├── .gitignore ├── .terraform.lock.hcl ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── THIRD-PARTY-LICENSES ├── access_log_policy.json ├── example ├── allocation-client.go ├── allocation.yaml ├── dashboard.json ├── go.mod └── go.sum ├── imgs ├── architecture.png ├── dashboard.png ├── log_groups.png └── overview.png ├── main.tf ├── modules ├── agones │ ├── main.tf │ ├── manifests │ │ ├── cert.yaml │ │ └── hpa.yaml │ ├── output.tf │ ├── values.yaml │ └── variable.tf ├── aws_otel │ ├── main.tf │ ├── manifests │ │ └── otel.yaml │ └── variable.tf ├── cert_manager │ ├── main.tf │ └── variable.tf ├── dgs_cluster │ ├── main.tf │ ├── manifests │ │ └── fleet.yaml │ ├── output.tf │ └── variable.tf ├── fluent_bit │ ├── main.tf │ ├── manifests │ │ └── fluent_bit.yaml │ └── variable.tf ├── karpenter │ ├── main.tf │ └── variable.tf ├── kubernetes_dashboard │ ├── main.tf │ ├── manifests │ │ └── manifest.yaml │ └── variable.tf ├── load_balancer_controller │ ├── iam_policy.json │ ├── main.tf │ ├── output.tf │ └── variable.tf └── routing_cluster │ ├── main.tf │ ├── manifests │ └── routing.yaml │ ├── output.tf │ └── variable.tf ├── output.tf └── variable.tf /.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | 8 | # Crash log files 9 | crash.log 10 | crash.*.log 11 | 12 | # Exclude all .tfvars files, which are likely to contain sentitive data, such as 13 | # password, private keys, and other secrets. These should not be part of version 14 | # control as they are data points which are potentially sensitive and subject 15 | # to change depending on the environment. 16 | # 17 | *.tfvars 18 | 19 | # Ignore override files as they are usually used to override resources locally and so 20 | # are not checked in 21 | override.tf 22 | override.tf.json 23 | *_override.tf 24 | *_override.tf.json 25 | 26 | # Include override files you do wish to add to version control using negated pattern 27 | # 28 | # !example_override.tf 29 | 30 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 31 | # example: *tfplan* 32 | 33 | # Ignore CLI configuration files 34 | .terraformrc 35 | terraform.rc 36 | 37 | # checkov related files 38 | checkov.txt 39 | tf.plan 40 | tf.json 41 | 42 | # allocation-client files 43 | example/ca.crt 44 | example/client.key 45 | example/client.crt 46 | -------------------------------------------------------------------------------- /.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/gavinbunney/kubectl" { 5 | version = "1.14.0" 6 | constraints = ">= 1.7.0" 7 | hashes = [ 8 | "h1:mX2AOFIMIxJmW5kM8DT51gloIOKCr9iT6W8yodnUyfs=", 9 | "zh:0350f3122ff711984bbc36f6093c1fe19043173fad5a904bce27f86afe3cc858", 10 | "zh:07ca36c7aa7533e8325b38232c77c04d6ef1081cb0bac9d56e8ccd51f12f2030", 11 | "zh:0c351afd91d9e994a71fe64bbd1662d0024006b3493bb61d46c23ea3e42a7cf5", 12 | "zh:39f1a0aa1d589a7e815b62b5aa11041040903b061672c4cfc7de38622866cbc4", 13 | "zh:428d3a321043b78e23c91a8d641f2d08d6b97f74c195c654f04d2c455e017de5", 14 | "zh:4baf5b1de2dfe9968cc0f57fd4be5a741deb5b34ee0989519267697af5f3eee5", 15 | "zh:6131a927f9dffa014ab5ca5364ac965fe9b19830d2bbf916a5b2865b956fdfcf", 16 | "zh:c62e0c9fd052cbf68c5c2612af4f6408c61c7e37b615dc347918d2442dd05e93", 17 | "zh:f0beffd7ce78f49ead612e4b1aefb7cb6a461d040428f514f4f9cc4e5698ac65", 18 | ] 19 | } 20 | 21 | provider "registry.terraform.io/hashicorp/aws" { 22 | version = "4.22.0" 23 | constraints = ">= 2.23.0, >= 3.63.0, >= 3.72.0, >= 4.0.0, ~> 4.22.0" 24 | hashes = [ 25 | "h1:fmPkEDTodRW9XE0dqpTzBFUtfB3nYurbwzKy//8N93o=", 26 | "zh:299efb8ba733b7742f0ef1c5c5467819e0c7bf46264f5f36ba6b6674304a5244", 27 | "zh:4db198a41d248491204d4ca644662c32f748177d5cbe01f3c7adbb957d4d77f0", 28 | "zh:62ebc2b05b25eafecb1a75f19d6fc5551faf521ada9df9e5682440d927f642e1", 29 | "zh:636b590840095b4f817c176034cf649f543c0ce514dc051d6d0994f0a05c53ef", 30 | "zh:8594bd8d442288873eee56c0b4535cbdf02cacfcf8f6ddcf8cd5f45bb1d3bc80", 31 | "zh:8e18a370949799f20ba967eec07a84aaedf95b3ee5006fe5af6eae13fbf39dc3", 32 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 33 | "zh:aa968514231e404fb53311d8eae2e8b6bde1fdad1f4dd5a592ab93d9cbf11af4", 34 | "zh:af8e5c48bf36d4fff1a6fca760d5b85f14d657cbdf95e9cd5e898c68104bad31", 35 | "zh:d8a75ba36bf8b6f2e49be5682f48eccb6c667a4484afd676ae347213ae208622", 36 | "zh:dd7c419674a47e587dabe98b150a8f1f7e31c248c68e8bf5e9ca0a400b5e2c4e", 37 | "zh:fdeb6314a2ce97489bbbece59511f78306955e8a23b02cbd1485bd04185a3673", 38 | ] 39 | } 40 | 41 | provider "registry.terraform.io/hashicorp/cloudinit" { 42 | version = "2.2.0" 43 | constraints = ">= 2.0.0" 44 | hashes = [ 45 | "h1:siiI0wK6/jUDdA5P8ifTO0yc9YmXHml4hz5K9I9N+MA=", 46 | "zh:76825122171f9ea2287fd27e23e80a7eb482f6491a4f41a096d77b666896ee96", 47 | "zh:795a36dee548e30ca9c9d474af9ad6d29290e0a9816154ad38d55381cd0ab12d", 48 | "zh:9200f02cb917fb99e44b40a68936fd60d338e4d30a718b7e2e48024a795a61b9", 49 | "zh:a33cf255dc670c20678063aa84218e2c1b7a67d557f480d8ec0f68bc428ed472", 50 | "zh:ba3c1b2cd0879286c1f531862c027ec04783ece81de67c9a3b97076f1ce7f58f", 51 | "zh:bd575456394428a1a02191d2e46af0c00e41fd4f28cfe117d57b6aeb5154a0fb", 52 | "zh:c68dd1db83d8437c36c92dc3fc11d71ced9def3483dd28c45f8640cfcd59de9a", 53 | "zh:cbfe34a90852ed03cc074601527bb580a648127255c08589bc3ef4bf4f2e7e0c", 54 | "zh:d6ffd7398c6d1f359b96f5b757e77b99b339fbb91df1b96ac974fe71bc87695c", 55 | "zh:d9c15285f847d7a52df59e044184fb3ba1b7679fd0386291ed183782683d9517", 56 | "zh:f7dd02f6d36844da23c9a27bb084503812c29c1aec4aba97237fec16860fdc8c", 57 | ] 58 | } 59 | 60 | provider "registry.terraform.io/hashicorp/helm" { 61 | version = "2.6.0" 62 | hashes = [ 63 | "h1:QZcB0CGaRloxrq1JjHF4ZLauaoJ8fHF2MsXFezR0COw=", 64 | "zh:0ac248c28acc1a4fd11bd26a85e48ab78dd6abf0f7ac842bf1cd7edd05ac6cf8", 65 | "zh:3d32c8deae3740d8c5310136cc11c8afeffc350fbf88afaca0c34a223a5246f5", 66 | "zh:4055a27489733d19ca7fa2dfce14d323fe99ae9dede7d0fea21ee6db0b9ca74b", 67 | "zh:58a8ed39653fd4c874a2ecb128eccfa24c94266a00e349fd7fb13e22ad81f381", 68 | "zh:6c81508044913f25083de132d0ff81d083732aba07c506cc2db05aa0cefcde2c", 69 | "zh:7db5d18093047bfc4fe597f79610c0a281b21db0d61b0bacb3800585e976f814", 70 | "zh:8269207b7422db99e7be80a5352d111966c3dfc7eb98511f11c8ff7b2e813456", 71 | "zh:b1d7ababfb2374e72532308ff442cc906b79256b66b3fe7a98d42c68c4ddf9c5", 72 | "zh:ca63e226cbdc964a5d63ef21189f059ce45c3fa4a5e972204d6916a9177d2b44", 73 | "zh:d205a72d60e8cc362943d66f5bcdd6b6aaaa9aab2b89fd83bf6f1978ac0b1e4c", 74 | "zh:db47dc579a0e68e5bfe3a61f2e950e6e2af82b1f388d1069de014a937962b56a", 75 | "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", 76 | ] 77 | } 78 | 79 | provider "registry.terraform.io/hashicorp/kubernetes" { 80 | version = "2.12.1" 81 | constraints = ">= 2.10.0" 82 | hashes = [ 83 | "h1:iAS9NYD0DjjmKpge74+y6nRltWkF+jkEpavWOEgq4jY=", 84 | "zh:1ecb2adff52754fb4680c7cfe6143d1d8c264b00bb0c44f07f5583b1c7f978b8", 85 | "zh:1fbd155088cd5818ad5874e4d59ccf1801e4e1961ac0711442b963315f1967ab", 86 | "zh:29e927c7c8f112ee0e8ab70e71b498f2f2ae6f47df1a14e6fd0fdb6f14b57c00", 87 | "zh:42c2f421da6b5b7c997e42aa04ca1457fceb13dd66099a057057a0812b680836", 88 | "zh:522a7bccd5cd7acbb4ec3ef077d47f4888df7e59ff9f3d598b717ad3ee4fe9c9", 89 | "zh:b45d8dc5dcbc5e30ae570d0c2e198505f47d09098dfd5f004871be8262e6ec1e", 90 | "zh:c3ea0943f2050001c7d6a7115b9b990f148b082ebfc4ff3c2ff3463a8affcc4a", 91 | "zh:f111833a64e06659d2e21864de39b7b7dec462615294d02f04c777956742a930", 92 | "zh:f182dba5707b90b0952d5984c23f7a2da3baa62b4d71e78df7759f16cc88d957", 93 | "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", 94 | "zh:f76655a68680887daceabd947b2f68e2103f5bbec49a2bc29530f82ab8e3bca3", 95 | "zh:fadb77352caa570bd3259dfb59c31db614d55bc96df0ff15a3c0cd2e685678b9", 96 | ] 97 | } 98 | 99 | provider "registry.terraform.io/hashicorp/time" { 100 | version = "0.7.2" 101 | hashes = [ 102 | "h1:NKy1QrNLlP5mKy5Tea6lQSRsVoyydJQKh6WvNTdBF4I=", 103 | "zh:0bbe0158c2a9e3f5be911b7e94477586110c51746bb13d102054f22754565bda", 104 | "zh:3250af7fd49b8aaf2ccc895588af05197d886e38b727e3ba33bcbb8cc96ad34d", 105 | "zh:35e4de0437f4fa9c1ad69aaf8136413be2369ea607d78e04bb68dc66a6a520b8", 106 | "zh:369756417a6272e79cad31eb2c82c202f6a4b6e4204a893f656644ba9e149fa2", 107 | "zh:390370f1179d89b33c3a0731691e772d5450a7d59fc66671ec625e201db74aa2", 108 | "zh:3d12ac905259d225c685bc42e5507ed0fbdaa5a09c30dce7c1932d908df857f7", 109 | "zh:75f63e5e1c68e6c5bccba4568c3564e2774eb3a7a19189eb8e2b6e0d58c8f8cc", 110 | "zh:7c22a2078a608e3e0278c4cbc9c483909062ebd1843bddaf8f176346c6d378b1", 111 | "zh:7cfb3c02f78f0060d59c757c4726ab45a962ce4a9cf4833beca704a1020785bd", 112 | "zh:a0325917f47c28a2ed088dedcea0d9520d91b264e63cc667fe4336ac993c0c11", 113 | "zh:c181551d4c0a40b52e236f1755cc340aeca0fb5dcfd08b3b1c393a7667d2f327", 114 | ] 115 | } 116 | 117 | provider "registry.terraform.io/hashicorp/tls" { 118 | version = "3.4.0" 119 | constraints = ">= 3.0.0" 120 | hashes = [ 121 | "h1:fSRc/OyRitbAST9vE+mEcmgJiDp+Jx8pGPbUUeYEQRc=", 122 | "zh:2442a0df0cfb550b8eba9b2af39ac06f54b62447eb369ecc6b1c29f739b33bbb", 123 | "zh:3ebb82cacb677a099de55f844f0d02886bc804b1a2b94441bc40fabcb64d2a38", 124 | "zh:436125c2a7e66bc62a4a7c68bdca694f071d7aa894e8637dc83f4a68fe322546", 125 | "zh:5f03db9f1d77e8274ff4750ae32d5c16c42b862b06bcb0683e4d733c8db922e4", 126 | "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", 127 | "zh:8190142ae8a539ab34193b7e75da0fa04035d1dcd8af8be94df1eafeeffb44b6", 128 | "zh:8cdc7cd9221e27c189e5beaf78462fce4c2edb081f415a1eafc6da2949de31e2", 129 | "zh:a5de0f7f5d63c59ebf61d3c1d94040f410665ff0aa04f66674efe24b39a11f94", 130 | "zh:a9fce48db3c140cc3e06f8a3c7ef4d36735e457e7660442d6d5dcd2b0781adc3", 131 | "zh:beb92de584c790c7c7f047e45ccd22b6ee3263c7b5a91ae4d6882ae6e7700570", 132 | "zh:f373f8cc52846fb513f44f468d885f722ca4dc22af9ff1942368cafd16b796b3", 133 | "zh:f69627fd6e5a920b17ff423cdbad2715078ca6d13146dc67668795582ab43748", 134 | ] 135 | } 136 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-cluster allocation demo for Agones on EKS 2 | This sample deploys [Agones](https://agones.dev/site/) multi-cluster configuration to Amazon EKS, one routing cluster and two DGS clusters, with [multi-cluster allocation feature](https://agones.dev/site/docs/advanced/multi-cluster-allocation/) enabled. 3 | 4 | This sample also works as a good Terraform example for the following features: 5 | 6 | * Deploy Agones with Network Load Balancer (NLB) instead of Classic Load Balancer with [AWS Load Balancer Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/) 7 | * Aggregate logs and metrics into Amazon CloudWatch using [Fluent Bit](https://fluentbit.io/) and [OpenTelemetry](https://aws-otel.github.io/) 8 | * View Kubernetes resources with [Kubernetes Dashboard](https://github.com/kubernetes/dashboard) 9 | * Adjust the number of nodes by [Karpenter](https://karpenter.sh/) (You can also see cluster autoscaler version [in this tag](https://github.com/aws-samples/multi-cluster-allocation-demo-for-agones-on-eks/tree/cluster_autoscaler)) 10 | 11 | ## Architecture / How it works 12 | The architecture overview of this sample is as the image below. 13 | 14 | ![overview](imgs/overview.png) 15 | 16 | We adopt a cluster topology of *Dedicated Cluster Responsible For Routing*, [which is disscussed here](https://github.com/googleforgames/agones/issues/597). By this way, your cluster configurations are symmetric - all the DGS clusters can share the same configuration, which is simpler than the toplogy *Single Cluster Responsible For Routing* with a special DGS cluster to perform serving game servers as well as allocation routing function. *All Clusters Responsible For Routing* topology seems overkill for a single region deployment, because it is unlikely for only a single cluster to fail while other clusters in the same region are working normally. It might improve availability with multi-region deployment though. 17 | 18 | ## Steps to Deploy 19 | ### Prerequisites 20 | You must install the following tools before deploying this sample: 21 | 22 | * [Terraform CLI](https://learn.hashicorp.com/tutorials/terraform/install-cli) 23 | * [Kubectl](https://kubernetes.io/docs/tasks/tools/) 24 | * [Helm](https://helm.sh/docs/intro/install/) 25 | * [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) 26 | * After install, you must configure permission equivalent to Administrator IAM policy 27 | 28 | ### Check Terraform parameters 29 | Please open [`variable.tf`](./variable.tf) and check the parameters. 30 | 31 | You can continue to deploy without any modification, but you may want to change some of the settings such as AWS region to deploy. 32 | You can also improve the security by specifying CIDRs that can connect to servers. By default all the servers are protected by mTLS but can be connected from anyone. 33 | 34 | ### Deploy 35 | To deploy this sample, you need to run the following commands: 36 | 37 | ```sh 38 | # Install required modules 39 | terraform init 40 | 41 | # deploy to your account 42 | terraform apply -auto-approve 43 | ``` 44 | 45 | It usually takes 20-30 minutes to deploy. 46 | 47 | After a deployment, please check all the pods are running properly (i.e. `Running` state) by the commands below: 48 | 49 | ```sh 50 | aws eks update-kubeconfig --name dgs01 51 | kubectl get pods -A 52 | 53 | aws eks update-kubeconfig --name dgs02 54 | kubectl get pods -A 55 | 56 | aws eks update-kubeconfig --name router 57 | kubectl get pods -A 58 | ``` 59 | 60 | ## Usage 61 | ### Connect to a game server 62 | You can follow [the official guide](https://agones.dev/site/docs/getting-started/create-gameserver/#2-fetch-the-gameserver-status) to connect to a game server, or run the following commands: 63 | 64 | ```sh 65 | aws eks update-kubeconfig --name dgs01 # or --name dgs02 66 | kubectl get gs 67 | 68 | # you will get a output like below 69 | NAME STATE ADDRESS PORT NODE AGE 70 | dgs-fleet-2l7fs-8pjfb Ready ec2-redacted.us-west-2.compute.amazonaws.com 7684 ip-10-0-177-35.us-west-2.compute.internal 66m 71 | dgs-fleet-2l7fs-dtz7c Ready ec2-redacted.us-west-2.compute.amazonaws.com 7039 ip-10-0-177-35.us-west-2.compute.internal 66m 72 | 73 | # get IP address and PORT number from the above output 74 | nc -u {ADDRESS} {PORT} 75 | 76 | # now you can send some message and see ACK is returned 77 | ``` 78 | 79 | As a sample game server, we are running [simple-game-server](https://github.com/googleforgames/agones/tree/main/examples/simple-game-server). The available commands are described in the `README.md`. 80 | 81 | ### Allocate a game server 82 | You can allocate a game server pod either by using [`GameServerAllocation` API aggregation](https://agones.dev/site/docs/reference/gameserverallocation/) or [allocator service client](https://agones.dev/site/docs/advanced/multi-cluster-allocation/#allocate-multi-cluster). 83 | 84 | To use GameServerAllocation API aggregation, run the following command: 85 | 86 | ```sh 87 | aws eks update-kubeconfig --name router 88 | kubectl create -f example/allocation.yaml 89 | # Try to run above command several times 90 | 91 | # You can see some DGS pods are allocated 92 | aws eks update-kubeconfig --name dgs01 93 | kubectl get gs 94 | 95 | aws eks update-kubeconfig --name dgs02 96 | kubectl get gs 97 | ``` 98 | 99 | To use an allocator service client, run the following commands. You can either use gRPC or REST interface. Since they are protected by mTLS, you need to set up TLS certificates and private keys first. 100 | 101 | ```sh 102 | NAMESPACE=default # replace with any namespace 103 | EXTERNAL_IP=$(terraform output -raw allocation_service_hostname) 104 | KEY_FILE=client.key 105 | CERT_FILE=client.crt 106 | TLS_CA_FILE=ca.crt 107 | 108 | # get certificates locally 109 | terraform output -raw allocation_service_client_tls_key | base64 -d > $KEY_FILE 110 | terraform output -raw allocation_service_client_tls_crt | base64 -d > $CERT_FILE 111 | terraform output -raw allocation_service_server_tls_crt | base64 -d > $TLS_CA_FILE 112 | 113 | mv $KEY_FILE $CERT_FILE $TLS_CA_FILE ./example 114 | cd ./example 115 | 116 | # Using go example code for gRPC interface 117 | go run allocation-client.go --ip ${EXTERNAL_IP} --namespace ${NAMESPACE} --key ${KEY_FILE} --cert ${CERT_FILE} --cacert ${TLS_CA_FILE} --multicluster true 118 | 119 | # Using curl for REST interface 120 | curl --key ${KEY_FILE} \ 121 | --cert ${CERT_FILE} \ 122 | --cacert ${TLS_CA_FILE} \ 123 | -H "Content-Type: application/json" \ 124 | --data '{"namespace":"'${NAMESPACE}'", "multiClusterSetting":{"enabled":true}}' \ 125 | https://${EXTERNAL_IP}/gameserverallocation \ 126 | -v 127 | ``` 128 | 129 | Note that allocation requests are forwarded from the routing cluster to the DGS clusters with Agones multi-cluster allocation feature. 130 | 131 | ### Open Kubernetes Dashboard 132 | You can open Kubernetes dashboard to see and manage Kubernetes resources in detail. 133 | It is already installed in all the clusters. You can follow the steps below to open and login it. 134 | 135 | ```sh 136 | aws eks update-kubeconfig --name # cluster name: dgs01, dgs02, router 137 | kubectl proxy 138 | # Now, open http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:https/proxy/#/login 139 | 140 | # Retrieve access token by the below command: 141 | kubectl -n kubernetes-dashboard get secret $(kubectl -n kubernetes-dashboard get sa/admin-user -o jsonpath="{.secrets[0].name}") -o go-template="{{.data.token | base64decode}}" 142 | ``` 143 | 144 | ### Monitor logs and metrics in CloudWatch 145 | Agones logs and metrics are aggregated into CloudWatch in this sample. You can easily check them in [CloudWatch management console](https://console.aws.amazon.com/cloudwatch/home). 146 | 147 | To check logs, you can open `Log groups` page, and inspect relevant log groups (e.g. `/aws/containerinsights/dgs01/application`.) 148 | 149 | ![log_groups](imgs/log_groups.png) 150 | 151 | Here you can see application logs ingested nearly realtime by Fluent Bit. You can configure which logs should be included or excluded by modifying [`modules/fluent_bit/manifests`](./modules/fluent_bit/manifests/fluent_bit.yaml). Please also check [the official document](https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/configuration-file) for further detail. 152 | 153 | For metrics, you can either open `All metrics` page or `Dashboards` page from CloudWatch management console. 154 | In All metrics page, you can check each metric one by one, which can be useful to check metrics in an ad-hoc manner. 155 | 156 | In CloudWatch Dashboards page, you can create a dashboard to monitor all the required metrics at a glance. 157 | This sample includes a sample dashboard for monitoring Agones. You can import the dashboard by the following command: 158 | 159 | ```sh 160 | aws cloudwatch put-dashboard --dashboard-name agones-demo-dashboard --dashboard-body file://exmaple/dashboard.json 161 | ``` 162 | 163 | Note that AWS region `us-west-2` is hard-coded in `dashboard.json`. If you deployed this sample in other regions, please replace it before creating a dashboard. 164 | 165 | After `put-dashboard` successed, now you can open the imported dashboard from CloudWatch management console. 166 | 167 | ![dashboard](imgs/dashboard.png) 168 | 169 | You can freely and intuitively customize the dashboard via management console. Please also refer to [this document](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Dashboards.html) if you need further information. 170 | 171 | You can add or remove metrics ingested into CloudWatch by modifying [`otel.yaml`](./modules/aws_otel/manifests/otel.yaml). 172 | The main document for AWS Distro for OpenTelemetry [is here](https://aws-otel.github.io/docs/getting-started/collector). 173 | You can also refer to the URLs commented in the file for further detail of each configs. 174 | 175 | ### Add more DGS clusters 176 | Currently there are only two DGS clusters, but you can add more of them easily. 177 | 178 | To add a DGS cluster, open [`main.tf`](./main.tf) and declare another instance of `./modules/dgs_cluster` module. You also need to add the module to `local.dgs_clusters` list variable. 179 | 180 | ```tf 181 | # Add this 182 | module "dgs03" { 183 | source = "./modules/dgs_cluster" 184 | cluster_name = "dgs03" 185 | vpc = module.vpc 186 | 187 | cluster_endpoint_public_access_cidrs = var.cluster_endpoint_allowed_cidrs 188 | gameserver_allowed_cidrs = var.gameserver_allowed_cidrs 189 | } 190 | 191 | # Don't forget to edit also this variable 192 | locals { 193 | dgs_clusters = [module.dgs01, module.dgs02, module.dgs03] 194 | } 195 | ``` 196 | 197 | ## Clean up 198 | To avoid incurring future charges, clean up the resources you created. 199 | 200 | You can remove all the AWS resources deployed by this sample running the following command: 201 | 202 | ```sh 203 | terraform destroy -auto-approve 204 | ``` 205 | 206 | ## Security 207 | 208 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 209 | 210 | ## License 211 | 212 | This library is licensed under the MIT-0 License. See the LICENSE file. 213 | -------------------------------------------------------------------------------- /THIRD-PARTY-LICENSES: -------------------------------------------------------------------------------- 1 | ** Agones; version 1.21.0 -- https://agones.dev/site/ 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | * For Agones see also this required NOTICE: 180 | © 2022 Copyright Google LLC All Rights Reserved. All Rights Reserved 181 | -------------------------------------------------------------------------------- /access_log_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "AWSLogDeliveryWrite", 6 | "Effect": "Allow", 7 | "Principal": { 8 | "Service": "delivery.logs.amazonaws.com" 9 | }, 10 | "Action": "s3:PutObject", 11 | "Resource": "arn:aws:s3:::${bucket_name}/*AWSLogs/${account_id}/*", 12 | "Condition": { 13 | "StringEquals": { 14 | "s3:x-amz-acl": "bucket-owner-full-control" 15 | } 16 | } 17 | }, 18 | { 19 | "Sid": "AWSLogDeliveryAclCheck", 20 | "Effect": "Allow", 21 | "Principal": { 22 | "Service": "delivery.logs.amazonaws.com" 23 | }, 24 | "Action": ["s3:GetBucketAcl", "s3:ListBucket"], 25 | "Resource": "arn:aws:s3:::${bucket_name}" 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /example/allocation-client.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Original file is here: https://github.com/googleforgames/agones/tree/main/examples/allocator-client 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "crypto/tls" 22 | "crypto/x509" 23 | "flag" 24 | "fmt" 25 | "io/ioutil" 26 | 27 | pb "agones.dev/agones/pkg/allocation/go" 28 | "github.com/pkg/errors" 29 | "google.golang.org/grpc" 30 | "google.golang.org/grpc/credentials" 31 | ) 32 | 33 | func main() { 34 | keyFile := flag.String("key", "missing key", "the private key file for the client certificate in PEM format") 35 | certFile := flag.String("cert", "missing cert", "the public key file for the client certificate in PEM format") 36 | cacertFile := flag.String("cacert", "missing cacert", "the CA cert file for server signing certificate in PEM format") 37 | externalIP := flag.String("ip", "missing external IP", "the external IP for allocator server") 38 | port := flag.String("port", "443", "the port for allocator server") 39 | namespace := flag.String("namespace", "default", "the game server kubernetes namespace") 40 | multicluster := flag.Bool("multicluster", false, "set to true to enable the multi-cluster allocation") 41 | 42 | flag.Parse() 43 | 44 | endpoint := *externalIP + ":" + *port 45 | cert, err := ioutil.ReadFile(*certFile) 46 | if err != nil { 47 | panic(err) 48 | } 49 | key, err := ioutil.ReadFile(*keyFile) 50 | if err != nil { 51 | panic(err) 52 | } 53 | cacert, err := ioutil.ReadFile(*cacertFile) 54 | if err != nil { 55 | panic(err) 56 | } 57 | 58 | request := &pb.AllocationRequest{ 59 | Namespace: *namespace, 60 | MultiClusterSetting: &pb.MultiClusterSetting{ 61 | Enabled: *multicluster, 62 | }, 63 | } 64 | 65 | dialOpts, err := createRemoteClusterDialOption(cert, key, cacert) 66 | if err != nil { 67 | panic(err) 68 | } 69 | conn, err := grpc.Dial(endpoint, dialOpts) 70 | if err != nil { 71 | panic(err) 72 | } 73 | defer conn.Close() 74 | 75 | grpcClient := pb.NewAllocationServiceClient(conn) 76 | response, err := grpcClient.Allocate(context.Background(), request) 77 | if err != nil { 78 | panic(err) 79 | } 80 | fmt.Printf("response: %s\n", response.String()) 81 | } 82 | 83 | // createRemoteClusterDialOption creates a grpc client dial option with TLS configuration. 84 | func createRemoteClusterDialOption(clientCert, clientKey, caCert []byte) (grpc.DialOption, error) { 85 | // Load client cert 86 | cert, err := tls.X509KeyPair(clientCert, clientKey) 87 | if err != nil { 88 | return nil, err 89 | } 90 | 91 | tlsConfig := &tls.Config{Certificates: []tls.Certificate{cert}} 92 | if len(caCert) != 0 { 93 | // Load CA cert, if provided and trust the server certificate. 94 | // This is required for self-signed certs. 95 | tlsConfig.RootCAs = x509.NewCertPool() 96 | if !tlsConfig.RootCAs.AppendCertsFromPEM(caCert) { 97 | return nil, errors.New("only PEM format is accepted for server CA") 98 | } 99 | } 100 | 101 | return grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)), nil 102 | } 103 | -------------------------------------------------------------------------------- /example/allocation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "allocation.agones.dev/v1" 2 | kind: GameServerAllocation 3 | spec: 4 | selectors: 5 | - matchLabels: 6 | agones.dev/fleet: dgs-fleet 7 | multiClusterSetting: 8 | enabled: true 9 | -------------------------------------------------------------------------------- /example/dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "widgets": [ 3 | { 4 | "height": 6, 5 | "width": 12, 6 | "y": 5, 7 | "x": 0, 8 | "type": "metric", 9 | "properties": { 10 | "metrics": [ 11 | [ "ContainerInsights", "node_cpu_utilization", "NodeGroupName", "eks-gameserver", "ClusterName", "dgs01", { "label": "dgs01 eks-gameserver", "yAxis": "right" } ], 12 | [ "...", "eks-agones-system", ".", "." ], 13 | [ "...", "eks-gameserver", ".", "dgs02", { "yAxis": "right" } ], 14 | [ "...", "eks-agones-system", ".", "." ], 15 | [ "...", "router" ] 16 | ], 17 | "view": "timeSeries", 18 | "stacked": false, 19 | "region": "us-west-2", 20 | "period": 60, 21 | "stat": "Maximum", 22 | "title": "Node CPU usage (max)" 23 | } 24 | }, 25 | { 26 | "height": 5, 27 | "width": 9, 28 | "y": 0, 29 | "x": 0, 30 | "type": "metric", 31 | "properties": { 32 | "metrics": [ 33 | [ { "expression": "m1+m2", "label": "#Total", "id": "e1", "yAxis": "left", "period": 60, "region": "us-west-2", "stat": "Maximum" } ], 34 | [ { "expression": "m3+m4", "label": "#Allocated", "id": "e3", "region": "us-west-2", "stat": "Maximum" } ], 35 | [ "ContainerInsights", "agones_fleets_replicas_count", "ClusterName", "dgs01", "type", "total", { "id": "m1", "visible": false } ], 36 | [ "...", "dgs02", ".", ".", { "id": "m2", "visible": false } ], 37 | [ "...", "dgs01", ".", "allocated", { "id": "m3", "visible": false } ], 38 | [ ".", "agones_gameservers_count", ".", "dgs02", ".", "Allocated", { "id": "m4", "visible": false } ] 39 | ], 40 | "sparkline": true, 41 | "view": "singleValue", 42 | "region": "us-west-2", 43 | "stat": "Maximum", 44 | "period": 60, 45 | "title": "Number of DGS pods" 46 | } 47 | }, 48 | { 49 | "height": 5, 50 | "width": 7, 51 | "y": 0, 52 | "x": 9, 53 | "type": "metric", 54 | "properties": { 55 | "metrics": [ 56 | [ { "expression": "m1+m2", "label": "#Total", "id": "e1", "yAxis": "left", "period": 60, "region": "us-west-2", "visible": false, "stat": "Maximum" } ], 57 | [ { "expression": "m3+m4", "label": "#Allocated", "id": "e3", "region": "us-west-2", "visible": false, "stat": "Maximum" } ], 58 | [ { "expression": "(e3/e1)*100", "label": "Allocated ratio", "id": "e2", "period": 60, "region": "us-west-2", "stat": "Maximum" } ], 59 | [ "ContainerInsights", "agones_fleets_replicas_count", "ClusterName", "dgs01", "type", "total", { "id": "m1", "visible": false } ], 60 | [ "...", "dgs02", ".", ".", { "id": "m2", "visible": false } ], 61 | [ "...", "dgs01", ".", "allocated", { "id": "m3", "visible": false } ], 62 | [ ".", "agones_gameservers_count", ".", "dgs02", ".", "Allocated", { "id": "m4", "visible": false } ] 63 | ], 64 | "sparkline": true, 65 | "view": "gauge", 66 | "region": "us-west-2", 67 | "stat": "Maximum", 68 | "period": 60, 69 | "title": "Allocated ratio (%)", 70 | "yAxis": { 71 | "left": { 72 | "min": 0, 73 | "max": 100 74 | } 75 | }, 76 | "legend": { 77 | "position": "hidden" 78 | } 79 | } 80 | }, 81 | { 82 | "height": 5, 83 | "width": 8, 84 | "y": 0, 85 | "x": 16, 86 | "type": "metric", 87 | "properties": { 88 | "metrics": [ 89 | [ "ContainerInsights", "cluster_node_count", "ClusterName", "dgs01", { "yAxis": "right", "id": "m4" } ], 90 | [ "...", "dgs02", { "yAxis": "right", "id": "m5" } ], 91 | [ "...", "router", { "yAxis": "right", "id": "m6" } ] 92 | ], 93 | "view": "timeSeries", 94 | "stacked": false, 95 | "region": "us-west-2", 96 | "period": 60, 97 | "stat": "Average", 98 | "title": "Number of Nodes" 99 | } 100 | }, 101 | { 102 | "height": 6, 103 | "width": 12, 104 | "y": 11, 105 | "x": 0, 106 | "type": "metric", 107 | "properties": { 108 | "metrics": [ 109 | [ "ContainerInsights", "pod_cpu_utilization", "PodName", "agones-allocator", "ClusterName", "dgs01", "Namespace", "agones-system" ], 110 | [ "...", "agones-controller", ".", ".", ".", "." ], 111 | [ "...", "dgs-fleet", ".", ".", ".", "default" ], 112 | [ "...", "agones-allocator", ".", "dgs02", ".", "agones-system" ], 113 | [ "...", "agones-controller", ".", ".", ".", "." ], 114 | [ "...", "dgs-fleet", ".", ".", ".", "default" ], 115 | [ "...", "agones-allocator", ".", "router", ".", "agones-system" ], 116 | [ "...", "agones-controller", ".", ".", ".", "." ] 117 | ], 118 | "view": "timeSeries", 119 | "stacked": false, 120 | "region": "us-west-2", 121 | "period": 60, 122 | "stat": "Average" 123 | } 124 | }, 125 | { 126 | "height": 6, 127 | "width": 12, 128 | "y": 11, 129 | "x": 12, 130 | "type": "metric", 131 | "properties": { 132 | "view": "timeSeries", 133 | "stacked": false, 134 | "metrics": [ 135 | [ "ContainerInsights", "pod_memory_utilization", "PodName", "agones-allocator", "ClusterName", "dgs01", "Namespace", "agones-system" ], 136 | [ "...", "agones-controller", ".", ".", ".", "." ], 137 | [ "...", "dgs-fleet", ".", ".", ".", "default" ], 138 | [ "...", "agones-allocator", ".", "dgs02", ".", "agones-system" ], 139 | [ "...", "agones-controller", ".", ".", ".", "." ], 140 | [ "...", "dgs-fleet", ".", ".", ".", "default" ], 141 | [ "...", "agones-allocator", ".", "router", ".", "agones-system" ], 142 | [ "...", "agones-controller", ".", ".", ".", "." ] 143 | ], 144 | "region": "us-west-2" 145 | } 146 | }, 147 | { 148 | "height": 6, 149 | "width": 12, 150 | "y": 5, 151 | "x": 12, 152 | "type": "metric", 153 | "properties": { 154 | "metrics": [ 155 | [ "ContainerInsights", "node_memory_utilization", "NodeGroupName", "eks-gameserver", "ClusterName", "dgs01", { "yAxis": "left" } ], 156 | [ "...", "eks-agones-system", ".", ".", { "yAxis": "left" } ], 157 | [ "...", "eks-gameserver", ".", "dgs02", { "yAxis": "left" } ], 158 | [ "...", "eks-agones-system", ".", ".", { "yAxis": "left" } ], 159 | [ "...", "router", { "yAxis": "left" } ] 160 | ], 161 | "view": "timeSeries", 162 | "stacked": false, 163 | "region": "us-west-2", 164 | "period": 60, 165 | "stat": "Maximum", 166 | "title": "Node RAM usage (max)" 167 | } 168 | } 169 | ] 170 | } -------------------------------------------------------------------------------- /example/go.mod: -------------------------------------------------------------------------------- 1 | module example.com/agones 2 | 3 | go 1.17 4 | 5 | require ( 6 | agones.dev/agones v1.21.0 7 | github.com/pkg/errors v0.9.1 8 | google.golang.org/grpc v1.56.3 9 | ) 10 | 11 | require ( 12 | github.com/golang/protobuf v1.5.3 // indirect 13 | github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect 14 | golang.org/x/net v0.38.0 // indirect 15 | golang.org/x/sys v0.31.0 // indirect 16 | golang.org/x/text v0.23.0 // indirect 17 | google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect 18 | google.golang.org/protobuf v1.33.0 // indirect 19 | ) 20 | -------------------------------------------------------------------------------- /imgs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/multi-cluster-allocation-demo-for-agones-on-eks/c80705f7c22c607a1bbb8ba0a6185daa1eff574a/imgs/architecture.png -------------------------------------------------------------------------------- /imgs/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/multi-cluster-allocation-demo-for-agones-on-eks/c80705f7c22c607a1bbb8ba0a6185daa1eff574a/imgs/dashboard.png -------------------------------------------------------------------------------- /imgs/log_groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/multi-cluster-allocation-demo-for-agones-on-eks/c80705f7c22c607a1bbb8ba0a6185daa1eff574a/imgs/log_groups.png -------------------------------------------------------------------------------- /imgs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/multi-cluster-allocation-demo-for-agones-on-eks/c80705f7c22c607a1bbb8ba0a6185daa1eff574a/imgs/overview.png -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "aws" 5 | version = "~> 4.22.0" 6 | } 7 | } 8 | } 9 | 10 | provider "aws" { 11 | region = var.aws_region 12 | } 13 | 14 | data "aws_availability_zones" "available" {} 15 | 16 | data "aws_caller_identity" "current" {} 17 | 18 | module "vpc" { 19 | version = "3.12.0" 20 | source = "terraform-aws-modules/vpc/aws" 21 | 22 | name = "agones-multi-cluster-demo" 23 | cidr = "10.0.0.0/16" 24 | azs = data.aws_availability_zones.available.names 25 | 26 | public_subnets = ["10.0.0.0/18", "10.0.64.0/18", "10.0.128.0/18"] 27 | private_subnets = ["10.0.192.0/20", "10.0.208.0/20", "10.0.224.0/20"] 28 | 29 | enable_flow_log = true 30 | flow_log_destination_arn = aws_s3_bucket.log_bucket.arn 31 | flow_log_destination_type = "s3" 32 | 33 | enable_nat_gateway = true 34 | single_nat_gateway = true 35 | one_nat_gateway_per_az = false 36 | 37 | manage_default_security_group = true 38 | 39 | # Enable these to use EKS private cluster endpoint 40 | # https://docs.aws.amazon.com/eks/latest/userguide/cluster-endpoint.html 41 | enable_dns_hostnames = true 42 | enable_dns_support = true 43 | 44 | # Add tags below for subnet discovery 45 | # https://aws.amazon.com/premiumsupport/knowledge-center/eks-vpc-subnet-discovery/ 46 | private_subnet_tags = { 47 | "kubernetes.io/role/internal-elb" = "1" 48 | } 49 | 50 | public_subnet_tags = { 51 | "kubernetes.io/role/elb" = "1" 52 | } 53 | } 54 | 55 | resource "aws_s3_bucket" "log_bucket" { 56 | bucket_prefix = "log-bucket" 57 | force_destroy = true 58 | } 59 | 60 | resource "aws_s3_bucket_versioning" "log_bucket" { 61 | bucket = aws_s3_bucket.log_bucket.id 62 | versioning_configuration { 63 | status = "Enabled" 64 | } 65 | } 66 | 67 | resource "aws_s3_bucket_server_side_encryption_configuration" "log_bucket" { 68 | bucket = aws_s3_bucket.log_bucket.id 69 | 70 | rule { 71 | apply_server_side_encryption_by_default { 72 | sse_algorithm = "AES256" 73 | } 74 | } 75 | } 76 | 77 | resource "aws_s3_bucket_public_access_block" "log_bucket" { 78 | bucket = aws_s3_bucket.log_bucket.id 79 | 80 | block_public_acls = true 81 | block_public_policy = true 82 | ignore_public_acls = true 83 | restrict_public_buckets = true 84 | } 85 | 86 | # Allow putting access log files from elb service 87 | resource "aws_s3_bucket_policy" "allow_elb_access_log" { 88 | bucket = aws_s3_bucket.log_bucket.id 89 | policy = templatefile("./access_log_policy.json", { 90 | bucket_name = aws_s3_bucket.log_bucket.bucket 91 | account_id = data.aws_caller_identity.current.account_id 92 | }) 93 | } 94 | 95 | module "dgs01" { 96 | source = "./modules/dgs_cluster" 97 | cluster_name = "dgs01" 98 | vpc = module.vpc 99 | 100 | cluster_endpoint_public_access_cidrs = var.cluster_endpoint_allowed_cidrs 101 | gameserver_allowed_cidrs = var.gameserver_allowed_cidrs 102 | } 103 | 104 | # We cannot use for_each or count here due to provider problem 105 | # https://www.terraform.io/language/modules/develop/providers#legacy-shared-modules-with-provider-configurations 106 | # https://github.com/hashicorp/terraform/issues/24476 107 | module "dgs02" { 108 | source = "./modules/dgs_cluster" 109 | cluster_name = "dgs02" 110 | vpc = module.vpc 111 | 112 | cluster_endpoint_public_access_cidrs = var.cluster_endpoint_allowed_cidrs 113 | gameserver_allowed_cidrs = var.gameserver_allowed_cidrs 114 | } 115 | 116 | locals { 117 | dgs_clusters = [module.dgs01, module.dgs02] 118 | } 119 | 120 | module "router" { 121 | source = "./modules/routing_cluster" 122 | vpc = module.vpc 123 | cluster_name = "router" 124 | 125 | allocation_targets = [for dgs in local.dgs_clusters : 126 | { 127 | cluster_name = dgs.cluster_name 128 | tls_crt = dgs.allocation_service_client_tls_crt 129 | tls_key = dgs.allocation_service_client_tls_key 130 | ca_crt = dgs.allocation_service_server_tls_crt 131 | endpoint = dgs.allocation_service_hostname 132 | gameserver_namespace = dgs.gameserver_namespace 133 | } 134 | ] 135 | 136 | cluster_endpoint_public_access_cidrs = var.cluster_endpoint_allowed_cidrs 137 | allocator_allowed_cidrs = var.agones_allocator_allowed_cidrs 138 | } 139 | -------------------------------------------------------------------------------- /modules/agones/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | kubectl = { 4 | source = "gavinbunney/kubectl" 5 | version = ">= 1.7.0" 6 | } 7 | } 8 | } 9 | 10 | data "aws_region" "current" {} 11 | 12 | locals { 13 | allocator_server_cert_name = "allocator-tls-org" 14 | allocator_client_cert_name = "allocator-client-tls" 15 | # CIDRs which can access allocator service 16 | # those related to its VPC are allowed by default 17 | allocator_allowed_cidrs = concat( 18 | [var.vpc.vpc_cidr_block], 19 | [for ip in var.vpc.nat_public_ips : "${ip}/32"], 20 | var.allocator_allowed_cidrs, 21 | ) 22 | } 23 | 24 | resource "kubernetes_namespace" "this" { 25 | # Agones depends on load balancer controller's finalizer 26 | depends_on = [var.load_balancer_controller_module, var.eks_cluster_addons] 27 | metadata { 28 | annotations = { 29 | name = var.namespace 30 | } 31 | 32 | name = var.namespace 33 | } 34 | } 35 | 36 | # https://artifacthub.io/packages/helm/agones/agones 37 | resource "helm_release" "this" { 38 | depends_on = [module.agones_system_node_group] 39 | namespace = var.namespace 40 | create_namespace = false 41 | 42 | repository = "https://agones.dev/chart/stable" 43 | name = "agones" 44 | chart = "agones" 45 | version = "1.24.0" 46 | wait_for_jobs = true 47 | 48 | # Use set block for certificates as yaml multiline string is bothering 49 | set { 50 | name = "agones.allocator.tlsCert" 51 | value = data.kubernetes_secret.allocator_server_cert.data["tls.crt"] 52 | } 53 | 54 | set_sensitive { 55 | name = "agones.allocator.tlsKey" 56 | value = data.kubernetes_secret.allocator_server_cert.data["tls.key"] 57 | } 58 | 59 | set { 60 | // Note that the suffix of name must be .crt or .pem 61 | // https://github.com/googleforgames/agones/blob/fbe538013a6ebd0eaa0933fd0bc5d862ab6b8d7c/cmd/allocator/main.go#L469 62 | name = "agones.allocator.clientCAs.default\\.crt" 63 | value = data.kubernetes_secret.allocator_client_cert.data["tls.crt"] 64 | } 65 | 66 | # https://agones.dev/site/docs/installation/install-agones/helm/#configuration 67 | values = [ 68 | templatefile("${path.module}/values.yaml", { 69 | allocator_allowed_cidrs = local.allocator_allowed_cidrs 70 | gameserver_namespace = var.gameserver_namespace 71 | }) 72 | ] 73 | } 74 | 75 | # Creating Agones dedicated node group is recommended here 76 | # https://agones.dev/site/docs/installation/install-agones/helm/ 77 | module "agones_system_node_group" { 78 | source = "terraform-aws-modules/eks/aws//modules/eks-managed-node-group" 79 | 80 | name = var.namespace 81 | cluster_name = var.cluster_name 82 | 83 | min_size = 1 84 | max_size = 10 85 | desired_size = 1 86 | ami_type = "AL2_ARM_64" 87 | instance_types = ["t4g.large"] 88 | subnet_ids = var.vpc.private_subnets 89 | vpc_id = var.vpc.vpc_id 90 | vpc_security_group_ids = [var.node_security_group_id] 91 | 92 | security_group_rules = { 93 | ingress_cluster_agones = { 94 | description = "allow access from EKS control plane to Agones controller" 95 | protocol = "TCP" 96 | from_port = 8080 97 | to_port = 8081 98 | type = "ingress" 99 | source_security_group_id = var.cluster_security_group_id 100 | } 101 | } 102 | 103 | taints = [ 104 | { 105 | key = "agones.dev/agones-system" 106 | value = "true" 107 | effect = "NO_EXECUTE" 108 | } 109 | ] 110 | 111 | labels = { 112 | "agones.dev/agones-system" = "true" 113 | } 114 | 115 | block_device_mappings = { 116 | default = { 117 | device_name = "/dev/xvda" 118 | ebs = { 119 | volume_size = 20 120 | encrypted = true 121 | } 122 | } 123 | } 124 | } 125 | 126 | module "agones_gameserver_node_group" { 127 | source = "terraform-aws-modules/eks/aws//modules/eks-managed-node-group" 128 | 129 | name = "gameserver" 130 | cluster_name = var.cluster_name 131 | 132 | min_size = 0 133 | max_size = 30 134 | desired_size = 0 135 | ami_type = "AL2_ARM_64" 136 | instance_types = ["t4g.medium"] 137 | 138 | subnet_ids = var.vpc.public_subnets 139 | vpc_id = var.vpc.vpc_id 140 | 141 | vpc_security_group_ids = [var.node_security_group_id] 142 | 143 | # Deny executing pods other than gameserver 144 | taints = [ 145 | { 146 | key = "gameserver" 147 | value = "true" 148 | effect = "NO_EXECUTE" 149 | } 150 | ] 151 | 152 | block_device_mappings = { 153 | default = { 154 | device_name = "/dev/xvda" 155 | ebs = { 156 | volume_size = 20 157 | encrypted = true 158 | } 159 | } 160 | } 161 | 162 | security_group_rules = { 163 | ingress_websocket_internet = { 164 | description = "Allow tcp/udp access from Internet to gameserver pods" 165 | from_port = 7000 166 | to_port = 8000 167 | protocol = "all" 168 | type = "ingress" 169 | cidr_blocks = var.gameserver_allowed_cidrs 170 | } 171 | } 172 | 173 | security_group_tags = { 174 | "karpenter.sh/discovery/${var.cluster_name}" = var.cluster_name 175 | } 176 | } 177 | 178 | # Tag required for Scaling from zero and Node selector 179 | # https://docs.aws.amazon.com/eks/latest/userguide/autoscaling.html#cluster-autoscaler 180 | resource "aws_autoscaling_group_tag" "gameserver_autoscaler" { 181 | autoscaling_group_name = module.agones_gameserver_node_group.node_group_resources[0].autoscaling_groups[0].name 182 | 183 | tag { 184 | key = "k8s.io/cluster-autoscaler/node-template/label/usage" 185 | value = "gameserver" 186 | propagate_at_launch = false 187 | } 188 | } 189 | 190 | data "kubernetes_service" "allocation_service" { 191 | depends_on = [helm_release.this] 192 | metadata { 193 | name = "agones-allocator" 194 | namespace = var.namespace 195 | } 196 | } 197 | 198 | module "cert_manager" { 199 | source = "../cert_manager" 200 | } 201 | 202 | data "kubectl_path_documents" "cert" { 203 | pattern = "${path.module}/manifests/cert.yaml" 204 | vars = { 205 | aws_region = data.aws_region.current.name 206 | namespace = var.namespace 207 | allocator_server_cert_name = local.allocator_server_cert_name 208 | allocator_client_cert_name = local.allocator_client_cert_name 209 | } 210 | } 211 | 212 | resource "kubectl_manifest" "cert" { 213 | depends_on = [module.cert_manager, kubernetes_namespace.this] 214 | count = 3 # To avoid this problem https://github.com/gavinbunney/terraform-provider-kubectl/issues/58 215 | yaml_body = element(data.kubectl_path_documents.cert.documents, count.index) 216 | } 217 | 218 | # It takes a few seconds for cert-manager to actually create secret after Certificate CRD is created. 219 | # We will get an error if we try to access the secret before it is created, so here we will wait for seconds. 220 | resource "time_sleep" "wait_for_cert_creation" { 221 | depends_on = [kubectl_manifest.cert] 222 | 223 | create_duration = "10s" 224 | } 225 | 226 | data "kubernetes_secret" "allocator_server_cert" { 227 | depends_on = [time_sleep.wait_for_cert_creation] 228 | metadata { 229 | name = local.allocator_server_cert_name 230 | namespace = var.namespace 231 | } 232 | } 233 | 234 | data "kubernetes_secret" "allocator_client_cert" { 235 | depends_on = [time_sleep.wait_for_cert_creation] 236 | metadata { 237 | name = local.allocator_client_cert_name 238 | namespace = var.namespace 239 | } 240 | } 241 | 242 | # HPA for allocator service 243 | data "kubectl_path_documents" "hpa" { 244 | pattern = "${path.module}/manifests/hpa.yaml" 245 | vars = { 246 | namespace = var.namespace 247 | } 248 | } 249 | 250 | resource "kubectl_manifest" "hpa" { 251 | depends_on = [helm_release.this] 252 | count = 1 253 | yaml_body = element(data.kubectl_path_documents.hpa.documents, count.index) 254 | } 255 | -------------------------------------------------------------------------------- /modules/agones/manifests/cert.yaml: -------------------------------------------------------------------------------- 1 | # Issue mTLS certificates for allocator service and its client using cert-manager 2 | # https://cert-manager.io/docs/usage/certificate/ 3 | apiVersion: cert-manager.io/v1 4 | kind: ClusterIssuer 5 | metadata: 6 | name: selfsigned 7 | spec: 8 | selfSigned: {} 9 | --- 10 | apiVersion: cert-manager.io/v1 11 | kind: Certificate 12 | metadata: 13 | name: ${allocator_server_cert_name} 14 | namespace: ${namespace} 15 | spec: 16 | # Set dnsName as ELB domain name 17 | dnsNames: 18 | - "*.elb.${aws_region}.amazonaws.com" 19 | secretName: ${allocator_server_cert_name} 20 | commonName: allocation-ca 21 | issuerRef: 22 | name: selfsigned 23 | kind: ClusterIssuer 24 | duration: 87600h 25 | --- 26 | apiVersion: cert-manager.io/v1 27 | kind: Certificate 28 | metadata: 29 | name: ${allocator_client_cert_name} 30 | namespace: ${namespace} 31 | spec: 32 | # You can use arbitrary CN for a client certificate. 33 | commonName: allocation-ca 34 | secretName: ${allocator_client_cert_name} 35 | issuerRef: 36 | name: selfsigned 37 | kind: ClusterIssuer 38 | duration: 87600h 39 | --- 40 | -------------------------------------------------------------------------------- /modules/agones/manifests/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v1 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | annotations: 5 | name: agones-allocator-hpa 6 | namespace: ${namespace} 7 | spec: 8 | maxReplicas: 20 9 | minReplicas: 1 10 | scaleTargetRef: 11 | apiVersion: apps/v1 12 | kind: Deployment 13 | name: agones-allocator 14 | targetCPUUtilizationPercentage: 60 15 | --- 16 | -------------------------------------------------------------------------------- /modules/agones/output.tf: -------------------------------------------------------------------------------- 1 | output "allocation_service_hostname" { 2 | value = data.kubernetes_service.allocation_service.status.0.load_balancer.0.ingress.0.hostname 3 | } 4 | 5 | output "allocation_service_client_tls_crt" { 6 | value = base64encode(data.kubernetes_secret.allocator_client_cert.data["tls.crt"]) 7 | } 8 | 9 | output "allocation_service_client_tls_key" { 10 | value = base64encode(data.kubernetes_secret.allocator_client_cert.data["tls.key"]) 11 | sensitive = true 12 | } 13 | 14 | output "allocation_service_server_tls_crt" { 15 | value = base64encode(data.kubernetes_secret.allocator_server_cert.data["tls.crt"]) 16 | } 17 | 18 | output "gameserver_iam_role_name" { 19 | value = module.agones_gameserver_node_group.iam_role_name 20 | } 21 | -------------------------------------------------------------------------------- /modules/agones/values.yaml: -------------------------------------------------------------------------------- 1 | # https://github.com/googleforgames/agones/blob/v1.21.0/install/helm/agones/values.yaml 2 | agones: 3 | metrics: 4 | prometheusEnabled: true 5 | prometheusServiceDiscovery: true 6 | controller: 7 | logLevel: warn 8 | resources: 9 | requests: 10 | cpu: 400m 11 | memory: 256Mi 12 | nodeSelector: 13 | "agones.dev/agones-system": "true" 14 | # Install this if you need ping service https://agones.dev/site/docs/guides/ping-service/ 15 | ping: 16 | install: false 17 | allocator: 18 | # Currently we have a lot of invalid warnings when logLevel is below warn 19 | # https://github.com/googleforgames/agones/issues/2498 20 | logLevel: error 21 | resources: 22 | requests: 23 | cpu: 400m 24 | memory: 256Mi 25 | replicas: null 26 | nodeSelector: 27 | "agones.dev/agones-system": "true" 28 | service: 29 | # https://aws.amazon.com/premiumsupport/knowledge-center/eks-cidr-ip-address-loadbalancer/ 30 | loadBalancerSourceRanges: 31 | %{ for cidr in allocator_allowed_cidrs ~} 32 | - ${cidr} 33 | %{ endfor ~} 34 | 35 | annotations: 36 | # Annotations to deploy NLB instead of CLB 37 | # https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.4/guide/service/annotations/ 38 | service.beta.kubernetes.io/aws-load-balancer-type: external 39 | 40 | # https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.4/guide/service/nlb/#instance-mode 41 | service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip 42 | 43 | # Set this to "internal" if you do not need internet access. 44 | service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing 45 | 46 | # We need this to use loadBalancerSourceRanges 47 | service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=true 48 | 49 | service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval: "30" 50 | 51 | # Enabled this if you need REST api service 52 | http: 53 | enabled: true 54 | # TLS certificates for allocator service will be generated by cert-manager 55 | # because Helm-generated certs has invalid CN. 56 | generateTLS: false 57 | # TLS certificate for allocator service clients will also be generated by cert-manager 58 | # because otherwise certs are updated every time we update Helm chart. 59 | generateClientTLS: false 60 | 61 | gameservers: 62 | namespaces: 63 | - ${gameserver_namespace} 64 | -------------------------------------------------------------------------------- /modules/agones/variable.tf: -------------------------------------------------------------------------------- 1 | variable "cluster_name" { 2 | description = "A EKS cluster name you deploy this module to" 3 | type = string 4 | } 5 | 6 | variable "gameserver_namespace" { 7 | description = "Kubernetes namespace gameserver pods are placed into" 8 | default = "default" 9 | type = string 10 | } 11 | 12 | variable "vpc" { 13 | } 14 | 15 | variable "cluster_security_group_id" { 16 | type = string 17 | } 18 | 19 | variable "node_security_group_id" { 20 | type = string 21 | } 22 | 23 | variable "load_balancer_controller_module" { 24 | description = "Load balancer controller module. We will use it for explict dependency." 25 | default = "" 26 | } 27 | 28 | variable "eks_cluster_addons" { 29 | description = "Load balancer controller module. We will use it for explict dependency." 30 | default = "" 31 | } 32 | 33 | variable "namespace" { 34 | description = "Kubernetes namespace Agones pods are placed into" 35 | default = "agones-system" 36 | type = string 37 | } 38 | 39 | variable "allocator_allowed_cidrs" { 40 | description = "CIDRs which can access Agones allocator service (e.g. [\"1.1.1.1/32\"])" 41 | default = [] 42 | type = list(string) 43 | } 44 | 45 | variable "gameserver_allowed_cidrs" { 46 | description = "CIDRs which can access gameservers" 47 | default = ["10.0.0.0/32"] 48 | type = list(string) 49 | } 50 | -------------------------------------------------------------------------------- /modules/aws_otel/main.tf: -------------------------------------------------------------------------------- 1 | # Install AWS Distro for OpenTelemetry 2 | # used for ingesting metrics including Agones to CloudWatch Metrics 3 | # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Container-Insights-EKS-otel.html 4 | 5 | terraform { 6 | required_providers { 7 | kubectl = { 8 | source = "gavinbunney/kubectl" 9 | version = ">= 1.7.0" 10 | } 11 | } 12 | } 13 | 14 | data "aws_region" "current" {} 15 | 16 | locals { 17 | service_account_name = "aws-otel-sa" 18 | } 19 | 20 | data "aws_iam_policy" "CloudWatchAgentServerPolicy" { 21 | arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" 22 | } 23 | 24 | module "iam_assumable_role" { 25 | source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc" 26 | version = "4.13.2" 27 | 28 | create_role = true 29 | role_name_prefix = "aws-otel-" 30 | role_description = "IRSA role for AWS Distro for OpenTelemetry" 31 | 32 | provider_url = var.oidc_provider 33 | role_policy_arns = [data.aws_iam_policy.CloudWatchAgentServerPolicy.arn] 34 | oidc_fully_qualified_subjects = ["system:serviceaccount:${var.namespace}:${local.service_account_name}"] 35 | oidc_fully_qualified_audiences = ["sts.amazonaws.com"] 36 | } 37 | 38 | resource "kubernetes_namespace" "this" { 39 | metadata { 40 | annotations = { 41 | name = var.namespace 42 | } 43 | 44 | name = var.namespace 45 | } 46 | } 47 | 48 | data "kubectl_path_documents" "this" { 49 | pattern = "${path.module}/manifests/*.yaml" 50 | vars = { 51 | role_arn = module.iam_assumable_role.iam_role_arn 52 | namespace = var.namespace 53 | service_account_name = local.service_account_name 54 | cluster_name = var.cluster_name 55 | hash = filesha256("${path.module}/manifests/otel.yaml") 56 | } 57 | } 58 | 59 | # https://github.com/gavinbunney/terraform-provider-kubectl/issues/58 60 | data "kubectl_path_documents" "dummy" { 61 | pattern = "${path.module}/manifests/*.yaml" 62 | vars = { 63 | role_arn = "" 64 | namespace = "" 65 | service_account_name = "" 66 | cluster_name = "" 67 | hash = "" 68 | } 69 | } 70 | 71 | resource "kubectl_manifest" "this" { 72 | depends_on = [kubernetes_namespace.this] 73 | count = length(data.kubectl_path_documents.dummy.documents) 74 | yaml_body = element(data.kubectl_path_documents.this.documents, count.index) 75 | } 76 | -------------------------------------------------------------------------------- /modules/aws_otel/manifests/otel.yaml: -------------------------------------------------------------------------------- 1 | # create cwagent service account and role binding 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: ${service_account_name} 6 | namespace: ${namespace} 7 | annotations: 8 | eks.amazonaws.com/role-arn: ${role_arn} 9 | 10 | --- 11 | kind: ClusterRole 12 | apiVersion: rbac.authorization.k8s.io/v1 13 | metadata: 14 | name: aoc-agent-role 15 | rules: 16 | - apiGroups: [""] 17 | resources: ["pods", "nodes", "endpoints"] 18 | verbs: ["list", "watch"] 19 | - apiGroups: ["apps"] 20 | resources: ["replicasets"] 21 | verbs: ["list", "watch"] 22 | - apiGroups: ["batch"] 23 | resources: ["jobs"] 24 | verbs: ["list", "watch"] 25 | - apiGroups: [""] 26 | resources: ["nodes/proxy"] 27 | verbs: ["get"] 28 | - apiGroups: [""] 29 | resources: ["nodes/stats", "configmaps", "events"] 30 | verbs: ["create", "get"] 31 | - apiGroups: [""] 32 | resources: ["configmaps"] 33 | resourceNames: ["otel-container-insight-clusterleader"] 34 | verbs: ["get","update"] 35 | 36 | --- 37 | kind: ClusterRoleBinding 38 | apiVersion: rbac.authorization.k8s.io/v1 39 | metadata: 40 | name: aoc-agent-role-binding 41 | subjects: 42 | - kind: ServiceAccount 43 | name: ${service_account_name} 44 | namespace: ${namespace} 45 | roleRef: 46 | kind: ClusterRole 47 | name: aoc-agent-role 48 | apiGroup: rbac.authorization.k8s.io 49 | 50 | --- 51 | apiVersion: v1 52 | kind: ConfigMap 53 | metadata: 54 | # Pass a hash to make sure pods are restarted when this configMap is changed 55 | name: otel-agent-conf-${hash} 56 | namespace: ${namespace} 57 | labels: 58 | app: opentelemetry 59 | component: otel-agent-conf 60 | data: 61 | # Pass extracfg.txt to set otel log level 62 | # https://github.com/aws-observability/aws-otel-collector/blob/v0.16.0/pkg/extraconfig/extraconfig.go 63 | otel-agent-extracfg: | 64 | loggingLevel=ERROR 65 | otel-agent-config: | 66 | extensions: 67 | health_check: 68 | 69 | receivers: 70 | # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/awscontainerinsightreceiver 71 | awscontainerinsightreceiver: 72 | 73 | # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/prometheusreceiver 74 | prometheus: 75 | config: 76 | # https://prometheus.io/docs/prometheus/latest/configuration/configuration/ 77 | scrape_configs: 78 | - job_name: 'otel-collector' 79 | scrape_interval: 60s 80 | kubernetes_sd_configs: 81 | - role: pod 82 | relabel_configs: 83 | # Extract Prometheus endpoint from Pods' annotations 84 | # https://github.com/prometheus/prometheus/blob/v2.33.4/documentation/examples/prometheus-kubernetes.yml#L157-L178 85 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 86 | action: keep 87 | regex: true 88 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 89 | action: replace 90 | target_label: __metrics_path__ 91 | regex: (.+) 92 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 93 | action: replace 94 | regex: ([^:]+)(?::\d+)?;(\d+) 95 | replacement: $$1:$$2 96 | target_label: __address__ 97 | 98 | # Set labels same as awscontainerinsightreceiver 99 | - source_labels: [__meta_kubernetes_namespace] 100 | action: replace 101 | target_label: Namespace 102 | - source_labels: [__meta_kubernetes_pod_node_name] 103 | action: replace 104 | target_label: NodeName 105 | - source_labels: [__meta_kubernetes_pod_name] 106 | action: replace 107 | target_label: PodName 108 | # Pass cluster name because it is not available in __meta labels 109 | # https://albersdevelopment.net/2019/08/28/prometheus-adding-a-label-to-a-target/ 110 | - source_labels: [__address__] 111 | target_label: ClusterName 112 | replacement: "${cluster_name}" 113 | # Set container name as service name 114 | - source_labels: [__meta_kubernetes_pod_container_name] 115 | action: replace 116 | target_label: Service 117 | 118 | # You can refer to this article to optimize your otel setting 119 | # https://aws.amazon.com/blogs/containers/cost-savings-by-customizing-metrics-sent-by-container-insights-in-amazon-eks/ 120 | processors: 121 | resource: 122 | attributes: 123 | - key: Sources 124 | action: delete 125 | - key: kubernetes 126 | action: delete 127 | # Replace gameserver pod names to a common name, because otherwise too many metrics are created. 128 | # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.45.1/processor/attributesprocessor/testdata/config.yaml#L24-L41 129 | - key: PodName 130 | action: extract 131 | pattern: "(?Pdgs-fleet).*" 132 | # By default Agones fleet pods do not have Service attribute, adding explicitly here 133 | - key: PodName 134 | action: extract 135 | pattern: "(?Pdgs-fleet).*" 136 | - key: AutoScalingGroupName 137 | action: extract 138 | # Extract a prefix of AutoScalingGroupName (it has Terraform generated suffix starting with numbers) 139 | pattern: "(?P.*?)-[0-9]{6,}.*" 140 | 141 | # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/filterprocessor 142 | filter/exclude: 143 | metrics: 144 | exclude: 145 | match_type: regexp 146 | metric_names: 147 | - container_.* 148 | - agones_k8s_.* 149 | - agones_grpc_.* 150 | - go_.* 151 | - redis_.* 152 | 153 | batch/metrics: 154 | timeout: 60s 155 | 156 | exporters: 157 | # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/awsemfexporter 158 | awsemf: 159 | namespace: ContainerInsights 160 | log_group_name: '/aws/containerinsights/{ClusterName}/performance' 161 | log_stream_name: '{NodeName}' 162 | resource_to_telemetry_conversion: 163 | enabled: true 164 | dimension_rollup_option: NoDimensionRollup 165 | parse_json_encoded_attr_values: [Sources, kubernetes] 166 | metric_declarations: 167 | # node metrics 168 | - dimensions: [[NodeGroupName, ClusterName]] 169 | metric_name_selectors: 170 | - node_cpu_utilization 171 | - node_memory_utilization 172 | - node_network_total_bytes 173 | - node_cpu_reserved_capacity 174 | - node_memory_reserved_capacity 175 | - node_number_of_running_pods 176 | - node_number_of_running_containers 177 | 178 | # pod metrics 179 | - dimensions: [[PodName, Namespace, ClusterName], [Service, Namespace, ClusterName]] 180 | metric_name_selectors: 181 | - pod_cpu_utilization 182 | - pod_memory_utilization 183 | - pod_network_rx_bytes 184 | - pod_network_tx_bytes 185 | - pod_cpu_utilization_over_pod_limit 186 | - pod_memory_utilization_over_pod_limit 187 | - dimensions: [[PodName, Namespace, ClusterName]] 188 | metric_name_selectors: 189 | - pod_cpu_reserved_capacity 190 | - pod_memory_reserved_capacity 191 | - dimensions: [[PodName, Namespace, ClusterName]] 192 | metric_name_selectors: 193 | - pod_number_of_container_restarts 194 | 195 | # cluster metrics 196 | - dimensions: [[ClusterName]] 197 | metric_name_selectors: 198 | - cluster_node_count 199 | - cluster_failed_node_count 200 | - agones_fleet_autoscalers_current_replicas_count 201 | 202 | # service metrics 203 | - dimensions: [[Service, Namespace, ClusterName]] 204 | metric_name_selectors: 205 | - service_number_of_running_pods 206 | 207 | # namespace metrics 208 | - dimensions: [[Namespace, ClusterName]] 209 | metric_name_selectors: 210 | - namespace_number_of_running_pods 211 | 212 | # agones metrics 213 | # https://agones.dev/site/docs/guides/metrics/ 214 | - dimensions: [[type, ClusterName]] 215 | metric_name_selectors: 216 | - agones_fleets_replicas_count 217 | - agones_gameservers_count 218 | 219 | service: 220 | pipelines: 221 | metrics: 222 | receivers: [awscontainerinsightreceiver, prometheus] 223 | processors: [filter/exclude, resource, batch/metrics] 224 | exporters: [awsemf] 225 | 226 | extensions: [health_check] 227 | 228 | 229 | --- 230 | # create Daemonset 231 | apiVersion: apps/v1 232 | kind: DaemonSet 233 | metadata: 234 | name: aws-otel-eks-ci 235 | namespace: ${namespace} 236 | spec: 237 | selector: 238 | matchLabels: 239 | name: aws-otel-eks-ci 240 | template: 241 | metadata: 242 | labels: 243 | name: aws-otel-eks-ci 244 | spec: 245 | containers: 246 | - name: aws-otel-collector 247 | image: amazon/aws-otel-collector:v0.18.0 248 | env: 249 | - name: K8S_NODE_NAME 250 | valueFrom: 251 | fieldRef: 252 | fieldPath: spec.nodeName 253 | - name: HOST_IP 254 | valueFrom: 255 | fieldRef: 256 | fieldPath: status.hostIP 257 | - name: HOST_NAME 258 | valueFrom: 259 | fieldRef: 260 | fieldPath: spec.nodeName 261 | - name: K8S_NAMESPACE 262 | valueFrom: 263 | fieldRef: 264 | fieldPath: metadata.namespace 265 | imagePullPolicy: Always 266 | command: 267 | - "/awscollector" 268 | - "--config=/conf/otel-agent-config.yaml" 269 | volumeMounts: 270 | - name: rootfs 271 | mountPath: /rootfs 272 | readOnly: true 273 | - name: dockersock 274 | mountPath: /var/run/docker.sock 275 | readOnly: true 276 | - name: varlibdocker 277 | mountPath: /var/lib/docker 278 | readOnly: true 279 | - name: sys 280 | mountPath: /sys 281 | readOnly: true 282 | - name: devdisk 283 | mountPath: /dev/disk 284 | readOnly: true 285 | - name: otel-agent-config-vol 286 | mountPath: /conf 287 | - name: otel-agent-extracfg-vol 288 | mountPath: /opt/aws/aws-otel-collector/etc/ 289 | resources: 290 | limits: 291 | cpu: 200m 292 | memory: 200Mi 293 | requests: 294 | cpu: 200m 295 | memory: 200Mi 296 | volumes: 297 | - name: otel-agent-config-vol 298 | configMap: 299 | name: otel-agent-conf-${hash} 300 | items: 301 | - key: otel-agent-config 302 | path: otel-agent-config.yaml 303 | - name: otel-agent-extracfg-vol 304 | configMap: 305 | name: otel-agent-conf-${hash} 306 | items: 307 | - key: otel-agent-extracfg 308 | path: extracfg.txt 309 | - name: rootfs 310 | hostPath: 311 | path: / 312 | - name: dockersock 313 | hostPath: 314 | path: /var/run/docker.sock 315 | - name: varlibdocker 316 | hostPath: 317 | path: /var/lib/docker 318 | - name: sys 319 | hostPath: 320 | path: /sys 321 | - name: devdisk 322 | hostPath: 323 | path: /dev/disk/ 324 | serviceAccountName: ${service_account_name} 325 | tolerations: 326 | - key: node-role.kubernetes.io/master 327 | operator: Exists 328 | effect: NoSchedule 329 | - operator: "Exists" 330 | effect: "NoExecute" 331 | - operator: "Exists" 332 | effect: "NoSchedule" 333 | -------------------------------------------------------------------------------- /modules/aws_otel/variable.tf: -------------------------------------------------------------------------------- 1 | variable "cluster_name" { 2 | type = string 3 | } 4 | 5 | variable "oidc_provider" { 6 | type = string 7 | } 8 | 9 | variable "namespace" { 10 | type = string 11 | default = "aws-otel" 12 | } 13 | -------------------------------------------------------------------------------- /modules/cert_manager/main.tf: -------------------------------------------------------------------------------- 1 | # https://artifacthub.io/packages/helm/cert-manager/cert-manager 2 | resource "helm_release" "this" { 3 | name = "cert-manager" 4 | namespace = "cert-manager" 5 | repository = "https://charts.jetstack.io" 6 | chart = "cert-manager" 7 | version = "1.8.1" 8 | create_namespace = true 9 | 10 | set { 11 | name = "installCRDs" 12 | value = "true" 13 | } 14 | 15 | set { 16 | name = "prometheus.enabled" 17 | value = "false" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /modules/cert_manager/variable.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/multi-cluster-allocation-demo-for-agones-on-eks/c80705f7c22c607a1bbb8ba0a6185daa1eff574a/modules/cert_manager/variable.tf -------------------------------------------------------------------------------- /modules/dgs_cluster/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | kubectl = { 4 | source = "gavinbunney/kubectl" 5 | version = ">= 1.7.0" 6 | } 7 | } 8 | } 9 | 10 | provider "helm" { 11 | kubernetes { 12 | host = data.aws_eks_cluster.eks.endpoint 13 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data) 14 | token = data.aws_eks_cluster_auth.eks.token 15 | } 16 | } 17 | 18 | provider "kubectl" { 19 | host = data.aws_eks_cluster.eks.endpoint 20 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data) 21 | token = data.aws_eks_cluster_auth.eks.token 22 | load_config_file = false 23 | } 24 | 25 | provider "kubernetes" { 26 | host = data.aws_eks_cluster.eks.endpoint 27 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data) 28 | token = data.aws_eks_cluster_auth.eks.token 29 | } 30 | 31 | data "aws_region" "current" {} 32 | 33 | # https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth 34 | data "aws_eks_cluster" "eks" { 35 | name = module.eks.cluster_id 36 | } 37 | 38 | data "aws_eks_cluster_auth" "eks" { 39 | name = module.eks.cluster_id 40 | } 41 | 42 | resource "aws_kms_key" "this" { 43 | description = "KMS key for cluster encryption: ${var.cluster_name}" 44 | enable_key_rotation = true 45 | } 46 | 47 | module "eks" { 48 | source = "terraform-aws-modules/eks/aws" 49 | version = "18.23.0" 50 | 51 | cluster_version = "1.22" 52 | cluster_name = var.cluster_name 53 | vpc_id = var.vpc.vpc_id 54 | subnet_ids = var.vpc.private_subnets 55 | enable_irsa = true 56 | 57 | cluster_endpoint_private_access = true 58 | cluster_endpoint_public_access_cidrs = var.cluster_endpoint_public_access_cidrs 59 | 60 | # Enverope encryption for Kubernetes secrets 61 | # https://aws.amazon.com/blogs/containers/using-eks-encryption-provider-support-for-defense-in-depth/ 62 | cluster_encryption_config = [{ 63 | provider_key_arn = aws_kms_key.this.arn 64 | resources = ["secrets"] 65 | }] 66 | 67 | # contro plane logging https://docs.aws.amazon.com/eks/latest/userguide/control-plane-logs.html 68 | cluster_enabled_log_types = ["audit", "api", "authenticator", "controllerManager", "scheduler"] 69 | 70 | cluster_addons = { 71 | coredns = {} 72 | kube-proxy = {} 73 | vpc-cni = {} 74 | } 75 | 76 | eks_managed_node_group_defaults = { 77 | block_device_mappings = { 78 | default = { 79 | device_name = "/dev/xvda" 80 | ebs = { 81 | volume_size = 20 82 | encrypted = true 83 | } 84 | } 85 | } 86 | } 87 | 88 | node_security_group_additional_rules = { 89 | # Allow all the traffic between each node 90 | ingress_all = { 91 | type = "ingress" 92 | from_port = 0 93 | to_port = 65535 94 | protocol = "all" 95 | self = true 96 | } 97 | 98 | egress_all = { 99 | type = "egress" 100 | from_port = 0 101 | to_port = 65535 102 | protocol = "all" 103 | self = true 104 | } 105 | # Control plane invoke Karpenter webhook 106 | ingress_karpenter_webhook_tcp = { 107 | description = "Cluster API to Node group for Karpenter webhook" 108 | protocol = "tcp" 109 | from_port = 8443 110 | to_port = 8443 111 | type = "ingress" 112 | source_cluster_security_group = true 113 | } 114 | } 115 | 116 | node_security_group_tags = { 117 | "karpenter.sh/discovery/${var.cluster_name}" = var.cluster_name 118 | } 119 | 120 | eks_managed_node_groups = { 121 | default = { 122 | desired_size = 1 123 | ami_type = "AL2_ARM_64" 124 | instance_types = ["t4g.medium"] 125 | subnet_ids = var.vpc.private_subnets 126 | create_security_group = false 127 | 128 | iam_role_additional_policies = [ 129 | # Required by Karpenter 130 | "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" 131 | ] 132 | } 133 | } 134 | 135 | tags = { 136 | # using subnetSelector/securityGroupSelector at Provisioner 137 | "karpenter.sh/discovery/${var.cluster_name}" = var.cluster_name 138 | } 139 | } 140 | 141 | module "karpenter" { 142 | source = "../karpenter" 143 | cluster_name = var.cluster_name 144 | cluster_id = module.eks.cluster_id 145 | cluster_endpoint = module.eks.cluster_endpoint 146 | iam_role_arn = module.eks.eks_managed_node_groups["default"].iam_role_arn 147 | iam_role_name = module.eks.eks_managed_node_groups["default"].iam_role_name 148 | oidc_provider_arn = module.eks.oidc_provider_arn 149 | public_subnet_ids = var.vpc.public_subnets 150 | } 151 | 152 | module "aws_otel" { 153 | source = "../aws_otel" 154 | 155 | oidc_provider = module.eks.oidc_provider 156 | cluster_name = module.eks.cluster_id 157 | } 158 | 159 | module "load_balancer_controller" { 160 | source = "../load_balancer_controller" 161 | depends_on = [module.eks.eks_managed_node_groups] 162 | 163 | oidc_provider = module.eks.oidc_provider 164 | cluster_name = module.eks.cluster_id 165 | vpc_id = var.vpc.vpc_id 166 | node_security_group_id = module.eks.node_security_group_id 167 | cluster_security_group_id = module.eks.cluster_security_group_id 168 | } 169 | 170 | module "fluent_bit" { 171 | source = "../fluent_bit" 172 | 173 | cluster_name = module.eks.cluster_id 174 | oidc_provider = module.eks.oidc_provider 175 | } 176 | 177 | module "kubernetes_dashboard" { 178 | source = "../kubernetes_dashboard" 179 | node_security_group_id = module.eks.node_security_group_id 180 | cluster_security_group_id = module.eks.cluster_security_group_id 181 | } 182 | 183 | module "agones" { 184 | source = "../agones" 185 | 186 | cluster_name = module.eks.cluster_id 187 | vpc = var.vpc 188 | node_security_group_id = module.eks.node_security_group_id 189 | cluster_security_group_id = module.eks.cluster_security_group_id 190 | gameserver_namespace = var.gameserver_namespace 191 | load_balancer_controller_module = module.load_balancer_controller 192 | eks_cluster_addons = module.eks.cluster_addons 193 | gameserver_allowed_cidrs = var.gameserver_allowed_cidrs 194 | allocator_allowed_cidrs = var.allocator_allowed_cidrs 195 | } 196 | 197 | data "kubectl_path_documents" "fleet" { 198 | pattern = "${path.module}/manifests/fleet.yaml" 199 | } 200 | 201 | resource "kubectl_manifest" "fleet" { 202 | depends_on = [module.agones] 203 | count = length(data.kubectl_path_documents.fleet.documents) 204 | yaml_body = element(data.kubectl_path_documents.fleet.documents, count.index) 205 | } 206 | -------------------------------------------------------------------------------- /modules/dgs_cluster/manifests/fleet.yaml: -------------------------------------------------------------------------------- 1 | # https://agones.dev/site/docs/reference/fleet/ 2 | apiVersion: "agones.dev/v1" 3 | kind: Fleet 4 | metadata: 5 | name: dgs-fleet 6 | spec: 7 | scheduling: Packed 8 | template: 9 | spec: 10 | container: dgs 11 | ports: 12 | - name: default 13 | containerPort: 7654 14 | protocol: TCPUDP 15 | template: 16 | spec: 17 | containers: 18 | - name: dgs 19 | image: gcr.io/agones-images/simple-game-server:0.13 20 | args: ["7654", "true"] 21 | env: 22 | resources: 23 | requests: 24 | memory: "64Mi" 25 | cpu: "50m" 26 | tolerations: 27 | - key: "gameserver" 28 | operator: "Exists" 29 | effect: "NoExecute" 30 | --- 31 | # https://agones.dev/site/docs/reference/fleetautoscaler/ 32 | apiVersion: "autoscaling.agones.dev/v1" 33 | kind: FleetAutoscaler 34 | metadata: 35 | name: dgs-autoscaler 36 | spec: 37 | fleetName: dgs-fleet 38 | policy: 39 | type: Buffer 40 | buffer: 41 | bufferSize: 25% 42 | minReplicas: 5 43 | maxReplicas: 100 44 | -------------------------------------------------------------------------------- /modules/dgs_cluster/output.tf: -------------------------------------------------------------------------------- 1 | output "allocation_service_hostname" { 2 | value = module.agones.allocation_service_hostname 3 | } 4 | 5 | output "allocation_service_client_tls_crt" { 6 | value = module.agones.allocation_service_client_tls_crt 7 | } 8 | 9 | output "allocation_service_client_tls_key" { 10 | value = module.agones.allocation_service_client_tls_key 11 | } 12 | 13 | output "allocation_service_server_tls_crt" { 14 | value = module.agones.allocation_service_server_tls_crt 15 | } 16 | 17 | output "gameserver_namespace" { 18 | value = var.gameserver_namespace 19 | } 20 | 21 | output "cluster_name" { 22 | value = module.eks.cluster_id 23 | } 24 | -------------------------------------------------------------------------------- /modules/dgs_cluster/variable.tf: -------------------------------------------------------------------------------- 1 | variable "vpc" { 2 | } 3 | 4 | variable "cluster_name" { 5 | type = string 6 | } 7 | 8 | variable "gameserver_namespace" { 9 | default = "default" 10 | type = string 11 | } 12 | 13 | variable "cluster_endpoint_public_access_cidrs" { 14 | default = ["0.0.0.0/0"] 15 | type = list(string) 16 | } 17 | 18 | variable "gameserver_allowed_cidrs" { 19 | description = "CIDRs which can access Agones allocator service (e.g. [\"1.1.1.1/32\"])" 20 | default = ["0.0.0.0/0"] 21 | type = list(string) 22 | } 23 | 24 | variable "allocator_allowed_cidrs" { 25 | description = "CIDRs which can access gameservers" 26 | default = [] 27 | type = list(string) 28 | } 29 | -------------------------------------------------------------------------------- /modules/fluent_bit/main.tf: -------------------------------------------------------------------------------- 1 | # Install Fluent Bit 2 | # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Container-Insights-setup-logs-FluentBit.html 3 | 4 | terraform { 5 | required_providers { 6 | kubectl = { 7 | source = "gavinbunney/kubectl" 8 | version = ">= 1.7.0" 9 | } 10 | } 11 | } 12 | 13 | data "aws_region" "current" {} 14 | 15 | locals { 16 | service_account_name = "fluent-bit" 17 | } 18 | 19 | # https://github.com/aws/amazon-cloudwatch-logs-for-fluent-bit#permissions 20 | data "aws_iam_policy_document" "this" { 21 | statement { 22 | actions = [ 23 | "logs:CreateLogGroup", 24 | "logs:CreateLogStream", 25 | "logs:DescribeLogStreams", 26 | "logs:PutLogEvents", 27 | "logs:PutRetentionPolicy", 28 | ] 29 | resources = ["*"] 30 | } 31 | } 32 | 33 | resource "aws_iam_policy" "this" { 34 | policy = data.aws_iam_policy_document.this.json 35 | } 36 | 37 | module "iam_assumable_role" { 38 | source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc" 39 | version = "4.13.2" 40 | 41 | create_role = true 42 | role_name_prefix = "fluent-bit-" 43 | role_description = "IRSA role for fluentBit" 44 | 45 | provider_url = var.oidc_provider 46 | role_policy_arns = [aws_iam_policy.this.arn] 47 | oidc_fully_qualified_subjects = ["system:serviceaccount:${var.namespace}:${local.service_account_name}"] 48 | oidc_fully_qualified_audiences = ["sts.amazonaws.com"] 49 | } 50 | 51 | resource "kubernetes_namespace" "this" { 52 | metadata { 53 | annotations = { 54 | name = var.namespace 55 | } 56 | 57 | name = var.namespace 58 | } 59 | } 60 | 61 | data "kubectl_path_documents" "this" { 62 | pattern = "${path.module}/manifests/*.yaml" 63 | vars = { 64 | cluster_name = var.cluster_name 65 | aws_region = data.aws_region.current.name 66 | role_arn = module.iam_assumable_role.iam_role_arn 67 | namespace = var.namespace 68 | service_account_name = local.service_account_name 69 | hash = filesha256("${path.module}/manifests/fluent_bit.yaml") 70 | } 71 | } 72 | 73 | # https://github.com/gavinbunney/terraform-provider-kubectl/issues/58 74 | data "kubectl_path_documents" "dummy" { 75 | pattern = "${path.module}/manifests/*.yaml" 76 | vars = { 77 | cluster_name = "dummy" 78 | aws_region = "dummy" 79 | role_arn = "dummy" 80 | namespace = "dummy" 81 | service_account_name = "dummy" 82 | hash = "dummy" 83 | } 84 | } 85 | 86 | resource "kubectl_manifest" "this" { 87 | depends_on = [kubernetes_namespace.this] 88 | count = length(data.kubectl_path_documents.dummy.documents) 89 | yaml_body = element(data.kubectl_path_documents.this.documents, count.index) 90 | } 91 | -------------------------------------------------------------------------------- /modules/fluent_bit/manifests/fluent_bit.yaml: -------------------------------------------------------------------------------- 1 | # https://raw.githubusercontent.com/aws-samples/amazon-cloudwatch-container-insights/latest/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/fluent-bit/fluent-bit.yaml 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: fluent-bit-cluster-info 6 | namespace: ${namespace} 7 | data: 8 | cluster.name: ${cluster_name} 9 | read.head: "Off" 10 | read.tail: "On" 11 | logs.region: ${aws_region} 12 | --- 13 | apiVersion: v1 14 | kind: ServiceAccount 15 | metadata: 16 | name: ${service_account_name} 17 | namespace: ${namespace} 18 | annotations: 19 | eks.amazonaws.com/role-arn: ${role_arn} 20 | --- 21 | apiVersion: rbac.authorization.k8s.io/v1 22 | kind: ClusterRole 23 | metadata: 24 | name: fluent-bit-role 25 | rules: 26 | - nonResourceURLs: 27 | - /metrics 28 | verbs: 29 | - get 30 | - apiGroups: [""] 31 | resources: 32 | - namespaces 33 | - pods 34 | - pods/logs 35 | verbs: ["get", "list", "watch"] 36 | --- 37 | apiVersion: rbac.authorization.k8s.io/v1 38 | kind: ClusterRoleBinding 39 | metadata: 40 | name: fluent-bit-role-binding 41 | roleRef: 42 | apiGroup: rbac.authorization.k8s.io 43 | kind: ClusterRole 44 | name: fluent-bit-role 45 | subjects: 46 | - kind: ServiceAccount 47 | name: ${service_account_name} 48 | namespace: ${namespace} 49 | --- 50 | apiVersion: v1 51 | kind: ConfigMap 52 | metadata: 53 | name: fluent-bit-config-${hash} 54 | namespace: ${namespace} 55 | labels: 56 | k8s-app: fluent-bit 57 | data: 58 | fluent-bit.conf: | 59 | [SERVICE] 60 | Flush 5 61 | Log_Level warn 62 | Daemon off 63 | Parsers_File parsers.conf 64 | HTTP_Listen 0.0.0.0 65 | storage.path /var/fluent-bit/state/flb-storage/ 66 | storage.sync normal 67 | storage.checksum off 68 | storage.backlog.mem_limit 5M 69 | 70 | @INCLUDE application-log.conf 71 | @INCLUDE dataplane-log.conf 72 | @INCLUDE host-log.conf 73 | 74 | application-log.conf: | 75 | [INPUT] 76 | Name tail 77 | Tag application.* 78 | Exclude_Path /var/log/containers/cloudwatch-agent*, /var/log/containers/fluent-bit*, /var/log/containers/aws-node*, /var/log/containers/kube-proxy* 79 | Path /var/log/containers/*.log 80 | Docker_Mode On 81 | Docker_Mode_Flush 5 82 | Docker_Mode_Parser container_firstline 83 | Parser docker 84 | DB /var/fluent-bit/state/flb_container.db 85 | Mem_Buf_Limit 50MB 86 | Skip_Long_Lines On 87 | Refresh_Interval 10 88 | Rotate_Wait 30 89 | storage.type filesystem 90 | Read_from_Head $${READ_FROM_HEAD} 91 | 92 | [INPUT] 93 | Name tail 94 | Tag application.* 95 | Path /var/log/containers/fluent-bit* 96 | Parser docker 97 | DB /var/fluent-bit/state/flb_log.db 98 | Mem_Buf_Limit 5MB 99 | Skip_Long_Lines On 100 | Refresh_Interval 10 101 | Read_from_Head $${READ_FROM_HEAD} 102 | 103 | [INPUT] 104 | Name tail 105 | Tag application.* 106 | Path /var/log/containers/cloudwatch-agent* 107 | Docker_Mode On 108 | Docker_Mode_Flush 5 109 | Docker_Mode_Parser cwagent_firstline 110 | Parser docker 111 | DB /var/fluent-bit/state/flb_cwagent.db 112 | Mem_Buf_Limit 5MB 113 | Skip_Long_Lines On 114 | Refresh_Interval 10 115 | Read_from_Head $${READ_FROM_HEAD} 116 | 117 | [FILTER] 118 | Name kubernetes 119 | Match application.* 120 | Kube_URL https://kubernetes.default.svc:443 121 | Kube_Tag_Prefix application.var.log.containers. 122 | Merge_Log On 123 | Merge_Log_Key log_processed 124 | Keep_Log Off 125 | K8S-Logging.Parser On 126 | K8S-Logging.Exclude Off 127 | Labels Off 128 | Annotations Off 129 | 130 | [OUTPUT] 131 | Name cloudwatch_logs 132 | Match application.* 133 | region $${AWS_REGION} 134 | log_group_name /aws/containerinsights/$${CLUSTER_NAME}/application 135 | log_stream_prefix . 136 | auto_create_group true 137 | extra_user_agent container-insights 138 | log_retention_days 60 139 | 140 | dataplane-log.conf: | 141 | [INPUT] 142 | Name systemd 143 | Tag dataplane.systemd.* 144 | Systemd_Filter _SYSTEMD_UNIT=docker.service 145 | Systemd_Filter _SYSTEMD_UNIT=kubelet.service 146 | DB /var/fluent-bit/state/systemd.db 147 | Path /var/log/journal 148 | Read_From_Tail $${READ_FROM_TAIL} 149 | 150 | [INPUT] 151 | Name tail 152 | Tag dataplane.tail.* 153 | Path /var/log/containers/aws-node*, /var/log/containers/kube-proxy* 154 | Docker_Mode On 155 | Docker_Mode_Flush 5 156 | Docker_Mode_Parser container_firstline 157 | Parser docker 158 | DB /var/fluent-bit/state/flb_dataplane_tail.db 159 | Mem_Buf_Limit 50MB 160 | Skip_Long_Lines On 161 | Refresh_Interval 10 162 | Rotate_Wait 30 163 | storage.type filesystem 164 | Read_from_Head $${READ_FROM_HEAD} 165 | 166 | [FILTER] 167 | Name modify 168 | Match dataplane.systemd.* 169 | Rename _HOSTNAME hostname 170 | Rename _SYSTEMD_UNIT systemd_unit 171 | Rename MESSAGE message 172 | Remove_regex ^((?!hostname|systemd_unit|message).)*$ 173 | 174 | [FILTER] 175 | Name aws 176 | Match dataplane.* 177 | imds_version v2 178 | 179 | [OUTPUT] 180 | Name cloudwatch_logs 181 | Match dataplane.* 182 | region $${AWS_REGION} 183 | log_group_name /aws/containerinsights/$${CLUSTER_NAME}/dataplane 184 | log_stream_prefix $${HOST_NAME}- 185 | auto_create_group true 186 | extra_user_agent container-insights 187 | log_retention_days 60 188 | 189 | host-log.conf: | 190 | [INPUT] 191 | Name tail 192 | Tag host.dmesg 193 | Path /var/log/dmesg 194 | Parser syslog 195 | DB /var/fluent-bit/state/flb_dmesg.db 196 | Mem_Buf_Limit 5MB 197 | Skip_Long_Lines On 198 | Refresh_Interval 10 199 | Read_from_Head $${READ_FROM_HEAD} 200 | 201 | [INPUT] 202 | Name tail 203 | Tag host.messages 204 | Path /var/log/messages 205 | Parser syslog 206 | DB /var/fluent-bit/state/flb_messages.db 207 | Mem_Buf_Limit 5MB 208 | Skip_Long_Lines On 209 | Refresh_Interval 10 210 | Read_from_Head $${READ_FROM_HEAD} 211 | 212 | [INPUT] 213 | Name tail 214 | Tag host.secure 215 | Path /var/log/secure 216 | Parser syslog 217 | DB /var/fluent-bit/state/flb_secure.db 218 | Mem_Buf_Limit 5MB 219 | Skip_Long_Lines On 220 | Refresh_Interval 10 221 | Read_from_Head $${READ_FROM_HEAD} 222 | 223 | [FILTER] 224 | Name aws 225 | Match host.* 226 | imds_version v2 227 | 228 | [OUTPUT] 229 | Name cloudwatch_logs 230 | Match host.* 231 | region $${AWS_REGION} 232 | log_group_name /aws/containerinsights/$${CLUSTER_NAME}/host 233 | log_stream_prefix $${HOST_NAME}. 234 | auto_create_group true 235 | extra_user_agent container-insights 236 | log_retention_days 60 237 | 238 | parsers.conf: | 239 | [PARSER] 240 | Name docker 241 | Format json 242 | Time_Key time 243 | Time_Format %Y-%m-%dT%H:%M:%S.%LZ 244 | 245 | [PARSER] 246 | Name syslog 247 | Format regex 248 | Regex ^(?