├── .github
└── workflows
│ └── documentation.yml
├── .gitignore
├── LICENSE
├── README.md
├── docs
└── architecture.png
├── examples
└── complete-cluster-asg
│ ├── nomad-clients.tf
│ └── nomad-servers.tf
├── modules
├── nomad-clients
│ ├── README.mkdn
│ ├── asg.tf
│ ├── data.tf
│ ├── ec2.tf
│ ├── iam.tf
│ ├── launch_template.tf
│ ├── locals.tf
│ ├── outputs.tf
│ ├── scripts
│ │ └── setup_client.tftpl.sh
│ ├── templates
│ │ └── nomad.tftpl
│ ├── variables.tf
│ └── versions.tf
└── nomad-servers
│ ├── README.mkdn
│ ├── alb.tf
│ ├── asg.tf
│ ├── data.tf
│ ├── firewall.tf
│ ├── iam.tf
│ ├── launch_template.tf
│ ├── locals.tf
│ ├── outputs.tf
│ ├── scripts
│ └── setup_server.tftpl.sh
│ ├── templates
│ └── nomad.tftpl
│ ├── variables.tf
│ └── versions.tf
└── packer
├── Makefile
├── ami.pkr.hcl
└── setup.sh
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
1 | name: Generate Terraform Module Documentation
2 |
3 | on:
4 | push:
5 | branches: ['main']
6 | paths: ['**.tf']
7 |
8 | jobs:
9 | deploy:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v3
13 |
14 | - name: Render Terraform Nomad Clients Module Documentation
15 | uses: terraform-docs/gh-actions@v1.0.0
16 | with:
17 | working-dir: ./modules/nomad-clients
18 | output-file: README.mkdn
19 | output-method: inject
20 | git-push: "true"
21 |
22 | - name: Render Terraform Nomad Servers Module Documentation
23 | uses: terraform-docs/gh-actions@v1.0.0
24 | with:
25 | working-dir: ./modules/nomad-servers
26 | output-file: README.mkdn
27 | output-method: inject
28 | git-push: "true"
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Local .terraform directories
2 | **/.terraform/*
3 |
4 | # .tfstate files
5 | *.tfstate
6 | *.tfstate.*
7 |
8 | # Crash log files
9 | crash.log
10 | crash.*.log
11 |
12 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
13 | # password, private keys, and other secrets. These should not be part of version
14 | # control as they are data points which are potentially sensitive and subject
15 | # to change depending on the environment.
16 | *.tfvars
17 | *.tfvars.json
18 |
19 | # Ignore override files as they are usually used to override resources locally and so
20 | # are not checked in
21 | override.tf
22 | override.tf.json
23 | *_override.tf
24 | *_override.tf.json
25 |
26 | # Include override files you do wish to add to version control using negated pattern
27 | # !example_override.tf
28 |
29 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
30 | # example: *tfplan*
31 |
32 | # Ignore CLI configuration files
33 | .terraformrc
34 | terraform.rc
35 |
36 | .env
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Zerodha Tech
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Nomad Cluster Setup
4 |
5 | Terraform modules to deploy a [HashiCorp Nomad]((https://www.nomadproject.io/)) cluster on AWS using an Auto Scaling Group (ASG). The modules are designed to provision Nomad servers and clients in ASG, making it easy to manage the infrastructure for Nomad cluster. Additionally, the repository includes Packer scripts to build a custom Amazon Machine Image (AMI) with Nomad pre-installed.
6 |
7 | 
8 |
9 | - [Nomad Cluster Setup](#nomad-cluster-setup)
10 | - [AMI](#ami)
11 | - [AWS Resources](#aws-resources)
12 | - [Auto Scaling Group (ASG)](#auto-scaling-group-asg)
13 | - [Security Group](#security-group)
14 | - [IAM Role](#iam-role)
15 | - [ALB](#alb)
16 | - [Nomad Server](#nomad-server)
17 | - [Terraform Module Reference](#terraform-module-reference)
18 | - [Nomad Client](#nomad-client)
19 | - [Terraform Module Reference](#terraform-module-reference-1)
20 | - [Example Usage](#example-usage)
21 | - [Nomad Servers](#nomad-servers)
22 | - [Nomad Clients](#nomad-clients)
23 | - [Other Examples](#other-examples)
24 | - [Contributors](#contributors)
25 | - [Contributing](#contributing)
26 | - [LICENSE](#license)
27 |
28 | ## AMI
29 |
30 | The repository includes a [Packer file](./packer/ami.pkr.hcl), to build a custom Amazon Machine Image (AMI) with Nomad and `docker` pre-installed. This AMI is used by the Terraform modules when creating the ASG instances.
31 |
32 | To build the AMI, run:
33 |
34 | ```bash
35 | cd packer
36 | make build
37 | ```
38 |
39 | NOTE: `dry_run` mode is toggled as true by default. To build the AMI, set the `dry_run` variable in [`Makefile`](./packer/Makefile) to `false`.
40 |
41 | ## AWS Resources
42 |
43 | The key resources provisioned by this module are:
44 |
45 | 1. Auto Scaling Group (ASG)
46 | 2. Security Group
47 | 3. IAM Role
48 | 4. Application Load Balancer (ALB) (optional)
49 |
50 | ### Auto Scaling Group (ASG)
51 |
52 | The module deploys Nomad on top of an Auto Scaling Group (ASG). For optimal performance and fault tolerance, it is recommended to run the Nomad server ASG with 3 or 5 EC2 instances distributed across multiple Availability Zones. Each EC2 instance should utilize an AMI built using the provided Packer script.
53 |
54 | **NOTE:** The Nomad Client terraform module allows setting up EC2 instances instead of ASGs. Check out the [`nomad_clients` Terraform Module Reference](./modules/nomad-clients/README.mkdn) for more information.
55 |
56 | ### Security Group
57 |
58 | Each EC2 instance within the ASG is assigned a Security Group that permits:
59 |
60 | - All outbound requests
61 | - All inbound ports specified in the [Nomad documentation](https://developer.hashicorp.com/nomad/docs/install/production/requirements#ports-used)
62 |
63 | The common Security Group is attached to both client and server nodes, enabling the Nomad agent to communicate and discover other agents within the cluster. The Security Group ID is exposed as an output variable for adding additional rules as needed. Furthermore, you can provide your own list of security groups as a variable to the module.
64 |
65 | ### IAM Role
66 |
67 | An IAM Role is attached to each EC2 instance within the ASG. This role is granted a minimal set of IAM permissions, allowing each instance to automatically discover other instances in the same ASG and form a cluster with them.
68 |
69 | ### ALB
70 |
71 | An internal Application Load Balancer (ALB) is _optionally_ created for the Nomad servers. The ALB is configured to listen on port 80/443 and forward requests to the Nomad servers on port 4646. The ALB is exposed as an output variable for adding additional rules as needed.
72 |
73 | ## Nomad Server
74 |
75 | The [`setup_server`](./modules/nomad-servers/scripts/setup_server.tftpl.sh) script included in this project configures and bootstraps Nomad server nodes in an AWS Auto Scaling group. The script performs the following steps:
76 |
77 | - Configures the Nomad agent as a server on the EC2 instances and uses the `nomad_join_tag_value` tag to auto-join the cluster. Once all the server instances discover each other, they elect a leader.
78 | - Bootstraps the Nomad ACL system with a pre-configured token on the first server.
79 | - It waits for the cluster leader to get elected before bootstrapping ACL.
80 | - The token must be passed as the `nomad_acl_bootstrap_token` variable.
81 |
82 | ### Terraform Module Reference
83 |
84 | Check out [`nomad_servers`](./modules/nomad-servers/README.mkdn) documentation for module reference.
85 |
86 | ## Nomad Client
87 |
88 | The [`setup_client`](./modules/nomad-clients/scripts/setup_client.tftpl.sh) script included in this project configures Nomad client nodes in an AWS Auto Scaling group. The script performs the following steps:
89 |
90 | - Configures the Nomad agent as a client on the EC2 instances and uses the `nomad_join_tag_value` tag to auto-join the cluster.
91 | - Configures DNS resolution for the Nomad cluster inside `exec` driver.
92 | - Prepares configurations for different task drivers.
93 |
94 | ### Terraform Module Reference
95 |
96 | Check out [`nomad_clients`](./modules/nomad-clients/README.mkdn) documentation for module reference.
97 |
98 | ## Example Usage
99 |
100 | ### Nomad Servers
101 |
102 | ```hcl
103 | module "nomad_servers" {
104 | source = "git::https://github.com/zerodha/nomad-cluster-setup//modules/nomad-servers?ref=main"
105 |
106 | cluster_name = "demo-nomad"
107 | nomad_join_tag_value = "demo"
108 | instance_count = 3
109 | ami = "ami-xyz"
110 | vpc = "vpc-xyz"
111 | subnets = "subnet-xyz"
112 | create_alb = true
113 | nomad_alb_hostname = "nomad.example.internal"
114 |
115 | nomad_gossip_encrypt_key = var.nomad_gossip_encrypt_key
116 | nomad_acl_bootstrap_token = var.nomad_acl_bootstrap_token
117 | }
118 | ```
119 |
120 | ### Nomad Clients
121 |
122 | ```hcl
123 | module "nomad_client_demo" {
124 | source = "git::https://github.com/zerodha/nomad-cluster-setup//modules/nomad-clients?ref=main"
125 |
126 | cluster_name = "demo-nomad"
127 | nomad_join_tag_value = "demo"
128 | client_name = "example-app"
129 | enable_docker_plugin = true
130 | ami = "ami-abc"
131 | instance_type = "c6a.xlarge"
132 | instance_desired_count = 10
133 | vpc = "vpc-xyz"
134 | subnets = "subnet-xyz"
135 | route_53_resolver_address = "10.0.0.2"
136 | }
137 | ```
138 |
139 | **NOTE:** This module does not set up an ALB for accessing applications running on Nomad Clients. This is left up to the user to configure. Check out [`terraform-aws-alb`](https://github.com/terraform-aws-modules/terraform-aws-alb) or [Other Examples](#other-examples) for more information. You may also need to set [`target_group_arns`](./modules/nomad-clients#input_target_group_arns) if Auto-Scaling Groups are used.
140 |
141 | ### Other Examples
142 |
143 | * [Complete Cluster Setup](./examples)
144 |
145 | ## Contributors
146 |
147 | - [Karan Sharma](https://github.com/mr-karan)
148 | - [Chinmay Pai](https://github.com/thunderbottom)
149 |
150 |
151 | ## Contributing
152 |
153 | Contributions to this repository are welcome. Please submit a pull request or open an issue to suggest improvements or report bugs.
154 |
155 |
156 | ## LICENSE
157 |
158 | [LICENSE](./LICENSE)
159 |
--------------------------------------------------------------------------------
/docs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zerodha/nomad-cluster-setup/6b6e7873bc24662c5a0c13b65f90eeee9d129f18/docs/architecture.png
--------------------------------------------------------------------------------
/examples/complete-cluster-asg/nomad-clients.tf:
--------------------------------------------------------------------------------
1 | # This example sets up a 10-node nomad client setup in a cluster
2 | # It also includes load balancer setup to connect to any applications
3 | # running within the cluster
4 |
5 | module "nomad_client_demo" {
6 | source = "git::https://github.com/zerodha/nomad-cluster-setup//nomad-clients?ref=main"
7 |
8 | client_name = "example-application"
9 | cluster_name = "demo-nomad"
10 | enable_docker_plugin = true
11 | instance_desired_count = 10
12 | instance_type = "c6a.xlarge"
13 | nomad_join_tag_value = "demo"
14 | route_53_resolver_address = "10.0.0.2"
15 |
16 | client_security_groups = module.nomad_servers.nomad_server_sg
17 |
18 | ami = "ami-abc"
19 | subnets = "subnet-xyz"
20 | vpc = "vpc-xyz"
21 |
22 | # Set this to allow the ALB to connect to the "demo-nomad"
23 | # client nodes.
24 | #
25 | # This also requires a security group rule allowing the ALB
26 | # to access the `backend_port` specified in the ALB configuration.
27 | # The additional security group can be created and appended to the
28 | # `client_security_groups` list.
29 | target_group_arns = module.demo_client_alb.target_group_arns
30 | }
31 |
32 | # Set up an example load balancer to connect to applications
33 | # running on the "demo-nomad" nomad clients
34 | #
35 | # By default, specifying `target_group_arns` will ensure that
36 | # all the "demo-nomad" client nodes get added to the ALB target
37 | # group. This setup is meant for High Availability (HA),
38 | # where the application or the reverse proxy
39 | # runs on all the nodes on this client
40 | module "demo_client_alb" {
41 | source = "terraform-aws-modules/alb/aws"
42 | version = "~> 8.2.1"
43 |
44 | name = "demo-nomad-client-alb"
45 |
46 | load_balancer_type = "application"
47 | internal = false
48 |
49 | vpc_id = "vpc-xyz"
50 | subnets = ["subnet-abc", "subnet-xyz"]
51 | security_groups = [aws_security_group.demo_client_nomad.id, "sg-12345678"]
52 |
53 | target_groups = [
54 | {
55 | name_prefix = "nomad-"
56 | backend_protocol = "HTTP"
57 | backend_port = 80 # This is where the reverse proxy runs on nomad
58 | target_type = "instance"
59 | health_check = {
60 | enabled = true
61 | interval = 30
62 | path = "/"
63 | port = "traffic-port"
64 | healthy_threshold = 5
65 | unhealthy_threshold = 2
66 | timeout = 10
67 | protocol = "HTTP"
68 | matcher = "200"
69 | }
70 | },
71 | ]
72 |
73 | http_tcp_listeners = [
74 | {
75 | port = 80
76 | protocol = "HTTP"
77 | target_group_index = 0
78 | }
79 | ]
80 |
81 | tags = {
82 | Name = "demo-nomad-client-alb"
83 | }
84 | }
85 |
86 | # Security Group for the demo-nomad-client ALB
87 | resource "aws_security_group" "demo_client_nomad" {
88 | name = "demo-client-nomad-alb"
89 | description = "ALB SG for demo-client-nomad"
90 | vpc_id = "vpc-xyz"
91 |
92 | ingress = []
93 | egress = [
94 | {
95 | description = "Allow all outgoing traffic"
96 | from_port = 0
97 | to_port = 0
98 | protocol = "-1"
99 | cidr_blocks = ["0.0.0.0/0"]
100 | ipv6_cidr_blocks = ["::/0"]
101 | prefix_list_ids = []
102 | security_groups = []
103 | self = false
104 | }
105 | ]
106 |
107 | tags = {
108 | Name = "demo-client-nomad-alb"
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/examples/complete-cluster-asg/nomad-servers.tf:
--------------------------------------------------------------------------------
1 | # This example sets up a 3 node terraform server in a cluster
2 | # Ensure that all the clients that need to connect to this cluster
3 | # use the same `nomad_join_tag_value`
4 |
5 | module "nomad_servers" {
6 | source = "git::https://github.com/zerodha/nomad-cluster-setup//nomad-servers?ref=main"
7 |
8 | instance_count = 3
9 |
10 | cluster_name = "demo-nomad"
11 | create_alb = true
12 | nomad_alb_hostname = "nomad.example.internal"
13 | nomad_join_tag_value = "demo"
14 |
15 | ami = "ami-xyz"
16 | vpc = "vpc-xyz"
17 | subnets = "subnet-xyz"
18 |
19 | nomad_gossip_encrypt_key = var.nomad_gossip_encrypt_key
20 | nomad_acl_bootstrap_token = var.nomad_acl_bootstrap_token
21 | }
22 |
--------------------------------------------------------------------------------
/modules/nomad-clients/README.mkdn:
--------------------------------------------------------------------------------
1 |
2 | ## Requirements
3 |
4 | | Name | Version |
5 | |------|---------|
6 | | [terraform](#requirement\_terraform) | >= 1.0 |
7 | | [aws](#requirement\_aws) | >= 4.59 |
8 |
9 | ## Providers
10 |
11 | | Name | Version |
12 | |------|---------|
13 | | [aws](#provider\_aws) | >= 4.59 |
14 | | [cloudinit](#provider\_cloudinit) | n/a |
15 |
16 | ## Modules
17 |
18 | No modules.
19 |
20 | ## Resources
21 |
22 | | Name | Type |
23 | |------|------|
24 | | [aws_autoscaling_group.nomad_client](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_group) | resource |
25 | | [aws_iam_instance_profile.nomad_client](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource |
26 | | [aws_iam_role.nomad_client](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
27 | | [aws_iam_role_policy_attachment.default_iam_policies](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
28 | | [aws_instance.nomad_client](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/instance) | resource |
29 | | [aws_launch_template.nomad_client](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template) | resource |
30 | | [aws_ec2_instance_type.type](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ec2_instance_type) | data source |
31 | | [aws_iam_policy_document.instance-assume-role-policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
32 | | [cloudinit_config.config](https://registry.terraform.io/providers/hashicorp/cloudinit/latest/docs/data-sources/config) | data source |
33 |
34 | ## Inputs
35 |
36 | | Name | Description | Type | Default | Required |
37 | |------|-------------|------|---------|:--------:|
38 | | [ami](#input\_ami) | Amazon Machine Image (AMI) ID used for deploying Nomad clients | `string` | n/a | yes |
39 | | [autoscale\_metrics](#input\_autoscale\_metrics) | List of autoscaling metrics to monitor for Auto Scaling Group (ASG) instances | `list(string)` |
[| no | 40 | | [aws\_region](#input\_aws\_region) | AWS region to deploy the cluster in | `string` | `"ap-south-1"` | no | 41 | | [client\_name](#input\_client\_name) | Name of the Auto Scaling Group (ASG) nodes deployed as Nomad clients | `string` | n/a | yes | 42 | | [client\_security\_groups](#input\_client\_security\_groups) | List of security groups to attach to the Nomad client nodes | `list(string)` | `[]` | no | 43 | | [client\_type](#input\_client\_type) | Type of client to deploy: 'ec2' or 'asg' | `string` | `"asg"` | no | 44 | | [cluster\_name](#input\_cluster\_name) | Identifier used for naming all resources associated with the cluster | `string` | n/a | yes | 45 | | [cluster\_tags](#input\_cluster\_tags) | Key-value pairs of tags to assign to the EC2 instances spawned by the ASG | `map(string)` | n/a | yes | 46 | | [default\_iam\_policies](#input\_default\_iam\_policies) | List of IAM policies to assign to the Nomad clients | `list(string)` | `[]` | no | 47 | | [ebs\_encryption](#input\_ebs\_encryption) | Enable EBS encryption | `bool` | `true` | no | 48 | | [ebs\_tags](#input\_ebs\_tags) | A map of custom tags to be assigned to the EBS volumes | `map(string)` | `{}` | no | 49 | | [ebs\_volume\_size](#input\_ebs\_volume\_size) | The size of the EBS volume in gigabytes | `number` | `100` | no | 50 | | [ec2\_count](#input\_ec2\_count) | Number of Nomad client EC2 instances to run | `number` | `1` | no | 51 | | [enable\_docker\_plugin](#input\_enable\_docker\_plugin) | Whether to enable the Docker plugin on the client nodes | `bool` | `true` | no | 52 | | [extra\_script](#input\_extra\_script) | Path to custom script to be run as part of cloud-init | `string` | `""` | no | 53 | | [health\_check\_grace\_period](#input\_health\_check\_grace\_period) | The time (in seconds) to allow instances in the Auto Scaling group to warm up before beginning health checks. | `number` | `180` | no | 54 | | [healthcheck\_type](#input\_healthcheck\_type) | Health check type for the ASG, either 'EC2' or 'ELB' | `string` | `"EC2"` | no | 55 | | [http\_put\_response\_hop\_limit](#input\_http\_put\_response\_hop\_limit) | The hop limit for HTTP PUT response for the EC2 instance metadata service | `number` | `2` | no | 56 | | [http\_tokens](#input\_http\_tokens) | Whether the metadata service requires session tokens, also referred to as Instance Metadata Service Version 2 (IMDSv2). Can be 'optional', 'required', or 'no-preference'. | `string` | `"optional"` | no | 57 | | [iam\_instance\_profile](#input\_iam\_instance\_profile) | Name of the existing IAM Instance Profile to use | `string` | `""` | no | 58 | | [iam\_tags](#input\_iam\_tags) | A map of custom tags to be assigned to the IAM role | `map(string)` | `{}` | no | 59 | | [instance\_desired\_count](#input\_instance\_desired\_count) | Desired number of Nomad clients to run | `number` | `1` | no | 60 | | [instance\_max\_count](#input\_instance\_max\_count) | Maximum number of Nomad clients to run | `number` | `3` | no | 61 | | [instance\_min\_count](#input\_instance\_min\_count) | Minimum number of Nomad clients to run | `number` | `0` | no | 62 | | [instance\_type](#input\_instance\_type) | Instance type to use for the Nomad clients | `string` | `"c5a.large"` | no | 63 | | [nomad\_acl\_enable](#input\_nomad\_acl\_enable) | Whether to enable ACLs on the Nomad cluster or not | `bool` | `true` | no | 64 | | [nomad\_client\_exec\_host\_volumes](#input\_nomad\_client\_exec\_host\_volumes) | A map of host volumes to configure for the Nomad client |
"GroupMinSize",
"GroupMaxSize",
"GroupDesiredCapacity",
"GroupInServiceInstances",
"GroupPendingInstances",
"GroupStandbyInstances",
"GroupTerminatingInstances",
"GroupTotalInstances"
]
map(object({| `{}` | no | 65 | | [nomad\_file\_limit](#input\_nomad\_file\_limit) | Value for LimitNOFILE in nomad systemd config | `number` | `900000` | no | 66 | | [nomad\_join\_tag\_value](#input\_nomad\_join\_tag\_value) | The value of the tag used for Nomad server auto-join | `string` | n/a | yes | 67 | | [override\_instance\_types](#input\_override\_instance\_types) | List of instance types to define in the mixed\_instances\_policy block | `list(string)` | `[]` | no | 68 | | [route\_53\_resolver\_address](#input\_route\_53\_resolver\_address) | Route53 resolver address for querying DNS inside exec tasks | `string` | n/a | yes | 69 | | [ssh\_public\_keys](#input\_ssh\_public\_keys) | List of SSH public keys to add to authorized\_keys | `list(string)` | `[]` | no | 70 | | [ssh\_user](#input\_ssh\_user) | The system user to add SSH keys for | `string` | `"ubuntu"` | no | 71 | | [subnets](#input\_subnets) | List of subnets to assign for deploying instances | `list(string)` | `[]` | no | 72 | | [target\_group\_arns](#input\_target\_group\_arns) | List of target groups assigned in the ALB to connect to the ASG | `list(string)` | `[]` | no | 73 | | [vpc](#input\_vpc) | AWS Virtual Private Cloud (VPC) to deploy all resources in | `string` | n/a | yes | 74 | | [wait\_for\_capacity\_timeout](#input\_wait\_for\_capacity\_timeout) | Time for which Terraform waits after ASG creation to see if instances are running. | `string` | `"10m"` | no | 75 | 76 | ## Outputs 77 | 78 | | Name | Description | 79 | |------|-------------| 80 | | [nomad\_client\_asg](#output\_nomad\_client\_asg) | Autoscaling group for the Nomad client nodes | 81 | | [nomad\_client\_ec2](#output\_nomad\_client\_ec2) | EC2 client ID for the Nomad client node | 82 | | [nomad\_client\_ec2\_availability\_zones](#output\_nomad\_client\_ec2\_availability\_zones) | Availability zones for the Nomad client nodes | 83 | | [nomad\_client\_iam\_profile](#output\_nomad\_client\_iam\_profile) | IAM Profile created for Nomad Client | 84 | | [nomad\_client\_iam\_role\_arn](#output\_nomad\_client\_iam\_role\_arn) | ARN of the IAM role for the Nomad client nodes | 85 | | [nomad\_client\_launch\_template\_id](#output\_nomad\_client\_launch\_template\_id) | ID of the launch template for the Nomad client nodes | 86 | 87 | -------------------------------------------------------------------------------- /modules/nomad-clients/asg.tf: -------------------------------------------------------------------------------- 1 | resource "aws_autoscaling_group" "nomad_client" { 2 | count = var.client_type == "asg" ? 1 : 0 3 | 4 | name = "${var.cluster_name}-${var.client_name}" 5 | max_size = var.instance_max_count 6 | min_size = var.instance_min_count 7 | desired_capacity = var.instance_desired_count 8 | health_check_grace_period = var.health_check_grace_period 9 | health_check_type = var.healthcheck_type 10 | vpc_zone_identifier = var.subnets 11 | wait_for_capacity_timeout = var.wait_for_capacity_timeout 12 | enabled_metrics = var.autoscale_metrics 13 | termination_policies = ["OldestInstance"] 14 | 15 | target_group_arns = var.target_group_arns 16 | 17 | mixed_instances_policy { 18 | launch_template { 19 | launch_template_specification { 20 | launch_template_id = aws_launch_template.nomad_client[0].id 21 | version = "$Latest" 22 | } 23 | 24 | dynamic "override" { 25 | for_each = var.override_instance_types 26 | content { 27 | instance_type = override.value 28 | } 29 | } 30 | } 31 | } 32 | 33 | dynamic "tag" { 34 | for_each = var.cluster_tags 35 | content { 36 | key = tag.key 37 | value = tag.value 38 | propagate_at_launch = true 39 | } 40 | } 41 | 42 | tag { 43 | key = "role" 44 | value = "nomad-client" 45 | propagate_at_launch = true 46 | } 47 | 48 | tag { 49 | key = "nomad_client" 50 | value = var.client_name 51 | propagate_at_launch = true 52 | } 53 | 54 | tag { 55 | key = "nomad_ec2_join" 56 | value = var.nomad_join_tag_value 57 | propagate_at_launch = true 58 | } 59 | 60 | timeouts { 61 | delete = "15m" 62 | } 63 | 64 | lifecycle { 65 | create_before_destroy = true 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /modules/nomad-clients/data.tf: -------------------------------------------------------------------------------- 1 | # Set to ensure instance type exists 2 | data "aws_ec2_instance_type" "type" { 3 | instance_type = var.instance_type 4 | } 5 | 6 | data "cloudinit_config" "config" { 7 | gzip = false 8 | base64_encode = false 9 | 10 | part { 11 | filename = "setup_client.sh" 12 | content_type = "text/x-shellscript" 13 | content = local.setup_client 14 | } 15 | 16 | part { 17 | filename = "extra_script.sh" 18 | content_type = "text/x-shellscript" 19 | merge_type = "str(append)" 20 | content = var.extra_script != "" ? file(var.extra_script) : "#!/bin/bash" 21 | } 22 | } -------------------------------------------------------------------------------- /modules/nomad-clients/ec2.tf: -------------------------------------------------------------------------------- 1 | resource "aws_instance" "nomad_client" { 2 | count = var.client_type == "ec2" ? var.ec2_count : 0 3 | 4 | ami = var.ami 5 | instance_type = data.aws_ec2_instance_type.type.instance_type 6 | iam_instance_profile = var.iam_instance_profile == "" ? aws_iam_instance_profile.nomad_client[0].name : var.iam_instance_profile 7 | disable_api_termination = false 8 | subnet_id = element(var.subnets, count.index) 9 | vpc_security_group_ids = var.client_security_groups 10 | 11 | root_block_device { 12 | encrypted = var.ebs_encryption 13 | delete_on_termination = true 14 | volume_size = var.ebs_volume_size 15 | volume_type = "gp3" 16 | tags = merge( 17 | { 18 | Name = "${var.cluster_name}-client-${var.client_name}-root-${count.index + 1}" 19 | cluster = var.cluster_name 20 | }, 21 | var.ebs_tags 22 | ) 23 | } 24 | 25 | metadata_options { 26 | http_tokens = var.http_tokens 27 | http_endpoint = "enabled" 28 | http_put_response_hop_limit = var.http_put_response_hop_limit 29 | instance_metadata_tags = "enabled" 30 | } 31 | 32 | user_data_base64 = base64encode(data.cloudinit_config.config.rendered) 33 | 34 | tags = { 35 | Name = "${var.client_name}-${count.index + 1}" 36 | role = "nomad-client" 37 | nomad_client = var.client_name 38 | nomad_ec2_join = var.nomad_join_tag_value 39 | } 40 | 41 | lifecycle { 42 | create_before_destroy = true 43 | ignore_changes = [ami, user_data, user_data_base64] 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /modules/nomad-clients/iam.tf: -------------------------------------------------------------------------------- 1 | # IAM roles, policies that apply to Nomad client role. 2 | 3 | # This assume role policy is applied to all EC2 instances. 4 | data "aws_iam_policy_document" "instance-assume-role-policy" { 5 | statement { 6 | actions = ["sts:AssumeRole"] 7 | 8 | principals { 9 | type = "Service" 10 | identifiers = ["ec2.amazonaws.com"] 11 | } 12 | } 13 | } 14 | 15 | resource "aws_iam_role" "nomad_client" { 16 | count = var.iam_instance_profile == "" ? 1 : 0 17 | name = "${var.cluster_name}-${var.client_name}" 18 | tags = var.iam_tags 19 | assume_role_policy = data.aws_iam_policy_document.instance-assume-role-policy.json 20 | } 21 | 22 | resource "aws_iam_role_policy_attachment" "default_iam_policies" { 23 | for_each = var.iam_instance_profile == "" ? toset(var.default_iam_policies) : [] 24 | role = aws_iam_role.nomad_client[0].name 25 | policy_arn = each.key 26 | } 27 | 28 | resource "aws_iam_instance_profile" "nomad_client" { 29 | count = var.iam_instance_profile == "" ? 1 : 0 30 | name = "${var.cluster_name}-${var.client_name}" 31 | role = aws_iam_role.nomad_client[0].id 32 | } 33 | -------------------------------------------------------------------------------- /modules/nomad-clients/launch_template.tf: -------------------------------------------------------------------------------- 1 | resource "aws_launch_template" "nomad_client" { 2 | count = var.client_type == "asg" ? 1 : 0 3 | description = "Launch template for nomad client ${var.client_name} in ${var.cluster_name} cluster" 4 | disable_api_termination = false 5 | image_id = var.ami 6 | instance_type = data.aws_ec2_instance_type.type.instance_type 7 | name = "${var.cluster_name}-client-${var.client_name}" 8 | tags = {} 9 | vpc_security_group_ids = concat(var.client_security_groups) 10 | update_default_version = true 11 | 12 | user_data = base64encode(data.cloudinit_config.config.rendered) 13 | 14 | metadata_options { 15 | http_tokens = var.http_tokens 16 | http_endpoint = "enabled" 17 | http_put_response_hop_limit = var.http_put_response_hop_limit 18 | instance_metadata_tags = "enabled" 19 | } 20 | 21 | block_device_mappings { 22 | device_name = "/dev/sda1" 23 | 24 | ebs { 25 | encrypted = var.ebs_encryption 26 | delete_on_termination = true 27 | volume_size = var.ebs_volume_size 28 | volume_type = "gp3" 29 | } 30 | } 31 | iam_instance_profile { 32 | name = var.iam_instance_profile == "" ? aws_iam_instance_profile.nomad_client[0].name : var.iam_instance_profile 33 | } 34 | 35 | monitoring { 36 | enabled = true 37 | } 38 | 39 | tag_specifications { 40 | resource_type = "instance" 41 | 42 | tags = { 43 | Name = var.client_name 44 | } 45 | } 46 | 47 | tag_specifications { 48 | resource_type = "volume" 49 | tags = merge( 50 | { 51 | Name = "${var.cluster_name}-client-${var.client_name}" 52 | }, 53 | var.ebs_tags 54 | ) 55 | } 56 | 57 | lifecycle { 58 | create_before_destroy = true 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /modules/nomad-clients/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | setup_client = templatefile("${path.module}/scripts/setup_client.tftpl.sh", { 3 | aws_region = var.aws_region 4 | route_53_resolver_address = var.route_53_resolver_address 5 | enable_docker_plugin = var.enable_docker_plugin 6 | nomad_join_tag_key = "nomad_ec2_join" 7 | nomad_join_tag_value = var.nomad_join_tag_value 8 | nomad_file_limit = var.nomad_file_limit 9 | nomad_client_exec_host_volumes = var.nomad_client_exec_host_volumes 10 | ssh_user = var.ssh_user 11 | ssh_public_keys = var.ssh_public_keys 12 | nomad_client_cfg = templatefile("${path.module}/templates/nomad.tftpl", { 13 | nomad_dc = var.cluster_name 14 | nomad_acl_enable = var.nomad_acl_enable 15 | }) 16 | }) 17 | } 18 | -------------------------------------------------------------------------------- /modules/nomad-clients/outputs.tf: -------------------------------------------------------------------------------- 1 | output "nomad_client_iam_profile" { 2 | description = "IAM Profile created for Nomad Client" 3 | value = var.iam_instance_profile == "" ? aws_iam_instance_profile.nomad_client[0].id : var.iam_instance_profile 4 | } 5 | 6 | output "nomad_client_asg" { 7 | description = "Autoscaling group for the Nomad client nodes" 8 | value = var.client_type == "asg" ? aws_autoscaling_group.nomad_client[0].name : "" 9 | } 10 | 11 | output "nomad_client_ec2" { 12 | description = "EC2 client ID for the Nomad client node" 13 | value = var.client_type == "ec2" ? aws_instance.nomad_client[*].id : [] 14 | } 15 | 16 | output "nomad_client_ec2_availability_zones" { 17 | description = "Availability zones for the Nomad client nodes" 18 | value = var.client_type == "ec2" ? aws_instance.nomad_client[*].availability_zone : [] 19 | } 20 | 21 | output "nomad_client_iam_role_arn" { 22 | description = "ARN of the IAM role for the Nomad client nodes" 23 | value = var.iam_instance_profile == "" ? aws_iam_role.nomad_client[0].arn : "" 24 | } 25 | 26 | output "nomad_client_launch_template_id" { 27 | description = "ID of the launch template for the Nomad client nodes" 28 | value = var.client_type == "asg" ? aws_launch_template.nomad_client[0].id : "" 29 | } 30 | -------------------------------------------------------------------------------- /modules/nomad-clients/scripts/setup_client.tftpl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Script to bootstrap Nomad as client node. 4 | 5 | # This script performs the following tasks: 6 | # - Prepares DNS configuration for exec tasks 7 | # - Renders the Nomad client configuration 8 | # - Optionally, adds Docker configuration to Nomad if the 'enable_docker_plugin' variable is set to true 9 | # - Adds SSH public keys to authorized_keys if provided 10 | # - Starts the Nomad service 11 | 12 | set -Eeuo pipefail 13 | 14 | declare -r SCRIPT_NAME="$(basename "$0")" 15 | declare -ag AWS_TAGS=() 16 | 17 | # Send the log output from this script to user-data.log, syslog, and the console. 18 | exec > >(tee /var/log/user-data.log | logger -t user-data -s 2>/dev/console) 2>&1 19 | 20 | # Wrapper to log any outputs from the script to stderr 21 | function log { 22 | declare -r LVL="$1" 23 | declare -r MSG="$2" 24 | declare -r TS=$(date +"%Y-%m-%d %H:%M:%S") 25 | echo >&2 -e "$TS [$LVL] [$SCRIPT_NAME] $MSG" 26 | } 27 | 28 | # Stores AWS tags to use as nomad client meta 29 | # Requires `nomad-cluster` tag to be defined 30 | # within AWS instance tags 31 | store_tags() { 32 | max_attempts=3 33 | count=0 34 | 35 | while true; do 36 | TOKEN=$(curl -s --connect-timeout 1 --retry 3 --retry-delay 3 \ 37 | -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") 38 | 39 | TAGS=$(curl -s --connect-timeout 1 --retry 3 --retry-delay 3 \ 40 | -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/tags/instance) 41 | 42 | # If there's no 'nomad-cluster' found in tags, retry. 43 | if [[ "$${TAGS}" != *"nomad-cluster"* ]]; then 44 | sleep 1 45 | 46 | count=$((count + 1)) 47 | 48 | # If max retries still didn't get the data, fail. 49 | if [[ $count -eq $max_attempts ]]; then 50 | log "ERROR" "aborting as max attempts reached" 51 | exit 1 52 | fi 53 | continue 54 | fi 55 | 56 | readarray -t AWS_TAGS <<<"$TAGS" 57 | break 58 | 59 | done 60 | } 61 | 62 | # Sets hostname for the system 63 | # Replaces `ip` in the hostname with the AWS instance `Name` tag 64 | set_hostname() { 65 | for t in "$${AWS_TAGS[@]}"; do 66 | # For servers we'll use the NAME tag of the EC2 instance. 67 | if [ "$t" == "Name" ]; then 68 | TAG=$(curl -s --retry 3 --retry-delay 3 --connect-timeout 3 \ 69 | -H "Accept: application/json" -H "X-aws-ec2-metadata-token: $TOKEN" "http://169.254.169.254/latest/meta-data/tags/instance/$t") 70 | 71 | # The original hostname is like `ip-10-x-y-z` 72 | CURR_HOSTNAME=$(sudo hostnamectl --static) 73 | # Replace `ip` with tag value. 74 | HOSTNAME="$${CURR_HOSTNAME//ip/$TAG}" 75 | log "INFO" "setting hostname as $HOSTNAME" 76 | sudo hostnamectl set-hostname "$HOSTNAME" 77 | fi 78 | done 79 | } 80 | 81 | # Ensures the resolv.conf within nomad `exec` jobs 82 | # can access other machines 83 | # see: https://github.com/hashicorp/nomad/issues/11033 84 | prepare_dns_config() { 85 | cat <
path = string
read_only = bool
}))
[| no | 44 | | [aws\_region](#input\_aws\_region) | AWS region to deploy the Nomad cluster in | `string` | `"ap-south-1"` | no | 45 | | [cluster\_name](#input\_cluster\_name) | Identifier for the cluster, used as a prefix for all resources | `string` | n/a | yes | 46 | | [cluster\_tags](#input\_cluster\_tags) | Map of tag key-value pairs to assign to the EC2 instances spawned by the ASG | `map(string)` | n/a | yes | 47 | | [create\_alb](#input\_create\_alb) | Whether to create an ALB for the Nomad servers or not | `bool` | `false` | no | 48 | | [default\_iam\_policies](#input\_default\_iam\_policies) | List of IAM policy ARNs to attach to the Nomad server instances | `list(string)` | `[]` | no | 49 | | [default\_security\_groups](#input\_default\_security\_groups) | List of security group IDs to assign to the Nomad server instances | `list(string)` | `[]` | no | 50 | | [ebs\_encryption](#input\_ebs\_encryption) | Enable EBS encryption | `bool` | `true` | no | 51 | | [ebs\_tags](#input\_ebs\_tags) | A map of additional tags to apply to the EBS volumes | `map(string)` | `{}` | no | 52 | | [extra\_script](#input\_extra\_script) | Path to custom script to be run as part of cloud-init | `string` | `""` | no | 53 | | [http\_put\_response\_hop\_limit](#input\_http\_put\_response\_hop\_limit) | The hop limit for HTTP PUT response for the EC2 instance metadata service | `number` | `2` | no | 54 | | [http\_tokens](#input\_http\_tokens) | Whether the metadata service requires session tokens, also referred to as Instance Metadata Service Version 2 (IMDSv2). Can be 'optional', 'required', or 'no-preference'. | `string` | `"optional"` | no | 55 | | [iam\_tags](#input\_iam\_tags) | A map of custom tags to be assigned to the IAM role | `map(string)` | `{}` | no | 56 | | [instance\_count](#input\_instance\_count) | Number of Nomad server instances to run | `number` | `3` | no | 57 | | [instance\_type](#input\_instance\_type) | Instance type to use for the Nomad server instances | `string` | `"c5a.large"` | no | 58 | | [nomad\_acl\_bootstrap\_token](#input\_nomad\_acl\_bootstrap\_token) | Nomad ACL bootstrap token to use for bootstrapping ACLs | `string` | `""` | no | 59 | | [nomad\_acl\_enable](#input\_nomad\_acl\_enable) | Whether to enable ACLs on the Nomad cluster or not | `bool` | `true` | no | 60 | | [nomad\_alb\_hostname](#input\_nomad\_alb\_hostname) | ALB hostname to use for accessing the Nomad web UI | `string` | `"nomad.example.internal"` | no | 61 | | [nomad\_bootstrap\_expect](#input\_nomad\_bootstrap\_expect) | Number of instances expected to bootstrap a new Nomad cluster | `number` | `3` | no | 62 | | [nomad\_file\_limit](#input\_nomad\_file\_limit) | Value for LimitNOFILE in nomad systemd config | `number` | `900000` | no | 63 | | [nomad\_gossip\_encrypt\_key](#input\_nomad\_gossip\_encrypt\_key) | Gossip encryption key to use for Nomad servers | `string` | n/a | yes | 64 | | [nomad\_join\_tag\_value](#input\_nomad\_join\_tag\_value) | Value of the tag used for Nomad server auto-join | `string` | n/a | yes | 65 | | [nomad\_server\_incoming\_ips](#input\_nomad\_server\_incoming\_ips) | List of IPs to allow incoming connections from to Nomad server ALBs | `list(string)` | `[]` | no | 66 | | [nomad\_server\_incoming\_security\_groups](#input\_nomad\_server\_incoming\_security\_groups) | List of Security Groups to allow incoming connections from to Nomad server ALBs | `list(string)` | `[]` | no | 67 | | [subnets](#input\_subnets) | List of subnets to assign for deploying instances | `list(string)` | `[]` | no | 68 | | [vpc](#input\_vpc) | ID of the AWS VPC to deploy all the resources in | `string` | n/a | yes | 69 | 70 | ## Outputs 71 | 72 | | Name | Description | 73 | |------|-------------| 74 | | [alb\_dns\_name](#output\_alb\_dns\_name) | The DNS name of the ALB | 75 | | [alb\_security\_group\_id](#output\_alb\_security\_group\_id) | The ID of the ALB security group | 76 | | [nomad\_agent\_security\_group\_id](#output\_nomad\_agent\_security\_group\_id) | The ID of the Nomad agent security group | 77 | | [nomad\_server\_asg\_arn](#output\_nomad\_server\_asg\_arn) | The ARN of the Nomad server Auto Scaling Group | 78 | | [nomad\_server\_asg\_name](#output\_nomad\_server\_asg\_name) | The name of the Nomad server Auto Scaling Group | 79 | | [nomad\_server\_iam\_role\_arn](#output\_nomad\_server\_iam\_role\_arn) | The ARN of the Nomad server IAM role | 80 | | [nomad\_server\_launch\_template\_id](#output\_nomad\_server\_launch\_template\_id) | The ID of the Nomad server launch template | 81 | -------------------------------------------------------------------------------- /modules/nomad-servers/alb.tf: -------------------------------------------------------------------------------- 1 | module "alb" { 2 | count = var.create_alb ? 1 : 0 3 | 4 | source = "terraform-aws-modules/alb/aws" 5 | version = "~> 6.0" 6 | 7 | name = "${var.cluster_name}-web" 8 | 9 | load_balancer_type = "application" 10 | internal = true 11 | 12 | vpc_id = var.vpc 13 | subnets = var.subnets 14 | security_groups = concat([aws_security_group.alb[0].id], var.nomad_server_incoming_security_groups) 15 | 16 | target_groups = [ 17 | { 18 | name_prefix = "nomad-" 19 | backend_protocol = "HTTP" 20 | backend_port = 4646 21 | target_type = "instance" 22 | health_check = { 23 | enabled = true 24 | interval = 30 25 | path = "/ui/" 26 | port = "traffic-port" 27 | healthy_threshold = 5 28 | unhealthy_threshold = 2 29 | timeout = 10 30 | protocol = "HTTP" 31 | matcher = "200" 32 | } 33 | } 34 | ] 35 | 36 | https_listeners = var.alb_certificate_arn == "" ? [] : [ 37 | { 38 | port = 443 39 | protocol = "HTTPS" 40 | certificate_arn = var.alb_certificate_arn 41 | target_group_index = 0 42 | 43 | action_type = "fixed-response" 44 | fixed_response = { 45 | content_type = "text/plain" 46 | status_code = "404" 47 | } 48 | } 49 | ] 50 | 51 | https_listener_rules = var.alb_certificate_arn == "" ? [] : [ 52 | { 53 | https_listener_index = 0 54 | actions = [{ 55 | type = "forward" 56 | target_group_index = 0 57 | }] 58 | conditions = [{ 59 | host_headers = ["${var.nomad_alb_hostname}"] 60 | }] 61 | } 62 | ] 63 | 64 | http_tcp_listeners = var.alb_certificate_arn != "" ? [] : [ 65 | { 66 | port = 80 67 | protocol = "HTTP" 68 | } 69 | ] 70 | 71 | http_tcp_listener_rules = var.alb_certificate_arn != "" ? [] : [ 72 | { 73 | http_tcp_listener_index = 0 74 | actions = [{ 75 | type = "forward" 76 | target_group_index = 0 77 | }] 78 | 79 | conditions = [{ 80 | host_headers = ["${var.nomad_alb_hostname}"] 81 | }] 82 | } 83 | ] 84 | 85 | tags = { 86 | Name = "${var.cluster_name}-web" 87 | } 88 | } 89 | 90 | resource "aws_security_group" "alb" { 91 | count = var.create_alb ? 1 : 0 92 | 93 | name = "${var.cluster_name}-alb" 94 | description = "Security Group for ${var.cluster_name} ALB" 95 | ingress = [ 96 | { 97 | description = "Allow access to Nomad ALB" 98 | from_port = var.alb_certificate_arn == "" ? 80 : 443 99 | to_port = var.alb_certificate_arn == "" ? 80 : 443 100 | cidr_blocks = var.nomad_server_incoming_ips 101 | ipv6_cidr_blocks = [] 102 | security_groups = [] 103 | prefix_list_ids = [] 104 | self = false 105 | protocol = "tcp" 106 | } 107 | ] 108 | 109 | egress = [ 110 | { 111 | description = "Allow all outgoing traffic" 112 | from_port = 0 113 | to_port = 0 114 | protocol = "-1" 115 | cidr_blocks = ["0.0.0.0/0"] 116 | ipv6_cidr_blocks = ["::/0"] 117 | prefix_list_ids = [] 118 | security_groups = [] 119 | self = false 120 | } 121 | ] 122 | 123 | vpc_id = var.vpc 124 | tags = { 125 | Name = "${var.cluster_name}-alb" 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /modules/nomad-servers/asg.tf: -------------------------------------------------------------------------------- 1 | resource "aws_autoscaling_group" "nomad_server" { 2 | launch_template { 3 | id = aws_launch_template.nomad_server.id 4 | version = "$Latest" 5 | } 6 | 7 | name = "${var.cluster_name}-server" 8 | max_size = var.instance_count 9 | min_size = var.instance_count 10 | desired_capacity = var.instance_count 11 | health_check_grace_period = 60 12 | health_check_type = "EC2" 13 | vpc_zone_identifier = var.subnets 14 | wait_for_capacity_timeout = "10m" 15 | enabled_metrics = var.autoscale_metrics 16 | termination_policies = ["OldestInstance"] 17 | 18 | target_group_arns = var.create_alb ? module.alb[0].target_group_arns : [] 19 | 20 | dynamic "tag" { 21 | for_each = var.cluster_tags 22 | content { 23 | key = tag.key 24 | value = tag.value 25 | propagate_at_launch = true 26 | } 27 | } 28 | 29 | tag { 30 | key = "role" 31 | value = "nomad-server" 32 | propagate_at_launch = true 33 | } 34 | 35 | tag { 36 | key = "nomad_ec2_join" 37 | value = var.nomad_join_tag_value 38 | propagate_at_launch = true 39 | } 40 | 41 | timeouts { 42 | delete = "15m" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /modules/nomad-servers/data.tf: -------------------------------------------------------------------------------- 1 | # Set to ensure instance type exists 2 | data "aws_ec2_instance_type" "type" { 3 | instance_type = var.instance_type 4 | } 5 | 6 | data "cloudinit_config" "config" { 7 | gzip = false 8 | base64_encode = false 9 | 10 | part { 11 | filename = "setup_server.sh" 12 | content_type = "text/x-shellscript" 13 | content = local.setup_server 14 | } 15 | 16 | part { 17 | filename = "extra_script.sh" 18 | content_type = "text/x-shellscript" 19 | merge_type = "str(append)" 20 | content = var.extra_script != "" ? file(var.extra_script) : "#!/bin/bash" 21 | } 22 | } -------------------------------------------------------------------------------- /modules/nomad-servers/firewall.tf: -------------------------------------------------------------------------------- 1 | 2 | # --- 3 | # Common security group for both Nomad server and clients. 4 | # --- 5 | 6 | resource "aws_security_group" "nomad_agent" { 7 | name = "${var.cluster_name}-agent" 8 | description = "Security Group for Nomad agents - ${title(var.cluster_name)}" 9 | # description = "Nomad Agent Security Group for cluster ${title(var.cluster_name)}" 10 | vpc_id = var.vpc 11 | 12 | ingress = flatten([ 13 | var.create_alb ? [ 14 | { 15 | description = "Allow ALB to access Nomad" 16 | from_port = 4646 17 | to_port = 4646 18 | cidr_blocks = [] 19 | ipv6_cidr_blocks = [] 20 | security_groups = [aws_security_group.alb[0].id] 21 | prefix_list_ids = [] 22 | self = false 23 | protocol = "tcp" 24 | } 25 | ] : [], 26 | [ 27 | { 28 | description = "Allow nomad agents to talk to each other on all ports" 29 | from_port = 0 30 | to_port = 0 31 | cidr_blocks = [] 32 | ipv6_cidr_blocks = [] 33 | security_groups = [] 34 | prefix_list_ids = [] 35 | self = true 36 | protocol = "-1" 37 | } 38 | ] 39 | ]) 40 | 41 | egress = [ 42 | { 43 | description = "Allow all outgoing traffic" 44 | from_port = 0 45 | to_port = 0 46 | protocol = "-1" 47 | cidr_blocks = ["0.0.0.0/0"] 48 | ipv6_cidr_blocks = ["::/0"] 49 | prefix_list_ids = [] 50 | security_groups = [] 51 | self = false 52 | } 53 | ] 54 | 55 | tags = { 56 | Name = "${var.cluster_name}-agent" 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /modules/nomad-servers/iam.tf: -------------------------------------------------------------------------------- 1 | # IAM roles, policies that apply to Nomad server role. 2 | 3 | # This assume role policy is applied to all EC2 instances. 4 | data "aws_iam_policy_document" "instance-assume-role-policy" { 5 | statement { 6 | actions = ["sts:AssumeRole"] 7 | 8 | principals { 9 | type = "Service" 10 | identifiers = ["ec2.amazonaws.com"] 11 | } 12 | } 13 | } 14 | 15 | resource "aws_iam_role" "nomad_server" { 16 | name = "${var.cluster_name}-server" 17 | tags = var.iam_tags 18 | assume_role_policy = data.aws_iam_policy_document.instance-assume-role-policy.json 19 | } 20 | 21 | resource "aws_iam_role_policy_attachment" "default_iam_policies" { 22 | for_each = toset(var.default_iam_policies) 23 | role = aws_iam_role.nomad_server.name 24 | policy_arn = each.key 25 | } 26 | 27 | resource "aws_iam_instance_profile" "nomad_server" { 28 | name = "${var.cluster_name}-server" 29 | role = aws_iam_role.nomad_server.id 30 | } 31 | -------------------------------------------------------------------------------- /modules/nomad-servers/launch_template.tf: -------------------------------------------------------------------------------- 1 | resource "aws_launch_template" "nomad_server" { 2 | description = "Launch template for nomad servers in ${var.cluster_name} cluster" 3 | disable_api_termination = false 4 | image_id = var.ami 5 | instance_type = data.aws_ec2_instance_type.type.instance_type 6 | name = "${var.cluster_name}-server" 7 | tags = {} 8 | vpc_security_group_ids = concat(var.default_security_groups, [aws_security_group.nomad_agent.id]) 9 | update_default_version = true 10 | 11 | user_data = base64encode(data.cloudinit_config.config.rendered) 12 | 13 | metadata_options { 14 | http_tokens = var.http_tokens 15 | http_endpoint = "enabled" 16 | http_put_response_hop_limit = var.http_put_response_hop_limit 17 | instance_metadata_tags = "enabled" 18 | } 19 | 20 | block_device_mappings { 21 | device_name = "/dev/sda1" 22 | 23 | ebs { 24 | encrypted = var.ebs_encryption 25 | delete_on_termination = true 26 | volume_size = 100 27 | volume_type = "gp3" 28 | } 29 | } 30 | iam_instance_profile { 31 | name = aws_iam_instance_profile.nomad_server.name 32 | } 33 | 34 | monitoring { 35 | enabled = true 36 | } 37 | 38 | tag_specifications { 39 | resource_type = "instance" 40 | 41 | tags = { 42 | Name = "${var.cluster_name}-server" 43 | } 44 | } 45 | 46 | tag_specifications { 47 | resource_type = "volume" 48 | tags = merge( 49 | { 50 | Name = "${var.cluster_name}-server" 51 | }, 52 | var.ebs_tags 53 | ) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /modules/nomad-servers/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | setup_server = templatefile("${path.module}/scripts/setup_server.tftpl.sh", { 3 | nomad_acl_bootstrap_token = var.nomad_acl_bootstrap_token 4 | nomad_acl_enable = var.nomad_acl_enable 5 | nomad_file_limit = var.nomad_file_limit 6 | nomad_server_cfg = templatefile("${path.module}/templates/nomad.tftpl", { 7 | nomad_dc = var.cluster_name 8 | aws_region = var.aws_region 9 | nomad_bootstrap_expect = var.nomad_bootstrap_expect 10 | nomad_gossip_encrypt_key = var.nomad_gossip_encrypt_key 11 | nomad_join_tag_key = "nomad_ec2_join" 12 | nomad_join_tag_value = var.nomad_join_tag_value 13 | nomad_acl_enable = var.nomad_acl_enable 14 | }) 15 | nomad_file_limit = var.nomad_file_limit 16 | }) 17 | } -------------------------------------------------------------------------------- /modules/nomad-servers/outputs.tf: -------------------------------------------------------------------------------- 1 | ### Outputs 2 | 3 | output "alb_dns_name" { 4 | description = "The DNS name of the ALB" 5 | value = var.create_alb ? module.alb[0].lb_dns_name : "" 6 | } 7 | 8 | output "alb_security_group_id" { 9 | description = "The ID of the ALB security group" 10 | value = var.create_alb ? aws_security_group.alb[0].id : "" 11 | } 12 | 13 | output "nomad_server_asg_name" { 14 | description = "The name of the Nomad server Auto Scaling Group" 15 | value = aws_autoscaling_group.nomad_server.name 16 | } 17 | 18 | output "nomad_server_asg_arn" { 19 | description = "The ARN of the Nomad server Auto Scaling Group" 20 | value = aws_autoscaling_group.nomad_server.arn 21 | } 22 | 23 | output "nomad_server_launch_template_id" { 24 | description = "The ID of the Nomad server launch template" 25 | value = aws_launch_template.nomad_server.id 26 | } 27 | 28 | output "nomad_agent_security_group_id" { 29 | description = "The ID of the Nomad agent security group" 30 | value = aws_security_group.nomad_agent.id 31 | } 32 | 33 | output "nomad_server_iam_role_arn" { 34 | description = "The ARN of the Nomad server IAM role" 35 | value = aws_iam_role.nomad_server.arn 36 | } 37 | -------------------------------------------------------------------------------- /modules/nomad-servers/scripts/setup_server.tftpl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Script to configure and bootstrap Nomad server nodes in an AWS Auto Scaling group. 4 | # 5 | # This script performs the following steps: 6 | # - Configures the Nomad agent as a server on the EC2 instances. 7 | # - Bootstraps the Nomad ACL system with a pre-configured token on the first server. 8 | # - Joins the Nomad server nodes to form a cluster. 9 | # - Starts the Nomad agent service. 10 | # 11 | # This script should be run on each Nomad server node as part of the EC2 instance launch process. 12 | # 13 | 14 | set -Eeuo pipefail 15 | 16 | declare -r SCRIPT_NAME="$(basename "$0")" 17 | 18 | declare -ag AWS_TAGS=() 19 | 20 | # Send the log output from this script to user-data.log, syslog, and the console. 21 | exec > >(tee /var/log/user-data.log | logger -t user-data -s 2>/dev/console) 2>&1 22 | 23 | # Wrapper to log any outputs from the script to stderr 24 | function log { 25 | declare -r LVL="$1" 26 | declare -r MSG="$2" 27 | declare -r TS=$(date +"%Y-%m-%d %H:%M:%S") 28 | echo >&2 -e "$TS [$LVL] [$SCRIPT_NAME] $MSG" 29 | } 30 | 31 | # Stores AWS tags to use as nomad client meta 32 | # Requires `nomad-cluster` tag to be defined 33 | # within AWS instance tags 34 | store_tags() { 35 | max_attempts=3 36 | count=0 37 | 38 | while true; do 39 | TOKEN=$(curl -s --connect-timeout 1 --retry 3 --retry-delay 3 \ 40 | -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") 41 | 42 | TAGS=$(curl -s --connect-timeout 1 --retry 3 --retry-delay 3 \ 43 | -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/tags/instance) 44 | 45 | # If there's no 'nomad-cluster' found in tags, retry. 46 | if [[ "$${TAGS}" != *"nomad-cluster"* ]]; then 47 | sleep 1 48 | 49 | count=$((count + 1)) 50 | 51 | # If max retries still didn't get the data, fail. 52 | if [[ $count -eq $max_attempts ]]; then 53 | log "ERROR" "aborting as max attempts reached" 54 | exit 1 55 | fi 56 | continue 57 | fi 58 | 59 | readarray -t AWS_TAGS <<<"$TAGS" 60 | break 61 | 62 | done 63 | } 64 | 65 | # Sets hostname for the system 66 | # Replaces `ip` in the hostname with the AWS instance `Name` tag 67 | set_hostname() { 68 | for t in "$${AWS_TAGS[@]}"; do 69 | # For servers we'll use the NAME tag of the EC2 instance. 70 | if [ "$t" == "Name" ]; then 71 | TAG=$(curl -s --retry 3 --retry-delay 3 --connect-timeout 3 \ 72 | -H "Accept: application/json" -H "X-aws-ec2-metadata-token: $TOKEN" "http://169.254.169.254/latest/meta-data/tags/instance/$t") 73 | 74 | # The original hostname is like `ip-10-x-y-z` 75 | CURR_HOSTNAME=$(sudo hostnamectl --static) 76 | # Replace `ip` with tag value. 77 | HOSTNAME="$${CURR_HOSTNAME//ip/$TAG}" 78 | log "INFO" "setting hostname as $HOSTNAME" 79 | sudo hostnamectl set-hostname "$HOSTNAME" 80 | fi 81 | done 82 | } 83 | 84 | # Increase the file limit 85 | modify_nomad_systemd_config() { 86 | if [ ${nomad_file_limit} > 65536 ]; then 87 | sudo sed -i '/^LimitNOFILE/s/=.*$/=${nomad_file_limit}/' /lib/systemd/system/nomad.service 88 | fi 89 | } 90 | 91 | # Enables nomad systemd service 92 | start_nomad() { 93 | sudo systemctl daemon-reload 94 | sudo systemctl enable --now nomad 95 | } 96 | 97 | # Restarts nomad systemd service 98 | restart_nomad() { 99 | sudo systemctl restart nomad 100 | } 101 | 102 | # Sets up `/etc/nomad.d` 103 | prepare_nomad_server_config() { 104 | cat <
"GroupMinSize",
"GroupMaxSize",
"GroupDesiredCapacity",
"GroupInServiceInstances",
"GroupPendingInstances",
"GroupStandbyInstances",
"GroupTerminatingInstances",
"GroupTotalInstances"
]