├── LICENSE ├── iam.tf ├── main.tf ├── auto-scaling.tf ├── files └── userdata.template ├── security-groups.tf └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Tom Hill 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /iam.tf: -------------------------------------------------------------------------------- 1 | # Autoscaling lifecycle hook role 2 | # Allows lifecycle hooks to add messages to the SQS queue 3 | resource "aws_iam_role" "lifecycle_role" { 4 | 5 | name = "${var.cluster_name}-lifecycle-hooks" 6 | assume_role_policy = <> /etc/sysconfig/docker 9 | fi 10 | 11 | # Start docker 12 | service docker start 13 | 14 | # Setup initial vars 15 | serverUrl=https://${environment_access_key}:${environment_secret_key}@${server_hostname} 16 | projectId=${environment_id} 17 | 18 | # Make initial POST request for a registration token and record the id 19 | response=$(curl -s -X POST $serverUrl/v1/registrationtokens?projectId=$projectId) 20 | requestId=$(echo $response | jq -r '.id') 21 | requestState=$(echo $response | jq -r '.state') 22 | 23 | # The registration token request is async so keep checking until it's complete 24 | while [[ "$requestState" != "active" ]]; do 25 | sleep 2 26 | response=$(curl -s $serverUrl/v1/registrationtokens/$requestId) 27 | requestState=$(echo $response | jq -r '.state') 28 | done 29 | 30 | # Get the instance id from metadata 31 | instanceId=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) 32 | instancePrivateIp=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) 33 | 34 | # Labels 35 | instanceLabels="HOSTID=$instanceId&CLOUD=aws&CLUSTER=${cluster_name}" 36 | customLabels="${cluster_instance_labels}" 37 | if [ -n "$customLabels" ]; then 38 | instanceLabels="$instanceLabels&$customLabels" 39 | fi 40 | 41 | # Add external DNS label if there's a public IP address 42 | instancePublicIp=$(curl -f -s http://169.254.169.254/latest/meta-data/public-ipv4) 43 | if [ -n "$instancePublicIp" ]; then 44 | instanceLabels="$instanceLabels&io.rancher.host.external_dns_ip=$instancePublicIp" 45 | fi 46 | 47 | # Use the command in the response to start the rancher agent 48 | cmd=$(echo $response | jq -r '.command') 49 | eval $${cmd/sudo docker run /docker run -e CATTLE_AGENT_IP=$instancePrivateIp -e CATTLE_HOST_LABELS=\"$instanceLabels\" } 50 | 51 | # Fix to allow rancher-nfs 52 | ln -s /var/run/rancher/storage/rancher-nfs.sock /run/docker/plugins/rancher-nfs.sock 53 | -------------------------------------------------------------------------------- /security-groups.tf: -------------------------------------------------------------------------------- 1 | # Attach IPSEC rules to host instance security group. 2 | # Enables the rancher overlay network for connected hosts. 3 | # Traffic only allowed to and from other machines with this security group. 4 | resource "aws_security_group_rule" "ipsec_ingress_1" { 5 | 6 | security_group_id = "${var.cluster_instance_security_group_id}" 7 | type = "ingress" 8 | from_port = 4500 9 | to_port = 4500 10 | protocol = "udp" 11 | source_security_group_id = "${var.cluster_instance_security_group_id}" 12 | 13 | lifecycle { 14 | create_before_destroy = true 15 | } 16 | 17 | } 18 | 19 | resource "aws_security_group_rule" "ipsec_egress_1" { 20 | 21 | security_group_id = "${var.cluster_instance_security_group_id}" 22 | type = "egress" 23 | from_port = 4500 24 | to_port = 4500 25 | protocol = "udp" 26 | source_security_group_id = "${var.cluster_instance_security_group_id}" 27 | 28 | lifecycle { 29 | create_before_destroy = true 30 | } 31 | 32 | } 33 | 34 | resource "aws_security_group_rule" "ipsec_ingress_2" { 35 | 36 | security_group_id = "${var.cluster_instance_security_group_id}" 37 | type = "ingress" 38 | from_port = 500 39 | to_port = 500 40 | protocol = "udp" 41 | source_security_group_id = "${var.cluster_instance_security_group_id}" 42 | 43 | lifecycle { 44 | create_before_destroy = true 45 | } 46 | 47 | } 48 | 49 | resource "aws_security_group_rule" "ipsec_egress_2" { 50 | 51 | security_group_id = "${var.cluster_instance_security_group_id}" 52 | type = "egress" 53 | from_port = 500 54 | to_port = 500 55 | protocol = "udp" 56 | source_security_group_id = "${var.cluster_instance_security_group_id}" 57 | 58 | lifecycle { 59 | create_before_destroy = true 60 | } 61 | 62 | } 63 | 64 | # SSH ingress 65 | # Required for the server to connect & configure the host. 66 | resource "aws_security_group_rule" "ssh_ingress" { 67 | 68 | security_group_id = "${var.cluster_instance_security_group_id}" 69 | type = "ingress" 70 | from_port = 22 71 | to_port = 22 72 | protocol = "tcp" 73 | source_security_group_id = "${var.server_security_group_id}" 74 | 75 | lifecycle { 76 | create_before_destroy = true 77 | } 78 | 79 | } 80 | 81 | # Outgoing HTTP 82 | # Allows pulling of remote docker images, installing packages, etc. 83 | resource "aws_security_group_rule" "http_egress" { 84 | 85 | security_group_id = "${var.cluster_instance_security_group_id}" 86 | type = "egress" 87 | from_port = 80 88 | to_port = 80 89 | protocol = "tcp" 90 | cidr_blocks = ["0.0.0.0/0"] 91 | 92 | lifecycle { 93 | create_before_destroy = true 94 | } 95 | 96 | } 97 | 98 | # Outgoing HTTPS 99 | # Allows pulling of remote docker images, installing packages, etc. 100 | resource "aws_security_group_rule" "https_egress" { 101 | 102 | security_group_id = "${var.cluster_instance_security_group_id}" 103 | type = "egress" 104 | from_port = 443 105 | to_port = 443 106 | protocol = "tcp" 107 | cidr_blocks = ["0.0.0.0/0"] 108 | 109 | lifecycle { 110 | create_before_destroy = true 111 | } 112 | 113 | } 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rancher host cluster Terraform module 2 | 3 | This is a terraform module to help with creating a rancher host cluster. It is intended for use in combination with [my Rancher server module](https://github.com/greensheep/terraform-aws-rancher-server). 4 | 5 | ### Features 6 | 7 | - Flexible for use with different deployment scenarios. 8 | - Automatically adds hosts launched by autoscaling to the Rancher server. 9 | - Registers autoscaling lifecycle hook used to automatically remove instances from the Rancher server on scale down (see [my Rancher server module](https://github.com/greensheep/terraform-aws-rancher-server)). 10 | - Designed for use in VPC private subnets so can be used for private, backend services or proxy traffic from an ELB for public services. 11 | - Can be used unlimited times in a terraform config. Allows creation of separate clusters for dev, staging, production, etc. 12 | 13 | ### Requirements 14 | 15 | Terraform 0.6.6 is required. 16 | 17 | On it's own this doesn't do very much. It needs to be included in a Terraform config that creates the following resources: 18 | 19 | - Security group 20 | - Autoscaling launch configuration 21 | - Autoscaling group 22 | 23 | Because these resources may vary significantly for your deployment (eg, the type of app you're deploying, expected workload, etc), you need to create these yourself and pass in the necessary variables. 24 | 25 | You'll also need to have your Rancher server setup & configured (did I mention [my Rancher server module](https://github.com/greensheep/terraform-aws-rancher-server)!). Don't be tempted to use this as part of some mega-config that also creates the server.. you need to specify an environment id and API access keys for it to work! 26 | 27 | ### Usage 28 | 29 | Include the following in your existing terraform config: 30 | 31 | module "staging_cluster" { 32 | 33 | # Import the module from Github 34 | # It's probably better to fork or clone this repo if you intend to use in production 35 | # so any future changes dont mess up your existing infrastructure. 36 | source = "github.com/greensheep/terraform-aws-rancher-hosts" 37 | 38 | # Add Rancher server details 39 | server_security_group_id = "sg-XXXXXXXX" 40 | server_hostname = "rancher-server.yourdomain.tld" 41 | 42 | # Rancher environment 43 | # In your Rancher server, create an environment and an API keypair. You can have 44 | # multiple host clusters per environment if necessary. Instances will be labelled 45 | # with the cluster name so you can differentiate between multiple clusters. 46 | environment_id = "1a7" 47 | environment_access_key = "ACCESS-KEY" 48 | environment_secret_key = "SECRET-KET" 49 | 50 | # Name your cluster and provide the autoscaling group name and security group id. 51 | # See examples below. 52 | cluster_name = "${var.cluster_name}" 53 | cluster_autoscaling_group_name = "${aws_autoscaling_group.cluster_autoscale_group.id}" 54 | cluster_instance_security_group_id = "${aws_security_group.rancher_host_sg.id}" 55 | 56 | # Lifecycle hooks queue ARN 57 | # This is specific to my Rancher server module which creates the SQS queue used to 58 | # received autoscaling lifecycle hooks. This module creates a lifecycle hook for the 59 | # provided autoscaling group so that instances can be removed from the Rancher 60 | # server before they are terminated. 61 | lifecycle_hooks_sqs_queue_arn = "${var.lifecycle_hooks_sqs_queue_arn}" 62 | 63 | } 64 | 65 | ### Examples of required resources 66 | 67 | ##### Security group 68 | 69 | # Cluster instance security group 70 | resource "aws_security_group" "cluster_instance_sg" { 71 | 72 | name = "Cluster-Instances" 73 | description = "Rules for connected Rancher host machines. These are the hosts that run containers placed on the cluster." 74 | vpc_id = "${TARGET-VPC-ID}" 75 | 76 | # NOTE: To allow ELB proxied traffic to private VPC 77 | # hosts, open the necessary ports here.. 78 | 79 | lifecycle { 80 | create_before_destroy = true 81 | } 82 | 83 | } 84 | 85 | 86 | ##### Autoscaling 87 | 88 | # Autoscaling launch configuration 89 | resource "aws_launch_configuration" "cluster_launch_conf" { 90 | 91 | name = "Launch-Config" 92 | 93 | # Amazon linux, eu-west-1 94 | image_id = "ami-69b9941e" 95 | 96 | # No public ip when instances are placed in private subnets. See notes 97 | # about creating an ELB to proxy public traffic into the cluster. 98 | associate_public_ip_address = false 99 | 100 | # Security groups 101 | security_groups = [ 102 | "${aws_security_group.cluster_instance_sg.id}" 103 | ] 104 | 105 | # Key 106 | # NOTE: It's a good idea to use the same key as the Rancher server here. 107 | key_name = "${UPLOADED-KEY-NAME}" 108 | 109 | # Add rendered userdata template 110 | user_data = "${module.staging_cluster.host_user_data}" 111 | 112 | # Misc 113 | instance_type = "t2.micro" 114 | enable_monitoring = true 115 | 116 | lifecycle { 117 | create_before_destroy = true 118 | } 119 | 120 | } 121 | 122 | # Autoscaling group 123 | resource "aws_autoscaling_group" "cluster_autoscale_group" { 124 | 125 | name = "Cluster-ASG" 126 | launch_configuration = "${aws_launch_configuration.cluster_launch_conf.name}" 127 | min_size = "2" 128 | max_size = "2" 129 | desired_capacity = "2" 130 | health_check_grace_period = 180 131 | health_check_type = "EC2" 132 | force_delete = false 133 | termination_policies = ["OldestInstance"] 134 | 135 | # Add ELB's here if you're proxying public traffic into the cluster 136 | # load_balancers = ["${var.instance_cluster_load_balancers}"] 137 | 138 | # Target subnets 139 | vpc_zone_identifier = ["${LIST-OF-VPC-PRIVATE-SUBNET-IDS}"] 140 | 141 | tag { 142 | key = "Name" 143 | value = "Test-Cluster-Instance" 144 | propagate_at_launch = true 145 | } 146 | 147 | lifecycle { 148 | create_before_destroy = true 149 | } 150 | 151 | } 152 | --------------------------------------------------------------------------------