├── .gitignore
├── README.md
├── iam
    ├── iam.tf
    ├── outputs.tf
    ├── policies
    │   ├── policy.json
    │   └── role.json
    └── variables.tf
├── main.tf
├── outputs.tf
├── templates
    └── user-data.tpl
└── variables.tf


/.gitignore:
--------------------------------------------------------------------------------
1 | terraform.tfstate*
2 | .terraform*
3 | environment
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Elasticsearch cluster on AWS using Terraform
  2 | =============
  3 | 
  4 | This project will create an elasticsearch cluster in AWS using multiple availability zones. The cluster is located in a private subnet and communicates via private ip addresses.
  5 | 
  6 | ## Requirements
  7 | 
  8 | * Terraform >= v0.6.15
  9 | * Elasticsearch IAM profile called elasticSearchNode with [EC2 permissions](https://github.com/elastic/elasticsearch-cloud-aws#recommended-ec2-permissions). You can also use the iam project found in the iam directory. This only needs to be done once per account.
 10 | 
 11 | Packer AMI's
 12 | 
 13 | We use prebuild Packer AMI's built from these projects:
 14 | 
 15 | * [packer-elasticsearch](https://github.com/nadnerb/packer-elasticsearch)
 16 | 
 17 | ## Installation
 18 | 
 19 | * install [Terraform](https://www.terraform.io/) and add it to your PATH.
 20 | * clone this repo.
 21 | * `terraform get`
 22 | 
 23 | ## Configuration
 24 | 
 25 | ### AWS Credentials
 26 | 
 27 | We rely on AWS credentials to have been set elsewhere, for example using environment variables. We also use [terraform_exec](https://github.com/nadnerb/terraform_exec) to execute terraform that
 28 | saves environment state to S3.
 29 | 
 30 | ### KMS encrypted consul atlas token
 31 | 
 32 | aws kms encrypt --key-id <kms-key-id> --plaintext fileb://<(echo <atlas-token>) --output text --query CiphertextBlob | base64 | base64 -d
 33 | 
 34 | This is then provided to terraform via `encrypted_atlas_token`.
 35 | 
 36 | ### Terraform configuration
 37 | 
 38 | Create a configuration file such as `~/.aws/default.tfvars` which can include mandatory and optional variables such as:
 39 | 
 40 | ```
 41 | key_name="<key name>"
 42 | 
 43 | stream_tag="<used for aws resource groups>"
 44 | 
 45 | aws_region="ap-southeast-2"
 46 | ami="ami-7ff38945"
 47 | 
 48 | vpc_id="xxx"
 49 | additional_security_groups=""
 50 | 
 51 | es_cluster="cluster name"
 52 | es_environment="dev"
 53 | volume_name="/dev/sdh"
 54 | volume_size="10"
 55 | 
 56 | instances="3"
 57 | availability_zones="ap-southeast-2a,ap-southeast-2b"
 58 | subnets="subnet-xxxxx,subnet-yyyyy"
 59 | 
 60 | # consul variables
 61 | dns_server  = "172.100.0.2"
 62 | consul_dc   = "dc0"
 63 | atlas       = "atlas user"
 64 | atlas_token = "atlas token"
 65 | # internal hosted zone
 66 | ```
 67 | 
 68 | These variables can also be overriden when running terraform like so:
 69 | 
 70 | ```
 71 | terraform (plan|apply|destroy) -var 'ami=foozie'
 72 | ```
 73 | 
 74 | The variables.tf terraform file can be further modified, for example it defaults to `ap-southeast-2` for the AWS region.
 75 | 
 76 | ## Using Terraform
 77 | 
 78 | Execute the plan to see if everything works as expected.
 79 | 
 80 | ```
 81 | terraform plan -var-file ~/.aws/default.tfvars -state='environment/development.tfstate'
 82 | ```
 83 | 
 84 | If all looks good, lets build our infrastructure!
 85 | 
 86 | ```
 87 | terraform apply -var-file ~/.aws/default.tfvars -state='environment/development.tfstate'
 88 | ```
 89 | 
 90 | ### Multiple security groups
 91 | 
 92 | A security group is created using terraform that opens up Elasticsearch and ssh ports. We can also add extra pre-existing security groups to our Elasticsearch instances like so:
 93 | 
 94 | ```
 95 | terraform plan -var-file '~/.aws/default.tfvars' -var 'additional_security_groups=sg-xxxx, sg-yyyy'
 96 | ```
 97 | 
 98 | ## TODO
 99 | 
100 | * Update this readme
101 | 
102 | 


--------------------------------------------------------------------------------
/iam/iam.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_iam_role" "elasticsearch" {
 2 |   name               = "${var.es_cluster}-elasticsearch-discovery-role"
 3 |   assume_role_policy = "${file("policies/role.json")}"
 4 | }
 5 | 
 6 | resource "aws_iam_role_policy" "elasticsearch" {
 7 |   name     = "${var.es_cluster}-elasticsearch-discovery-policy"
 8 |   policy   = "${file("policies/policy.json")}"
 9 |   role     = "${aws_iam_role.elasticsearch.id}"
10 | }
11 | 
12 | resource "aws_iam_instance_profile" "elasticsearch" {
13 |   name = "${var.es_cluster}-elasticsearch-discovery-profile"
14 |   path = "/"
15 |   roles = ["${aws_iam_role.elasticsearch.name}"]
16 | }
17 | 


--------------------------------------------------------------------------------
/iam/outputs.tf:
--------------------------------------------------------------------------------
1 | output "ecs iam id" {
2 |   value = "${aws_iam_instance_profile.elasticsearch.id}"
3 | }
4 | 


--------------------------------------------------------------------------------
/iam/policies/policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Statement": [
 3 |         {
 4 |             "Action": [
 5 |                 "ec2:DescribeInstances"
 6 |             ],
 7 |             "Effect": "Allow",
 8 |             "Resource": [
 9 |                 "*"
10 |             ]
11 |         }
12 |     ],
13 |     "Version": "2012-10-17"
14 | }
15 | 


--------------------------------------------------------------------------------
/iam/policies/role.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2008-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Action": "sts:AssumeRole",
 6 |       "Principal": {
 7 |         "Service": ["ecs.amazonaws.com", "ec2.amazonaws.com"]
 8 |       },
 9 |       "Effect": "Allow"
10 |     }
11 |   ]
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/iam/variables.tf:
--------------------------------------------------------------------------------
1 | variable "es_cluster" {
2 |   description = "the name of the elasticsearch cluster"
3 |   default = "default"
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/main.tf:
--------------------------------------------------------------------------------
  1 | provider "aws" {
  2 |   region = "${var.aws_region}"
  3 | }
  4 | 
  5 | ##############################################################################
  6 | # Elasticsearch
  7 | ##############################################################################
  8 | 
  9 | resource "aws_security_group" "elasticsearch" {
 10 |   name = "${var.security_group_name}-elasticsearch"
 11 |   description = "Elasticsearch ports with ssh"
 12 |   vpc_id = "${var.vpc_id}"
 13 | 
 14 |   # SSH access from anywhere
 15 |   ingress {
 16 |     from_port = 22
 17 |     to_port = 22
 18 |     protocol = "tcp"
 19 |     cidr_blocks = ["${split(",", var.internal_cidr_blocks)}"]
 20 |   }
 21 | 
 22 |   # elastic ports from anywhere.. we are using private ips so shouldn't
 23 |   # have people deleting our indexes just yet
 24 |   ingress {
 25 |     from_port = 9200
 26 |     to_port = 9400
 27 |     protocol = "tcp"
 28 |     cidr_blocks = ["${split(",", var.internal_cidr_blocks)}"]
 29 |   }
 30 | 
 31 |   egress {
 32 |     from_port = 0
 33 |     to_port = 0
 34 |     protocol = "-1"
 35 |     cidr_blocks = ["0.0.0.0/0"]
 36 |   }
 37 | 
 38 |   tags {
 39 |     Name = "${var.es_cluster}-elasticsearch"
 40 |     stream = "${var.stream_tag}"
 41 |     cluster = "${var.es_cluster}"
 42 |   }
 43 | 
 44 |   lifecycle {
 45 |     create_before_destroy = true
 46 |   }
 47 | }
 48 | 
 49 | resource "template_file" "user_data" {
 50 |   template = "${file("${path.root}/templates/user-data.tpl")}"
 51 | 
 52 |   vars {
 53 |     dns_server              = "${var.dns_server}"
 54 |     consul_dc               = "${var.consul_dc}"
 55 |     atlas                   = "${var.atlas}"
 56 |     encrypted_atlas_token   = "${var.encrypted_atlas_token}"
 57 |     volume_name             = "${var.volume_name}"
 58 |     elasticsearch_data_dir  = "${var.elasticsearch_data}"
 59 |     heap_size               = "${var.heap_size}"
 60 |     es_cluster              = "${var.es_cluster}"
 61 |     es_environment          = "${var.es_environment}"
 62 |     security_groups         = "${aws_security_group.elasticsearch.id}"
 63 |     aws_region              = "${var.aws_region}"
 64 |     availability_zones      = "${var.availability_zones}"
 65 |   }
 66 | 
 67 |   lifecycle {
 68 |     create_before_destroy = true
 69 |   }
 70 | }
 71 | 
 72 | resource "aws_launch_configuration" "elasticsearch" {
 73 |   image_id = "${var.ami}"
 74 |   instance_type = "${var.instance_type}"
 75 |   security_groups = ["${split(",", replace(concat(aws_security_group.elasticsearch.id, ",", var.additional_security_groups), "/,\\s?$/", ""))}"]
 76 |   associate_public_ip_address = false
 77 |   ebs_optimized = false
 78 |   key_name = "${var.key_name}"
 79 |   iam_instance_profile = "${var.iam_profile}"
 80 |   user_data = "${template_file.user_data.rendered}"
 81 | 
 82 |   lifecycle {
 83 |     create_before_destroy = true
 84 |   }
 85 | 
 86 |   ebs_block_device {
 87 |     device_name = "${var.volume_name}"
 88 |     volume_size = "${var.volume_size}"
 89 |     encrypted = "${var.volume_encryption}"
 90 |   }
 91 | }
 92 | 
 93 | resource "aws_autoscaling_group" "elasticsearch" {
 94 |   availability_zones = ["${split(",", var.availability_zones)}"]
 95 |   vpc_zone_identifier = ["${split(",", var.subnets)}"]
 96 |   max_size = "${var.instances}"
 97 |   min_size = "${var.instances}"
 98 |   desired_capacity = "${var.instances}"
 99 |   default_cooldown = 30
100 |   force_delete = true
101 |   launch_configuration = "${aws_launch_configuration.elasticsearch.id}"
102 | 
103 |   tag {
104 |     key = "Name"
105 |     value = "${format("%s-elasticsearch", var.es_cluster)}"
106 |     propagate_at_launch = true
107 |   }
108 |   tag {
109 |     key = "Stream"
110 |     value = "${var.stream_tag}"
111 |     propagate_at_launch = true
112 |   }
113 |   tag {
114 |     key = "ServerRole"
115 |     value = "Elasticsearch"
116 |     propagate_at_launch = true
117 |   }
118 |   tag {
119 |     key = "Cost Center"
120 |     value = "${var.costcenter_tag}"
121 |     propagate_at_launch = true
122 |   }
123 |   tag {
124 |     key = "Environment"
125 |     value = "${var.environment_tag}"
126 |     propagate_at_launch = true
127 |   }
128 |   tag {
129 |     key = "consul"
130 |     value = "agent"
131 |     propagate_at_launch = true
132 |   }
133 |   tag {
134 |     key = "es_env"
135 |     value = "${var.es_environment}"
136 |     propagate_at_launch = true
137 |   }
138 | 
139 |   lifecycle {
140 |     create_before_destroy = true
141 |   }
142 | }
143 | 
144 | 


--------------------------------------------------------------------------------
/outputs.tf:
--------------------------------------------------------------------------------
1 | output "launch_configuration" {
2 |   value = "${aws_autoscaling_group.elasticsearch.launch_configuration}"
3 | }
4 | 


--------------------------------------------------------------------------------
/templates/user-data.tpl:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # Ideally move all this to a proper config management tool
  5 | #
  6 | # Configure elasticsearch
  7 | 
  8 | cat <<'EOF' >/etc/elasticsearch/elasticsearch.yml
  9 | cluster.name: ${es_cluster}
 10 | 
 11 | # our init.d script sets the default to this as well
 12 | path.data: ${elasticsearch_data_dir}
 13 | 
 14 | bootstrap.mlockall: true
 15 | network.host: _ec2:privateIpv4_
 16 | discovery.type: ec2
 17 | discovery.ec2.groups: ${security_groups}
 18 | discovery.ec2.tag.es_env: ${es_environment}
 19 | cloud.aws.region: ${aws_region}
 20 | discovery.ec2.availability_zones: ${availability_zones}
 21 | script.inline: true
 22 | EOF
 23 | 
 24 | ##############################################
 25 | # The following have been installed via Packer
 26 | ##############################################
 27 | 
 28 | # heap size
 29 | sudo sed -i 's/#MAX_LOCKED_MEMORY=unlimited/MAX_LOCKED_MEMORY=unlimited/' /etc/sysconfig/elasticsearch
 30 | sudo sed -i "s/#ES_HEAP_SIZE=.*$/ES_HEAP_SIZE=${heap_size}/" /etc/sysconfig/elasticsearch
 31 | 
 32 | sudo mkfs -t ext4 ${volume_name}
 33 | sudo mkdir -p ${elasticsearch_data_dir}
 34 | sudo mount ${volume_name} ${elasticsearch_data_dir}
 35 | sudo echo "${volume_name} ${elasticsearch_data_dir} ext4 defaults,nofail 0 2" >> /etc/fstab
 36 | sudo chown -R elasticsearch:elasticsearch ${elasticsearch_data_dir}
 37 | 
 38 | # Configure the consul agent
 39 | cat <<EOF >/tmp/consul.json
 40 | {
 41 |     "addresses"                   : {
 42 |         "http" : "0.0.0.0"
 43 |     },
 44 |     "recursor"                    : "${dns_server}",
 45 |     "disable_anonymous_signature" : true,
 46 |     "disable_update_check"        : true,
 47 |     "data_dir"                    : "/mnt/consul/data"
 48 | }
 49 | EOF
 50 | sudo mv /tmp/consul.json /etc/consul.d/consul.json
 51 | 
 52 | # Setup the consul agent init script
 53 | cat <<'EOF' >/tmp/upstart
 54 | description "Consul agent"
 55 | 
 56 | start on runlevel [2345]
 57 | stop on runlevel [!2345]
 58 | 
 59 | respawn
 60 | 
 61 | env PIDFILE=/var/run/consul.pid
 62 | 
 63 | script
 64 |   # Make sure to use all our CPUs, because Consul can block a scheduler thread
 65 |   export GOMAXPROCS=`nproc`
 66 | 
 67 |   # Get the IP
 68 |   BIND=`ifconfig eth0 | grep "inet addr" | awk '{ print substr($2,6) }'`
 69 |   ATLAS_TOKEN=`sudo -H -u ec2-user bash -c 'aws kms decrypt --ciphertext-blob fileb://<(echo '${encrypted_atlas_token}' | base64 -d) --output text --query Plaintext --region ${aws_region} | base64 -d'`
 70 | 
 71 |   echo $$ > $${PIDFILE}
 72 |   exec /usr/local/bin/consul agent \
 73 |     -config-dir="/etc/consul.d" \
 74 |     -bind=$${BIND} \
 75 |     -node="elasticsearch-$${BIND}" \
 76 |     -dc="${consul_dc}" \
 77 |     -atlas=${atlas} \
 78 |     -atlas-join \
 79 |     -atlas-token="$${ATLAS_TOKEN}" \
 80 |     >>/var/log/consul.log 2>&1
 81 | end script
 82 | 
 83 | # to gracefully remove agents
 84 | pre-stop script
 85 |     [ -e $PIDFILE ] && kill -INT $(cat $PIDFILE)
 86 |     rm -f $PIDFILE
 87 | end script
 88 | EOF
 89 | sudo mv /tmp/upstart /etc/init/consul.conf
 90 | 
 91 | # Setup the consul agent config
 92 | cat <<'EOF' >/tmp/elasticsearch-consul.json
 93 | {
 94 |     "services": [{
 95 |         "name": "elasticsearch",
 96 |         "leave_on_terminate": true,
 97 |         "tags": [
 98 |             "http", "query"
 99 |         ],
100 |         "port": 9200,
101 |         "checks": [{
102 |             "id": "1",
103 |             "name": "Elasticsearch HTTP",
104 |             "notes": "Use curl to check the web service every 10 seconds",
105 |             "script": "curl `ifconfig eth0 | grep 'inet addr' | awk '{ print substr($2,6) }'`:9200 >/dev/null 2>&1",
106 |             "interval": "10s"
107 |         } ]
108 |     },
109 |     {
110 |         "name": "elasticsearch-9300",
111 |         "leave_on_terminate": true,
112 |         "tags": [
113 |             "tcp", "index"
114 |         ],
115 |         "port": 9300,
116 |         "checks": [{
117 |             "id": "1",
118 |             "name": "Elasticsearch TCP",
119 |             "notes": "Use nc to check the tcp port every 10 seconds",
120 |             "script": "nc -zv `ifconfig eth0 | grep 'inet addr' | awk '{ print substr($2,6) }'` 9300 >/dev/null 2>&1 ",
121 |             "interval": "10s"
122 |         }, {
123 |             "id": "2",
124 |             "name": "Cluster health",
125 |             "notes": "Check cluster health every 30 seconds",
126 |             "script": "python /etc/consul.d/check.py",
127 |             "interval": "30s"
128 |         }]
129 |     }]
130 | }
131 | EOF
132 | sudo mv /tmp/elasticsearch-consul.json /etc/consul.d/elasticsearch.json
133 | 
134 | cat <<EOF >/tmp/check.py
135 | import requests
136 | import sys
137 | 
138 | ip = requests.get("http://169.254.169.254/latest/meta-data/local-ipv4").text
139 | url = "http://{ip}:9200/_cat/health".format(**locals())
140 | 
141 | def green():
142 |     sys.exit()
143 | 
144 | def yellow():
145 |     sys.exit(1)
146 | 
147 | def red():
148 |     sys.exit(2)
149 | 
150 | codes = {
151 |         "green": green,
152 |         "yellow": yellow,
153 |         "red": red,
154 |     }
155 | 
156 | r = requests.get(url)
157 | codes.get(r.text.split()[3], lambda: red)()
158 | EOF
159 | sudo mv /tmp/check.py /etc/consul.d/check.py
160 | 
161 | # Start Elasticsearch
162 | sudo chkconfig --add elasticsearch
163 | sudo service elasticsearch start
164 | 
165 | # Start Consul
166 | sudo start consul
167 | 
168 | 


--------------------------------------------------------------------------------
/variables.tf:
--------------------------------------------------------------------------------
  1 | ### MANDATORY ###
  2 | variable "role_tag" {
  3 |   description = "Role of the ec2 instance, defaults to <SERVICE>"
  4 |   default = "SERVICE"
  5 | }
  6 | 
  7 | variable "environment_tag" {
  8 |   description = "Role of the ec2 instance, defaults to <DEV>"
  9 |   default = "DEV"
 10 | }
 11 | 
 12 | variable "costcenter_tag" {
 13 |   description = "Role of the ec2 instance, defaults to <DEV>"
 14 |   default = "DEV"
 15 | }
 16 | 
 17 | # group our resources
 18 | variable "stream_tag" {
 19 |   default = "default"
 20 | }
 21 | 
 22 | variable "environment" {
 23 |   default = "default"
 24 | }
 25 | 
 26 | variable "es_environment" {
 27 |   default = "elasticsearch"
 28 | }
 29 | 
 30 | variable "es_cluster" {
 31 |   description = "Name of the elasticsearch cluster, used in node discovery"
 32 |   default = "elasticsearch"
 33 | }
 34 | 
 35 | ###################################################################
 36 | # AWS configuration below
 37 | ###################################################################
 38 | variable "key_name" {
 39 |   description = "Name of the SSH keypair to use in AWS."
 40 |   default = "elastic"
 41 | }
 42 | 
 43 | ### MANDATORY ###
 44 | variable "iam_profile" {
 45 |   description = "Elasticsearch IAM profile"
 46 | }
 47 | 
 48 | variable "aws_region" {
 49 |   description = "AWS region to launch servers."
 50 |   default = "ap-southeast-2"
 51 | }
 52 | 
 53 | variable "availability_zones" {
 54 |   description = "AWS region to launch servers."
 55 |   default = "ap-southeast-2a,ap-southeast-2b"
 56 | }
 57 | 
 58 | variable "security_group_name" {
 59 |   description = "Name of security group to use in AWS."
 60 |   default = "elasticsearch"
 61 | }
 62 | 
 63 | ###################################################################
 64 | # Vpc configuration below
 65 | ###################################################################
 66 | 
 67 | ### MANDATORY ###
 68 | variable "vpc_id" {
 69 |   description = "VPC id"
 70 | }
 71 | 
 72 | variable "internal_cidr_blocks"{
 73 |   default = "0.0.0.0/0"
 74 | }
 75 | 
 76 | ###################################################################
 77 | # Subnet configuration below
 78 | ###################################################################
 79 | 
 80 | ### MANDATORY ###
 81 | variable "subnets" {
 82 |   description = "subnets to deploy into"
 83 | }
 84 | 
 85 | ###################################################################
 86 | # Elasticsearch configuration below
 87 | ###################################################################
 88 | 
 89 | ### MANDATORY ###
 90 | # Amazon Linux elasticsearch ami built by packer
 91 | # See https://github.com/nadnerb/packer-elastic-search
 92 | variable "ami" {
 93 | }
 94 | 
 95 | variable "instance_type" {
 96 |   description = "Elasticsearch instance type."
 97 |   default = "t2.medium"
 98 | }
 99 | 
100 | ### MANDATORY ###
101 | variable "es_environment" {
102 |   description = "Elastic environment tag for auto discovery"
103 | }
104 | 
105 | # total number of nodes
106 | variable "instances" {
107 |   description = "total instances"
108 |   default = "2"
109 | }
110 | 
111 | #DEPRECATED
112 | # number of nodes in zone a
113 | variable "subnet_a_num_nodes" {
114 |   description = "Elastic nodes in a"
115 |   default = "1"
116 | }
117 | 
118 | #DEPRECATED
119 | # number of nodes in zone b
120 | variable "subnet_b_num_nodes" {
121 |   description = "Elastic nodes in b"
122 |   default = "1"
123 | }
124 | 
125 | # the ability to add additional existing security groups. In our case
126 | # we have consul running as agents on the box
127 | variable "additional_security_groups" {
128 |   default = ""
129 | }
130 | 
131 | variable "volume_name" {
132 |   default = "/dev/sdh"
133 | }
134 | 
135 | variable "volume_size" {
136 |   default = "10"
137 | }
138 | 
139 | variable "volume_encryption" {
140 |   default = true
141 | }
142 | 
143 | variable "elasticsearch_data" {
144 |   default = "/opt/elasticsearch/data"
145 | }
146 | 
147 | # default elasticsearch heap size
148 | variable "heap_size" {
149 |   default = "256m"
150 | }
151 | 
152 | ###################################################################
153 | # Consul configuration below
154 | ###################################################################
155 | 
156 | ### MANDATORY ###
157 | variable "dns_server" {
158 | }
159 | 
160 | variable "consul_dc" {
161 |   default = "dev"
162 | }
163 | 
164 | variable "atlas" {
165 |   default = "example/atlas"
166 | }
167 | 
168 | ### MANDATORY ###
169 | variable "encrypted_atlas_token" {
170 | }
171 | 


--------------------------------------------------------------------------------