├── .gitignore ├── LICENSE ├── README.md ├── ansible ├── .gitignore ├── ansible.cfg ├── group_vars │ └── all │ │ └── vars.yml ├── hosts │ ├── ec2.ini │ ├── ec2.py │ └── groups ├── infra.yaml ├── kubectl.yaml ├── kubernetes-nginx.yaml ├── kubernetes-routing.yaml └── roles │ ├── common │ └── tasks │ │ └── main.yaml │ ├── controller │ ├── files │ │ ├── authorization-policy.jsonl │ │ └── token.csv │ ├── tasks │ │ └── main.yml │ ├── templates │ │ ├── kube-apiserver.service.j2 │ │ ├── kube-controller-manager.service.j2 │ │ └── kube-scheduler.service.j2 │ └── vars │ │ └── main.yml │ ├── etcd │ ├── tasks │ │ └── main.yml │ ├── templates │ │ └── etcd.service.j2 │ └── vars │ │ └── main.yml │ └── worker │ ├── handlers │ └── main.yml │ ├── tasks │ └── main.yml │ ├── templates │ ├── docker.service.j2 │ ├── kube-proxy.service.j2 │ ├── kubeconfig.j2 │ └── kubelet.service.j2 │ └── vars │ └── main.yml ├── cert ├── .gitignore ├── ca-config.json └── ca-csr.json └── terraform ├── .gitignore ├── aws.tf ├── certificates.tf ├── etcf.tf ├── iam.tf ├── k8s_controllers.tf ├── sshcfg.tf ├── template ├── kubernetes-csr.json └── ssh.cfg ├── terraform.tfvars.example ├── variables.tf ├── vpc.tf └── workers.tf /.gitignore: -------------------------------------------------------------------------------- 1 | ### Secrets ### 2 | *.pem 3 | /ssh.cfg 4 | /secrets.sh 5 | 6 | 7 | ### OSX ### 8 | *.DS_Store 9 | .AppleDouble 10 | .LSOverride 11 | 12 | # Icon must end with two \r 13 | Icon 14 | 15 | 16 | # Thumbnails 17 | ._* 18 | 19 | # Files that might appear in the root of a volume 20 | .DocumentRevisions-V100 21 | .fseventsd 22 | .Spotlight-V100 23 | .TemporaryItems 24 | .Trashes 25 | .VolumeIcon.icns 26 | .com.apple.timemachine.donotpresent 27 | 28 | # Directories potentially created on remote AFP share 29 | .AppleDB 30 | .AppleDesktop 31 | Network Trash Folder 32 | Temporary Items 33 | .apdisk 34 | 35 | 36 | ### Linux ### 37 | *~ 38 | 39 | # temporary files which can be created if a process still has a handle open of a deleted file 40 | .fuse_hidden* 41 | 42 | # KDE directory preferences 43 | .directory 44 | 45 | # Linux trash folder which might appear on any partition or disk 46 | .Trash-* 47 | 48 | 49 | ### Terraform ### 50 | # Compiled files 51 | *.tfstate 52 | *.tfstate.backup 53 | 54 | # Module directory 55 | .terraform/ 56 | 57 | 58 | ### Ansible ### 59 | *.retry 60 | 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 OpenCredo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes not the hardest way (or "Provisioning a Kubernetes Cluster on AWS using Terraform and Ansible") 2 | 3 | A worked example to provision a Kubernetes cluster on AWS from scratch, using Terraform and Ansible. A scripted version of the famous tutorial [Kubernetes the hard way](https://github.com/kelseyhightower/kubernetes-the-hard-way). 4 | 5 | See the companion article https://opencredo.com/kubernetes-aws-terraform-ansible-1/ for details about goals, design decisions and simplifications. 6 | 7 | - AWS VPC 8 | - 3 EC2 instances for HA Kubernetes Control Plane: Kubernetes API, Scheduler and Controller Manager 9 | - 3 EC2 instances for *etcd* cluster 10 | - 3 EC2 instances as Kubernetes Workers (aka Minions or Nodes) 11 | - Kubenet Pod networking (using CNI) 12 | - HTTPS between components and control API 13 | - Sample *nginx* service deployed to check everything works 14 | 15 | *This is a learning tool, not a production-ready setup.* 16 | 17 | ## Requirements 18 | 19 | Requirements on control machine: 20 | 21 | - Terraform (tested with Terraform 0.7.0; **NOT compatible with Terraform 0.6.x**) 22 | - Python (tested with Python 2.7.12, may be not compatible with older versions; requires Jinja2 2.8) 23 | - Python *netaddr* module 24 | - Ansible (tested with Ansible 2.1.0.0) 25 | - *cfssl* and *cfssljson*: https://github.com/cloudflare/cfssl 26 | - Kubernetes CLI 27 | - SSH Agent 28 | - (optionally) AWS CLI 29 | 30 | 31 | ## AWS Credentials 32 | 33 | ### AWS KeyPair 34 | 35 | You need a valid AWS Identity (`.pem`) file and the corresponding Public Key. Terraform imports the [KeyPair](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) in your AWS account. Ansible uses the Identity to SSH into machines. 36 | 37 | Please read [AWS Documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html#how-to-generate-your-own-key-and-import-it-to-aws) about supported formats. 38 | 39 | ### Terraform and Ansible authentication 40 | 41 | Both Terraform and Ansible expect AWS credentials set in environment variables: 42 | ``` 43 | $ export AWS_ACCESS_KEY_ID= 44 | $ export AWS_SECRET_ACCESS_KEY="" 45 | ``` 46 | 47 | If you plan to use AWS CLI you have to set `AWS_DEFAULT_REGION`. 48 | 49 | Ansible expects the SSH identity loaded by SSH agent: 50 | ``` 51 | $ ssh-add .pem 52 | ``` 53 | 54 | ## Defining the environment 55 | 56 | Terraform expects some variables to define your working environment: 57 | 58 | - `control_cidr`: The CIDR of your IP. All instances will accept only traffic from this address only. Note this is a CIDR, not a single IP. e.g. `123.45.67.89/32` (mandatory) 59 | - `default_keypair_public_key`: Valid public key corresponding to the Identity you will use to SSH into VMs. e.g. `"ssh-rsa AAA....xyz"` (mandatory) 60 | 61 | **Note that Instances and Kubernetes API will be accessible only from the "control IP"**. If you fail to set it correctly, you will not be able to SSH into machines or run Ansible playbooks. 62 | 63 | You may optionally redefine: 64 | 65 | - `default_keypair_name`: AWS key-pair name for all instances. (Default: "k8s-not-the-hardest-way") 66 | - `vpc_name`: VPC Name. Must be unique in the AWS Account (Default: "kubernetes") 67 | - `elb_name`: ELB Name for Kubernetes API. Can only contain characters valid for DNS names. Must be unique in the AWS Account (Default: "kubernetes") 68 | - `owner`: `Owner` tag added to all AWS resources. No functional use. It becomes useful to filter your resources on AWS console if you are sharing the same AWS account with others. (Default: "kubernetes") 69 | 70 | 71 | 72 | The easiest way is creating a `terraform.tfvars` [variable file](https://www.terraform.io/docs/configuration/variables.html#variable-files) in `./terraform` directory. Terraform automatically imports it. 73 | 74 | Sample `terraform.tfvars`: 75 | ``` 76 | default_keypair_public_key = "ssh-rsa AAA...zzz" 77 | control_cidr = "123.45.67.89/32" 78 | default_keypair_name = "lorenzo-glf" 79 | vpc_name = "Lorenzo ETCD" 80 | elb_name = "lorenzo-etcd" 81 | owner = "Lorenzo" 82 | ``` 83 | 84 | 85 | ### Changing AWS Region 86 | 87 | By default, the project uses `eu-west-1`. To use a different AWS Region, set additional Terraform variables: 88 | 89 | - `region`: AWS Region (default: "eu-west-1"). 90 | - `zone`: AWS Availability Zone (default: "eu-west-1a") 91 | - `default_ami`: Pick the AMI for the new Region from https://cloud-images.ubuntu.com/locator/ec2/: Ubuntu 16.04 LTS (xenial), HVM:EBS-SSD 92 | 93 | You also have to edit `./ansible/hosts/ec2.ini`, changing `regions = eu-west-1` to the new Region. 94 | 95 | ## Provision infrastructure, with Terraform 96 | 97 | Run Terraform commands from `./terraform` subdirectory. 98 | 99 | ``` 100 | $ terraform plan 101 | $ terraform apply 102 | ``` 103 | 104 | Terraform outputs public DNS name of Kubernetes API and Workers public IPs. 105 | ``` 106 | Apply complete! Resources: 12 added, 2 changed, 0 destroyed. 107 | ... 108 | Outputs: 109 | 110 | kubernetes_api_dns_name = lorenzo-kubernetes-api-elb-1566716572.eu-west-1.elb.amazonaws.com 111 | kubernetes_workers_public_ip = 54.171.180.238,54.229.249.240,54.229.251.124 112 | ``` 113 | 114 | You will need them later (you may show them at any moment with `terraform output`). 115 | 116 | ### Generated SSH config 117 | 118 | Terraform generates `ssh.cfg`, SSH configuration file in the project directory. 119 | It is convenient for manually SSH into machines using node names (`controller0`...`controller2`, `etcd0`...`2`, `worker0`...`2`), but it is NOT used by Ansible. 120 | 121 | e.g. 122 | ``` 123 | $ ssh -F ssh.cfg worker0 124 | ``` 125 | 126 | ## Install Kubernetes, with Ansible 127 | 128 | Run Ansible commands from `./ansible` subdirectory. 129 | 130 | We have multiple playbooks. 131 | 132 | ### Install and set up Kubernetes cluster 133 | 134 | Install Kubernetes components and *etcd* cluster. 135 | ``` 136 | $ ansible-playbook infra.yaml 137 | ``` 138 | 139 | ### Setup Kubernetes CLI 140 | 141 | Configure Kubernetes CLI (`kubectl`) on your machine, setting Kubernetes API endpoint (as returned by Terraform). 142 | ``` 143 | $ ansible-playbook kubectl.yaml --extra-vars "kubernetes_api_endpoint=" 144 | ``` 145 | 146 | Verify all components and minions (workers) are up and running, using Kubernetes CLI (`kubectl`). 147 | 148 | ``` 149 | $ kubectl get componentstatuses 150 | NAME STATUS MESSAGE ERROR 151 | controller-manager Healthy ok 152 | scheduler Healthy ok 153 | etcd-2 Healthy {"health": "true"} 154 | etcd-1 Healthy {"health": "true"} 155 | etcd-0 Healthy {"health": "true"} 156 | 157 | $ kubectl get nodes 158 | NAME STATUS AGE 159 | ip-10-43-0-30.eu-west-1.compute.internal Ready 6m 160 | ip-10-43-0-31.eu-west-1.compute.internal Ready 6m 161 | ip-10-43-0-32.eu-west-1.compute.internal Ready 6m 162 | ``` 163 | 164 | ### Setup Pod cluster routing 165 | 166 | Set up additional routes for traffic between Pods. 167 | ``` 168 | $ ansible-playbook kubernetes-routing.yaml 169 | ``` 170 | 171 | ### Smoke test: Deploy *nginx* service 172 | 173 | Deploy a *ngnix* service inside Kubernetes. 174 | ``` 175 | $ ansible-playbook kubernetes-nginx.yaml 176 | ``` 177 | 178 | Verify pods and service are up and running. 179 | 180 | ``` 181 | $ kubectl get pods -o wide 182 | NAME READY STATUS RESTARTS AGE IP NODE 183 | nginx-2032906785-9chju 1/1 Running 0 3m 10.200.1.2 ip-10-43-0-31.eu-west-1.compute.internal 184 | nginx-2032906785-anu2z 1/1 Running 0 3m 10.200.2.3 ip-10-43-0-30.eu-west-1.compute.internal 185 | nginx-2032906785-ynuhi 1/1 Running 0 3m 10.200.0.3 ip-10-43-0-32.eu-west-1.compute.internal 186 | 187 | > kubectl get svc nginx --output=json 188 | { 189 | "kind": "Service", 190 | "apiVersion": "v1", 191 | "metadata": { 192 | "name": "nginx", 193 | "namespace": "default", 194 | ... 195 | ``` 196 | 197 | Retrieve the port *nginx* has been exposed on: 198 | 199 | ``` 200 | $ kubectl get svc nginx --output=jsonpath='{range .spec.ports[0]}{.nodePort}' 201 | 32700 202 | ``` 203 | 204 | Now you should be able to access *nginx* default page: 205 | ``` 206 | $ curl http://: 207 | 208 | 209 | 210 | Welcome to nginx! 211 | ... 212 | ``` 213 | 214 | The service is exposed on all Workers using the same port (see Workers public IPs in Terraform output). 215 | 216 | 217 | # Known simplifications 218 | 219 | There are many known simplifications, compared to a production-ready solution: 220 | 221 | - Networking setup is very simple: ALL instances have a public IP (though only accessible from a configurable Control IP). 222 | - Infrastructure managed by direct SSH into instances (no VPN, no Bastion). 223 | - Very basic Service Account and Secret (to change them, modify: `./ansible/roles/controller/files/token.csv` and `./ansible/roles/worker/templates/kubeconfig.j2`) 224 | - No actual integration between Kubernetes and AWS. 225 | - No additional Kubernetes add-on (DNS, Dashboard, Logging...) 226 | - Simplified Ansible lifecycle. Playbooks support changes in a simplistic way, including possibly unnecessary restarts. 227 | - Instances use static private IP addresses 228 | - No stable private or public DNS naming (only dynamic DNS names, generated by AWS) 229 | -------------------------------------------------------------------------------- /ansible/.gitignore: -------------------------------------------------------------------------------- 1 | *.retry 2 | -------------------------------------------------------------------------------- /ansible/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | remote_user = ubuntu 3 | host_key_checking = False 4 | inventory = ./hosts/ 5 | -------------------------------------------------------------------------------- /ansible/group_vars/all/vars.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | internal_cidr: "10.0.0.0/8" # Includes VPC and Kubernetes subnets 4 | kubernetes_service_cluster_cidr: "10.32.0.0/24" 5 | kubernetes_pod_cluster_cidr: "10.200.0.0/16" 6 | kubernetes_cluster_dns: "10.32.0.10" 7 | 8 | kubernetes_version: "v1.3.6" 9 | -------------------------------------------------------------------------------- /ansible/hosts/ec2.ini: -------------------------------------------------------------------------------- 1 | # Ansible EC2 external inventory script settings 2 | # 3 | 4 | [ec2] 5 | # Retrieve only instances with (key=value) env=staging tag 6 | instance_filters = tag:ansibleFilter=Kubernetes01 7 | 8 | # AWS regions to make calls to. Set this to 'all' to make request to all regions 9 | # in AWS and merge the results together. Alternatively, set this to a comma 10 | # separated list of regions. E.g. 'us-east-1,us-west-1,us-west-2' 11 | # regions = all 12 | regions_exclude = 13 | regions = eu-west-1 14 | 15 | # When generating inventory, Ansible needs to know how to address a server. 16 | # Each EC2 instance has a lot of variables associated with it. Here is the list: 17 | # http://docs.pythonboto.org/en/latest/ref/ec2.html#module-boto.ec2.instance 18 | # Below are 2 variables that are used as the address of a server: 19 | # - destination_variable 20 | # - vpc_destination_variable 21 | 22 | # This is the normal destination variable to use. If you are running Ansible 23 | # from outside EC2, then 'public_dns_name' makes the most sense. If you are 24 | # running Ansible from within EC2, then perhaps you want to use the internal 25 | # address, and should set this to 'private_dns_name'. The key of an EC2 tag 26 | # may optionally be used; however the boto instance variables hold precedence 27 | # in the event of a collision. 28 | #destination_varie = public_dns_name 29 | destination_variable = ip_address 30 | 31 | # This allows you to override the inventory_name with an ec2 variable, instead 32 | # of using the destination_variable above. Addressing (aka ansible_ssh_host) 33 | # will still use destination_variable. Tags should be written as 'tag_TAGNAME'. 34 | #hostname_variable = tag_Name 35 | hostname_variable = tag_ansibleNodeName 36 | 37 | # For server inside a VPC, using DNS names may not make sense. When an instance 38 | # has 'subnet_id' set, this variable is used. If the subnet is public, setting 39 | # this to 'ip_address' will return the public IP address. For instances in a 40 | # private subnet, this should be set to 'private_ip_address', and Ansible must 41 | # be run from within EC2. The key of an EC2 tag may optionally be used; however 42 | # the boto instance variables hold precedence in the event of a collision. 43 | # WARNING: - instances that are in the private vpc, _without_ public ip address 44 | # will not be listed in the inventory until You set: 45 | #vpc_destination_variable = private_ip_address 46 | vpc_destination_variable = ip_address 47 | 48 | # To tag instances on EC2 with the resource records that point to them from 49 | # Route53, uncomment and set 'route53' to True. 50 | route53 = False 51 | 52 | # To exclude RDS instances from the inventory, uncomment and set to False. 53 | rds = False 54 | 55 | # To exclude ElastiCache instances from the inventory, uncomment and set to False. 56 | elasticache = False 57 | 58 | # By default, only EC2 instances in the 'running' state are returned. Set 59 | # 'all_instances' to True to return all instances regardless of state. 60 | all_instances = False 61 | 62 | # By default, only RDS instances in the 'available' state are returned. Set 63 | # 'all_rds_instances' to True return all RDS instances regardless of state. 64 | all_rds_instances = False 65 | 66 | # By default, only ElastiCache clusters and nodes in the 'available' state 67 | # are returned. Set 'all_elasticache_clusters' and/or 'all_elastic_nodes' 68 | # to True return all ElastiCache clusters and nodes, regardless of state. 69 | # 70 | # Note that all_elasticache_nodes only applies to listed clusters. That means 71 | # if you set all_elastic_clusters to false, no node will be return from 72 | # unavailable clusters, regardless of the state and to what you set for 73 | # all_elasticache_nodes. 74 | all_elasticache_replication_groups = False 75 | all_elasticache_clusters = False 76 | all_elasticache_nodes = False 77 | 78 | # API calls to EC2 are slow. For this reason, we cache the results of an API 79 | # call. Set this to the path you want cache files to be written to. Two files 80 | # will be written to this directory: 81 | # - ansible-ec2.cache 82 | # - ansible-ec2.index 83 | cache_path = ~/.ansible/tmp 84 | 85 | # The number of seconds a cache file is considered valid. After this many 86 | # seconds, a new API call will be made, and the cache file will be updated. 87 | # To disable the cache, set this value to 0 88 | cache_max_age = 300 89 | 90 | # Organize groups into a nested/hierarchy instead of a flat namespace. 91 | nested_groups = False 92 | 93 | # Replace - tags when creating groups to avoid issues with ansible 94 | replace_dash_in_groups = True 95 | 96 | # If set to true, any tag of the form "a,b,c" is expanded into a list 97 | # and the results are used to create additional tag_* inventory groups. 98 | expand_csv_tags = False 99 | 100 | # The EC2 inventory output can become very large. To manage its size, 101 | # configure which groups should be created. 102 | group_by_instance_id = True 103 | group_by_region = True 104 | group_by_availability_zone = True 105 | group_by_ami_id = True 106 | group_by_instance_type = True 107 | group_by_key_pair = True 108 | group_by_vpc_id = True 109 | group_by_security_group = True 110 | group_by_tag_keys = True 111 | group_by_tag_none = True 112 | group_by_route53_names = True 113 | group_by_rds_engine = True 114 | group_by_rds_parameter_group = True 115 | group_by_elasticache_engine = True 116 | group_by_elasticache_cluster = True 117 | group_by_elasticache_parameter_group = True 118 | group_by_elasticache_replication_group = True 119 | 120 | 121 | [credentials] 122 | 123 | # The AWS credentials can optionally be specified here. Credentials specified 124 | # here are ignored if the environment variable AWS_ACCESS_KEY_ID or 125 | # AWS_PROFILE is set, or if the boto_profile property above is set. 126 | # 127 | # Supplying AWS credentials here is not recommended, as it introduces 128 | # non-trivial security concerns. When going down this route, please make sure 129 | # to set access permissions for this file correctly, e.g. handle it the same 130 | # way as you would a private SSH key. 131 | # 132 | # Unlike the boto and AWS configure files, this section does not support 133 | # profiles. 134 | # 135 | # aws_access_key_id = AXXXXXXXXXXXXXX 136 | # aws_secret_access_key = XXXXXXXXXXXXXXXXXXX 137 | # aws_security_token = XXXXXXXXXXXXXXXXXXXXXXXXXXXX 138 | -------------------------------------------------------------------------------- /ansible/hosts/ec2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | EC2 external inventory script 5 | ================================= 6 | 7 | Generates inventory that Ansible can understand by making API request to 8 | AWS EC2 using the Boto library. 9 | 10 | NOTE: This script assumes Ansible is being executed where the environment 11 | variables needed for Boto have already been set: 12 | export AWS_ACCESS_KEY_ID='AK123' 13 | export AWS_SECRET_ACCESS_KEY='abc123' 14 | 15 | This script also assumes there is an ec2.ini file alongside it. To specify a 16 | different path to ec2.ini, define the EC2_INI_PATH environment variable: 17 | 18 | export EC2_INI_PATH=/path/to/my_ec2.ini 19 | 20 | If you're using eucalyptus you need to set the above variables and 21 | you need to define: 22 | 23 | export EC2_URL=http://hostname_of_your_cc:port/services/Eucalyptus 24 | 25 | If you're using boto profiles (requires boto>=2.24.0) you can choose a profile 26 | using the --boto-profile command line argument (e.g. ec2.py --boto-profile prod) or using 27 | the AWS_PROFILE variable: 28 | 29 | AWS_PROFILE=prod ansible-playbook -i ec2.py myplaybook.yml 30 | 31 | For more details, see: http://docs.pythonboto.org/en/latest/boto_config_tut.html 32 | 33 | When run against a specific host, this script returns the following variables: 34 | - ec2_ami_launch_index 35 | - ec2_architecture 36 | - ec2_association 37 | - ec2_attachTime 38 | - ec2_attachment 39 | - ec2_attachmentId 40 | - ec2_client_token 41 | - ec2_deleteOnTermination 42 | - ec2_description 43 | - ec2_deviceIndex 44 | - ec2_dns_name 45 | - ec2_eventsSet 46 | - ec2_group_name 47 | - ec2_hypervisor 48 | - ec2_id 49 | - ec2_image_id 50 | - ec2_instanceState 51 | - ec2_instance_type 52 | - ec2_ipOwnerId 53 | - ec2_ip_address 54 | - ec2_item 55 | - ec2_kernel 56 | - ec2_key_name 57 | - ec2_launch_time 58 | - ec2_monitored 59 | - ec2_monitoring 60 | - ec2_networkInterfaceId 61 | - ec2_ownerId 62 | - ec2_persistent 63 | - ec2_placement 64 | - ec2_platform 65 | - ec2_previous_state 66 | - ec2_private_dns_name 67 | - ec2_private_ip_address 68 | - ec2_publicIp 69 | - ec2_public_dns_name 70 | - ec2_ramdisk 71 | - ec2_reason 72 | - ec2_region 73 | - ec2_requester_id 74 | - ec2_root_device_name 75 | - ec2_root_device_type 76 | - ec2_security_group_ids 77 | - ec2_security_group_names 78 | - ec2_shutdown_state 79 | - ec2_sourceDestCheck 80 | - ec2_spot_instance_request_id 81 | - ec2_state 82 | - ec2_state_code 83 | - ec2_state_reason 84 | - ec2_status 85 | - ec2_subnet_id 86 | - ec2_tenancy 87 | - ec2_virtualization_type 88 | - ec2_vpc_id 89 | 90 | These variables are pulled out of a boto.ec2.instance object. There is a lack of 91 | consistency with variable spellings (camelCase and underscores) since this 92 | just loops through all variables the object exposes. It is preferred to use the 93 | ones with underscores when multiple exist. 94 | 95 | In addition, if an instance has AWS Tags associated with it, each tag is a new 96 | variable named: 97 | - ec2_tag_[Key] = [Value] 98 | 99 | Security groups are comma-separated in 'ec2_security_group_ids' and 100 | 'ec2_security_group_names'. 101 | ''' 102 | 103 | # (c) 2012, Peter Sankauskas 104 | # 105 | # This file is part of Ansible, 106 | # 107 | # Ansible is free software: you can redistribute it and/or modify 108 | # it under the terms of the GNU General Public License as published by 109 | # the Free Software Foundation, either version 3 of the License, or 110 | # (at your option) any later version. 111 | # 112 | # Ansible is distributed in the hope that it will be useful, 113 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 114 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 115 | # GNU General Public License for more details. 116 | # 117 | # You should have received a copy of the GNU General Public License 118 | # along with Ansible. If not, see . 119 | 120 | ###################################################################### 121 | 122 | import sys 123 | import os 124 | import argparse 125 | import re 126 | from time import time 127 | import boto 128 | from boto import ec2 129 | from boto import rds 130 | from boto import elasticache 131 | from boto import route53 132 | import six 133 | 134 | from six.moves import configparser 135 | from collections import defaultdict 136 | 137 | try: 138 | import json 139 | except ImportError: 140 | import simplejson as json 141 | 142 | 143 | class Ec2Inventory(object): 144 | 145 | def _empty_inventory(self): 146 | return {"_meta" : {"hostvars" : {}}} 147 | 148 | def __init__(self): 149 | ''' Main execution path ''' 150 | 151 | # Inventory grouped by instance IDs, tags, security groups, regions, 152 | # and availability zones 153 | self.inventory = self._empty_inventory() 154 | 155 | # Index of hostname (address) to instance ID 156 | self.index = {} 157 | 158 | # Boto profile to use (if any) 159 | self.boto_profile = None 160 | 161 | # AWS credentials. 162 | self.credentials = {} 163 | 164 | # Read settings and parse CLI arguments 165 | self.parse_cli_args() 166 | self.read_settings() 167 | 168 | # Make sure that profile_name is not passed at all if not set 169 | # as pre 2.24 boto will fall over otherwise 170 | if self.boto_profile: 171 | if not hasattr(boto.ec2.EC2Connection, 'profile_name'): 172 | self.fail_with_error("boto version must be >= 2.24 to use profile") 173 | 174 | # Cache 175 | if self.args.refresh_cache: 176 | self.do_api_calls_update_cache() 177 | elif not self.is_cache_valid(): 178 | self.do_api_calls_update_cache() 179 | 180 | # Data to print 181 | if self.args.host: 182 | data_to_print = self.get_host_info() 183 | 184 | elif self.args.list: 185 | # Display list of instances for inventory 186 | if self.inventory == self._empty_inventory(): 187 | data_to_print = self.get_inventory_from_cache() 188 | else: 189 | data_to_print = self.json_format_dict(self.inventory, True) 190 | 191 | print(data_to_print) 192 | 193 | 194 | def is_cache_valid(self): 195 | ''' Determines if the cache files have expired, or if it is still valid ''' 196 | 197 | if os.path.isfile(self.cache_path_cache): 198 | mod_time = os.path.getmtime(self.cache_path_cache) 199 | current_time = time() 200 | if (mod_time + self.cache_max_age) > current_time: 201 | if os.path.isfile(self.cache_path_index): 202 | return True 203 | 204 | return False 205 | 206 | 207 | def read_settings(self): 208 | ''' Reads the settings from the ec2.ini file ''' 209 | if six.PY3: 210 | config = configparser.ConfigParser() 211 | else: 212 | config = configparser.SafeConfigParser() 213 | ec2_default_ini_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'ec2.ini') 214 | ec2_ini_path = os.path.expanduser(os.path.expandvars(os.environ.get('EC2_INI_PATH', ec2_default_ini_path))) 215 | config.read(ec2_ini_path) 216 | 217 | # is eucalyptus? 218 | self.eucalyptus_host = None 219 | self.eucalyptus = False 220 | if config.has_option('ec2', 'eucalyptus'): 221 | self.eucalyptus = config.getboolean('ec2', 'eucalyptus') 222 | if self.eucalyptus and config.has_option('ec2', 'eucalyptus_host'): 223 | self.eucalyptus_host = config.get('ec2', 'eucalyptus_host') 224 | 225 | # Regions 226 | self.regions = [] 227 | configRegions = config.get('ec2', 'regions') 228 | configRegions_exclude = config.get('ec2', 'regions_exclude') 229 | if (configRegions == 'all'): 230 | if self.eucalyptus_host: 231 | self.regions.append(boto.connect_euca(host=self.eucalyptus_host).region.name, **self.credentials) 232 | else: 233 | for regionInfo in ec2.regions(): 234 | if regionInfo.name not in configRegions_exclude: 235 | self.regions.append(regionInfo.name) 236 | else: 237 | self.regions = configRegions.split(",") 238 | 239 | # Destination addresses 240 | self.destination_variable = config.get('ec2', 'destination_variable') 241 | self.vpc_destination_variable = config.get('ec2', 'vpc_destination_variable') 242 | 243 | if config.has_option('ec2', 'hostname_variable'): 244 | self.hostname_variable = config.get('ec2', 'hostname_variable') 245 | else: 246 | self.hostname_variable = None 247 | 248 | if config.has_option('ec2', 'destination_format') and \ 249 | config.has_option('ec2', 'destination_format_tags'): 250 | self.destination_format = config.get('ec2', 'destination_format') 251 | self.destination_format_tags = config.get('ec2', 'destination_format_tags').split(',') 252 | else: 253 | self.destination_format = None 254 | self.destination_format_tags = None 255 | 256 | # Route53 257 | self.route53_enabled = config.getboolean('ec2', 'route53') 258 | self.route53_excluded_zones = [] 259 | if config.has_option('ec2', 'route53_excluded_zones'): 260 | self.route53_excluded_zones.extend( 261 | config.get('ec2', 'route53_excluded_zones', '').split(',')) 262 | 263 | # Include RDS instances? 264 | self.rds_enabled = True 265 | if config.has_option('ec2', 'rds'): 266 | self.rds_enabled = config.getboolean('ec2', 'rds') 267 | 268 | # Include ElastiCache instances? 269 | self.elasticache_enabled = True 270 | if config.has_option('ec2', 'elasticache'): 271 | self.elasticache_enabled = config.getboolean('ec2', 'elasticache') 272 | 273 | # Return all EC2 instances? 274 | if config.has_option('ec2', 'all_instances'): 275 | self.all_instances = config.getboolean('ec2', 'all_instances') 276 | else: 277 | self.all_instances = False 278 | 279 | # Instance states to be gathered in inventory. Default is 'running'. 280 | # Setting 'all_instances' to 'yes' overrides this option. 281 | ec2_valid_instance_states = [ 282 | 'pending', 283 | 'running', 284 | 'shutting-down', 285 | 'terminated', 286 | 'stopping', 287 | 'stopped' 288 | ] 289 | self.ec2_instance_states = [] 290 | if self.all_instances: 291 | self.ec2_instance_states = ec2_valid_instance_states 292 | elif config.has_option('ec2', 'instance_states'): 293 | for instance_state in config.get('ec2', 'instance_states').split(','): 294 | instance_state = instance_state.strip() 295 | if instance_state not in ec2_valid_instance_states: 296 | continue 297 | self.ec2_instance_states.append(instance_state) 298 | else: 299 | self.ec2_instance_states = ['running'] 300 | 301 | # Return all RDS instances? (if RDS is enabled) 302 | if config.has_option('ec2', 'all_rds_instances') and self.rds_enabled: 303 | self.all_rds_instances = config.getboolean('ec2', 'all_rds_instances') 304 | else: 305 | self.all_rds_instances = False 306 | 307 | # Return all ElastiCache replication groups? (if ElastiCache is enabled) 308 | if config.has_option('ec2', 'all_elasticache_replication_groups') and self.elasticache_enabled: 309 | self.all_elasticache_replication_groups = config.getboolean('ec2', 'all_elasticache_replication_groups') 310 | else: 311 | self.all_elasticache_replication_groups = False 312 | 313 | # Return all ElastiCache clusters? (if ElastiCache is enabled) 314 | if config.has_option('ec2', 'all_elasticache_clusters') and self.elasticache_enabled: 315 | self.all_elasticache_clusters = config.getboolean('ec2', 'all_elasticache_clusters') 316 | else: 317 | self.all_elasticache_clusters = False 318 | 319 | # Return all ElastiCache nodes? (if ElastiCache is enabled) 320 | if config.has_option('ec2', 'all_elasticache_nodes') and self.elasticache_enabled: 321 | self.all_elasticache_nodes = config.getboolean('ec2', 'all_elasticache_nodes') 322 | else: 323 | self.all_elasticache_nodes = False 324 | 325 | # boto configuration profile (prefer CLI argument) 326 | self.boto_profile = self.args.boto_profile 327 | if config.has_option('ec2', 'boto_profile') and not self.boto_profile: 328 | self.boto_profile = config.get('ec2', 'boto_profile') 329 | 330 | # AWS credentials (prefer environment variables) 331 | if not (self.boto_profile or os.environ.get('AWS_ACCESS_KEY_ID') or 332 | os.environ.get('AWS_PROFILE')): 333 | if config.has_option('credentials', 'aws_access_key_id'): 334 | aws_access_key_id = config.get('credentials', 'aws_access_key_id') 335 | else: 336 | aws_access_key_id = None 337 | if config.has_option('credentials', 'aws_secret_access_key'): 338 | aws_secret_access_key = config.get('credentials', 'aws_secret_access_key') 339 | else: 340 | aws_secret_access_key = None 341 | if config.has_option('credentials', 'aws_security_token'): 342 | aws_security_token = config.get('credentials', 'aws_security_token') 343 | else: 344 | aws_security_token = None 345 | if aws_access_key_id: 346 | self.credentials = { 347 | 'aws_access_key_id': aws_access_key_id, 348 | 'aws_secret_access_key': aws_secret_access_key 349 | } 350 | if aws_security_token: 351 | self.credentials['security_token'] = aws_security_token 352 | 353 | # Cache related 354 | cache_dir = os.path.expanduser(config.get('ec2', 'cache_path')) 355 | if self.boto_profile: 356 | cache_dir = os.path.join(cache_dir, 'profile_' + self.boto_profile) 357 | if not os.path.exists(cache_dir): 358 | os.makedirs(cache_dir) 359 | 360 | cache_name = 'ansible-ec2' 361 | aws_profile = lambda: (self.boto_profile or 362 | os.environ.get('AWS_PROFILE') or 363 | os.environ.get('AWS_ACCESS_KEY_ID') or 364 | self.credentials.get('aws_access_key_id', None)) 365 | if aws_profile(): 366 | cache_name = '%s-%s' % (cache_name, aws_profile()) 367 | self.cache_path_cache = cache_dir + "/%s.cache" % cache_name 368 | self.cache_path_index = cache_dir + "/%s.index" % cache_name 369 | self.cache_max_age = config.getint('ec2', 'cache_max_age') 370 | 371 | if config.has_option('ec2', 'expand_csv_tags'): 372 | self.expand_csv_tags = config.getboolean('ec2', 'expand_csv_tags') 373 | else: 374 | self.expand_csv_tags = False 375 | 376 | # Configure nested groups instead of flat namespace. 377 | if config.has_option('ec2', 'nested_groups'): 378 | self.nested_groups = config.getboolean('ec2', 'nested_groups') 379 | else: 380 | self.nested_groups = False 381 | 382 | # Replace dash or not in group names 383 | if config.has_option('ec2', 'replace_dash_in_groups'): 384 | self.replace_dash_in_groups = config.getboolean('ec2', 'replace_dash_in_groups') 385 | else: 386 | self.replace_dash_in_groups = True 387 | 388 | # Configure which groups should be created. 389 | group_by_options = [ 390 | 'group_by_instance_id', 391 | 'group_by_region', 392 | 'group_by_availability_zone', 393 | 'group_by_ami_id', 394 | 'group_by_instance_type', 395 | 'group_by_key_pair', 396 | 'group_by_vpc_id', 397 | 'group_by_security_group', 398 | 'group_by_tag_keys', 399 | 'group_by_tag_none', 400 | 'group_by_route53_names', 401 | 'group_by_rds_engine', 402 | 'group_by_rds_parameter_group', 403 | 'group_by_elasticache_engine', 404 | 'group_by_elasticache_cluster', 405 | 'group_by_elasticache_parameter_group', 406 | 'group_by_elasticache_replication_group', 407 | ] 408 | for option in group_by_options: 409 | if config.has_option('ec2', option): 410 | setattr(self, option, config.getboolean('ec2', option)) 411 | else: 412 | setattr(self, option, True) 413 | 414 | # Do we need to just include hosts that match a pattern? 415 | try: 416 | pattern_include = config.get('ec2', 'pattern_include') 417 | if pattern_include and len(pattern_include) > 0: 418 | self.pattern_include = re.compile(pattern_include) 419 | else: 420 | self.pattern_include = None 421 | except configparser.NoOptionError: 422 | self.pattern_include = None 423 | 424 | # Do we need to exclude hosts that match a pattern? 425 | try: 426 | pattern_exclude = config.get('ec2', 'pattern_exclude'); 427 | if pattern_exclude and len(pattern_exclude) > 0: 428 | self.pattern_exclude = re.compile(pattern_exclude) 429 | else: 430 | self.pattern_exclude = None 431 | except configparser.NoOptionError: 432 | self.pattern_exclude = None 433 | 434 | # Instance filters (see boto and EC2 API docs). Ignore invalid filters. 435 | self.ec2_instance_filters = defaultdict(list) 436 | if config.has_option('ec2', 'instance_filters'): 437 | 438 | filters = [f for f in config.get('ec2', 'instance_filters').split(',') if f] 439 | 440 | for instance_filter in filters: 441 | instance_filter = instance_filter.strip() 442 | if not instance_filter or '=' not in instance_filter: 443 | continue 444 | filter_key, filter_value = [x.strip() for x in instance_filter.split('=', 1)] 445 | if not filter_key: 446 | continue 447 | self.ec2_instance_filters[filter_key].append(filter_value) 448 | 449 | def parse_cli_args(self): 450 | ''' Command line argument processing ''' 451 | 452 | parser = argparse.ArgumentParser(description='Produce an Ansible Inventory file based on EC2') 453 | parser.add_argument('--list', action='store_true', default=True, 454 | help='List instances (default: True)') 455 | parser.add_argument('--host', action='store', 456 | help='Get all the variables about a specific instance') 457 | parser.add_argument('--refresh-cache', action='store_true', default=False, 458 | help='Force refresh of cache by making API requests to EC2 (default: False - use cache files)') 459 | parser.add_argument('--profile', '--boto-profile', action='store', dest='boto_profile', 460 | help='Use boto profile for connections to EC2') 461 | self.args = parser.parse_args() 462 | 463 | 464 | def do_api_calls_update_cache(self): 465 | ''' Do API calls to each region, and save data in cache files ''' 466 | 467 | if self.route53_enabled: 468 | self.get_route53_records() 469 | 470 | for region in self.regions: 471 | self.get_instances_by_region(region) 472 | if self.rds_enabled: 473 | self.get_rds_instances_by_region(region) 474 | if self.elasticache_enabled: 475 | self.get_elasticache_clusters_by_region(region) 476 | self.get_elasticache_replication_groups_by_region(region) 477 | 478 | self.write_to_cache(self.inventory, self.cache_path_cache) 479 | self.write_to_cache(self.index, self.cache_path_index) 480 | 481 | def connect(self, region): 482 | ''' create connection to api server''' 483 | if self.eucalyptus: 484 | conn = boto.connect_euca(host=self.eucalyptus_host, **self.credentials) 485 | conn.APIVersion = '2010-08-31' 486 | else: 487 | conn = self.connect_to_aws(ec2, region) 488 | return conn 489 | 490 | def boto_fix_security_token_in_profile(self, connect_args): 491 | ''' monkey patch for boto issue boto/boto#2100 ''' 492 | profile = 'profile ' + self.boto_profile 493 | if boto.config.has_option(profile, 'aws_security_token'): 494 | connect_args['security_token'] = boto.config.get(profile, 'aws_security_token') 495 | return connect_args 496 | 497 | def connect_to_aws(self, module, region): 498 | connect_args = self.credentials 499 | 500 | # only pass the profile name if it's set (as it is not supported by older boto versions) 501 | if self.boto_profile: 502 | connect_args['profile_name'] = self.boto_profile 503 | self.boto_fix_security_token_in_profile(connect_args) 504 | 505 | conn = module.connect_to_region(region, **connect_args) 506 | # connect_to_region will fail "silently" by returning None if the region name is wrong or not supported 507 | if conn is None: 508 | self.fail_with_error("region name: %s likely not supported, or AWS is down. connection to region failed." % region) 509 | return conn 510 | 511 | def get_instances_by_region(self, region): 512 | ''' Makes an AWS EC2 API call to the list of instances in a particular 513 | region ''' 514 | 515 | try: 516 | conn = self.connect(region) 517 | reservations = [] 518 | if self.ec2_instance_filters: 519 | for filter_key, filter_values in self.ec2_instance_filters.items(): 520 | reservations.extend(conn.get_all_instances(filters = { filter_key : filter_values })) 521 | else: 522 | reservations = conn.get_all_instances() 523 | 524 | # Pull the tags back in a second step 525 | # AWS are on record as saying that the tags fetched in the first `get_all_instances` request are not 526 | # reliable and may be missing, and the only way to guarantee they are there is by calling `get_all_tags` 527 | instance_ids = [] 528 | for reservation in reservations: 529 | instance_ids.extend([instance.id for instance in reservation.instances]) 530 | 531 | max_filter_value = 199 532 | tags = [] 533 | for i in range(0, len(instance_ids), max_filter_value): 534 | tags.extend(conn.get_all_tags(filters={'resource-type': 'instance', 'resource-id': instance_ids[i:i+max_filter_value]})) 535 | 536 | tags_by_instance_id = defaultdict(dict) 537 | for tag in tags: 538 | tags_by_instance_id[tag.res_id][tag.name] = tag.value 539 | 540 | for reservation in reservations: 541 | for instance in reservation.instances: 542 | instance.tags = tags_by_instance_id[instance.id] 543 | self.add_instance(instance, region) 544 | 545 | except boto.exception.BotoServerError as e: 546 | if e.error_code == 'AuthFailure': 547 | error = self.get_auth_error_message() 548 | else: 549 | backend = 'Eucalyptus' if self.eucalyptus else 'AWS' 550 | error = "Error connecting to %s backend.\n%s" % (backend, e.message) 551 | self.fail_with_error(error, 'getting EC2 instances') 552 | 553 | def get_rds_instances_by_region(self, region): 554 | ''' Makes an AWS API call to the list of RDS instances in a particular 555 | region ''' 556 | 557 | try: 558 | conn = self.connect_to_aws(rds, region) 559 | if conn: 560 | marker = None 561 | while True: 562 | instances = conn.get_all_dbinstances(marker=marker) 563 | marker = instances.marker 564 | for instance in instances: 565 | self.add_rds_instance(instance, region) 566 | if not marker: 567 | break 568 | except boto.exception.BotoServerError as e: 569 | error = e.reason 570 | 571 | if e.error_code == 'AuthFailure': 572 | error = self.get_auth_error_message() 573 | if not e.reason == "Forbidden": 574 | error = "Looks like AWS RDS is down:\n%s" % e.message 575 | self.fail_with_error(error, 'getting RDS instances') 576 | 577 | def get_elasticache_clusters_by_region(self, region): 578 | ''' Makes an AWS API call to the list of ElastiCache clusters (with 579 | nodes' info) in a particular region.''' 580 | 581 | # ElastiCache boto module doesn't provide a get_all_intances method, 582 | # that's why we need to call describe directly (it would be called by 583 | # the shorthand method anyway...) 584 | try: 585 | conn = self.connect_to_aws(elasticache, region) 586 | if conn: 587 | # show_cache_node_info = True 588 | # because we also want nodes' information 589 | response = conn.describe_cache_clusters(None, None, None, True) 590 | 591 | except boto.exception.BotoServerError as e: 592 | error = e.reason 593 | 594 | if e.error_code == 'AuthFailure': 595 | error = self.get_auth_error_message() 596 | if not e.reason == "Forbidden": 597 | error = "Looks like AWS ElastiCache is down:\n%s" % e.message 598 | self.fail_with_error(error, 'getting ElastiCache clusters') 599 | 600 | try: 601 | # Boto also doesn't provide wrapper classes to CacheClusters or 602 | # CacheNodes. Because of that wo can't make use of the get_list 603 | # method in the AWSQueryConnection. Let's do the work manually 604 | clusters = response['DescribeCacheClustersResponse']['DescribeCacheClustersResult']['CacheClusters'] 605 | 606 | except KeyError as e: 607 | error = "ElastiCache query to AWS failed (unexpected format)." 608 | self.fail_with_error(error, 'getting ElastiCache clusters') 609 | 610 | for cluster in clusters: 611 | self.add_elasticache_cluster(cluster, region) 612 | 613 | def get_elasticache_replication_groups_by_region(self, region): 614 | ''' Makes an AWS API call to the list of ElastiCache replication groups 615 | in a particular region.''' 616 | 617 | # ElastiCache boto module doesn't provide a get_all_intances method, 618 | # that's why we need to call describe directly (it would be called by 619 | # the shorthand method anyway...) 620 | try: 621 | conn = self.connect_to_aws(elasticache, region) 622 | if conn: 623 | response = conn.describe_replication_groups() 624 | 625 | except boto.exception.BotoServerError as e: 626 | error = e.reason 627 | 628 | if e.error_code == 'AuthFailure': 629 | error = self.get_auth_error_message() 630 | if not e.reason == "Forbidden": 631 | error = "Looks like AWS ElastiCache [Replication Groups] is down:\n%s" % e.message 632 | self.fail_with_error(error, 'getting ElastiCache clusters') 633 | 634 | try: 635 | # Boto also doesn't provide wrapper classes to ReplicationGroups 636 | # Because of that wo can't make use of the get_list method in the 637 | # AWSQueryConnection. Let's do the work manually 638 | replication_groups = response['DescribeReplicationGroupsResponse']['DescribeReplicationGroupsResult']['ReplicationGroups'] 639 | 640 | except KeyError as e: 641 | error = "ElastiCache [Replication Groups] query to AWS failed (unexpected format)." 642 | self.fail_with_error(error, 'getting ElastiCache clusters') 643 | 644 | for replication_group in replication_groups: 645 | self.add_elasticache_replication_group(replication_group, region) 646 | 647 | def get_auth_error_message(self): 648 | ''' create an informative error message if there is an issue authenticating''' 649 | errors = ["Authentication error retrieving ec2 inventory."] 650 | if None in [os.environ.get('AWS_ACCESS_KEY_ID'), os.environ.get('AWS_SECRET_ACCESS_KEY')]: 651 | errors.append(' - No AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment vars found') 652 | else: 653 | errors.append(' - AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment vars found but may not be correct') 654 | 655 | boto_paths = ['/etc/boto.cfg', '~/.boto', '~/.aws/credentials'] 656 | boto_config_found = list(p for p in boto_paths if os.path.isfile(os.path.expanduser(p))) 657 | if len(boto_config_found) > 0: 658 | errors.append(" - Boto configs found at '%s', but the credentials contained may not be correct" % ', '.join(boto_config_found)) 659 | else: 660 | errors.append(" - No Boto config found at any expected location '%s'" % ', '.join(boto_paths)) 661 | 662 | return '\n'.join(errors) 663 | 664 | def fail_with_error(self, err_msg, err_operation=None): 665 | '''log an error to std err for ansible-playbook to consume and exit''' 666 | if err_operation: 667 | err_msg = 'ERROR: "{err_msg}", while: {err_operation}'.format( 668 | err_msg=err_msg, err_operation=err_operation) 669 | sys.stderr.write(err_msg) 670 | sys.exit(1) 671 | 672 | def get_instance(self, region, instance_id): 673 | conn = self.connect(region) 674 | 675 | reservations = conn.get_all_instances([instance_id]) 676 | for reservation in reservations: 677 | for instance in reservation.instances: 678 | return instance 679 | 680 | def add_instance(self, instance, region): 681 | ''' Adds an instance to the inventory and index, as long as it is 682 | addressable ''' 683 | 684 | # Only return instances with desired instance states 685 | if instance.state not in self.ec2_instance_states: 686 | return 687 | 688 | # Select the best destination address 689 | if self.destination_format and self.destination_format_tags: 690 | dest = self.destination_format.format(*[ getattr(instance, 'tags').get(tag, '') for tag in self.destination_format_tags ]) 691 | elif instance.subnet_id: 692 | dest = getattr(instance, self.vpc_destination_variable, None) 693 | if dest is None: 694 | dest = getattr(instance, 'tags').get(self.vpc_destination_variable, None) 695 | else: 696 | dest = getattr(instance, self.destination_variable, None) 697 | if dest is None: 698 | dest = getattr(instance, 'tags').get(self.destination_variable, None) 699 | 700 | if not dest: 701 | # Skip instances we cannot address (e.g. private VPC subnet) 702 | return 703 | 704 | # Set the inventory name 705 | hostname = None 706 | if self.hostname_variable: 707 | if self.hostname_variable.startswith('tag_'): 708 | hostname = instance.tags.get(self.hostname_variable[4:], None) 709 | else: 710 | hostname = getattr(instance, self.hostname_variable) 711 | 712 | # If we can't get a nice hostname, use the destination address 713 | if not hostname: 714 | hostname = dest 715 | else: 716 | hostname = self.to_safe(hostname).lower() 717 | 718 | # if we only want to include hosts that match a pattern, skip those that don't 719 | if self.pattern_include and not self.pattern_include.match(hostname): 720 | return 721 | 722 | # if we need to exclude hosts that match a pattern, skip those 723 | if self.pattern_exclude and self.pattern_exclude.match(hostname): 724 | return 725 | 726 | # Add to index 727 | self.index[hostname] = [region, instance.id] 728 | 729 | # Inventory: Group by instance ID (always a group of 1) 730 | if self.group_by_instance_id: 731 | self.inventory[instance.id] = [hostname] 732 | if self.nested_groups: 733 | self.push_group(self.inventory, 'instances', instance.id) 734 | 735 | # Inventory: Group by region 736 | if self.group_by_region: 737 | self.push(self.inventory, region, hostname) 738 | if self.nested_groups: 739 | self.push_group(self.inventory, 'regions', region) 740 | 741 | # Inventory: Group by availability zone 742 | if self.group_by_availability_zone: 743 | self.push(self.inventory, instance.placement, hostname) 744 | if self.nested_groups: 745 | if self.group_by_region: 746 | self.push_group(self.inventory, region, instance.placement) 747 | self.push_group(self.inventory, 'zones', instance.placement) 748 | 749 | # Inventory: Group by Amazon Machine Image (AMI) ID 750 | if self.group_by_ami_id: 751 | ami_id = self.to_safe(instance.image_id) 752 | self.push(self.inventory, ami_id, hostname) 753 | if self.nested_groups: 754 | self.push_group(self.inventory, 'images', ami_id) 755 | 756 | # Inventory: Group by instance type 757 | if self.group_by_instance_type: 758 | type_name = self.to_safe('type_' + instance.instance_type) 759 | self.push(self.inventory, type_name, hostname) 760 | if self.nested_groups: 761 | self.push_group(self.inventory, 'types', type_name) 762 | 763 | # Inventory: Group by key pair 764 | if self.group_by_key_pair and instance.key_name: 765 | key_name = self.to_safe('key_' + instance.key_name) 766 | self.push(self.inventory, key_name, hostname) 767 | if self.nested_groups: 768 | self.push_group(self.inventory, 'keys', key_name) 769 | 770 | # Inventory: Group by VPC 771 | if self.group_by_vpc_id and instance.vpc_id: 772 | vpc_id_name = self.to_safe('vpc_id_' + instance.vpc_id) 773 | self.push(self.inventory, vpc_id_name, hostname) 774 | if self.nested_groups: 775 | self.push_group(self.inventory, 'vpcs', vpc_id_name) 776 | 777 | # Inventory: Group by security group 778 | if self.group_by_security_group: 779 | try: 780 | for group in instance.groups: 781 | key = self.to_safe("security_group_" + group.name) 782 | self.push(self.inventory, key, hostname) 783 | if self.nested_groups: 784 | self.push_group(self.inventory, 'security_groups', key) 785 | except AttributeError: 786 | self.fail_with_error('\n'.join(['Package boto seems a bit older.', 787 | 'Please upgrade boto >= 2.3.0.'])) 788 | 789 | # Inventory: Group by tag keys 790 | if self.group_by_tag_keys: 791 | for k, v in instance.tags.items(): 792 | if self.expand_csv_tags and v and ',' in v: 793 | values = map(lambda x: x.strip(), v.split(',')) 794 | else: 795 | values = [v] 796 | 797 | for v in values: 798 | if v: 799 | key = self.to_safe("tag_" + k + "=" + v) 800 | else: 801 | key = self.to_safe("tag_" + k) 802 | self.push(self.inventory, key, hostname) 803 | if self.nested_groups: 804 | self.push_group(self.inventory, 'tags', self.to_safe("tag_" + k)) 805 | if v: 806 | self.push_group(self.inventory, self.to_safe("tag_" + k), key) 807 | 808 | # Inventory: Group by Route53 domain names if enabled 809 | if self.route53_enabled and self.group_by_route53_names: 810 | route53_names = self.get_instance_route53_names(instance) 811 | for name in route53_names: 812 | self.push(self.inventory, name, hostname) 813 | if self.nested_groups: 814 | self.push_group(self.inventory, 'route53', name) 815 | 816 | # Global Tag: instances without tags 817 | if self.group_by_tag_none and len(instance.tags) == 0: 818 | self.push(self.inventory, 'tag_none', hostname) 819 | if self.nested_groups: 820 | self.push_group(self.inventory, 'tags', 'tag_none') 821 | 822 | # Global Tag: tag all EC2 instances 823 | self.push(self.inventory, 'ec2', hostname) 824 | 825 | self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance) 826 | self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest 827 | 828 | 829 | def add_rds_instance(self, instance, region): 830 | ''' Adds an RDS instance to the inventory and index, as long as it is 831 | addressable ''' 832 | 833 | # Only want available instances unless all_rds_instances is True 834 | if not self.all_rds_instances and instance.status != 'available': 835 | return 836 | 837 | # Select the best destination address 838 | dest = instance.endpoint[0] 839 | 840 | if not dest: 841 | # Skip instances we cannot address (e.g. private VPC subnet) 842 | return 843 | 844 | # Set the inventory name 845 | hostname = None 846 | if self.hostname_variable: 847 | if self.hostname_variable.startswith('tag_'): 848 | hostname = instance.tags.get(self.hostname_variable[4:], None) 849 | else: 850 | hostname = getattr(instance, self.hostname_variable) 851 | 852 | # If we can't get a nice hostname, use the destination address 853 | if not hostname: 854 | hostname = dest 855 | 856 | hostname = self.to_safe(hostname).lower() 857 | 858 | # Add to index 859 | self.index[hostname] = [region, instance.id] 860 | 861 | # Inventory: Group by instance ID (always a group of 1) 862 | if self.group_by_instance_id: 863 | self.inventory[instance.id] = [hostname] 864 | if self.nested_groups: 865 | self.push_group(self.inventory, 'instances', instance.id) 866 | 867 | # Inventory: Group by region 868 | if self.group_by_region: 869 | self.push(self.inventory, region, hostname) 870 | if self.nested_groups: 871 | self.push_group(self.inventory, 'regions', region) 872 | 873 | # Inventory: Group by availability zone 874 | if self.group_by_availability_zone: 875 | self.push(self.inventory, instance.availability_zone, hostname) 876 | if self.nested_groups: 877 | if self.group_by_region: 878 | self.push_group(self.inventory, region, instance.availability_zone) 879 | self.push_group(self.inventory, 'zones', instance.availability_zone) 880 | 881 | # Inventory: Group by instance type 882 | if self.group_by_instance_type: 883 | type_name = self.to_safe('type_' + instance.instance_class) 884 | self.push(self.inventory, type_name, hostname) 885 | if self.nested_groups: 886 | self.push_group(self.inventory, 'types', type_name) 887 | 888 | # Inventory: Group by VPC 889 | if self.group_by_vpc_id and instance.subnet_group and instance.subnet_group.vpc_id: 890 | vpc_id_name = self.to_safe('vpc_id_' + instance.subnet_group.vpc_id) 891 | self.push(self.inventory, vpc_id_name, hostname) 892 | if self.nested_groups: 893 | self.push_group(self.inventory, 'vpcs', vpc_id_name) 894 | 895 | # Inventory: Group by security group 896 | if self.group_by_security_group: 897 | try: 898 | if instance.security_group: 899 | key = self.to_safe("security_group_" + instance.security_group.name) 900 | self.push(self.inventory, key, hostname) 901 | if self.nested_groups: 902 | self.push_group(self.inventory, 'security_groups', key) 903 | 904 | except AttributeError: 905 | self.fail_with_error('\n'.join(['Package boto seems a bit older.', 906 | 'Please upgrade boto >= 2.3.0.'])) 907 | 908 | 909 | # Inventory: Group by engine 910 | if self.group_by_rds_engine: 911 | self.push(self.inventory, self.to_safe("rds_" + instance.engine), hostname) 912 | if self.nested_groups: 913 | self.push_group(self.inventory, 'rds_engines', self.to_safe("rds_" + instance.engine)) 914 | 915 | # Inventory: Group by parameter group 916 | if self.group_by_rds_parameter_group: 917 | self.push(self.inventory, self.to_safe("rds_parameter_group_" + instance.parameter_group.name), hostname) 918 | if self.nested_groups: 919 | self.push_group(self.inventory, 'rds_parameter_groups', self.to_safe("rds_parameter_group_" + instance.parameter_group.name)) 920 | 921 | # Global Tag: all RDS instances 922 | self.push(self.inventory, 'rds', hostname) 923 | 924 | self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance) 925 | self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest 926 | 927 | def add_elasticache_cluster(self, cluster, region): 928 | ''' Adds an ElastiCache cluster to the inventory and index, as long as 929 | it's nodes are addressable ''' 930 | 931 | # Only want available clusters unless all_elasticache_clusters is True 932 | if not self.all_elasticache_clusters and cluster['CacheClusterStatus'] != 'available': 933 | return 934 | 935 | # Select the best destination address 936 | if 'ConfigurationEndpoint' in cluster and cluster['ConfigurationEndpoint']: 937 | # Memcached cluster 938 | dest = cluster['ConfigurationEndpoint']['Address'] 939 | is_redis = False 940 | else: 941 | # Redis sigle node cluster 942 | # Because all Redis clusters are single nodes, we'll merge the 943 | # info from the cluster with info about the node 944 | dest = cluster['CacheNodes'][0]['Endpoint']['Address'] 945 | is_redis = True 946 | 947 | if not dest: 948 | # Skip clusters we cannot address (e.g. private VPC subnet) 949 | return 950 | 951 | # Add to index 952 | self.index[dest] = [region, cluster['CacheClusterId']] 953 | 954 | # Inventory: Group by instance ID (always a group of 1) 955 | if self.group_by_instance_id: 956 | self.inventory[cluster['CacheClusterId']] = [dest] 957 | if self.nested_groups: 958 | self.push_group(self.inventory, 'instances', cluster['CacheClusterId']) 959 | 960 | # Inventory: Group by region 961 | if self.group_by_region and not is_redis: 962 | self.push(self.inventory, region, dest) 963 | if self.nested_groups: 964 | self.push_group(self.inventory, 'regions', region) 965 | 966 | # Inventory: Group by availability zone 967 | if self.group_by_availability_zone and not is_redis: 968 | self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest) 969 | if self.nested_groups: 970 | if self.group_by_region: 971 | self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone']) 972 | self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone']) 973 | 974 | # Inventory: Group by node type 975 | if self.group_by_instance_type and not is_redis: 976 | type_name = self.to_safe('type_' + cluster['CacheNodeType']) 977 | self.push(self.inventory, type_name, dest) 978 | if self.nested_groups: 979 | self.push_group(self.inventory, 'types', type_name) 980 | 981 | # Inventory: Group by VPC (information not available in the current 982 | # AWS API version for ElastiCache) 983 | 984 | # Inventory: Group by security group 985 | if self.group_by_security_group and not is_redis: 986 | 987 | # Check for the existence of the 'SecurityGroups' key and also if 988 | # this key has some value. When the cluster is not placed in a SG 989 | # the query can return None here and cause an error. 990 | if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None: 991 | for security_group in cluster['SecurityGroups']: 992 | key = self.to_safe("security_group_" + security_group['SecurityGroupId']) 993 | self.push(self.inventory, key, dest) 994 | if self.nested_groups: 995 | self.push_group(self.inventory, 'security_groups', key) 996 | 997 | # Inventory: Group by engine 998 | if self.group_by_elasticache_engine and not is_redis: 999 | self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest) 1000 | if self.nested_groups: 1001 | self.push_group(self.inventory, 'elasticache_engines', self.to_safe(cluster['Engine'])) 1002 | 1003 | # Inventory: Group by parameter group 1004 | if self.group_by_elasticache_parameter_group: 1005 | self.push(self.inventory, self.to_safe("elasticache_parameter_group_" + cluster['CacheParameterGroup']['CacheParameterGroupName']), dest) 1006 | if self.nested_groups: 1007 | self.push_group(self.inventory, 'elasticache_parameter_groups', self.to_safe(cluster['CacheParameterGroup']['CacheParameterGroupName'])) 1008 | 1009 | # Inventory: Group by replication group 1010 | if self.group_by_elasticache_replication_group and 'ReplicationGroupId' in cluster and cluster['ReplicationGroupId']: 1011 | self.push(self.inventory, self.to_safe("elasticache_replication_group_" + cluster['ReplicationGroupId']), dest) 1012 | if self.nested_groups: 1013 | self.push_group(self.inventory, 'elasticache_replication_groups', self.to_safe(cluster['ReplicationGroupId'])) 1014 | 1015 | # Global Tag: all ElastiCache clusters 1016 | self.push(self.inventory, 'elasticache_clusters', cluster['CacheClusterId']) 1017 | 1018 | host_info = self.get_host_info_dict_from_describe_dict(cluster) 1019 | 1020 | self.inventory["_meta"]["hostvars"][dest] = host_info 1021 | 1022 | # Add the nodes 1023 | for node in cluster['CacheNodes']: 1024 | self.add_elasticache_node(node, cluster, region) 1025 | 1026 | def add_elasticache_node(self, node, cluster, region): 1027 | ''' Adds an ElastiCache node to the inventory and index, as long as 1028 | it is addressable ''' 1029 | 1030 | # Only want available nodes unless all_elasticache_nodes is True 1031 | if not self.all_elasticache_nodes and node['CacheNodeStatus'] != 'available': 1032 | return 1033 | 1034 | # Select the best destination address 1035 | dest = node['Endpoint']['Address'] 1036 | 1037 | if not dest: 1038 | # Skip nodes we cannot address (e.g. private VPC subnet) 1039 | return 1040 | 1041 | node_id = self.to_safe(cluster['CacheClusterId'] + '_' + node['CacheNodeId']) 1042 | 1043 | # Add to index 1044 | self.index[dest] = [region, node_id] 1045 | 1046 | # Inventory: Group by node ID (always a group of 1) 1047 | if self.group_by_instance_id: 1048 | self.inventory[node_id] = [dest] 1049 | if self.nested_groups: 1050 | self.push_group(self.inventory, 'instances', node_id) 1051 | 1052 | # Inventory: Group by region 1053 | if self.group_by_region: 1054 | self.push(self.inventory, region, dest) 1055 | if self.nested_groups: 1056 | self.push_group(self.inventory, 'regions', region) 1057 | 1058 | # Inventory: Group by availability zone 1059 | if self.group_by_availability_zone: 1060 | self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest) 1061 | if self.nested_groups: 1062 | if self.group_by_region: 1063 | self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone']) 1064 | self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone']) 1065 | 1066 | # Inventory: Group by node type 1067 | if self.group_by_instance_type: 1068 | type_name = self.to_safe('type_' + cluster['CacheNodeType']) 1069 | self.push(self.inventory, type_name, dest) 1070 | if self.nested_groups: 1071 | self.push_group(self.inventory, 'types', type_name) 1072 | 1073 | # Inventory: Group by VPC (information not available in the current 1074 | # AWS API version for ElastiCache) 1075 | 1076 | # Inventory: Group by security group 1077 | if self.group_by_security_group: 1078 | 1079 | # Check for the existence of the 'SecurityGroups' key and also if 1080 | # this key has some value. When the cluster is not placed in a SG 1081 | # the query can return None here and cause an error. 1082 | if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None: 1083 | for security_group in cluster['SecurityGroups']: 1084 | key = self.to_safe("security_group_" + security_group['SecurityGroupId']) 1085 | self.push(self.inventory, key, dest) 1086 | if self.nested_groups: 1087 | self.push_group(self.inventory, 'security_groups', key) 1088 | 1089 | # Inventory: Group by engine 1090 | if self.group_by_elasticache_engine: 1091 | self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest) 1092 | if self.nested_groups: 1093 | self.push_group(self.inventory, 'elasticache_engines', self.to_safe("elasticache_" + cluster['Engine'])) 1094 | 1095 | # Inventory: Group by parameter group (done at cluster level) 1096 | 1097 | # Inventory: Group by replication group (done at cluster level) 1098 | 1099 | # Inventory: Group by ElastiCache Cluster 1100 | if self.group_by_elasticache_cluster: 1101 | self.push(self.inventory, self.to_safe("elasticache_cluster_" + cluster['CacheClusterId']), dest) 1102 | 1103 | # Global Tag: all ElastiCache nodes 1104 | self.push(self.inventory, 'elasticache_nodes', dest) 1105 | 1106 | host_info = self.get_host_info_dict_from_describe_dict(node) 1107 | 1108 | if dest in self.inventory["_meta"]["hostvars"]: 1109 | self.inventory["_meta"]["hostvars"][dest].update(host_info) 1110 | else: 1111 | self.inventory["_meta"]["hostvars"][dest] = host_info 1112 | 1113 | def add_elasticache_replication_group(self, replication_group, region): 1114 | ''' Adds an ElastiCache replication group to the inventory and index ''' 1115 | 1116 | # Only want available clusters unless all_elasticache_replication_groups is True 1117 | if not self.all_elasticache_replication_groups and replication_group['Status'] != 'available': 1118 | return 1119 | 1120 | # Select the best destination address (PrimaryEndpoint) 1121 | dest = replication_group['NodeGroups'][0]['PrimaryEndpoint']['Address'] 1122 | 1123 | if not dest: 1124 | # Skip clusters we cannot address (e.g. private VPC subnet) 1125 | return 1126 | 1127 | # Add to index 1128 | self.index[dest] = [region, replication_group['ReplicationGroupId']] 1129 | 1130 | # Inventory: Group by ID (always a group of 1) 1131 | if self.group_by_instance_id: 1132 | self.inventory[replication_group['ReplicationGroupId']] = [dest] 1133 | if self.nested_groups: 1134 | self.push_group(self.inventory, 'instances', replication_group['ReplicationGroupId']) 1135 | 1136 | # Inventory: Group by region 1137 | if self.group_by_region: 1138 | self.push(self.inventory, region, dest) 1139 | if self.nested_groups: 1140 | self.push_group(self.inventory, 'regions', region) 1141 | 1142 | # Inventory: Group by availability zone (doesn't apply to replication groups) 1143 | 1144 | # Inventory: Group by node type (doesn't apply to replication groups) 1145 | 1146 | # Inventory: Group by VPC (information not available in the current 1147 | # AWS API version for replication groups 1148 | 1149 | # Inventory: Group by security group (doesn't apply to replication groups) 1150 | # Check this value in cluster level 1151 | 1152 | # Inventory: Group by engine (replication groups are always Redis) 1153 | if self.group_by_elasticache_engine: 1154 | self.push(self.inventory, 'elasticache_redis', dest) 1155 | if self.nested_groups: 1156 | self.push_group(self.inventory, 'elasticache_engines', 'redis') 1157 | 1158 | # Global Tag: all ElastiCache clusters 1159 | self.push(self.inventory, 'elasticache_replication_groups', replication_group['ReplicationGroupId']) 1160 | 1161 | host_info = self.get_host_info_dict_from_describe_dict(replication_group) 1162 | 1163 | self.inventory["_meta"]["hostvars"][dest] = host_info 1164 | 1165 | def get_route53_records(self): 1166 | ''' Get and store the map of resource records to domain names that 1167 | point to them. ''' 1168 | 1169 | r53_conn = route53.Route53Connection() 1170 | all_zones = r53_conn.get_zones() 1171 | 1172 | route53_zones = [ zone for zone in all_zones if zone.name[:-1] 1173 | not in self.route53_excluded_zones ] 1174 | 1175 | self.route53_records = {} 1176 | 1177 | for zone in route53_zones: 1178 | rrsets = r53_conn.get_all_rrsets(zone.id) 1179 | 1180 | for record_set in rrsets: 1181 | record_name = record_set.name 1182 | 1183 | if record_name.endswith('.'): 1184 | record_name = record_name[:-1] 1185 | 1186 | for resource in record_set.resource_records: 1187 | self.route53_records.setdefault(resource, set()) 1188 | self.route53_records[resource].add(record_name) 1189 | 1190 | 1191 | def get_instance_route53_names(self, instance): 1192 | ''' Check if an instance is referenced in the records we have from 1193 | Route53. If it is, return the list of domain names pointing to said 1194 | instance. If nothing points to it, return an empty list. ''' 1195 | 1196 | instance_attributes = [ 'public_dns_name', 'private_dns_name', 1197 | 'ip_address', 'private_ip_address' ] 1198 | 1199 | name_list = set() 1200 | 1201 | for attrib in instance_attributes: 1202 | try: 1203 | value = getattr(instance, attrib) 1204 | except AttributeError: 1205 | continue 1206 | 1207 | if value in self.route53_records: 1208 | name_list.update(self.route53_records[value]) 1209 | 1210 | return list(name_list) 1211 | 1212 | def get_host_info_dict_from_instance(self, instance): 1213 | instance_vars = {} 1214 | for key in vars(instance): 1215 | value = getattr(instance, key) 1216 | key = self.to_safe('ec2_' + key) 1217 | 1218 | # Handle complex types 1219 | # state/previous_state changed to properties in boto in https://github.com/boto/boto/commit/a23c379837f698212252720d2af8dec0325c9518 1220 | if key == 'ec2__state': 1221 | instance_vars['ec2_state'] = instance.state or '' 1222 | instance_vars['ec2_state_code'] = instance.state_code 1223 | elif key == 'ec2__previous_state': 1224 | instance_vars['ec2_previous_state'] = instance.previous_state or '' 1225 | instance_vars['ec2_previous_state_code'] = instance.previous_state_code 1226 | elif type(value) in [int, bool]: 1227 | instance_vars[key] = value 1228 | elif isinstance(value, six.string_types): 1229 | instance_vars[key] = value.strip() 1230 | elif type(value) == type(None): 1231 | instance_vars[key] = '' 1232 | elif key == 'ec2_region': 1233 | instance_vars[key] = value.name 1234 | elif key == 'ec2__placement': 1235 | instance_vars['ec2_placement'] = value.zone 1236 | elif key == 'ec2_tags': 1237 | for k, v in value.items(): 1238 | if self.expand_csv_tags and ',' in v: 1239 | v = map(lambda x: x.strip(), v.split(',')) 1240 | key = self.to_safe('ec2_tag_' + k) 1241 | instance_vars[key] = v 1242 | elif key == 'ec2_groups': 1243 | group_ids = [] 1244 | group_names = [] 1245 | for group in value: 1246 | group_ids.append(group.id) 1247 | group_names.append(group.name) 1248 | instance_vars["ec2_security_group_ids"] = ','.join([str(i) for i in group_ids]) 1249 | instance_vars["ec2_security_group_names"] = ','.join([str(i) for i in group_names]) 1250 | else: 1251 | pass 1252 | # TODO Product codes if someone finds them useful 1253 | #print key 1254 | #print type(value) 1255 | #print value 1256 | 1257 | return instance_vars 1258 | 1259 | def get_host_info_dict_from_describe_dict(self, describe_dict): 1260 | ''' Parses the dictionary returned by the API call into a flat list 1261 | of parameters. This method should be used only when 'describe' is 1262 | used directly because Boto doesn't provide specific classes. ''' 1263 | 1264 | # I really don't agree with prefixing everything with 'ec2' 1265 | # because EC2, RDS and ElastiCache are different services. 1266 | # I'm just following the pattern used until now to not break any 1267 | # compatibility. 1268 | 1269 | host_info = {} 1270 | for key in describe_dict: 1271 | value = describe_dict[key] 1272 | key = self.to_safe('ec2_' + self.uncammelize(key)) 1273 | 1274 | # Handle complex types 1275 | 1276 | # Target: Memcached Cache Clusters 1277 | if key == 'ec2_configuration_endpoint' and value: 1278 | host_info['ec2_configuration_endpoint_address'] = value['Address'] 1279 | host_info['ec2_configuration_endpoint_port'] = value['Port'] 1280 | 1281 | # Target: Cache Nodes and Redis Cache Clusters (single node) 1282 | if key == 'ec2_endpoint' and value: 1283 | host_info['ec2_endpoint_address'] = value['Address'] 1284 | host_info['ec2_endpoint_port'] = value['Port'] 1285 | 1286 | # Target: Redis Replication Groups 1287 | if key == 'ec2_node_groups' and value: 1288 | host_info['ec2_endpoint_address'] = value[0]['PrimaryEndpoint']['Address'] 1289 | host_info['ec2_endpoint_port'] = value[0]['PrimaryEndpoint']['Port'] 1290 | replica_count = 0 1291 | for node in value[0]['NodeGroupMembers']: 1292 | if node['CurrentRole'] == 'primary': 1293 | host_info['ec2_primary_cluster_address'] = node['ReadEndpoint']['Address'] 1294 | host_info['ec2_primary_cluster_port'] = node['ReadEndpoint']['Port'] 1295 | host_info['ec2_primary_cluster_id'] = node['CacheClusterId'] 1296 | elif node['CurrentRole'] == 'replica': 1297 | host_info['ec2_replica_cluster_address_'+ str(replica_count)] = node['ReadEndpoint']['Address'] 1298 | host_info['ec2_replica_cluster_port_'+ str(replica_count)] = node['ReadEndpoint']['Port'] 1299 | host_info['ec2_replica_cluster_id_'+ str(replica_count)] = node['CacheClusterId'] 1300 | replica_count += 1 1301 | 1302 | # Target: Redis Replication Groups 1303 | if key == 'ec2_member_clusters' and value: 1304 | host_info['ec2_member_clusters'] = ','.join([str(i) for i in value]) 1305 | 1306 | # Target: All Cache Clusters 1307 | elif key == 'ec2_cache_parameter_group': 1308 | host_info["ec2_cache_node_ids_to_reboot"] = ','.join([str(i) for i in value['CacheNodeIdsToReboot']]) 1309 | host_info['ec2_cache_parameter_group_name'] = value['CacheParameterGroupName'] 1310 | host_info['ec2_cache_parameter_apply_status'] = value['ParameterApplyStatus'] 1311 | 1312 | # Target: Almost everything 1313 | elif key == 'ec2_security_groups': 1314 | 1315 | # Skip if SecurityGroups is None 1316 | # (it is possible to have the key defined but no value in it). 1317 | if value is not None: 1318 | sg_ids = [] 1319 | for sg in value: 1320 | sg_ids.append(sg['SecurityGroupId']) 1321 | host_info["ec2_security_group_ids"] = ','.join([str(i) for i in sg_ids]) 1322 | 1323 | # Target: Everything 1324 | # Preserve booleans and integers 1325 | elif type(value) in [int, bool]: 1326 | host_info[key] = value 1327 | 1328 | # Target: Everything 1329 | # Sanitize string values 1330 | elif isinstance(value, six.string_types): 1331 | host_info[key] = value.strip() 1332 | 1333 | # Target: Everything 1334 | # Replace None by an empty string 1335 | elif type(value) == type(None): 1336 | host_info[key] = '' 1337 | 1338 | else: 1339 | # Remove non-processed complex types 1340 | pass 1341 | 1342 | return host_info 1343 | 1344 | def get_host_info(self): 1345 | ''' Get variables about a specific host ''' 1346 | 1347 | if len(self.index) == 0: 1348 | # Need to load index from cache 1349 | self.load_index_from_cache() 1350 | 1351 | if not self.args.host in self.index: 1352 | # try updating the cache 1353 | self.do_api_calls_update_cache() 1354 | if not self.args.host in self.index: 1355 | # host might not exist anymore 1356 | return self.json_format_dict({}, True) 1357 | 1358 | (region, instance_id) = self.index[self.args.host] 1359 | 1360 | instance = self.get_instance(region, instance_id) 1361 | return self.json_format_dict(self.get_host_info_dict_from_instance(instance), True) 1362 | 1363 | def push(self, my_dict, key, element): 1364 | ''' Push an element onto an array that may not have been defined in 1365 | the dict ''' 1366 | group_info = my_dict.setdefault(key, []) 1367 | if isinstance(group_info, dict): 1368 | host_list = group_info.setdefault('hosts', []) 1369 | host_list.append(element) 1370 | else: 1371 | group_info.append(element) 1372 | 1373 | def push_group(self, my_dict, key, element): 1374 | ''' Push a group as a child of another group. ''' 1375 | parent_group = my_dict.setdefault(key, {}) 1376 | if not isinstance(parent_group, dict): 1377 | parent_group = my_dict[key] = {'hosts': parent_group} 1378 | child_groups = parent_group.setdefault('children', []) 1379 | if element not in child_groups: 1380 | child_groups.append(element) 1381 | 1382 | def get_inventory_from_cache(self): 1383 | ''' Reads the inventory from the cache file and returns it as a JSON 1384 | object ''' 1385 | 1386 | cache = open(self.cache_path_cache, 'r') 1387 | json_inventory = cache.read() 1388 | return json_inventory 1389 | 1390 | 1391 | def load_index_from_cache(self): 1392 | ''' Reads the index from the cache file sets self.index ''' 1393 | 1394 | cache = open(self.cache_path_index, 'r') 1395 | json_index = cache.read() 1396 | self.index = json.loads(json_index) 1397 | 1398 | 1399 | def write_to_cache(self, data, filename): 1400 | ''' Writes data in JSON format to a file ''' 1401 | 1402 | json_data = self.json_format_dict(data, True) 1403 | cache = open(filename, 'w') 1404 | cache.write(json_data) 1405 | cache.close() 1406 | 1407 | def uncammelize(self, key): 1408 | temp = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', key) 1409 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', temp).lower() 1410 | 1411 | def to_safe(self, word): 1412 | ''' Converts 'bad' characters in a string to underscores so they can be used as Ansible groups ''' 1413 | regex = "[^A-Za-z0-9\_" 1414 | if not self.replace_dash_in_groups: 1415 | regex += "\-" 1416 | return re.sub(regex + "]", "_", word) 1417 | 1418 | def json_format_dict(self, data, pretty=False): 1419 | ''' Converts a dict to a JSON object and dumps it as a formatted 1420 | string ''' 1421 | 1422 | if pretty: 1423 | return json.dumps(data, sort_keys=True, indent=2) 1424 | else: 1425 | return json.dumps(data) 1426 | 1427 | 1428 | # Run the script 1429 | Ec2Inventory() 1430 | -------------------------------------------------------------------------------- /ansible/hosts/groups: -------------------------------------------------------------------------------- 1 | [tag_ansibleNodeType_etcd] 2 | 3 | [tag_ansibleNodeType_worker] 4 | 5 | [tag_ansibleNodeType_controller] 6 | 7 | [etcd:children] 8 | tag_ansibleNodeType_etcd 9 | 10 | [worker:children] 11 | tag_ansibleNodeType_worker 12 | 13 | [controller:children] 14 | tag_ansibleNodeType_controller 15 | -------------------------------------------------------------------------------- /ansible/infra.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ##################### 3 | # Install Python 4 | ##################### 5 | 6 | - hosts: all 7 | gather_facts: false # As Python is not yet installed, we cannot gather host facts 8 | 9 | tasks: 10 | - name: Install Python 11 | raw: "apt-get -y -q install python" 12 | become: true 13 | retries: 10 14 | delay: 20 15 | # If you run this playbook immediately after Terraform, ssh may not be ready to respond yet 16 | 17 | ####################### 18 | # Setup K8s components 19 | ####################### 20 | 21 | - hosts: etcd 22 | roles: 23 | - common 24 | - etcd 25 | 26 | - hosts: controller 27 | roles: 28 | - common 29 | - controller 30 | 31 | - hosts: worker 32 | roles: 33 | - common 34 | - worker 35 | -------------------------------------------------------------------------------- /ansible/kubectl.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ########################### 3 | # Setup Kubernetes client 4 | ########################### 5 | 6 | # Expects `kubernetes_api_endpoint` to be defined passed in 7 | # as `--extra-vars "kubernetes_api_endpoint=xxxx"` 8 | 9 | - hosts: localhost 10 | connection: local 11 | 12 | vars: 13 | kubernetes_api_endpoint_port: 6443 14 | kubernetes_cluster_name: "kubernetes-not-the-hardest-way" 15 | certificate_path: "{{ playbook_dir }}/../cert" 16 | 17 | # Credentials must match those defined in templates/var/lib/kubernetes/token.csv 18 | kubectl_user: admin 19 | kubectl_token: chAng3m3 20 | 21 | 22 | tasks: 23 | 24 | - name: Check certificate file 25 | stat: 26 | path: "{{ certificate_path }}/ca.pem" 27 | register: cert 28 | 29 | - assert: 30 | that: "cert.stat.exists == True" 31 | 32 | - name: Set kubectl endpoint 33 | shell: "kubectl config set-cluster {{ kubernetes_cluster_name }} --certificate-authority={{ certificate_path }}/ca.pem --embed-certs=true --server=https://{{ kubernetes_api_endpoint }}:{{ kubernetes_api_endpoint_port }}" 34 | 35 | - name: Set kubectl credentials 36 | shell: "kubectl config set-credentials {{ kubectl_user }} --token {{ kubectl_token }}" 37 | 38 | - name: Set kubectl default context 39 | shell: "kubectl config set-context default-context --cluster={{ kubernetes_cluster_name }} --user={{ kubectl_user }}" 40 | 41 | - name: Switch kubectl to default context 42 | shell: "kubectl config use-context default-context" 43 | -------------------------------------------------------------------------------- /ansible/kubernetes-nginx.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | ####################################### 4 | # Deploy and expose Nginx service 5 | ####################################### 6 | 7 | # Expects kubectl being configured on the local machine 8 | # using kubectl.yaml playbook 9 | 10 | 11 | - hosts: localhost 12 | connection: local 13 | 14 | tasks: 15 | 16 | - name: Launch 3 ngix pods 17 | command: "kubectl run nginx --image=nginx --port=80 --replicas=3" 18 | 19 | - name: Expose nginx 20 | command: "kubectl expose deployment nginx --type NodePort" 21 | 22 | - name: Get exposed port 23 | command: "kubectl get svc nginx --output=jsonpath='{range .spec.ports[0]}{.nodePort}'" 24 | register: result 25 | - set_fact: 26 | node_port: "{{ result.stdout }}" 27 | 28 | - debug: msg="Exposed port {{ node_port }}" 29 | -------------------------------------------------------------------------------- /ansible/kubernetes-routing.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | ############################################ 4 | # Setup Kubernetes cluster internal routing 5 | ############################################ 6 | 7 | # Expects kubectl being configured on the local machine 8 | # using kubectl.yaml playbook 9 | 10 | # 1) Infer the routing mapping for the Pod cluster 11 | # kubectl get nodes --output=jsonpath='{range .items[*]}{.status.addresses[?(@.type=="InternalIP")].address} {.spec.podCIDR} {"\n"}{end}' 12 | # 2) Set additional routes on every worker 13 | # e.g. route add -net 10.200.0.0 netmask 255.255.255.0 gw 10.43.0.31 metric 1 14 | 15 | - hosts: worker 16 | 17 | tasks: 18 | 19 | # Run kuberctl on local machine to gather kubernetes nodes "facts" 20 | - name: Gather Kubernetes nodes facts (on localhost) 21 | local_action: command kubectl get nodes --output=json 22 | register: result 23 | - set_fact: 24 | kubernetes_nodes: "{{ result.stdout | from_json }}" 25 | 26 | # Extract Pod addresses 27 | # The following is an hack to extract the list of addresses from kubernetes facts, 28 | # as Jinja2 doesn't support list comprehension (list flattening) 29 | - debug: msg="Extract Kubernetes node addresses" 30 | with_flattened: 31 | - "{{ kubernetes_nodes['items']|map(attribute='status')|map(attribute='addresses')|list }}" 32 | register: node_addresses_tmp 33 | no_log: True 34 | - set_fact: 35 | kubernetes_nodes_addresses: "{{ node_addresses_tmp.results|map(attribute='item')|selectattr('type','equalto','InternalIP')|map(attribute='address')|list }}" 36 | # - debug: var=kubernetes_nodes_addresses 37 | 38 | # Extract Pod CIDRs 39 | - set_fact: 40 | kubernetes_pod_cidrs: "{{ kubernetes_nodes['items']|map(attribute='spec')|map(attribute='podCIDR')|list }}" 41 | # - debug: var=kubernetes_pod_cidrs 42 | 43 | - name: Add pod routes 44 | command: "route add -net {{ item.0|ipaddr('network') }} netmask {{ item.0|ipaddr('netmask') }} gw {{ item.1|ipaddr('address') }} metric 1" 45 | with_together: 46 | - "{{ kubernetes_pod_cidrs }}" 47 | - "{{ kubernetes_nodes_addresses }}" 48 | ignore_errors: true # The route command fails if the route already exists 49 | become: true 50 | 51 | - name: Verify routes 52 | command: "route -n" 53 | become: true 54 | register: result 55 | 56 | - assert: 57 | that: # TODO Make assertions more strict 58 | - "'{{ kubernetes_pod_cidrs[0]|ipaddr('network') }}' in result.stdout" 59 | - "'{{ kubernetes_pod_cidrs[1]|ipaddr('network') }}' in result.stdout" 60 | - "'{{ kubernetes_pod_cidrs[2]|ipaddr('network') }}' in result.stdout" 61 | - "'{{ kubernetes_nodes_addresses[0] }}' in result.stdout" 62 | - "'{{ kubernetes_nodes_addresses[1] }}' in result.stdout" 63 | - "'{{ kubernetes_nodes_addresses[2] }}' in result.stdout" 64 | 65 | # VPC knows nothing about our Pod subnet and we must avoid internal traffic being routed as outbound Internet traffic 66 | # For explaination, see: http://kubernetes.io/docs/admin/networking/#google-compute-engine-gce 67 | - name: Add iptable rule to masquerade traffic bound outside VPC CIDR 68 | command: "iptables -t nat -A POSTROUTING ! -d {{ internal_cidr }} -o eth0 -j MASQUERADE" 69 | become: true 70 | -------------------------------------------------------------------------------- /ansible/roles/common/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | #################### 4 | # Set hostname 5 | #################### 6 | 7 | # Set hostname to internal DNS FQN. Default name is not fully qualified. 8 | # As nodes register themselves using hostname, if not FQN other nodes will not be able to resolve it. 9 | 10 | - name: Add internal DNS name to hosts file 11 | lineinfile: 12 | dest: /etc/hosts 13 | regexp: '^127\.0\.0\.1' 14 | line: '127.0.0.1 localhost {{ ec2_private_dns_name }}' 15 | owner: root 16 | group: root 17 | mode: 0644 18 | become: true 19 | 20 | - name: Set internal DNS name as hostname 21 | shell: "hostname {{ ec2_private_dns_name }}" 22 | become: true 23 | -------------------------------------------------------------------------------- /ansible/roles/controller/files/authorization-policy.jsonl: -------------------------------------------------------------------------------- 1 | {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"user":"*", "nonResourcePath": "*", "readonly": true}} 2 | {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"user":"admin", "namespace": "*", "resource": "*", "apiGroup": "*"}} 3 | {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"user":"scheduler", "namespace": "*", "resource": "*", "apiGroup": "*"}} 4 | {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"user":"kubelet", "namespace": "*", "resource": "*", "apiGroup": "*"}} 5 | {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"group":"system:serviceaccounts", "namespace": "*", "resource": "*", "apiGroup": "*", "nonResourcePath": "*"}} 6 | -------------------------------------------------------------------------------- /ansible/roles/controller/files/token.csv: -------------------------------------------------------------------------------- 1 | chAng3m3,admin,admin 2 | chAng3m3,scheduler,scheduler 3 | chAng3m3,kubelet,kubelet 4 | -------------------------------------------------------------------------------- /ansible/roles/controller/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #################### 3 | # Load Certificates 4 | #################### 5 | 6 | - name: Create kubernetes data dir 7 | file: path=/var/lib/kubernetes state=directory 8 | become: true 9 | 10 | - name: Copy certificates 11 | copy: 12 | src: "{{ playbook_dir }}/../cert/{{ item }}" 13 | dest: /var/lib/kubernetes 14 | owner: root 15 | group: root 16 | mode: 0644 17 | with_items: 18 | - ca.pem 19 | - kubernetes.pem 20 | - kubernetes-key.pem 21 | become: true 22 | 23 | 24 | ############# 25 | # Kubernetes 26 | ############# 27 | 28 | - name: Download Kuberneters controller binaries 29 | get_url: 30 | url: "{{ kubernetes_download_path }}/{{ item }}" 31 | dest: /usr/bin 32 | owner: root 33 | group: root 34 | mode: 0755 35 | # TODO Add hash check 36 | with_items: 37 | - kube-apiserver 38 | - kube-controller-manager 39 | - kube-scheduler 40 | - kubectl 41 | become: true 42 | 43 | - name: Copy Authentication and Authorisation files 44 | copy: 45 | src: "{{ item }}" 46 | dest: /var/lib/kubernetes 47 | owner: root 48 | group: root 49 | mode: 0644 50 | with_items: 51 | - token.csv 52 | - authorization-policy.jsonl 53 | become: true 54 | 55 | - name: Add kube-* systemd unit 56 | template: 57 | src: "{{ item }}.service.j2" 58 | dest: /etc/systemd/system/{{ item }}.service 59 | mode: 700 60 | with_items: 61 | - kube-controller-manager 62 | - kube-apiserver 63 | - kube-scheduler 64 | become: true 65 | 66 | - name: Reload systemd 67 | command: systemctl daemon-reload 68 | become: true 69 | 70 | - name: Enable kube-* services 71 | command: "systemctl enable {{ item }}" 72 | with_items: 73 | - kube-apiserver 74 | - kube-controller-manager 75 | - kube-scheduler 76 | become: true 77 | 78 | - name: Restart kube-* services 79 | service: 80 | name: "{{ item }}" 81 | state: restarted 82 | enabled: yes 83 | with_items: 84 | - kube-apiserver 85 | - kube-controller-manager 86 | - kube-scheduler 87 | become: true 88 | 89 | - name: Verify Kubernetes status 90 | shell: kubectl get componentstatuses 91 | register: cmd_result 92 | retries: 5 93 | delay: 10 94 | 95 | - assert: 96 | that: 97 | - "'scheduler Healthy' in cmd_result.stdout" 98 | - "'controller-manager Healthy' in cmd_result.stdout" 99 | - "'etcd-0 Healthy' in cmd_result.stdout" 100 | - "'etcd-1 Healthy' in cmd_result.stdout" 101 | - "'etcd-2 Healthy' in cmd_result.stdout" 102 | -------------------------------------------------------------------------------- /ansible/roles/controller/templates/kube-apiserver.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes API Server 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | ExecStart=/usr/bin/kube-apiserver \ 7 | --admission-control=NamespaceLifecycle,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota \ 8 | --advertise-address={{ ansible_eth0.ipv4.address }} \ 9 | --allow-privileged=true \ 10 | --apiserver-count=3 \ 11 | --authorization-mode=ABAC \ 12 | --authorization-policy-file=/var/lib/kubernetes/authorization-policy.jsonl \ 13 | --bind-address=0.0.0.0 \ 14 | --enable-swagger-ui=true \ 15 | --etcd-cafile=/var/lib/kubernetes/ca.pem \ 16 | --insecure-bind-address=0.0.0.0 \ 17 | --kubelet-certificate-authority=/var/lib/kubernetes/ca.pem \ 18 | --etcd-servers={% for node in groups['etcd'] %}https://{{ hostvars[node].ansible_eth0.ipv4.address }}:2379{% if not loop.last %},{% endif %}{% endfor %} \ 19 | --service-account-key-file=/var/lib/kubernetes/kubernetes-key.pem \ 20 | --service-cluster-ip-range={{ kubernetes_service_cluster_cidr }} \ 21 | --service-node-port-range=30000-32767 \ 22 | --tls-cert-file=/var/lib/kubernetes/kubernetes.pem \ 23 | --tls-private-key-file=/var/lib/kubernetes/kubernetes-key.pem \ 24 | --token-auth-file=/var/lib/kubernetes/token.csv \ 25 | --v=2 26 | Restart=on-failure 27 | RestartSec=5 28 | 29 | [Install] 30 | WantedBy=multi-user.target 31 | -------------------------------------------------------------------------------- /ansible/roles/controller/templates/kube-controller-manager.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Controller Manager 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | ExecStart=/usr/bin/kube-controller-manager \ 7 | --allocate-node-cidrs=true \ 8 | --cluster-cidr={{ kubernetes_pod_cluster_cidr }} \ 9 | --cluster-name=kubernetes \ 10 | --leader-elect=true \ 11 | --master=http://{{ ansible_eth0.ipv4.address }}:8080 \ 12 | --root-ca-file=/var/lib/kubernetes/ca.pem \ 13 | --service-account-private-key-file=/var/lib/kubernetes/kubernetes-key.pem \ 14 | --service-cluster-ip-range={{ kubernetes_service_cluster_cidr }} \ 15 | --v=2 16 | Restart=on-failure 17 | RestartSec=5 18 | 19 | [Install] 20 | WantedBy=multi-user.target 21 | -------------------------------------------------------------------------------- /ansible/roles/controller/templates/kube-scheduler.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Scheduler 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | ExecStart=/usr/bin/kube-scheduler \ 7 | --leader-elect=true \ 8 | --master=http://{{ ansible_eth0.ipv4.address }}:8080 \ 9 | --v=2 10 | Restart=on-failure 11 | RestartSec=5 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /ansible/roles/controller/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | kubernetes_version: "v1.3.6" 4 | kubernetes_download_path: "https://storage.googleapis.com/kubernetes-release/release/{{ kubernetes_version }}/bin/linux/amd64" 5 | -------------------------------------------------------------------------------- /ansible/roles/etcd/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Create etcd config dir 4 | file: path=/etc/etcd state=directory 5 | become: true 6 | 7 | - name: Copy certificates 8 | copy: 9 | src: "{{ playbook_dir }}/../cert/{{ item }}" 10 | dest: "/etc/etcd/" 11 | owner: root 12 | group: root 13 | mode: 0644 14 | become: true 15 | with_items: 16 | - ca.pem 17 | - kubernetes.pem 18 | - kubernetes-key.pem 19 | 20 | - name: Download etcd binaries 21 | get_url: 22 | url: "{{ etcd_download_url }}" 23 | dest: "/usr/local/src" 24 | # TODO Add hash check 25 | become: true 26 | 27 | - name: Unpack etcd binaries 28 | unarchive: 29 | copy: no 30 | src: "/usr/local/src/{{ etcd_release }}.tar.gz" 31 | dest: "/usr/local/src/" 32 | creates: "/usr/local/src/{{ etcd_release }}/etcd" 33 | become: true 34 | 35 | - name: Copy etcd binaries 36 | copy: 37 | remote_src: true 38 | src: "/usr/local/src/{{ etcd_release }}/{{ item }}" 39 | dest: "/usr/bin" 40 | owner: root 41 | group: root 42 | mode: 0755 43 | with_items: 44 | - etcd 45 | - etcdctl 46 | become: true 47 | 48 | - name: Create etcd data dir 49 | file: path=/var/lib/etcd state=directory 50 | become: true 51 | 52 | - name: Add etcd systemd unit 53 | template: 54 | src: etcd.service.j2 55 | dest: /etc/systemd/system/etcd.service 56 | mode: 700 57 | become: true 58 | 59 | - name: Reload systemd 60 | command: systemctl daemon-reload 61 | become: true 62 | 63 | - name: Enable etcd service 64 | command: systemctl enable etcd 65 | become: true 66 | 67 | - name: Restart etcd 68 | service: 69 | name: etcd 70 | state: restarted 71 | enabled: yes 72 | become: true 73 | 74 | - name: Wait for etcd listening 75 | wait_for: port=2379 timeout=60 76 | 77 | - name: Verify etcd cluster health 78 | shell: etcdctl --ca-file=/etc/etcd/ca.pem cluster-health 79 | register: cmd_result 80 | until: cmd_result.stdout.find("cluster is healthy") != -1 81 | retries: 5 82 | delay: 5 83 | -------------------------------------------------------------------------------- /ansible/roles/etcd/templates/etcd.service.j2: -------------------------------------------------------------------------------- 1 | # {{ ansible_managed }} 2 | 3 | [Unit] 4 | Description=etcd 5 | Documentation=https://github.com/coreos 6 | 7 | [Service] 8 | ExecStart=/usr/bin/etcd --name {{ inventory_hostname }} \ 9 | --cert-file=/etc/etcd/kubernetes.pem \ 10 | --key-file=/etc/etcd/kubernetes-key.pem \ 11 | --peer-cert-file=/etc/etcd/kubernetes.pem \ 12 | --peer-key-file=/etc/etcd/kubernetes-key.pem \ 13 | --trusted-ca-file=/etc/etcd/ca.pem \ 14 | --peer-trusted-ca-file=/etc/etcd/ca.pem \ 15 | --initial-advertise-peer-urls https://{{ ansible_eth0.ipv4.address }}:2380 \ 16 | --listen-peer-urls https://{{ ansible_eth0.ipv4.address }}:2380 \ 17 | --listen-client-urls https://{{ ansible_eth0.ipv4.address }}:2379,http://127.0.0.1:2379 \ 18 | --advertise-client-urls https://{{ ansible_eth0.ipv4.address }}:2379 \ 19 | --initial-cluster-token etcd-cluster-0 \ 20 | --initial-cluster {% for node in groups['etcd'] %}{{ node }}=https://{{ hostvars[node].ansible_eth0.ipv4.address }}:2380{% if not loop.last %},{% endif %}{% endfor %} \ 21 | --initial-cluster-state new \ 22 | --data-dir=/var/lib/etcd 23 | Restart=on-failure 24 | RestartSec=5 25 | 26 | [Install] 27 | WantedBy=multi-user.target 28 | -------------------------------------------------------------------------------- /ansible/roles/etcd/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | etcd_version: "v3.0.1" 4 | etcd_release: "etcd-{{ etcd_version }}-linux-amd64" 5 | etcd_download_url: "https://github.com/coreos/etcd/releases/download/{{ etcd_version }}/{{ etcd_release }}.tar.gz" 6 | -------------------------------------------------------------------------------- /ansible/roles/worker/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Reload systemd 4 | command: systemctl daemon-reload 5 | become: true 6 | 7 | - name: Enable docker service 8 | command: systemctl enable docker 9 | become: true 10 | 11 | - name: Enable kubelet service 12 | command: systemctl enable kubelet 13 | become: true 14 | 15 | - name: Enable kube-proxy service 16 | command: systemctl enable kube-proxy 17 | become: true 18 | 19 | - name: Restart docker service 20 | service: 21 | name: docker 22 | state: restarted 23 | enabled: yes 24 | become: true 25 | 26 | - name: Restart kubelet service 27 | service: 28 | name: kubelet 29 | state: restarted 30 | enabled: yes 31 | become: true 32 | 33 | - name: Restart kube-proxy service 34 | service: 35 | name: kube-proxy 36 | state: restarted 37 | enabled: yes 38 | become: true 39 | -------------------------------------------------------------------------------- /ansible/roles/worker/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | #################### 4 | # Load Certificates 5 | #################### 6 | 7 | - name: Create kubernetes data dir 8 | file: path=/var/lib/kubernetes state=directory 9 | become: true 10 | 11 | - name: Copy certificates 12 | copy: 13 | src: "{{ playbook_dir }}/../cert/{{ item }}" 14 | dest: /var/lib/kubernetes 15 | owner: root 16 | group: root 17 | mode: 0644 18 | with_items: 19 | - ca.pem 20 | - kubernetes.pem 21 | - kubernetes-key.pem 22 | become: true 23 | 24 | ######### 25 | # Docker 26 | ######### 27 | 28 | - name: Download docker binaries 29 | get_url: 30 | url: "{{ docker_download_url }}" 31 | dest: "/usr/local/src" 32 | # TODO Add hash check 33 | become: true 34 | 35 | - name: Unpack docker binaries 36 | unarchive: 37 | copy: no 38 | src: "/usr/local/src/{{ docker_package_file }}" 39 | dest: "/usr/local/src/" 40 | creates: "/local/src/docker/docker" 41 | become: true 42 | 43 | - name: Copy docker binaries 44 | copy: 45 | remote_src: true 46 | src: "/usr/local/src/docker/{{ item }}" 47 | dest: /usr/bin 48 | mode: 0755 49 | with_items: # Remote-to-remote copy doesn't support file glob yet 50 | - docker 51 | - docker-containerd 52 | - docker-containerd-ctr 53 | - docker-containerd-shim 54 | - docker-runc 55 | become: true 56 | 57 | - name: Add docker systemd unit 58 | template: 59 | src: docker.service.j2 60 | dest: /etc/systemd/system/docker.service 61 | mode: 700 62 | become: true 63 | notify: 64 | - Reload systemd 65 | - Enable docker service 66 | - Restart docker service 67 | 68 | 69 | ########### 70 | # Kubelet 71 | ########### 72 | 73 | - name: Create CNI dir 74 | file: path=/opt/cni state=directory 75 | become: true 76 | 77 | - name: Download CNI binaries 78 | get_url: 79 | url: "{{ cni_download_url }}" 80 | dest: "/usr/local/src" 81 | # TODO Add hash check 82 | become: true 83 | 84 | - name: Unpack CNI binaries 85 | unarchive: 86 | copy: no 87 | src: "/usr/local/src/{{ cni_package_file }}" 88 | dest: "/opt/cni/" 89 | creates: "/opt/cni/bin/cnitool" 90 | become: true 91 | 92 | - name: Create Kubelet directory 93 | file: path=/var/lib/kubelet state=directory 94 | become: true 95 | 96 | - name: Download Kubelets binaries 97 | get_url: 98 | url: "{{ kubelet_download_path}}/{{ item }}" 99 | dest: "/usr/bin" 100 | group: root 101 | owner: root 102 | mode: 0755 103 | # TODO Add hash check 104 | with_items: 105 | - kubectl 106 | - kube-proxy 107 | - kubelet 108 | become: true 109 | 110 | - name: Add Kubelet configuration 111 | template: 112 | src: kubeconfig.j2 113 | dest: /var/lib/kubelet/kubeconfig 114 | mode: 644 115 | become: true 116 | 117 | - name: Add kubelet systemd unit 118 | template: 119 | src: kubelet.service.j2 120 | dest: /etc/systemd/system/kubelet.service 121 | mode: 700 122 | become: true 123 | notify: 124 | - Reload systemd 125 | - Enable kubelet service 126 | - Restart kubelet service 127 | 128 | ############# 129 | # Kube-proxy 130 | ############# 131 | 132 | - name: Add kube-proxy systemd unit 133 | template: 134 | src: kube-proxy.service.j2 135 | dest: /etc/systemd/system/kube-proxy.service 136 | mode: 700 137 | become: true 138 | notify: 139 | - Reload systemd 140 | - Enable kube-proxy service 141 | - Restart kube-proxy service 142 | -------------------------------------------------------------------------------- /ansible/roles/worker/templates/docker.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Docker Application Container Engine 3 | Documentation=http://docs.docker.io 4 | 5 | [Service] 6 | ExecStart=/usr/bin/docker daemon \ 7 | --iptables=false \ 8 | --ip-masq=false \ 9 | --host=unix:///var/run/docker.sock \ 10 | --log-level=error \ 11 | --storage-driver=overlay 12 | Restart=on-failure 13 | RestartSec=5 14 | 15 | [Install] 16 | WantedBy=multi-user.target 17 | -------------------------------------------------------------------------------- /ansible/roles/worker/templates/kube-proxy.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Kube Proxy 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | ExecStart=/usr/bin/kube-proxy \ 7 | --master=https://{{ hostvars['controller0'].ansible_eth0.ipv4.address }}:6443 \ 8 | --kubeconfig=/var/lib/kubelet/kubeconfig \ 9 | --proxy-mode=iptables \ 10 | --v=2 11 | 12 | Restart=on-failure 13 | RestartSec=5 14 | 15 | [Install] 16 | WantedBy=multi-user.target 17 | -------------------------------------------------------------------------------- /ansible/roles/worker/templates/kubeconfig.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Config 3 | clusters: 4 | - cluster: 5 | certificate-authority: /var/lib/kubernetes/ca.pem 6 | server: https://{{ hostvars['controller0'].ansible_eth0.ipv4.address }}:6443 7 | name: kubernetes 8 | contexts: 9 | - context: 10 | cluster: kubernetes 11 | user: kubelet 12 | name: kubelet 13 | current-context: kubelet 14 | users: 15 | - name: kubelet 16 | user: 17 | token: chAng3m3 18 | -------------------------------------------------------------------------------- /ansible/roles/worker/templates/kubelet.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Kubelet 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | After=docker.service 5 | Requires=docker.service 6 | 7 | [Service] 8 | ExecStart=/usr/bin/kubelet \ 9 | --allow-privileged=true \ 10 | --api-servers={% for node in groups['controller'] %}https://{{ hostvars[node].ansible_eth0.ipv4.address }}:6443{% if not loop.last %},{% endif %}{% endfor %} \ 11 | --cloud-provider= \ 12 | --cluster-dns={{ kubernetes_cluster_dns }} \ 13 | --cluster-domain=cluster.local \ 14 | --configure-cbr0=true \ 15 | --container-runtime=docker \ 16 | --docker=unix:///var/run/docker.sock \ 17 | --network-plugin=kubenet \ 18 | --kubeconfig=/var/lib/kubelet/kubeconfig \ 19 | --reconcile-cidr=true \ 20 | --serialize-image-pulls=false \ 21 | --tls-cert-file=/var/lib/kubernetes/kubernetes.pem \ 22 | --tls-private-key-file=/var/lib/kubernetes/kubernetes-key.pem \ 23 | --v=2 24 | 25 | Restart=on-failure 26 | RestartSec=5 27 | 28 | [Install] 29 | WantedBy=multi-user.target 30 | -------------------------------------------------------------------------------- /ansible/roles/worker/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | docker_version: "1.11.2" 4 | docker_package_file: "docker-{{ docker_version }}.tgz" 5 | docker_download_url: "https://get.docker.com/builds/Linux/x86_64/{{ docker_package_file }}" 6 | cni_package_file: "cni-c864f0e1ea73719b8f4582402b0847064f9883b0.tar.gz" 7 | cni_download_url: "https://storage.googleapis.com/kubernetes-release/network-plugins/{{ cni_package_file }}" 8 | kubernetes_version: "v1.3.6" 9 | kubelet_download_path: "https://storage.googleapis.com/kubernetes-release/release/{{ kubernetes_version }}/bin/linux/amd64" 10 | -------------------------------------------------------------------------------- /cert/.gitignore: -------------------------------------------------------------------------------- 1 | /kubernetes-csr.json 2 | *.csr 3 | -------------------------------------------------------------------------------- /cert/ca-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "signing": { 3 | "default": { 4 | "expiry": "8760h" 5 | }, 6 | "profiles": { 7 | "kubernetes": { 8 | "usages": ["signing", "key encipherment", "server auth", "client auth"], 9 | "expiry": "8760h" 10 | } 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /cert/ca-csr.json: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "Kubernetes", 3 | "key": { 4 | "algo": "rsa", 5 | "size": 2048 6 | }, 7 | "names": [ 8 | { 9 | "C": "UK", 10 | "L": "London", 11 | "O": "OpenCredo", 12 | "OU": "CA" 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /terraform/.gitignore: -------------------------------------------------------------------------------- 1 | /terraform.tfvars 2 | /util/ 3 | -------------------------------------------------------------------------------- /terraform/aws.tf: -------------------------------------------------------------------------------- 1 | # Retrieve AWS credentials from env variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY 2 | provider "aws" { 3 | access_key = "" 4 | secret_key = "" 5 | region = "${var.region}" 6 | } 7 | -------------------------------------------------------------------------------- /terraform/certificates.tf: -------------------------------------------------------------------------------- 1 | ######################### 2 | ## Generate certificates 3 | ######################### 4 | 5 | # Generate Certificates 6 | data "template_file" "certificates" { 7 | template = "${file("${path.module}/template/kubernetes-csr.json")}" 8 | depends_on = ["aws_elb.kubernetes_api","aws_instance.etcd","aws_instance.controller","aws_instance.worker"] 9 | vars { 10 | kubernetes_api_elb_dns_name = "${aws_elb.kubernetes_api.dns_name}" 11 | kubernetes_cluster_dns = "${var.kubernetes_cluster_dns}" 12 | 13 | # Unfortunately, variables must be primitives, neither lists nor maps 14 | etcd0_ip = "${aws_instance.etcd.0.private_ip}" 15 | etcd1_ip = "${aws_instance.etcd.1.private_ip}" 16 | etcd2_ip = "${aws_instance.etcd.2.private_ip}" 17 | controller0_ip = "${aws_instance.controller.0.private_ip}" 18 | controller1_ip = "${aws_instance.controller.1.private_ip}" 19 | controller2_ip = "${aws_instance.controller.2.private_ip}" 20 | worker0_ip = "${aws_instance.worker.0.private_ip}" 21 | worker1_ip = "${aws_instance.worker.1.private_ip}" 22 | worker2_ip = "${aws_instance.worker.2.private_ip}" 23 | 24 | etcd0_dns = "${aws_instance.etcd.0.private_dns}" 25 | etcd1_dns = "${aws_instance.etcd.1.private_dns}" 26 | etcd2_dns = "${aws_instance.etcd.2.private_dns}" 27 | controller0_dns = "${aws_instance.controller.0.private_dns}" 28 | controller1_dns = "${aws_instance.controller.1.private_dns}" 29 | controller2_dns = "${aws_instance.controller.2.private_dns}" 30 | worker0_dns = "${aws_instance.worker.0.private_dns}" 31 | worker1_dns = "${aws_instance.worker.1.private_dns}" 32 | worker2_dns = "${aws_instance.worker.2.private_dns}" 33 | } 34 | } 35 | resource "null_resource" "certificates" { 36 | triggers { 37 | template_rendered = "${ data.template_file.certificates.rendered }" 38 | } 39 | provisioner "local-exec" { 40 | command = "echo '${ data.template_file.certificates.rendered }' > ../cert/kubernetes-csr.json" 41 | } 42 | provisioner "local-exec" { 43 | command = "cd ../cert; cfssl gencert -initca ca-csr.json | cfssljson -bare ca; cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kubernetes-csr.json | cfssljson -bare kubernetes" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /terraform/etcf.tf: -------------------------------------------------------------------------------- 1 | ######################### 2 | # etcd cluster instances 3 | ######################### 4 | 5 | resource "aws_instance" "etcd" { 6 | count = 3 7 | ami = "${lookup(var.amis, var.region)}" 8 | instance_type = "${var.etcd_instance_type}" 9 | 10 | subnet_id = "${aws_subnet.kubernetes.id}" 11 | private_ip = "${cidrhost(var.vpc_cidr, 10 + count.index)}" 12 | associate_public_ip_address = true # Instances have public, dynamic IP 13 | 14 | availability_zone = "${var.zone}" 15 | vpc_security_group_ids = ["${aws_security_group.kubernetes.id}"] 16 | key_name = "${var.default_keypair_name}" 17 | 18 | tags { 19 | Owner = "${var.owner}" 20 | Name = "etcd-${count.index}" 21 | ansibleFilter = "${var.ansibleFilter}" 22 | ansibleNodeType = "etcd" 23 | ansibleNodeName = "etcd${count.index}" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /terraform/iam.tf: -------------------------------------------------------------------------------- 1 | ########################## 2 | # IAM: Policies and Roles 3 | ########################## 4 | 5 | # The following Roles and Policy are mostly for future use 6 | 7 | resource "aws_iam_role" "kubernetes" { 8 | name = "kubernetes" 9 | assume_role_policy = <