├── Dockerfile ├── LICENSE ├── README.md └── etcd-aws-cluster /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gliderlabs/alpine:3.4 2 | 3 | RUN apk --update add \ 4 | python \ 5 | py-pip \ 6 | jq \ 7 | curl \ 8 | wget \ 9 | bash &&\ 10 | pip install --upgrade awscli &&\ 11 | mkdir /root/.aws 12 | 13 | COPY etcd-aws-cluster /etcd-aws-cluster 14 | 15 | # Expose volume for adding credentials 16 | VOLUME ["/root/.aws"] 17 | 18 | # Expose directory to write output to, and to potentially read certs from 19 | VOLUME ["/etc/sysconfig/", "/etc/certs"] 20 | 21 | ENTRYPOINT /etcd-aws-cluster 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Monsanto Company 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of Monsanto Company nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | etcd-aws-cluster 2 | ============== 3 | 4 | This container serves to assist in the creation of an etcd (2.x) cluster from an AWS auto scaling group. It writes a file to /etc/sysconfig/etcd-peers that contains parameters for etcd: 5 | 6 | - ETCD_INITIAL_CLUSTER_STATE 7 | - either `new` or `existing` 8 | - used to specify whether we are creating a new cluster or joining an existing one 9 | - ETCD_NAME 10 | - the name of the machine joining the etcd cluster 11 | - this is obtained by getting the instance if from amazon of the host (e.g. i-694fad83) 12 | - ETCD_INITIAL_CLUSTER 13 | - this is a list of the machines (id and ip) expected to be in the cluster, including the new machine 14 | - e.g., "i-5fc4c9e1=http://10.0.0.1:2380,i-694fad83=http://10.0.0.2:2380" 15 | 16 | This file can then be loaded as an EnvironmentFile in an etcd2 drop-in to properly configure etcd2: 17 | 18 | ``` 19 | [Service] 20 | EnvironmentFile=/etc/sysconfig/etcd-peers 21 | ``` 22 | 23 | Workflow 24 | -------- 25 | 26 | - get the instance id and ip from amazon 27 | - fetch the autoscaling group this machine belongs to 28 | - obtain the ip of every member of the auto scaling group 29 | - for each member of the autoscaling group detect if they are running etcd and if so who they see as members of the cluster 30 | 31 | if no machines respond OR there are existing peers but my instance id is listed as a member of the cluster 32 | 33 | - assume that this is a new cluster 34 | - write a file using the ids/ips of the autoscaling group 35 | 36 | else 37 | 38 | - assume that we are joining an existing cluster 39 | - check to see if any machines are listed as being part of the cluster but are not part of the autoscaling group 40 | - if so remove it from the etcd cluster 41 | - add this machine to the current cluster 42 | - write a file using the ids/ips obtained from query etcd for members of the cluster 43 | 44 | 45 | Usage 46 | ----- 47 | 48 | ```docker run -v /etc/sysconfig/:/etc/sysconfig/ monsantoco/etcd-aws-cluster``` 49 | 50 | Environment Variables 51 | * PROXY_ASG - If specified forces into proxy=on and uses the vaue of PROXY_ASG as the autocaling group that contains the master servers 52 | * ASG_BY_TAG - If specified in conjunction with PROXY_ASG uses the value of PROXY_ASG to look up the server by the Tag Name 53 | * ETCD_CLIENT_SCHEME - defaults to http 54 | * ETCD_PEER_SCHEME - defaults to http 55 | 56 | 57 | Demo 58 | ---- 59 | 60 | We have created a CloudFomation script that shows sample usage of this container for creating a simple etcd cluster: https://gist.github.com/tj-corrigan/3baf86051471062b2fb7 61 | -------------------------------------------------------------------------------- /etcd-aws-cluster: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | pkg="etcd-aws-cluster" 3 | version="0.5" 4 | etcd_peers_file_path="/etc/sysconfig/etcd-peers" 5 | export AWS_DEFAULT_REGION=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | jq --raw-output .region) 6 | if [[ ! $AWS_DEFAULT_REGION ]]; then 7 | echo "$pkg: failed to get region" 8 | exit 1 9 | fi 10 | 11 | # Allow default client/server ports to be changed if necessary 12 | client_port=${ETCD_CLIENT_PORT:-2379} 13 | server_port=${ETCD_SERVER_PORT:-2380} 14 | 15 | # ETCD API https://coreos.com/etcd/docs/2.0.11/other_apis.html 16 | add_ok=201 17 | already_added=409 18 | delete_ok=204 19 | delete_gone=410 20 | 21 | # Retry N times before giving up 22 | retry_times=${RETRY_TIMES:-10} 23 | # Add a sleep time to allow etcd client requets to finish 24 | wait_time=3 25 | 26 | #if the script has already run just exit 27 | if [ -f "$etcd_peers_file_path" ]; then 28 | echo "$pkg: etcd-peers file $etcd_peers_file_path already created, exiting" 29 | exit 0 30 | fi 31 | 32 | ec2_instance_id=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) 33 | if [[ ! $ec2_instance_id ]]; then 34 | echo "$pkg: failed to get instance id from instance metadata" 35 | exit 2 36 | fi 37 | 38 | ec2_instance_ip=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) 39 | if [[ ! $ec2_instance_ip ]]; then 40 | echo "$pkg: failed to get instance IP address" 41 | exit 3 42 | fi 43 | 44 | # If we're in proxy mode we don't have to look this up and expect an env var 45 | if [[ ! $PROXY_ASG ]]; then 46 | etcd_proxy=off 47 | asg_name=$(aws autoscaling describe-auto-scaling-groups | jq --raw-output ".[] | map(select(.Instances[].InstanceId | contains(\"$ec2_instance_id\"))) | .[].AutoScalingGroupName") 48 | if [[ ! "$asg_name" ]]; then 49 | echo "$pkg: failed to get the auto scaling group name" 50 | exit 4 51 | fi 52 | else 53 | etcd_proxy=on 54 | if [[ -n $ASG_BY_TAG ]]; then 55 | asg_name=$(aws autoscaling describe-auto-scaling-groups | jq --raw-output ".[] | map(select(.Tags[].Value == \"$PROXY_ASG\")) | .[].AutoScalingGroupName") 56 | else 57 | asg_name=$PROXY_ASG 58 | fi 59 | fi 60 | 61 | etcd_client_scheme=${ETCD_CLIENT_SCHEME:-http} 62 | echo "client_client_scheme=$etcd_client_scheme" 63 | 64 | etcd_peer_scheme=${ETCD_PEER_SCHEME:-http} 65 | echo "peer_peer_scheme=$etcd_peer_scheme" 66 | 67 | etcd_peer_urls=$(aws ec2 describe-instances --instance-ids $(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name "$asg_name" | jq '.AutoScalingGroups[0].Instances[] | select(.LifecycleState == "InService") | .InstanceId' | xargs) | jq -r ".Reservations[].Instances | map(\"$etcd_client_scheme://\" + .NetworkInterfaces[].PrivateIpAddress + \":$client_port\")[]") 68 | if [[ ! $etcd_peer_urls ]]; then 69 | echo "$pkg: unable to find members of auto scaling group" 70 | exit 5 71 | fi 72 | 73 | echo "etcd_peer_urls=$etcd_peer_urls" 74 | 75 | etcd_existing_peer_urls= 76 | etcd_existing_peer_names= 77 | etcd_good_member_url= 78 | 79 | for url in $etcd_peer_urls; do 80 | case "$url" in 81 | # If we're in proxy mode this is an error, but unlikely to happen? 82 | *$ec2_instance_ip*) continue;; 83 | esac 84 | 85 | etcd_members=$(curl $ETCD_CURLOPTS -f -s $url/v2/members) 86 | 87 | if [[ $? == 0 && $etcd_members ]]; then 88 | etcd_good_member_url="$url" 89 | echo "etcd_members=$etcd_members" 90 | etcd_existing_peer_urls=$(echo "$etcd_members" | jq --raw-output .[][].peerURLs[0]) 91 | etcd_existing_peer_names=$(echo "$etcd_members" | jq --raw-output .[][].name) 92 | break 93 | fi 94 | done 95 | 96 | echo "etcd_good_member_url=$etcd_good_member_url" 97 | echo "etcd_existing_peer_urls=$etcd_existing_peer_urls" 98 | echo "etcd_existing_peer_names=$etcd_existing_peer_names" 99 | 100 | # if I am not listed as a member of the cluster assume that this is a existing cluster 101 | # this will also be the case for a proxy situation 102 | if [[ $etcd_existing_peer_urls && $etcd_existing_peer_names != *"$ec2_instance_id"* ]]; then 103 | echo "joining existing cluster" 104 | 105 | # eject bad members from cluster 106 | peer_regexp=$(echo "$etcd_peer_urls" | sed 's/^.*https\{0,1\}:\/\/\([0-9.]*\):[0-9]*.*$/contains(\\"\/\/\1:\\")/' | xargs | sed 's/ */ or /g') 107 | if [[ ! $peer_regexp ]]; then 108 | echo "$pkg: failed to create peer regular expression" 109 | exit 6 110 | fi 111 | 112 | echo "peer_regexp=$peer_regexp" 113 | bad_peer=$(echo "$etcd_members" | jq --raw-output ".[] | map(select(.peerURLs[] | $peer_regexp | not )) | .[].id") 114 | echo "bad_peer=$bad_peer" 115 | 116 | if [[ $bad_peer ]]; then 117 | for bp in $bad_peer; do 118 | status=0 119 | retry=1 120 | until [[ $status = $delete_ok || $status = $delete_gone || $retry = $retry_times ]]; do 121 | status=$(curl $ETCD_CURLOPTS -f -s -w %{http_code} "$etcd_good_member_url/v2/members/$bp" -XDELETE) 122 | echo "$pkg: removing bad peer $bp, retry $((retry++)), return code $status." 123 | sleep $wait_time 124 | done 125 | if [[ $status != $delete_ok && $status != $delete_gone ]]; then 126 | echo "$pkg: ERROR: failed to remove bad peer: $bad_peer, return code $status." 127 | exit 7 128 | else 129 | echo "$pkg: removed bad peer: $bad_peer, return code $status." 130 | fi 131 | done 132 | fi 133 | 134 | # If we're not a proxy we add ourselves as a member to the cluster 135 | if [[ ! $PROXY_ASG ]]; then 136 | peer_url="$etcd_peer_scheme://$ec2_instance_ip:$server_port" 137 | etcd_initial_cluster=$(curl $ETCD_CURLOPTS -s -f "$etcd_good_member_url/v2/members" | jq --raw-output '.[] | map(.name + "=" + .peerURLs[0]) | .[]' | xargs | sed 's/ */,/g')$(echo ",$ec2_instance_id=$peer_url") 138 | echo "etcd_initial_cluster=$etcd_initial_cluster" 139 | if [[ ! $etcd_initial_cluster ]]; then 140 | echo "$pkg: docker command to get etcd peers failed" 141 | exit 8 142 | fi 143 | 144 | # join an existing cluster 145 | status=0 146 | retry=1 147 | until [[ $status = $add_ok || $status = $already_added || $retry = $retry_times ]]; do 148 | status=$(curl $ETCD_CURLOPTS -f -s -w %{http_code} -o /dev/null -XPOST "$etcd_good_member_url/v2/members" -H "Content-Type: application/json" -d "{\"peerURLs\": [\"$peer_url\"], \"name\": \"$ec2_instance_id\"}") 149 | echo "$pkg: adding instance ID $ec2_instance_id with peer URL $peer_url, retry $((retry++)), return code $status." 150 | sleep $wait_time 151 | done 152 | if [[ $status != $add_ok && $status != $already_added ]]; then 153 | echo "$pkg: unable to add $peer_url to the cluster: return code $status." 154 | exit 9 155 | else 156 | echo "$pkg: added $peer_url to existing cluster, return code $status" 157 | fi 158 | # If we are a proxy we just want the list for the actual cluster 159 | else 160 | etcd_initial_cluster=$(curl $ETCD_CURLOPTS -s -f "$etcd_good_member_url/v2/members" | jq --raw-output '.[] | map(.name + "=" + .peerURLs[0]) | .[]' | xargs | sed 's/ */,/g') 161 | echo "etcd_initial_cluster=$etcd_initial_cluster" 162 | if [[ ! $etcd_initial_cluster ]]; then 163 | echo "$pkg: docker command to get etcd peers failed" 164 | exit 8 165 | fi 166 | fi 167 | 168 | cat > "$etcd_peers_file_path" < "$etcd_peers_file_path" <