├── README.md ├── destroy.sh ├── nginx.conf └── provision.sh /README.md: -------------------------------------------------------------------------------- 1 | # Docker Swarm and Spark example 2 | 3 | You can see details in https://medium.com/@aoc/running-spark-on-docker-swarm-777b87b5aa3#.b74932l0u 4 | 5 | Running this will create some AWS instances so make sure you understand what the code is 6 | doing and what costs will it incur. Run at your own risk. 7 | 8 | To create the Swarm: 9 | ``` 10 | VPC_ID= ./provision 11 | ``` 12 | 13 | Destroy all instances. 14 | ``` 15 | ./destroy 16 | ``` 17 | This will destroy EC2 instances but that does not necessary mean there won't be any 18 | additional charges, like some persistent volumes if instances are created with such. So always 19 | make sure in the AWS console that there are no services running that you will be charged for. 20 | -------------------------------------------------------------------------------- /destroy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CLUSTER_PREFIX=spark-cluster 4 | docker-machine ls | grep "^${CLUSTER_PREFIX}" | cut -d\ -f1 | xargs docker-machine rm -y 5 | -------------------------------------------------------------------------------- /nginx.conf: -------------------------------------------------------------------------------- 1 | user nginx; 2 | worker_processes 1; 3 | 4 | error_log /var/log/nginx/error.log warn; 5 | pid /var/run/nginx.pid; 6 | 7 | events { 8 | worker_connections 1024; 9 | } 10 | 11 | http { 12 | server { 13 | listen 80; 14 | 15 | location / { 16 | proxy_pass http://master:8080; 17 | } 18 | } 19 | server { 20 | listen 8081; 21 | 22 | location / { 23 | proxy_pass http://worker:8081; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /provision.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -evx 3 | 4 | REGION=eu-west-1 5 | ZONE=c 6 | CLUSTER_PREFIX=spark-cluster 7 | MASTER_TYPE=m4.large 8 | WORKER_TYPE=m3.medium 9 | SPOT_PRICE=0.067 10 | NUM_WORKERS=1 11 | SPARK_IMAGE=gettyimages/spark:2.0.2-hadoop-2.7 12 | 13 | DRIVER_OPTIONS="\ 14 | --driver amazonec2 \ 15 | --amazonec2-security-group=default \ 16 | --amazonec2-vpc-id $VPC_ID \ 17 | --amazonec2-zone $ZONE \ 18 | --amazonec2-region $REGION" 19 | 20 | MASTER_OPTIONS="$DRIVER_OPTIONS \ 21 | --engine-label role=master \ 22 | --amazonec2-instance-type=$MASTER_TYPE" 23 | 24 | MASTER_MACHINE_NAME=${CLUSTER_PREFIX}-master 25 | docker-machine create $MASTER_OPTIONS $MASTER_MACHINE_NAME 26 | 27 | MASTER_IP=$(aws ec2 describe-instances --output json | jq -r \ 28 | ".Reservations[].Instances[] | select(.KeyName==\"$MASTER_MACHINE_NAME\" and .State.Name==\"running\") | .PrivateIpAddress") 29 | docker-machine ssh $MASTER_MACHINE_NAME sudo docker swarm init --advertise-addr $MASTER_IP 30 | TOKEN=$(docker-machine ssh $MASTER_MACHINE_NAME sudo docker swarm join-token worker -q) 31 | 32 | WORKER_OPTIONS="$DRIVER_OPTIONS \ 33 | --amazonec2-request-spot-instance \ 34 | --amazonec2-spot-price=$SPOT_PRICE \ 35 | --amazonec2-instance-type=$WORKER_TYPE" 36 | WORKER_MACHINE_NAME=${CLUSTER_PREFIX}-worker- 37 | 38 | for INDEX in $(seq $NUM_WORKERS) 39 | do 40 | ( 41 | docker-machine create $WORKER_OPTIONS $WORKER_MACHINE_NAME$INDEX 42 | docker-machine ssh $WORKER_MACHINE_NAME$INDEX sudo docker swarm join --token $TOKEN $MASTER_IP:2377 43 | ) & 44 | done 45 | wait 46 | 47 | eval $(docker-machine env $MASTER_MACHINE_NAME) 48 | 49 | docker network create --driver overlay spark-network 50 | 51 | docker service create \ 52 | --name master \ 53 | --constraint engine.labels.role==master \ 54 | --replicas 1 \ 55 | --network spark-network \ 56 | ${SPARK_IMAGE} \ 57 | bin/spark-class org.apache.spark.deploy.master.Master 58 | 59 | docker service create \ 60 | --name worker \ 61 | --constraint engine.labels.role!=master \ 62 | --replicas $NUM_WORKERS \ 63 | --network spark-network \ 64 | ${SPARK_IMAGE} \ 65 | bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077 66 | 67 | docker service create \ 68 | --name proxy \ 69 | --constraint engine.labels.role==master \ 70 | --replicas 1 \ 71 | --publish "80:80" \ 72 | --publish "8081:8081" \ 73 | --network spark-network \ 74 | library/nginx:stable 75 | --------------------------------------------------------------------------------