├── .gitignore ├── Dockerfile ├── Makefile ├── README.md ├── VERSION ├── build ├── scripts └── run.sh └── spark_conf ├── spark-defaults.conf └── spark-env.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:openjdk-8-jdk 2 | 3 | MAINTAINER Dmitry B "ficha83@gmail.com" 4 | 5 | RUN apt-get update && \ 6 | apt-get install -y python libnss3 curl 7 | 8 | RUN cd /tmp && \ 9 | wget http://repos.mesosphere.com/debian/pool/main/m/mesos/mesos_1.1.0-2.0.107.debian81_amd64.deb && \ 10 | dpkg --unpack mesos_1.1.0-2.0.107.debian81_amd64.deb && \ 11 | apt-get install -f -y && \ 12 | rm mesos_1.1.0-2.0.107.debian81_amd64.deb && \ 13 | apt-get clean 14 | 15 | RUN cd /tmp && \ 16 | wget https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz && \ 17 | tar -xzvf spark-2.1.1-bin-hadoop2.7.tgz && \ 18 | mv spark-2.1.1-bin-hadoop2.7 /opt/spark && \ 19 | rm spark-2.1.1-bin-hadoop2.7.tgz 20 | 21 | ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so 22 | ENV SPARK_HOME /opt/spark 23 | 24 | COPY spark_conf/* /opt/spark/conf/ 25 | COPY scripts /scripts 26 | 27 | ENTRYPOINT ["/scripts/run.sh"] 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | REPO=dmitryb/mesos-spark 3 | V=2.1.1 4 | 5 | docker-build: 6 | docker build -t $(REPO):$(V) . 7 | 8 | docker-push: 9 | docker push $(REPO):$(V) 10 | 11 | docker-pull: 12 | docker pull $(REPO):$(V) 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spark Docker for Mesos (DCOS) 2 | 3 | Docker image for running spark with mesos (DCOS). 4 | Build with 5 | 6 | * spark 2.1.1 (hadoop 2.7) (https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz) 7 | * mesos 1.1.0 (http://repos.mesosphere.com/debian/pool/main/m/mesos/mesos_1.1.0-2.0.107.debian81_amd64.deb) 8 | 9 | # How to use 10 | 11 | Build Image 12 | 13 | ```bash 14 | make docker-build 15 | ``` 16 | 17 | Pull Image 18 | 19 | ```bash 20 | make docker-pull 21 | ``` 22 | 23 | Start spark driver inside docker container 24 | 25 | ```bash 26 | MESOS_IP=mesos://: 27 | EXECUTOR_IMAGE=dmitryb/mesos-spark:2.1.1 28 | CORES=2 29 | RAM=2g 30 | 31 | docker run -it --rm --net=host dmitryb/mesos-spark:2.1.1 bash /opt/spark/bin/spark-shell \ 32 | --conf spark.master=${MESOS_IP} \ 33 | --conf spark.driver.host=${DRIVER_IP} \ 34 | --conf spark.mesos.coarse=true \ 35 | --conf spark.mesos.executor.docker.image=${EXECUTOR_IMAGE} \ 36 | --conf spark.mesos.executor.home=/opt/spark \ 37 | --conf spark.task.maxFailures=10 \ 38 | --conf spark.sql.parquet.compression.codec=gzip \ 39 | --conf spark.sql.warehouse.dir=file:///tmp \ 40 | --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ 41 | --conf spark.kryoserializer.buffer.max=1g \ 42 | --conf spark.task.cpus=1 \ 43 | --conf spark.executor.memory=${RAM} \ 44 | --conf spark.cores.max=${CORES} \ 45 | --conf spark.sql.shuffle.partitions=2000 \ 46 | --conf spark.shuffle.spill=true \ 47 | --conf spark.executor.heartbeatInterval=10 48 | ``` 49 | 50 | # TODO 51 | 52 | Integrate with pyspark 53 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.0.3 2 | -------------------------------------------------------------------------------- /build: -------------------------------------------------------------------------------- 1 | VERSION=$(cat VERSION) 2 | docker build $@ -t sparkmesos . 3 | docker tag -f sparkmesos:latest sparkmesos:$VERSION 4 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SPARK_MASTER=${SPARK_MASTER:-local} 4 | MESOS_EXECUTOR_CORE=${MESOS_EXECUTOR_CORE:-0.1} 5 | SPARK_IMAGE=${SPARK_IMAGE:-sparkmesos:lastet} 6 | CURRENT_IP=$(hostname -i) 7 | 8 | echo "current ip: $CURRENT_IP" 9 | export LIBPROCESS_IP=$CURRENT_IP 10 | 11 | sed -i 's;SPARK_MASTER;'$SPARK_MASTER';g' /opt/spark/conf/spark-defaults.conf 12 | sed -i 's;MESOS_EXECUTOR_CORE;'$MESOS_EXECUTOR_CORE';g' /opt/spark/conf/spark-defaults.conf 13 | sed -i 's;SPARK_IMAGE;'$SPARK_IMAGE';g' /opt/spark/conf/spark-defaults.conf 14 | sed -i 's;CURRENT_IP;'$CURRENT_IP';g' /opt/spark/conf/spark-defaults.conf 15 | 16 | if [ $ADDITIONAL_VOLUMES ]; 17 | then 18 | echo "spark.mesos.executor.docker.volumes: $ADDITIONAL_VOLUMES" >> /opt/spark/conf/spark-defaults.conf 19 | fi 20 | 21 | exec "$@" 22 | -------------------------------------------------------------------------------- /spark_conf/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.master SPARK_MASTER 2 | spark.mesos.mesosExecutor.cores MESOS_EXECUTOR_CORE 3 | spark.mesos.executor.docker.image SPARK_IMAGE 4 | spark.mesos.executor.home /opt/spark 5 | spark.driver.host CURRENT_IP 6 | spark.executor.extraClassPath /opt/spark/custom/lib/* 7 | spark.driver.extraClassPath /opt/spark/custom/lib/* 8 | -------------------------------------------------------------------------------- /spark_conf/spark-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export MESOS_NATIVE_JAVA_LIBRARY=${MESOS_NATIVE_JAVA_LIBRARY:-/usr/lib/libmesos.so} 4 | --------------------------------------------------------------------------------