├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── VERSION ├── scripts └── run.sh └── spark-conf ├── spark-defaults.conf └── spark-env.sh /Dockerfile: -------------------------------------------------------------------------------- 1 | # Version 0.1 2 | FROM ipython/scipystack 3 | 4 | MAINTAINER Bernardo Gomez Palacio "bernardo.gomezpalacio@gmail.com" 5 | ENV REFRESHED_AT 2015-03-19 6 | 7 | ENV DEBIAN_FRONTEND noninteractive 8 | 9 | RUN apt-get update 10 | RUN apt-get dist-upgrade -y 11 | 12 | # RUN echo "deb http://repos.mesosphere.io/ubuntu/ trusty main" > /etc/apt/sources.list.d/mesosphere.list 13 | # RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF 14 | # RUN apt-get -y update 15 | # RUN apt-get -y install mesos=0.26.0-0.2.145.ubuntu1404 16 | 17 | # Setup 18 | RUN sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF 19 | RUN export OS_DISTRO=$(lsb_release -is | tr '[:upper:]' '[:lower:]') && \ 20 | export OS_CODENAME=$(lsb_release -cs) && \ 21 | echo "deb http://repos.mesosphere.io/${OS_DISTRO} ${OS_CODENAME} main" | \ 22 | tee /etc/apt/sources.list.d/mesosphere.list &&\ 23 | apt-get -y update 24 | 25 | RUN apt-get -y install mesos 26 | 27 | RUN apt-get install -y python libnss3 curl 28 | 29 | #RUN add-apt-repository ppa:webupd8team/java -y && \ 30 | # apt-get install oracle-java8-installer && \ 31 | # apt-get install oracle-java8-set-default 32 | 33 | # echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee /etc/apt/sources.list.d/webupd8team-java.list 34 | # echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee -a /etc/apt/sources.list.d/webupd8team-java.list 35 | # apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 36 | # apt-get update 37 | # apt-get install oracle-java8-installer 38 | 39 | RUN curl http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz \ 40 | | tar -xzC /opt && \ 41 | mv /opt/spark* /opt/spark 42 | 43 | RUN apt-get clean 44 | 45 | # Fix pypspark six error. 46 | RUN pip2 install -U six 47 | RUN pip2 install boto 48 | RUN pip2 install msgpack-python 49 | RUN pip2 install avro 50 | 51 | COPY spark-conf/* /opt/spark/conf/ 52 | COPY scripts /scripts 53 | 54 | ENV SPARK_HOME /opt/spark 55 | 56 | ENTRYPOINT ["/scripts/run.sh"] 57 | 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2012 Bernardo Gomez Palacio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REPORTER = dot 2 | 3 | # ================================================= 4 | # Docker Machine Env Variables 5 | # ================================================= 6 | 7 | DOCKER_MACHINE_BIN?=docker-machine 8 | 9 | DOCKER_MACHINE=$(DOCKER_MACHINE_BIN) $(DOCKER_MACHINE_OPTS) 10 | 11 | # ================================================= 12 | # OS Specific Configuration. 13 | # ================================================= 14 | ifeq ($(OS),Linux) 15 | MD5_SUM="md5sum" 16 | else 17 | MD5_SUM="md5" 18 | endif 19 | 20 | # ================================================= 21 | # Targets 22 | # ================================================= 23 | 24 | #all: release-prod 25 | 26 | docker-machine-create: 27 | 28 | $(DOCKER_MACHINE) create\ 29 | --driver virtualbox \ 30 | --virtualbox-boot2docker-url "http://mirror.cs.vt.edu/pub/CentOS/7/isos/x86_64/CentOS-7-x86_64-Minimal-1511.iso" \ 31 | --virtualbox-cpu-count "2" \ 32 | --virtualbox-memory "2048" \ 33 | --virtualbox-disk-size "20000" \ 34 | --virtualbox-host-dns-resolver \ 35 | mesos-docker 36 | 37 | #--virtualbox-boot2docker-url "http://lug.mtu.edu/centos/6.7/isos/x86_64/CentOS-6.7-x86_64-minimal.iso" \ 38 | 39 | 40 | .PHONY: 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Running Your Spark Job Executors In Docker Containers 2 | 3 | The following tutorial showcases a _Dockerized_ _Apache Spark_ application running in a _Mesos_ cluster. 4 | In the example the _Spark Driver_, as well as the _Spark Executors_, will be running in a _Docker Image_ based on Ubuntu with the 5 | additions of the [SciPy][SCIPY] Python packages. If you are already familiar with the reasons of using Docker 6 | as well as Apache Mesos feel free to skip the next section and jump right into the tutorial, if not, please carry on. 7 | 8 | ## Rational 9 | 10 | Today is pretty common to find Engineers and Data Scientist that need to run _Big Data workloads_ inside a 11 | shared infrastructure. In addition the infrastructure could potentially be used not only for such workloads but 12 | to for other important services required for business operations. All these amalgamates to a none trivial infrastructure 13 | and provisioning conundrum. 14 | 15 | A very common way to solve such problem is to virtualize the infrastructure and statically partition it such that each development or business group 16 | in the company has its own resources to deploy and run their applications on. Hopefully the maintainers of such infrastructure and services have a _DevOps_ mentality and 17 | have automated, and continuously work on automating, the configuration and software provisioning tasks on such infrastructure. 18 | The problem is, as [Benhamin Hindman][MESOS_WHY] backed by [studies][MESOS_WP] done at the University of California at Berkeley 19 | points out, static partitioning can be highly inefficient on the utilization of such infrastructure. This has prompted the development 20 | of _Resource Schedulers_ that abstracts CPU, memory, storage, and other compute resources away from machines, either physical or virtual, 21 | to enable the execution of applications across the infrastructure to achieve a higher utilization factor, among other things. 22 | 23 | The concept of sharing infrastructure resources is not new for applications that entail the analysis of large datasets, in most cases through 24 | algorithms that favor parallelization of workloads. Today the most common frameworks to develop such applications are _Hadoop Map Reduce_ and 25 | _Apache Spark_. In the case of _Apache Spark_ it can be deployed in clusters managed by _Resource Schedulers_ such as Hadoop YARN or Apache Mesos. 26 | Now, since different applications are running inside a shared infrastructure its common to find applications that have different sets of requirements 27 | across the software packages and versions of such packages they depend on to function. 28 | As an operation engineer, or infrastructure manager, you could force your users to a predefine set of software libraries, along with their versions, 29 | that the infrastructure supports. Hopefully if you follow that path you also establish a procedure to upgrade such software libraries and add new ones. 30 | This tends to require an investment in time and might be frustrating to Engineers and Data Scientist that are constantly installing new packages and 31 | libraries to facilitate their work. When you decide to upgrade you might as well have to refactor some applications that might have been running for 32 | a long time but have hard dependencies on previous versions of the packages that are part of the upgraded. All in all, its not simple. 33 | 34 | Linux Containers, and specially Docker, offer an abstraction such that software can be packaged into light weight images that can be executed as containers. The containers are executed with some level of isolation, such isolation is mainly provided by _cgroups_. Each image can define the type of operating system that it requires along with the software packages. This provides a 35 | fantastic mechanism to pass the burden of maintaining the software packages and libraries out of infrastructure management and operations to the owners of the applications. 36 | With this the infrastructure and operation teams can run multiple, isolated, applications that can potentially have conflicting software libraries within the same infrastructure. _Apache Spark_ can leverage this as long as its deployed with an _Apache Mesos_ cluster that supports Docker. 37 | 38 | In the next sections we will review how we can run Apache Spark Applications within Docker containers. 39 | 40 | ## Tutorial 41 | 42 | For this tutorial we will use a CentOS 7.2 minimal image running on [VirtualBox][VBOX]. We will 43 | not include as part of this tutorial the instructions on obtaining such CentOS Image and making 44 | it available in _VirtualBox_ nor configuring its network interfaces. 45 | 46 | In addition to the above we will be using a single node to keep this exercise as simple as possible. 47 | We can later explore deploying a similar setup in a set of nodes in the cloud but for the sake of simplicity and time 48 | our single node will be running the following services: 49 | 50 | * A Mesos Master 51 | * A Mesos Slave 52 | * A Zookeeper Instance 53 | * A Docker Daemon 54 | 55 | ### Step 1: The Mesos Cluster 56 | 57 | To install _Apache Mesos_ in your cluster I suggest you follow the [Mesosphere getting started guidelines][MESOSPHERE_GS]. 58 | Since we are using CentOS 7.2 we first installed the _Mesosphere YUM Repository_ as follows: 59 | 60 | # Add the repository 61 | sudo rpm -Uvh http://repos.mesosphere.com/el/7/noarch/RPMS/mesosphere-el-repo-7-1.noarch.rpm 62 | 63 | We then install _Apache Mesos_ and the _Apache Zookeeper_ packages. 64 | 65 | sudo yum -y install mesos mesosphere-zookeeper 66 | 67 | Once the packages are installed we need to configure Zookeeper as well as the Mesos Master and Slave. 68 | 69 | #### Zookeeper 70 | 71 | For Zookeeper we need to create a Zookeeper Node Identity, we do this my setting the numerical identifying inside 72 | the `/var/lib/zookeeper/myid` file. 73 | 74 | echo "1" > /var/lib/zookeeper/myid 75 | 76 | Since by default Zookeeper binds to all interfaces and exposes its services through port `2181` we do not need to 77 | change the `/etc/zookeeper/conf/zoo.cfg` file. Please refer to the [Mesosphere getting started guidelines][MESOSPHERE_GS] 78 | if you have a Zookeeper ensemble, more than one node running Zookeeper. After that we can start the Zookeeper Service. 79 | 80 | sudo service zookeeper restart 81 | 82 | #### Mesos Master and Slave 83 | 84 | Before we start to describe the Mesos configuration we most note that the location of the Mesos configuration files that 85 | we are going to mention bellow is specific to Mesosphere's Mesos package. If you don't have a strong reason to build 86 | your own Mesos packages I suggest you use the ones that Mesosphere kindly provides. Lets continue. 87 | 88 | We need to tell the Mesos Master and Slave the connection string the they can use to reach Zookeeper, including their namespace. 89 | By default Zookeeper will bind to all interfaces, you might want to change this behaviour. 90 | In our case we will make sure that the IP address that we want to use to connect to Zookeeper can be resolved within the 91 | containers. The nodes public interface IP `192.168.99.100`, to do this we do the following: 92 | 93 | echo "zk://192.168.99.100:2181/mesos" > /etc/mesos/zk 94 | 95 | 96 | Now since in our setup we have several network interfaces associated with the node that will be running the Mesos Master we will 97 | pick an interface that will be reachable within the Docker containers that will eventually be running the Spark Driver and Spark Executors. 98 | Knowing that the IP address that we want to bind to is `192.168.99.100` we do the following: 99 | 100 | echo "192.168.99.100" > /etc/mesos-master/ip 101 | 102 | We do a similar thing for the Mesos Slave, again, please consider that in our example the Mesos Slave is running in the same node as the 103 | Mesos Master and we are going to bind it to the same network interface. 104 | 105 | echo "192.168.99.100" > /etc/mesos-slave/ip 106 | echo "192.168.99.100" > /etc/mesos-slave/hostname 107 | 108 | The `ip` defines the IP address that the Mesos Slave is going to bind to and `hostname` defines the _hostname_ that the slave will use to report 109 | its availability and therefore is the value that the _Mesos Frameworks_, in our case _Apache Spark_, will use to connect to it. 110 | 111 | Lets start the services 112 | 113 | systemctl start mesos-master 114 | systemctl start mesos-slave 115 | 116 | By default the Mesos Master will bind to port `5050` and the Mesos Slave to port `5051`. Lets confirm, assuming you have installed the `net-utils` package. 117 | 118 | netstat -pleno | grep -E "5050|5051" 119 | tcp 0 0 192.168.99.100:5050 0.0.0.0:* LISTEN 0 127336 22205/mesos-master off (0.00/0/0) 120 | tcp 0 0 192.168.99.100:5051 0.0.0.0:* LISTEN 0 127453 22242/mesos-slave off (0.00/0/0) 121 | 122 | Lets run a test. 123 | 124 | MASTER=$(mesos-resolve `cat /etc/mesos/zk`) \ 125 | LIBPROCESS_IP=192.168.99.100 \ 126 | mesos-execute --master=$MASTER \ 127 | --name="cluster-test" \ 128 | --command="echo 'Hello World' && sleep 5 && echo 'Good Bye'" 129 | 130 | 131 | ### Step 2: Installing Docker 132 | 133 | We followed the Docker documentation on [installing Docker in CentOS][DOCKER_COS]. I suggest you 134 | do the same. In a nutshell we executed the following. 135 | 136 | sudo yum update 137 | sudo tee /etc/yum.repos.d/docker.repo <<-'EOF' 138 | [dockerrepo] 139 | name=Docker Repository 140 | baseurl=https://yum.dockerproject.org/repo/main/centos/$releasever/ 141 | enabled=1 142 | gpgcheck=1 143 | gpgkey=https://yum.dockerproject.org/gpg 144 | EOF 145 | sudo yum install docker-engine 146 | sudo service docker start 147 | 148 | If the above succeeded you should be able to do a `docker ps` as well as a `docker search ipython/scipystack` successfully. 149 | 150 | ### Step 3: Creating a Spark Image 151 | 152 | Lets create the Dockerfile that will be used by the Spark Driver and Spark Executor. For our example we will consider 153 | that the Docker Image should provide the SciPy Stack along with additional Python libraries. 154 | So, in a nutshell, the Docker Image most have the following features: 155 | 156 | 1. The version of libmesos should be compatible with the version of the Mesos Master and Slave. e.g. `/usr/lib/libmesos-0.26.0.so` 157 | 1. Should have a valid JDK. 158 | 1. Should have the SciPy Stack as well as Python packages that we want. 159 | 1. Have a version of Spark, we will choose 1.6.0 160 | 161 | 162 | The Dockerfile bellow will provide the requirements that we mention above. Note that installing Mesos 163 | through the _Mesosphere RPMs_ will install _Open JDK_, in this case `1.7`. 164 | 165 | Dockerfile: 166 | 167 | # Version 0.1 168 | FROM ipython/scipystack 169 | MAINTAINER Bernardo Gomez Palacio "bernardo.gomezpalacio@gmail.com" 170 | ENV REFRESHED_AT 2015-03-19 171 | 172 | ENV DEBIAN_FRONTEND noninteractive 173 | 174 | RUN apt-get update 175 | RUN apt-get dist-upgrade -y 176 | 177 | # Setup 178 | RUN sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF 179 | RUN export OS_DISTRO=$(lsb_release -is | tr '[:upper:]' '[:lower:]') && \ 180 | export OS_CODENAME=$(lsb_release -cs) && \ 181 | echo "deb http://repos.mesosphere.io/${OS_DISTRO} ${OS_CODENAME} main" | \ 182 | tee /etc/apt/sources.list.d/mesosphere.list &&\ 183 | apt-get -y update 184 | 185 | RUN apt-get -y install mesos 186 | 187 | RUN apt-get install -y python libnss3 curl 188 | 189 | RUN curl http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz \ 190 | | tar -xzC /opt && \ 191 | mv /opt/spark* /opt/spark 192 | 193 | RUN apt-get clean 194 | 195 | # Fix pypspark six error. 196 | RUN pip2 install -U six 197 | RUN pip2 install msgpack-python 198 | RUN pip2 install avro 199 | 200 | COPY spark-conf/* /opt/spark/conf/ 201 | COPY scripts /scripts 202 | 203 | ENV SPARK_HOME /opt/spark 204 | 205 | ENTRYPOINT ["/scripts/run.sh"] 206 | 207 | 208 | Lets explain some very important files that will be available in the Docker Image according to the 209 | Dockerfile mentioned above: 210 | 211 | The `spark-conf/spark-env.sh`, as mentioned in the [Spark docs][SPARK_MMASTER], will be used to set the 212 | location of the Mesos `libmesos.so`. 213 | 214 | 215 | export MESOS_NATIVE_JAVA_LIBRARY=${MESOS_NATIVE_JAVA_LIBRARY:-/usr/lib/libmesos.so} 216 | export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-"127.0.0.1"} 217 | export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-"127.0.0.1"} 218 | 219 | The `spark-conf/spark-defaults.conf` is serves as the definition of the default configuration for our 220 | Spark Jobs within the container, the contents are bellow. 221 | 222 | spark.master SPARK_MASTER 223 | spark.mesos.mesosExecutor.cores MESOS_EXECUTOR_CORE 224 | spark.mesos.executor.docker.image SPARK_IMAGE 225 | spark.mesos.executor.home /opt/spark 226 | spark.driver.host CURRENT_IP 227 | spark.executor.extraClassPath /opt/spark/custom/lib/* 228 | spark.driver.extraClassPath /opt/spark/custom/lib/* 229 | 230 | Note the use of environment variables such as `SPARK_MASTER` and `SPARK_IMAGE` are critical since 231 | this will allow us to customize how the Spark Application interacts with Mesos Docker integration. 232 | 233 | We have Docker's entry point script. The script, showcased bellow, 234 | will populate the `spark-defaults.conf` file. 235 | 236 | 237 | Now lets define the Dockerfile Entrypoint such that it lets us define some basic options that 238 | will get passed to the Spark command, for example `spark-shell`, `spark-submit` or `pyspark`. 239 | 240 | 241 | #!/bin/bash 242 | 243 | SPARK_MASTER=${SPARK_MASTER:-local} 244 | MESOS_EXECUTOR_CORE=${MESOS_EXECUTOR_CORE:-0.1} 245 | SPARK_IMAGE=${SPARK_IMAGE:-sparkmesos:lastet} 246 | CURRENT_IP=$(hostname -i) 247 | 248 | sed -i 's;SPARK_MASTER;'$SPARK_MASTER';g' /opt/spark/conf/spark-defaults.conf 249 | sed -i 's;MESOS_EXECUTOR_CORE;'$MESOS_EXECUTOR_CORE';g' /opt/spark/conf/spark-defaults.conf 250 | sed -i 's;SPARK_IMAGE;'$SPARK_IMAGE';g' /opt/spark/conf/spark-defaults.conf 251 | sed -i 's;CURRENT_IP;'$CURRENT_IP';g' /opt/spark/conf/spark-defaults.conf 252 | 253 | export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-${CURRENT_IP:-"127.0.0.1"}} 254 | export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-${CURRENT_IP:-"127.0.0.1"}} 255 | 256 | 257 | if [ $ADDITIONAL_VOLUMES ]; 258 | then 259 | echo "spark.mesos.executor.docker.volumes: $ADDITIONAL_VOLUMES" >> /opt/spark/conf/spark-defaults.conf 260 | fi 261 | 262 | exec "$@" 263 | 264 | 265 | 266 | Lets build the image so we can start using it. 267 | 268 | docker build -t sparkmesos . && \ 269 | docker tag -f sparkmesos:latest sparkmesos:latest 270 | 271 | 272 | ### Step 4: Running a Spark Application with Docker. 273 | 274 | Now that the image is built we just need to run it. We will call the PySpark application. 275 | 276 | docker run -it --rm \ 277 | -e SPARK_MASTER="mesos://zk://192.168.99.100:2181/mesos" \ 278 | -e SPARK_IMAGE="sparkmesos:latest" \ 279 | -e PYSPARK_DRIVER_PYTHON=ipython2 \ 280 | sparkmesos:latest /opt/spark/bin/pyspark 281 | 282 | To make sure that SciPy is working lets write the following to the PySpark shell 283 | 284 | from scipy import special, optimize 285 | import numpy as np 286 | 287 | f = lambda x: -special.jv(3, x) 288 | sol = optimize.minimize(f, 1.0) 289 | x = np.linspace(0, 10, 5000) 290 | x 291 | 292 | 293 | Now, lets try to calculate PI as an example . 294 | 295 | docker run -it --rm \ 296 | -e SPARK_MASTER="mesos://zk://192.168.99.100:2181/mesos" \ 297 | -e SPARK_IMAGE="sparkmesos:latest" \ 298 | -e PYSPARK_DRIVER_PYTHON=ipython2 \ 299 | sparkmesos:latest /opt/spark/bin/spark-submit --driver-memory 500M \ 300 | --executor-memory 500M \ 301 | /opt/spark/examples/src/main/python/pi.py 10 302 | 303 | ## Conclusion and further Notes 304 | 305 | Although we were able to run a Spark Application within a Docker containers leveraging Apache Mesos there is more work to do. 306 | We need to explore containerized Spark Applications that spread across multiple nodes along with providing 307 | a mechanism that enables network port mapping. 308 | 309 | 310 | ## References 311 | 312 | 1. Apache Mesos. The Apache Software Foundation, 2015. Web. 27 Jan. 2016. . 313 | 1. Apache Spark. The Apache Software Foundation, 2015. Web. 27 Jan. 2016. . 314 | 1. Benjamin Hindman. "Apache Mesos NYC Meetup", August 20, 2013. Web. 27 Jan 2016. 315 | 1. Docker. Docker Inc, 2015. Web. 27 Jan 2016. . 316 | 1. Hindman, Konwinski, Zaharia, Ghodsi, D. Joseph, Katz, Shenker, Stoica. 317 | "Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center" 318 | Web. 27 Jan 2016. 319 | 1. Mesosphere Inc, 2015. Web. 27 Jan 2016. 320 | 1. SciPy. SciPy developers, 2015. Web. 28 Jan 2016. . 321 | 1. Virtual Box, Oracle Inc, 2015. Web 28 Jan 2016. 322 | 1. Wang Qiang, "Docker Spark Mesos". Web 28 Jan 2016. 323 | 324 | 325 | [DOCKER_COS]: https://docs.docker.com/engine/installation/centos/ "Docker CentOS install." 326 | [MESOS_WHY]: https://speakerdeck.com/benh/apache-mesos-nyc-meetup "Apache Mesos NYC Meetup." 327 | [MESOS_WP]: https://www.cs.berkeley.edu/~alig/papers/mesos.pdf "Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center." 328 | [SCIPY]: http://www.scipy.org/ "SciPy: Python based ecosystem for Math, Science, and Engineering." 329 | [SPARK_GUIDE]: http://spark.apache.org/docs/latest/programming-guide.html "Apache Spark: Programming Guide." 330 | [SPARK_MESOS]: http://spark.apache.org/docs/latest/running-on-mesos.html "Apache Spark: Running On Mesos." 331 | [SPARK_MMASTER]: http://spark.apache.org/docs/latest/running-on-mesos.html "Apache Spark: Using a Mesos Master URL" 332 | [VBOX]: https://www.virtualbox.org/ "VirtualBox" 333 | [MESOSPHERE_GS]: https://open.mesosphere.com/getting-started/install/ "Mesosphere: Setting up a Mesos and Marathon Cluster" 334 | 335 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.1 2 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SPARK_MASTER=${SPARK_MASTER:-local} 4 | MESOS_EXECUTOR_CORE=${MESOS_EXECUTOR_CORE:-0.1} 5 | SPARK_IMAGE=${SPARK_IMAGE:-sparkmesos:lastet} 6 | CURRENT_IP=$(hostname -i) 7 | 8 | sed -i 's;SPARK_MASTER;'$SPARK_MASTER';g' /opt/spark/conf/spark-defaults.conf 9 | sed -i 's;MESOS_EXECUTOR_CORE;'$MESOS_EXECUTOR_CORE';g' /opt/spark/conf/spark-defaults.conf 10 | sed -i 's;SPARK_IMAGE;'$SPARK_IMAGE';g' /opt/spark/conf/spark-defaults.conf 11 | sed -i 's;CURRENT_IP;'$CURRENT_IP';g' /opt/spark/conf/spark-defaults.conf 12 | 13 | export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-${CURRENT_IP:-"127.0.0.1"}} 14 | export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-${SPARK_PUBLIC_DNS:-"127.0.0.1"}} 15 | 16 | if [ $ADDITIONAL_VOLUMES ]; 17 | then 18 | echo "spark.mesos.executor.docker.volumes: $ADDITIONAL_VOLUMES" >> /opt/spark/conf/spark-defaults.conf 19 | fi 20 | 21 | exec "$@" 22 | -------------------------------------------------------------------------------- /spark-conf/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.master SPARK_MASTER 2 | spark.mesos.mesosExecutor.cores MESOS_EXECUTOR_CORE 3 | spark.mesos.executor.docker.image SPARK_IMAGE 4 | spark.mesos.executor.home /opt/spark 5 | spark.driver.host CURRENT_IP 6 | spark.executor.extraClassPath /opt/spark/custom/lib/* 7 | spark.driver.extraClassPath /opt/spark/custom/lib/* 8 | -------------------------------------------------------------------------------- /spark-conf/spark-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export MESOS_NATIVE_JAVA_LIBRARY=${MESOS_NATIVE_JAVA_LIBRARY:-/usr/lib/libmesos.so} 4 | export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-"127.0.0.1"} 5 | export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-"127.0.0.1"} 6 | --------------------------------------------------------------------------------