├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── VERSION
├── scripts
    └── run.sh
└── spark-conf
    ├── spark-defaults.conf
    └── spark-env.sh


/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Version 0.1
 2 | FROM ipython/scipystack
 3 | 
 4 | MAINTAINER Bernardo Gomez Palacio "bernardo.gomezpalacio@gmail.com"
 5 | ENV REFRESHED_AT 2015-03-19
 6 | 
 7 | ENV DEBIAN_FRONTEND noninteractive
 8 | 
 9 | RUN apt-get update
10 | RUN apt-get dist-upgrade -y
11 | 
12 | # RUN echo "deb http://repos.mesosphere.io/ubuntu/ trusty main" > /etc/apt/sources.list.d/mesosphere.list
13 | # RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF
14 | # RUN apt-get -y update
15 | # RUN apt-get -y install mesos=0.26.0-0.2.145.ubuntu1404
16 | 
17 | # Setup
18 | RUN sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF
19 | RUN export OS_DISTRO=$(lsb_release -is | tr '[:upper:]' '[:lower:]') && \
20 |     export OS_CODENAME=$(lsb_release -cs) && \
21 |     echo "deb http://repos.mesosphere.io/${OS_DISTRO} ${OS_CODENAME} main" | \
22 |     tee /etc/apt/sources.list.d/mesosphere.list &&\
23 |     apt-get -y update
24 | 
25 | RUN apt-get -y install mesos
26 | 
27 | RUN apt-get install -y python libnss3 curl
28 | 
29 | #RUN add-apt-repository ppa:webupd8team/java -y && \
30 | #    apt-get install oracle-java8-installer && \
31 | #    apt-get install oracle-java8-set-default
32 | 
33 | # echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee /etc/apt/sources.list.d/webupd8team-java.list
34 | # echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee -a /etc/apt/sources.list.d/webupd8team-java.list
35 | # apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886
36 | # apt-get update
37 | # apt-get install oracle-java8-installer
38 | 
39 | RUN curl http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz \
40 |     | tar -xzC /opt && \
41 |     mv /opt/spark* /opt/spark
42 | 
43 | RUN apt-get clean
44 | 
45 | # Fix pypspark six error.
46 | RUN pip2 install -U six
47 | RUN pip2 install boto
48 | RUN pip2 install msgpack-python
49 | RUN pip2 install avro
50 | 
51 | COPY spark-conf/* /opt/spark/conf/
52 | COPY scripts /scripts
53 | 
54 | ENV SPARK_HOME /opt/spark
55 | 
56 | ENTRYPOINT ["/scripts/run.sh"]
57 | 
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | (The MIT License)
 2 | 
 3 | Copyright (c) 2012 Bernardo Gomez Palacio <bernardo.gomezpalacio@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | 'Software'), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | REPORTER = dot
 2 | 
 3 | # =================================================
 4 | # Docker Machine Env Variables
 5 | # =================================================
 6 | 
 7 | DOCKER_MACHINE_BIN?=docker-machine
 8 | 
 9 | DOCKER_MACHINE=$(DOCKER_MACHINE_BIN) $(DOCKER_MACHINE_OPTS)
10 | 
11 | # =================================================
12 | # OS Specific Configuration.
13 | # =================================================
14 | ifeq ($(OS),Linux)
15 | 		MD5_SUM="md5sum"
16 | else
17 | 		MD5_SUM="md5"
18 | endif
19 | 
20 | # =================================================
21 | # Targets
22 | # =================================================
23 | 
24 | #all: release-prod
25 | 
26 | docker-machine-create:
27 | 
28 | 	$(DOCKER_MACHINE) create\
29 |     --driver virtualbox \
30 | 		--virtualbox-boot2docker-url "http://mirror.cs.vt.edu/pub/CentOS/7/isos/x86_64/CentOS-7-x86_64-Minimal-1511.iso" \
31 |    	--virtualbox-cpu-count "2" \
32 |     --virtualbox-memory "2048" \
33 |    	--virtualbox-disk-size "20000" \
34 |     --virtualbox-host-dns-resolver \
35 |     mesos-docker
36 | 
37 | 		#--virtualbox-boot2docker-url "http://lug.mtu.edu/centos/6.7/isos/x86_64/CentOS-6.7-x86_64-minimal.iso" \
38 | 
39 | 
40 | .PHONY:
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Running Your Spark Job Executors In Docker Containers
  2 | 
  3 |   The following tutorial showcases a _Dockerized_ _Apache Spark_ application running in a _Mesos_ cluster.
  4 | In the example the _Spark Driver_, as well as the _Spark Executors_, will be running in a _Docker Image_ based on Ubuntu with the
  5 | additions of the [SciPy][SCIPY] Python packages. If you are already familiar with the reasons of using Docker
  6 | as well as Apache Mesos feel free to skip the next section and jump right into the tutorial, if not, please carry on.
  7 | 
  8 | ## Rational
  9 | 
 10 |   Today is pretty common to find Engineers and Data Scientist that need to run _Big Data workloads_ inside a
 11 | shared infrastructure. In addition the infrastructure could potentially be used not only for such workloads but
 12 | to for other important services required for business operations. All these amalgamates to a none trivial infrastructure
 13 | and provisioning conundrum.
 14 | 
 15 |   A very common way to solve such problem is to virtualize the infrastructure and statically partition it such that each development or business  group
 16 | in the company has its own resources to deploy and run their applications on. Hopefully the maintainers of such infrastructure and services have a _DevOps_ mentality and
 17 | have automated, and continuously work on automating, the configuration and software provisioning tasks on such infrastructure.
 18 | The problem is, as [Benhamin Hindman][MESOS_WHY] backed by [studies][MESOS_WP] done at the University of California at Berkeley
 19 | points out, static partitioning can be highly inefficient on the utilization of such infrastructure. This has prompted the development
 20 | of _Resource Schedulers_ that abstracts CPU, memory, storage, and other compute resources away from machines, either physical or virtual,
 21 | to enable the execution of applications across the infrastructure to achieve a higher utilization factor, among other things.
 22 | 
 23 |   The concept of sharing infrastructure resources is not new for applications that entail the analysis of large datasets, in most cases through
 24 | algorithms that favor parallelization of workloads. Today the most common frameworks to develop such applications are _Hadoop Map Reduce_ and
 25 | _Apache Spark_. In the case of _Apache Spark_ it can be deployed in clusters managed by _Resource Schedulers_ such as Hadoop YARN or Apache Mesos.
 26 | Now, since different applications are running inside a shared infrastructure its common to find applications that have different sets of requirements
 27 | across the software packages and versions of such packages they depend on to function.
 28 | As an operation engineer, or infrastructure manager, you could force your users to a predefine set of software libraries, along with their versions,
 29 | that the infrastructure supports. Hopefully if you follow that path you also establish a procedure to upgrade such software libraries and add new ones.
 30 | This tends to require an investment in time and might be frustrating to Engineers and Data Scientist that are constantly installing new packages and
 31 | libraries to facilitate their work. When you decide to upgrade you might as well have to refactor some applications that might have been running for
 32 | a long time but have hard dependencies on previous versions of the packages that are part of the upgraded. All in all, its not simple.
 33 | 
 34 |   Linux Containers, and specially Docker, offer an abstraction such that software can be packaged into light weight images that can be executed as containers. The containers are executed with some level of isolation, such isolation is mainly provided by _cgroups_. Each image can define the type of operating system that it requires along with the software packages. This provides a
 35 | fantastic mechanism to pass the burden of maintaining the software packages and libraries out of infrastructure management and operations to the owners of the applications.
 36 | With this the infrastructure and operation teams can run multiple, isolated, applications that can potentially have conflicting software libraries within the same infrastructure. _Apache Spark_ can leverage this as long as its deployed with an _Apache Mesos_ cluster that supports Docker.
 37 | 
 38 | In the next sections we will review how we can run Apache Spark Applications within Docker containers.
 39 | 
 40 | ## Tutorial
 41 | 
 42 |   For this tutorial we will use a CentOS 7.2 minimal image running on [VirtualBox][VBOX]. We will
 43 | not include as part of this tutorial the instructions on obtaining such CentOS Image and making
 44 | it available in _VirtualBox_ nor configuring its network interfaces.
 45 | 
 46 |   In addition to the above we will be using a single node to keep this exercise as simple as possible.
 47 | We can later explore deploying a similar setup in a set of nodes in the cloud but for the sake of simplicity and time
 48 | our single node will be running the following services:
 49 | 
 50 | * A Mesos Master
 51 | * A Mesos Slave
 52 | * A Zookeeper Instance
 53 | * A Docker Daemon
 54 | 
 55 | ### Step 1: The Mesos Cluster
 56 | 
 57 | To install _Apache Mesos_ in your cluster I suggest you follow the [Mesosphere getting started guidelines][MESOSPHERE_GS].
 58 | Since we are using CentOS 7.2 we first installed the _Mesosphere YUM Repository_ as follows:
 59 | 
 60 |     # Add the repository
 61 |     sudo rpm -Uvh http://repos.mesosphere.com/el/7/noarch/RPMS/mesosphere-el-repo-7-1.noarch.rpm
 62 | 
 63 | We then install _Apache Mesos_ and the _Apache Zookeeper_ packages.
 64 | 
 65 |     sudo yum -y install mesos mesosphere-zookeeper
 66 | 
 67 | Once the packages are installed we need to configure Zookeeper as well as the Mesos Master and Slave.
 68 | 
 69 | #### Zookeeper
 70 | 
 71 | For Zookeeper we need to create a Zookeeper Node Identity, we do this my setting the numerical identifying inside
 72 | the `/var/lib/zookeeper/myid` file.
 73 | 
 74 |     echo "1" > /var/lib/zookeeper/myid
 75 | 
 76 | Since by default Zookeeper binds to all interfaces and exposes its services through port `2181` we do not need to
 77 | change the `/etc/zookeeper/conf/zoo.cfg` file. Please refer to the [Mesosphere getting started guidelines][MESOSPHERE_GS]
 78 | if you have a Zookeeper ensemble, more than one node running Zookeeper. After that we can start the Zookeeper Service.
 79 | 
 80 |     sudo service zookeeper restart
 81 | 
 82 | #### Mesos Master and Slave
 83 | 
 84 | Before we start to describe the Mesos configuration we most note that the location of the Mesos configuration files that
 85 | we are going to mention bellow is specific to Mesosphere's Mesos package. If you don't have a strong reason to build
 86 | your own Mesos packages I suggest you use the ones that Mesosphere kindly provides. Lets continue.
 87 | 
 88 | We need to tell the Mesos Master and Slave the connection string the they can use to reach Zookeeper, including their namespace.
 89 | By default Zookeeper will bind to all interfaces, you might want to change this behaviour.
 90 | In our case we will make sure that the IP address that we want to use to connect to Zookeeper can be resolved within the
 91 | containers. The nodes public interface IP `192.168.99.100`, to do this we do the following:
 92 | 
 93 |     echo "zk://192.168.99.100:2181/mesos" > /etc/mesos/zk
 94 | 
 95 | 
 96 | Now since in our setup we have several network interfaces associated with the node that will be running the Mesos Master we will
 97 | pick an interface that will be reachable within the Docker containers that will eventually be running the Spark Driver and Spark Executors.
 98 | Knowing that the IP address that we want to bind to is `192.168.99.100` we do the following:
 99 | 
100 |     echo "192.168.99.100" > /etc/mesos-master/ip
101 | 
102 | We do a similar thing for the Mesos Slave, again, please consider that in our example the Mesos Slave is running in the same node as the
103 | Mesos Master and we are going to bind it to the same network interface.
104 | 
105 |     echo "192.168.99.100" > /etc/mesos-slave/ip
106 |     echo "192.168.99.100" > /etc/mesos-slave/hostname
107 | 
108 | The `ip` defines the IP address that the Mesos Slave is going to bind to and `hostname` defines the _hostname_ that the slave will use to report
109 | its availability and therefore is the value that the _Mesos Frameworks_, in our case _Apache Spark_, will use to connect to it.
110 | 
111 | Lets start the services
112 | 
113 |     systemctl start mesos-master
114 |     systemctl start mesos-slave
115 | 
116 | By default the Mesos Master will bind to port `5050` and the Mesos Slave to port `5051`. Lets confirm, assuming you have installed the `net-utils` package.
117 | 
118 |     netstat -pleno | grep -E "5050|5051"
119 |     tcp        0      0 192.168.99.100:5050     0.0.0.0:*               LISTEN      0          127336     22205/mesos-master   off (0.00/0/0)
120 |     tcp        0      0 192.168.99.100:5051     0.0.0.0:*               LISTEN      0          127453     22242/mesos-slave    off (0.00/0/0)
121 | 
122 | Lets run a test.
123 | 
124 |     MASTER=$(mesos-resolve `cat /etc/mesos/zk`) \
125 |     LIBPROCESS_IP=192.168.99.100 \
126 |     mesos-execute --master=$MASTER \
127 |                   --name="cluster-test" \
128 |                   --command="echo 'Hello World' &&  sleep 5 && echo 'Good Bye'"
129 | 
130 | 
131 | ### Step 2: Installing Docker
132 | 
133 | We followed the Docker documentation on [installing Docker in CentOS][DOCKER_COS]. I suggest you
134 | do the same. In a nutshell we executed the following.
135 | 
136 |     sudo yum update
137 |     sudo tee /etc/yum.repos.d/docker.repo <<-'EOF'
138 |     [dockerrepo]
139 |     name=Docker Repository
140 |     baseurl=https://yum.dockerproject.org/repo/main/centos/$releasever/
141 |     enabled=1
142 |     gpgcheck=1
143 |     gpgkey=https://yum.dockerproject.org/gpg
144 |     EOF
145 |     sudo yum install docker-engine
146 |     sudo service docker start
147 | 
148 | If the above succeeded you should be able to do a `docker ps` as well as a `docker search ipython/scipystack` successfully.
149 | 
150 | ### Step 3: Creating a Spark Image
151 | 
152 | Lets create the Dockerfile that will be used by the Spark Driver and Spark Executor. For our example we will consider
153 | that the Docker Image should provide the SciPy Stack along with additional Python libraries.
154 | So, in a nutshell, the Docker Image most have the following features:
155 | 
156 | 1. The version of libmesos should be compatible with the version of the Mesos Master and Slave.  e.g. `/usr/lib/libmesos-0.26.0.so`
157 | 1. Should have a valid JDK.
158 | 1. Should have the SciPy Stack as well as Python packages that we want.
159 | 1. Have a version of Spark, we will choose 1.6.0
160 | 
161 | 
162 | The Dockerfile bellow will provide the requirements that we mention above. Note that installing Mesos
163 | through the _Mesosphere RPMs_ will install _Open JDK_, in this case `1.7`.
164 | 
165 | Dockerfile:
166 | 
167 |     # Version 0.1
168 |     FROM ipython/scipystack
169 |     MAINTAINER Bernardo Gomez Palacio "bernardo.gomezpalacio@gmail.com"
170 |     ENV REFRESHED_AT 2015-03-19
171 | 
172 |     ENV DEBIAN_FRONTEND noninteractive
173 | 
174 |     RUN apt-get update
175 |     RUN apt-get dist-upgrade -y
176 | 
177 |     # Setup
178 |     RUN sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF
179 |     RUN export OS_DISTRO=$(lsb_release -is | tr '[:upper:]' '[:lower:]') && \
180 |         export OS_CODENAME=$(lsb_release -cs) && \
181 |         echo "deb http://repos.mesosphere.io/${OS_DISTRO} ${OS_CODENAME} main" | \
182 |         tee /etc/apt/sources.list.d/mesosphere.list &&\
183 |         apt-get -y update
184 | 
185 |     RUN apt-get -y install mesos
186 | 
187 |     RUN apt-get install -y python libnss3 curl
188 | 
189 |     RUN curl http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz \
190 |         | tar -xzC /opt && \
191 |         mv /opt/spark* /opt/spark
192 | 
193 |     RUN apt-get clean
194 | 
195 |     # Fix pypspark six error.
196 |     RUN pip2 install -U six
197 |     RUN pip2 install msgpack-python
198 |     RUN pip2 install avro
199 | 
200 |     COPY spark-conf/* /opt/spark/conf/
201 |     COPY scripts /scripts
202 | 
203 |     ENV SPARK_HOME /opt/spark
204 | 
205 |     ENTRYPOINT ["/scripts/run.sh"]
206 | 
207 | 
208 | Lets explain some very important files that will be available in the Docker Image according to the
209 | Dockerfile mentioned above:
210 | 
211 | The `spark-conf/spark-env.sh`, as mentioned in the [Spark docs][SPARK_MMASTER], will be used to set the
212 | location of the Mesos `libmesos.so`.
213 | 
214 | 
215 |     export MESOS_NATIVE_JAVA_LIBRARY=${MESOS_NATIVE_JAVA_LIBRARY:-/usr/lib/libmesos.so}
216 |     export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-"127.0.0.1"}
217 |     export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-"127.0.0.1"}
218 | 
219 | The `spark-conf/spark-defaults.conf` is serves as the definition of the default configuration for our
220 | Spark Jobs within the container, the contents are bellow.
221 | 
222 |     spark.master                      SPARK_MASTER
223 |     spark.mesos.mesosExecutor.cores   MESOS_EXECUTOR_CORE
224 |     spark.mesos.executor.docker.image SPARK_IMAGE
225 |     spark.mesos.executor.home         /opt/spark
226 |     spark.driver.host                 CURRENT_IP
227 |     spark.executor.extraClassPath     /opt/spark/custom/lib/*
228 |     spark.driver.extraClassPath       /opt/spark/custom/lib/*
229 | 
230 | Note the use of environment variables such as `SPARK_MASTER` and `SPARK_IMAGE` are critical since
231 | this will allow us to customize how the Spark Application interacts with Mesos Docker integration.
232 | 
233 | We have Docker's entry point script. The script, showcased bellow,
234 | will populate the `spark-defaults.conf` file.
235 | 
236 | 
237 | Now lets define the Dockerfile Entrypoint such that it lets us define some basic options that
238 | will get passed to the Spark command, for example `spark-shell`, `spark-submit` or `pyspark`.
239 | 
240 | 
241 |     #!/bin/bash
242 | 
243 |     SPARK_MASTER=${SPARK_MASTER:-local}
244 |     MESOS_EXECUTOR_CORE=${MESOS_EXECUTOR_CORE:-0.1}
245 |     SPARK_IMAGE=${SPARK_IMAGE:-sparkmesos:lastet}
246 |     CURRENT_IP=$(hostname -i)
247 | 
248 |     sed -i 's;SPARK_MASTER;'$SPARK_MASTER';g' /opt/spark/conf/spark-defaults.conf
249 |     sed -i 's;MESOS_EXECUTOR_CORE;'$MESOS_EXECUTOR_CORE';g' /opt/spark/conf/spark-defaults.conf
250 |     sed -i 's;SPARK_IMAGE;'$SPARK_IMAGE';g' /opt/spark/conf/spark-defaults.conf
251 |     sed -i 's;CURRENT_IP;'$CURRENT_IP';g' /opt/spark/conf/spark-defaults.conf
252 | 
253 |     export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-${CURRENT_IP:-"127.0.0.1"}}
254 |     export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-${CURRENT_IP:-"127.0.0.1"}}
255 | 
256 | 
257 |     if [ $ADDITIONAL_VOLUMES ];
258 |     then
259 |             echo "spark.mesos.executor.docker.volumes: $ADDITIONAL_VOLUMES" >> /opt/spark/conf/spark-defaults.conf
260 |     fi
261 | 
262 |     exec "$@"
263 | 
264 | 
265 | 
266 | Lets build the image so we can start using it.
267 | 
268 |     docker build -t sparkmesos . && \
269 |     docker tag -f sparkmesos:latest sparkmesos:latest
270 | 
271 | 
272 | ### Step 4: Running a Spark Application with Docker.
273 | 
274 | Now that the image is built we just need to run it. We will call the PySpark application.
275 | 
276 |     docker run -it --rm \
277 |       -e SPARK_MASTER="mesos://zk://192.168.99.100:2181/mesos" \
278 |       -e SPARK_IMAGE="sparkmesos:latest" \
279 |       -e PYSPARK_DRIVER_PYTHON=ipython2 \
280 |       sparkmesos:latest /opt/spark/bin/pyspark
281 | 
282 | To make sure that SciPy is working lets write the following to the PySpark shell
283 | 
284 |     from scipy import special, optimize
285 |     import numpy as np
286 | 
287 |     f = lambda x: -special.jv(3, x)
288 |     sol = optimize.minimize(f, 1.0)
289 |     x = np.linspace(0, 10, 5000)
290 |     x
291 | 
292 | 
293 | Now, lets try to calculate PI as an example .
294 | 
295 |     docker run -it --rm \
296 |       -e SPARK_MASTER="mesos://zk://192.168.99.100:2181/mesos" \
297 |       -e SPARK_IMAGE="sparkmesos:latest" \
298 |       -e PYSPARK_DRIVER_PYTHON=ipython2 \
299 |       sparkmesos:latest /opt/spark/bin/spark-submit --driver-memory 500M \
300 |                                                     --executor-memory 500M \
301 |                                                     /opt/spark/examples/src/main/python/pi.py 10
302 | 
303 | ## Conclusion and further Notes
304 | 
305 |   Although we were able to run a Spark Application within a Docker containers leveraging Apache Mesos there is more work to do.
306 | We need to explore containerized Spark Applications that spread across multiple nodes along with providing
307 | a mechanism that enables network port mapping.
308 | 
309 | 
310 | ## References
311 | 
312 | 1. Apache Mesos. The Apache Software Foundation, 2015. Web. 27 Jan. 2016. <http://mesos.apache.org>.
313 | 1. Apache Spark. The Apache Software Foundation, 2015. Web. 27 Jan. 2016. <http://spark.apache.org>.
314 | 1. Benjamin Hindman. "Apache Mesos NYC Meetup", August 20, 2013. Web. 27 Jan 2016.  <https://speakerdeck.com/benh/apache-mesos-nyc-meetup>
315 | 1. Docker. Docker Inc, 2015. Web. 27 Jan 2016. <https://docs.docker.com/>.
316 | 1. Hindman, Konwinski, Zaharia, Ghodsi, D. Joseph, Katz, Shenker, Stoica.
317 |     "Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center"
318 |      Web. 27 Jan 2016. <https://www.cs.berkeley.edu/~alig/papers/mesos.pdf>
319 | 1. Mesosphere Inc, 2015. Web. 27 Jan 2016. <https://mesosphere.com/>
320 | 1. SciPy.  SciPy developers, 2015. Web. 28 Jan 2016. <http://www.scipy.org/>.
321 | 1. Virtual Box, Oracle Inc, 2015. Web 28 Jan 2016. <https://www.virtualbox.org/wiki/Downloads>
322 | 1. Wang Qiang, "Docker Spark Mesos". Web 28 Jan 2016. <https://github.com/wangqiang8511/docker-spark-mesos>
323 | 
324 | 
325 | [DOCKER_COS]:     https://docs.docker.com/engine/installation/centos/ "Docker CentOS install."
326 | [MESOS_WHY]:      https://speakerdeck.com/benh/apache-mesos-nyc-meetup "Apache Mesos NYC Meetup."
327 | [MESOS_WP]:       https://www.cs.berkeley.edu/~alig/papers/mesos.pdf "Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center."
328 | [SCIPY]:          http://www.scipy.org/ "SciPy: Python based ecosystem for Math, Science, and Engineering."
329 | [SPARK_GUIDE]:    http://spark.apache.org/docs/latest/programming-guide.html "Apache Spark: Programming Guide."
330 | [SPARK_MESOS]:    http://spark.apache.org/docs/latest/running-on-mesos.html "Apache Spark: Running On Mesos."
331 | [SPARK_MMASTER]:  http://spark.apache.org/docs/latest/running-on-mesos.html "Apache Spark: Using a Mesos Master URL"
332 | [VBOX]:           https://www.virtualbox.org/ "VirtualBox"
333 | [MESOSPHERE_GS]:  https://open.mesosphere.com/getting-started/install/ "Mesosphere: Setting up a Mesos and Marathon Cluster"
334 | 
335 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.1
2 | 


--------------------------------------------------------------------------------
/scripts/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SPARK_MASTER=${SPARK_MASTER:-local}
 4 | MESOS_EXECUTOR_CORE=${MESOS_EXECUTOR_CORE:-0.1}
 5 | SPARK_IMAGE=${SPARK_IMAGE:-sparkmesos:lastet}
 6 | CURRENT_IP=$(hostname -i)
 7 | 
 8 | sed -i 's;SPARK_MASTER;'$SPARK_MASTER';g' /opt/spark/conf/spark-defaults.conf
 9 | sed -i 's;MESOS_EXECUTOR_CORE;'$MESOS_EXECUTOR_CORE';g' /opt/spark/conf/spark-defaults.conf
10 | sed -i 's;SPARK_IMAGE;'$SPARK_IMAGE';g' /opt/spark/conf/spark-defaults.conf
11 | sed -i 's;CURRENT_IP;'$CURRENT_IP';g' /opt/spark/conf/spark-defaults.conf
12 | 
13 | export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-${CURRENT_IP:-"127.0.0.1"}}
14 | export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-${SPARK_PUBLIC_DNS:-"127.0.0.1"}}
15 | 
16 | if [ $ADDITIONAL_VOLUMES ];
17 | then
18 |         echo "spark.mesos.executor.docker.volumes: $ADDITIONAL_VOLUMES" >> /opt/spark/conf/spark-defaults.conf
19 | fi
20 | 
21 | exec "$@"
22 | 


--------------------------------------------------------------------------------
/spark-conf/spark-defaults.conf:
--------------------------------------------------------------------------------
1 | spark.master                      SPARK_MASTER
2 | spark.mesos.mesosExecutor.cores   MESOS_EXECUTOR_CORE
3 | spark.mesos.executor.docker.image SPARK_IMAGE
4 | spark.mesos.executor.home         /opt/spark
5 | spark.driver.host                 CURRENT_IP
6 | spark.executor.extraClassPath     /opt/spark/custom/lib/*
7 | spark.driver.extraClassPath       /opt/spark/custom/lib/*
8 | 


--------------------------------------------------------------------------------
/spark-conf/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | export MESOS_NATIVE_JAVA_LIBRARY=${MESOS_NATIVE_JAVA_LIBRARY:-/usr/lib/libmesos.so}
4 | export SPARK_LOCAL_IP=${SPARK_LOCAL_IP:-"127.0.0.1"}
5 | export SPARK_PUBLIC_DNS=${SPARK_PUBLIC_DNS:-"127.0.0.1"}
6 | 


--------------------------------------------------------------------------------