├── .gitignore
├── README.md
├── apache-hadoop-hdfs-precise
├── Dockerfile
├── build
└── files
│ ├── authorized_keys
│ ├── configure_hadoop.sh
│ ├── core-site.xml
│ ├── hdfs-site.xml
│ └── id_rsa
├── build
├── README.txt
├── build_all.sh
├── push_all.sh
└── tag_all.sh
├── deploy
├── deploy.sh
├── kill_all.sh
├── start_nameserver.sh
├── start_shell.sh
└── start_spark_cluster.sh
├── dnsmasq-precise
├── Dockerfile
├── build
└── files
│ └── default_cmd
├── mesos
├── NOTE.txt
├── build
├── deploy
│ ├── deploy
│ └── start_mesos_cluster.sh
├── mesos-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ └── configure_mesos.sh
├── mesos-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_mesos_master.sh
├── mesos-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_mesos_worker.sh
├── shark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ └── default_cmd
└── spark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── test.txt
├── shark-0.7.0
├── build
├── shark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_shark.sh
│ │ ├── hive-site.xml
│ │ └── shark-env.sh
├── shark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_shark_master.sh
├── shark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── test.shark
└── shark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_shark_worker.sh
├── shark-0.8.0
├── build
├── shark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_shark.sh
│ │ ├── hive-site.xml
│ │ └── shark-env.sh
├── shark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_shark_master.sh
├── shark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── test.shark
└── shark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_shark_worker.sh
├── spark-0.7.3
├── build
├── spark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_spark.sh
│ │ ├── log4j.properties
│ │ └── spark-env.sh
├── spark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_spark_master.sh
├── spark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ ├── test.spark
│ │ └── test.txt
└── spark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_spark_worker.sh
├── spark-0.8.0
├── NOTE.txt
├── build
├── spark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_spark.sh
│ │ ├── log4j.properties
│ │ └── spark-env.sh
├── spark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_spark_master.sh
├── spark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ ├── test.spark
│ │ └── test.txt
└── spark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_spark_worker.sh
├── spark-0.9.0
├── NOTE.txt
├── build
├── spark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_spark.sh
│ │ ├── log4j.properties
│ │ └── spark-env.sh
├── spark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_spark_master.sh
├── spark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ ├── test.spark
│ │ └── test.txt
└── spark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_spark_worker.sh
├── spark-0.9.1
├── NOTE.txt
├── build
├── spark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_spark.sh
│ │ ├── log4j.properties
│ │ └── spark-env.sh
├── spark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_spark_master.sh
├── spark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ ├── test.spark
│ │ └── test.txt
└── spark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_spark_worker.sh
├── spark-1.0.0
├── NOTE.txt
├── build
├── spark-base
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── configure_spark.sh
│ │ ├── log4j.properties
│ │ └── spark-env.sh
├── spark-master
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ └── run_spark_master.sh
├── spark-shell
│ ├── Dockerfile
│ ├── build
│ └── files
│ │ ├── default_cmd
│ │ ├── test.spark
│ │ └── test.txt
└── spark-worker
│ ├── Dockerfile
│ ├── build
│ └── files
│ ├── default_cmd
│ └── run_spark_worker.sh
└── test
└── test_all.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | files.hash
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dockerfiles for Spark and Shark
2 |
3 | ## Contents
4 |
5 | Dockerfiles to build Spark and Shark images for testing and
6 | development.
7 |
8 | ## Requirements
9 |
10 | Tested on Ubuntu 12.04 (Docker version 0.6.4), Ubuntu 13.10 (Docker 0.7.0 and 0.9.0) with the virtual
11 | switch
12 | lxcbr0
13 | enabled. For running Docker on Mac and Windows see [the docs](http://docs.docker.io).
14 | Also tested inside the VirtualBox Tiny Core Linux VirtualBox VM for Docker on
15 | Mac.
16 |
17 | Note: the earlier version of the scripts had problems with newer
18 | versions of Docker (0.7). If you encounter issues please pull the
19 | latest changes from https://github.com/amplab/docker-scripts.git
20 | master branch.
21 |
22 | ## Tips for running on Mac OS
23 | If you are running on Mac OS, installed as described
24 | [in the Docker installation docs](http://docs.docker.io/en/latest/installation/mac/)
25 | you need to run all commands inside the Docker virtual machine by first ssh-ing into it:
26 |
27 |
28 | $ ./boot2docker ssh
29 | # User: docker
30 | # Pwd: tcuser
31 |
32 |
33 | Then make sure that `python` is installed. Otherwise install it via
34 | `tce-ab` (search for python and install `python.tcz`). Newer versions
35 | of the image that comes with boot2docker also do not have `bash` installed
36 | (install package `bash.tcz`) which is required for the deployment scripts.
37 |
38 | Further, make sure that your virtual machine running the Docker daemon and
39 | the containers has sufficient memory allocated (at least 2GB for two Spark worker
40 | containers and one master container). This can be done inside the Virtual Box
41 | GUI under the properties of the virtual machine.
42 |
43 | Finally, `boot2docker save` is a good way to perserve changes to the image
44 | between restarts of the virtual machine or host computer,
45 | for example the scripts come in the cloned git repository (see below).
46 |
47 | ## Testing
48 |
49 | First clone the repository:
50 |
51 | $ git clone https://github.com/amplab/docker-scripts.git
52 |
53 | This repository contains deploy scripts and the sources for the Docker
54 | image files, which can be easily modified. The main deploy script
55 | takes the following options.
56 |
57 |
58 | $ sudo ./deploy/deploy.sh
59 | usage: ./deploy.sh -i <image> [-w <#workers>] [-v <data_directory>] [-c]
60 |
61 | image: spark or shark image from:
62 | amplab/spark:0.9.0 amplab/spark:0.9.1 amplab/spark:1.0.0
63 | amplab/shark:0.8.0
64 |
65 |
66 | The script either starts a standalone Spark cluster or a standalone
67 | Spark/Shark cluster for a given number of worker nodes. Note that
68 | on the first call it may take a while for Docker to download the
69 | various images from the repository,
70 |
71 | In addition to Spark (and Shark) the cluster also runs a Hadoop HDFS
72 | filesystem. When the deploy script is run it generates one container
73 | for the master node, one container for each worker node and one extra
74 | container running a Dnsmasq DNS forwarder. The latter one can also be
75 | used to resolve node names on the host, for example to access the
76 | worker logs via the Spark web UI.
77 |
78 | Optionally one can set the number of workers (default: 2) and a data directory
79 | which is a local path on the host that can be mounted on the master and
80 | worker containers and will appear under /data.
81 |
82 | Both the Spark and Shark shells are started in a separate container.
83 | This container can be directly started from the deploy script by
84 | passing "-c" to the deploy script.
85 |
86 | Each node (worker and master) also runs a sshd which is
87 | _pre-configured with the given RSA key_. Note that you should change
88 | this key if you plan to expose services running inside the containers.
89 | Since the permissions of the key when cloned from the repository are
90 | likely wrong you need to change them if you intend to log in with ssh:
91 |
92 |
93 | chmod go -rwx apache-hadoop-hdfs-precise/files/id_rsa
94 |
95 |
96 | ### Example: Running a Spark cluster
97 |
98 | Starting from the directory in which the repository was cloned do
99 |
100 | #### Deploy the cluster
101 |
102 | $ sudo ./deploy/deploy.sh -i amplab/spark:0.9.0 -w 3
103 |
104 | #### Wait a few seconds
105 |
106 | Wait for the "cluster" to come up. Note that it can take longer to download
107 | the container images the first time but after that the process is fairly quick.
108 | When the cluster comes up you should see something like this:
109 |
110 |
111 | > sudo ./deploy.sh -i amplab/spark:0.9.0 -w 3
112 | *** Starting Spark 0.9.0 ***
113 | starting nameserver container
114 | started nameserver container: 069557913d98a37caf43f8238dfdf181aea5ab30eb42e382db83307e277cfa9e
115 | DNS host->IP file mapped: /tmp/dnsdir_12015/0hosts
116 | NAMESERVER_IP: 172.17.0.8
117 | waiting for nameserver to come up
118 | starting master container
119 | started master container: f50a65d2ef7b17bffed7075ac2de4a7b52c26adff15bdbe14d3280ef4991c9d6
120 | MASTER_IP: 172.17.0.9
121 | waiting for master ........
122 | waiting for nameserver to find master
123 | starting worker container
124 | started worker container: 576d7d223f59a6da7a0e73311d1e082fad27895aef53edf3635264fb00b70258
125 | starting worker container
126 | started worker container: 5672ea896e179b51fe2f1ae5d542c35706528cd3a768ba523324f434bb2b2413
127 | starting worker container
128 | started worker container: 3cdf681f7c99c1e19f7b580ac911e139923e9caca943fd006fb633aac5b20001
129 | waiting for workers to register .....
130 |
131 | ***********************************************************************
132 | start shell via: sudo /home/andre/docker-scripts/deploy/start_shell.sh -i amplab/spark-shell:0.9.0 -n 069557913d98a37caf43f8238dfdf181aea5ab30eb42e382db83307e277cfa9e
133 |
134 | visit Spark WebUI at: http://172.17.0.9:8080/
135 | visit Hadoop Namenode at: http://172.17.0.9:50070
136 | ssh into master via: ssh -i /home/andre/docker-scripts/deploy/../apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@172.17.0.9
137 |
138 | /data mapped:
139 |
140 | kill master via: sudo docker kill f50a65d2ef7b17bffed7075ac2de4a7b52c26adff15bdbe14d3280ef4991c9d6
141 | ***********************************************************************
142 |
143 | to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:
144 | nameserver 172.17.0.8
145 |
146 |
147 | #### Start the Spark shell container as shown above, for example:
148 |
149 | $ sudo /home/andre/docker-scripts/deploy/start_shell.sh -i amplab/spark-shell:0.9.0 -n 069557913d98a37caf43f8
150 |
151 | The parameter passed with -n is the ID of the nameserver container.
152 | Then attach to the running shell via the given command, for example:
153 |
154 | $ sudo docker attach 9ac49b09bf18a13c7
155 |
156 | If the screen appears to stay blank just hit return to get to the prompt.
157 |
158 | #### Execute an example:
159 |
160 |
161 | scala> val textFile = sc.textFile("hdfs://master:9000/user/hdfs/test.txt")
162 | scala> textFile.count()
163 | scala> textFile.map({line => line}).collect()
164 |
165 |
166 |
167 | #### Terminate the cluster:
168 |
169 | $ sudo ./deploy/kill_all.sh spark
170 | $ sudo ./deploy/kill_all.sh nameserver
171 |
172 | ### Shark
173 |
174 | Basically the same steps apply only that the Shark images are chosen instead of the Spark ones
175 | (the former contain in addition to Spark the Shark binaries).
176 |
177 | #### Deploy the cluster
178 |
179 | $ sudo ./deploy/deploy.sh -i amplab/shark:0.8.0 -w 3
180 |
181 | #### Wait a few seconds
182 |
183 | Wait for the "cluster" to come up. Note that it can take longer to download
184 | the container images the first time but after that the process is fairly quick.
185 | When the cluster comes up you should see something like this:
186 |
187 |
188 | *** Starting Shark 0.8.0 + Spark ***
189 | starting nameserver container
190 | started nameserver container: 952d22e085c3b74e829e006ab536d45d31800c463832e43d8679bbf3d703940e
191 | DNS host->IP file mapped: /tmp/dnsdir_30578/0hosts
192 | NAMESERVER_IP: 172.17.0.13
193 | waiting for nameserver to come up
194 | starting master container
195 | started master container: 169f253eaddadb19b6eb28e79f148eef892f20d34602ffb42d3e57625dc61652
196 | MASTER_IP: 172.17.0.14
197 | waiting for master ........
198 | waiting for nameserver to find master
199 | starting worker container
200 | started worker container: 1c6920c96d5ad684a2f591bfb334323c5854cdd7a0da49982baaf77dc4d62ac7
201 | starting worker container
202 | started worker container: 7250dcfb882e2d17441c8c59361d10d8c59afb2b295719ba35f59bc72c6f17a5
203 | starting worker container
204 | started worker container: 26823e188a2a5a5897ed4b9bf0fca711dc7f98674fe62eb78fb49cf031bec79c
205 | waiting for workers to register .......
206 |
207 | ***********************************************************************
208 | start shell via: sudo /home/andre/docker-scripts/deploy/start_shell.sh -i amplab/shark-shell:0.8.0 -n 952d22e085c3b74e829e006ab536d45d31800c463832e43d8679bbf3d703940e
209 |
210 | visit Spark WebUI at: http://172.17.0.14:8080/
211 | visit Hadoop Namenode at: http://172.17.0.14:50070
212 | ssh into master via: ssh -i /home/andre/docker-scripts/deploy/../apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@172.17.0.14
213 |
214 | /data mapped:
215 |
216 | kill master via: sudo docker kill 169f253eaddadb19b6eb28e79f148eef892f20d34602ffb42d3e57625dc61652
217 | ***********************************************************************
218 |
219 | to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:
220 | nameserver 172.17.0.13
221 |
222 |
223 | #### Start the Shark shell container as shown above, for example:
224 |
225 | $ sudo /home/andre/docker-scripts/deploy/start_shell.sh -i amplab/shark-shell:0.8.0 -n 952d22e085c3b74e829e00
226 |
227 | The parameter passed with -n is the ID of the nameserver container.
228 | Then attach to the running shell via the given command, for example:
229 |
230 | $ sudo docker attach 9ac49b09bf18a13c7
231 |
232 | If the screen appears to stay blank just hit return to get to the prompt.
233 |
234 | #### Execute an example:
235 |
236 |
237 | shark> CREATE TABLE src(key INT, value STRING);
238 | shark> LOAD DATA LOCAL INPATH '${env:HIVE_HOME}/examples/files/kv1.txt' INTO TABLE src;
239 | shark> SELECT COUNT(1) FROM src;
240 |
241 |
242 | #### Terminate the cluster:
243 |
244 | $ sudo ./deploy/kill_all.sh shark
245 | $ sudo ./deploy/kill_all.sh nameserver
246 |
247 | ## Building
248 |
249 | If you prefer to build the images yourself (or intend to modify them) rather
250 | than downloading them from the Docker repository, you can build
251 | all Spark and Shark images in the correct order via the build script:
252 |
253 | $ ./build/build_all.sh
254 |
255 | The script builds the images in an order that satisfies the chain of
256 | dependencies:
257 |
258 | apache-hadoop-hdfs-precise -> spark-base -> spark-{master, worker, shell}
259 |
260 | apache-hadoop-hdfs-precise -> spark-base -> shark-base -> shark-{master, worker, shell}
261 |
262 | You can always (re-)build single images by cd-ing into the image directory and doing
263 |
264 | $ . build
265 |
266 | ## Best practices for Dockerfiles and startup scripts
267 |
268 | The following are just some comments that made the generation of the images easier. It
269 | is not enforced in any way by Docker.
270 |
271 | The images and startup scripts follow the following structure in order to reuse
272 | as much as possible of the image they depend on. There are two types of images,
273 | base images and leaf images. Leaf images, as the name suggests,
274 | are images that are leafs in the dependency tree. For example, spark-base as a base
275 | image depends on apache-hadoop-hdfs-precise. spark-master depends on spark-base as
276 | its base image and is itself a leaf.
277 |
278 | In addition to its Dockerfile, each image has a
279 | files/
280 | subdirectory in its image directory that contains files (config files, data files) that will be copied
281 | to the
282 | root/image_name_files
283 | directory inside the image.
284 |
285 | ### Base images
286 |
287 | Base images are images that are intended to be extended by other images and therefore do not
288 | have a default command or entry point. They are good for testing though, e.g, by running
289 | /bin/bash
290 | inside them.
291 |
292 |
293 | For base images such as spark-base, besides data files the
294 | files/
295 | directory also contains
296 | files/configure_spark.sh
297 | which is a script that contains four functions
298 |
299 | * create_spark_directories
300 | for creating required directories such as the working directory
301 | * deploy_spark_files
302 | that would copy files from
303 | /root/image_name_files
304 | to required system path locations
305 | * configure_spark
306 | that changes settings in config files and takes the IP of the master as argument
307 | * prepare_spark
308 | that calls the previous three in the given order and takes the IP of the master as argument
309 |
310 |
311 | All of the functions of a __base-image__'s configure script, so also inside
312 | files/configure_spark.sh
313 | except __prepare_spark__ first call their corresponding functions in the image the spark-base image depends on (apache-hadoop-hdfs-precise in this case). Therefore all the underlying services get initialized before the top level service.
314 |
315 | ### Leaf images
316 |
317 | For leaf images such as spark-master, besides data files the
318 | files/
319 | directory also contains
320 | files/default_cmd
321 | that is chosen in the image's Dockerfile to be the default command (or entry point) to the image. This means the command
322 | inside is executed whenever the container is started.
323 |
324 |
325 | The default command script executes the following steps in this order
326 |
327 | 1. The first thing the default command does is call the prepare
328 | function of the configure script inside its base image. In this case, the default command script calls function
329 | prepare_spark
330 | inside
331 | /root/spark-base/configure_spark.sh
332 | which is the location the configure script of spark-base was copied to.
333 | 2. After that, now that the base images configuration (and the configuration of the images it inherits from) has completed, the
334 | default command may start services it relies on, such as the Hadoop namenode service in the case of spark-master.
335 | 3. Finally, the default command script of spark-master runs a second script under userid hdfs
336 | (the Hadoop HDFS super user), which is
337 | files/files/run_spark_master.sh
338 | that actually starts the master.
339 |
340 |
341 | The spark-worker default command proceeds along the same lines but starts a Spark worker with a Hadoop datanode instead.
342 |
343 | ## Tips
344 |
345 | ### Name resolution on host
346 |
347 | In order to resolve names (such as "master", "worker1", etc.) add the IP
348 | of the nameserver container to the top of /etc/resolv.conf on the host.
349 |
350 | ### Maintaining local Docker image repository
351 |
352 | After a while building and debugging images the local image repository gets
353 | full of intermediate images that serve no real purpose other than
354 | debugging a broken build. To remove these do
355 |
356 | $ sudo docker images | grep "" | awk '{print $3}' | xargs sudo docker rmi
357 |
358 | Also data from stopped containers tend to accumulate. In order to remove all container data (__only do when no containers are running__) do
359 |
360 | $ sudo docker rm `sudo docker ps -a -q`
361 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/Dockerfile:
--------------------------------------------------------------------------------
1 | # Base Ubuntu Precise 12.04 LTS image
2 | #
3 | FROM ubuntu:precise
4 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
5 |
6 | # Setup a volume for data
7 | VOLUME ["/data"]
8 |
9 | # Set correct source list
10 | RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list
11 | RUN echo "deb http://archive.ubuntu.com/ubuntu precise-updates main universe" >> /etc/apt/sources.list
12 |
13 | # install a few other useful packages plus Open Jdk 7
14 | RUN apt-get update && apt-get upgrade -y && apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server iputils-ping python2.7
15 |
16 | # Install Hadoop
17 | ADD http://mirror.sdunix.com/apache/hadoop/common/hadoop-1.2.1/hadoop_1.2.1-1_x86_64.deb /root/
18 | RUN dpkg -i /root/hadoop_1.2.1-1_x86_64.deb && rm /root/hadoop_1.2.1-1_x86_64.deb
19 |
20 | # Docker messes up /etc/hosts and adds two entries for 127.0.0.1
21 | # we try to recover from that by giving /etc/resolv.conf and therefore
22 | # the nameserver priority
23 | RUN sed -i s/"files dns"/"dns files"/ /etc/nsswitch.conf
24 |
25 | # add Hadoop config file templates
26 | ADD files /root/hadoop_files
27 |
28 | # Set JAVA_HOME
29 | ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64
30 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}apache-hadoop-hdfs-precise:1.2.1 .
5 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/files/authorized_keys:
--------------------------------------------------------------------------------
1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDp2atNK3bux0z3d2Aojkl231Lf6X7HZUYIBt3XzUs+wnTzzB/eH2ubS5Wdwyy5daA4itsvX6hI1o/LQOfRBdjXqIVl+IFXFdwNQ0saCSNh65O2ynuMwsxUXhBJAGoBg6sTXq1ZPNQk1JqopUBP6+H4jpnKFW3JosON9QopQdkkYIz/frHs3HojfbydQesGNovanKrGYV3QeFVQDPxseufRZtHjrTk1hQ3FEayQCTyqJ8JDE6DMrirNEVBTuuNZ/Z2afPLWcZIKQ46E73p9HhqcaWEph6xQ3Ha/WV9oK0jenfz4b+sGrUItTbzuP8SsUiA4yZrZaN4BubDi4oPALOr/ root@423e412aa505
2 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/files/configure_hadoop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | hadoop_files=( "/root/hadoop_files/core-site.xml" "/root/hadoop_files/hdfs-site.xml" )
4 |
5 | function create_hadoop_directories() {
6 | rm -rf /root/.ssh
7 | mkdir /root/.ssh
8 | chmod go-rx /root/.ssh
9 | mkdir /var/run/sshd
10 | }
11 |
12 | function deploy_hadoop_files() {
13 | for i in "${hadoop_files[@]}";
14 | do
15 | filename=$(basename $i);
16 | cp $i /etc/hadoop/$filename;
17 | done
18 | cp /root/hadoop_files/id_rsa /root/.ssh
19 | chmod go-rwx /root/.ssh/id_rsa
20 | cp /root/hadoop_files/authorized_keys /root/.ssh/authorized_keys
21 | chmod go-wx /root/.ssh/authorized_keys
22 | }
23 |
24 | function configure_hadoop() {
25 | sed -i s/__MASTER__/$1/ /etc/hadoop/core-site.xml
26 | sed -i s/"JAVA_HOME=\/usr\/lib\/jvm\/java-6-sun"/"JAVA_HOME=\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /etc/hadoop/hadoop-env.sh
27 | }
28 |
29 | function prepare_hadoop() {
30 | create_hadoop_directories
31 | deploy_hadoop_files
32 | configure_hadoop $1
33 | }
34 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/files/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | fs.default.name
4 | hdfs://__MASTER__:9000
5 |
6 |
7 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/files/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | dfs.replication
4 | 1
5 |
6 |
7 |
--------------------------------------------------------------------------------
/apache-hadoop-hdfs-precise/files/id_rsa:
--------------------------------------------------------------------------------
1 | -----BEGIN RSA PRIVATE KEY-----
2 | MIIEpAIBAAKCAQEA6dmrTSt27sdM93dgKI5Jdt9S3+l+x2VGCAbd181LPsJ088wf
3 | 3h9rm0uVncMsuXWgOIrbL1+oSNaPy0Dn0QXY16iFZfiBVxXcDUNLGgkjYeuTtsp7
4 | jMLMVF4QSQBqAYOrE16tWTzUJNSaqKVAT+vh+I6ZyhVtyaLDjfUKKUHZJGCM/36x
5 | 7Nx6I328nUHrBjaL2pyqxmFd0HhVUAz8bHrn0WbR4605NYUNxRGskAk8qifCQxOg
6 | zK4qzRFQU7rjWf2dmnzy1nGSCkOOhO96fR4anGlhKYesUNx2v1lfaCtI3p38+G/r
7 | Bq1CLU287j/ErFIgOMma2WjeAbmw4uKDwCzq/wIDAQABAoIBAQCBgFZZ/Pj3EI2x
8 | +XzZ2LocR144u7DGsXHP3iWabYj+72ce3+rB8np/3KK1ZDFvXxFkXpk1Ke8irxeg
9 | gogd+/PysdN1/eF6nZNoEN0VRPxALNp3frhe4j2PdyvjkYQi5IynxGWRJpuA7e/b
10 | 9u+fksxn/mhyPd23rRhIk+uVn26lsnccHhCkfqr+Szm/xFsTUhYQ1B8bfrqhA1Le
11 | WRrBa03JXocd2y3TdzeaQ+AtvbpAy9Fc28N7xkDsuh+H1y74jRhFzBXd4WnYuxze
12 | /PAD3hpgtCDGGnGpwE2SMM8fZJ7vLOPAsMUuz1tvLbKcoTTdaUw4fBur/XQHloW7
13 | k7adoW6BAoGBAP0bdE1uynnwZOFDhmpMvdYfodwlv3Far+QZwVroSa64YWBaeAef
14 | v0AO75p/EiQJEGWB9bgOAyrbOFdRqLtUF14lQw4ZLUV7sQu/o2Z0sVMSRCVWuNDf
15 | W8sk74RtH3WB7lutOMP3WyYopOUZtTK1rZrRNxD4+edq7+utAba+DLS/AoGBAOyF
16 | 31hype9DkOHgD/jWU7tNrJprLkNkSHe/Aq5JdKesgw84AOSKO4W1/uXOly4VOt6Z
17 | 54eeW1gt+uKT292GEl66TO8PIxszfsUzpYpTKkSzrl5OsM9hUlitJwpff/D9Mbxw
18 | fZWt0EjKlBQWc83sMBwCe8ZyNh/WueBIKH5HjhnBAoGAEwFRvVK5X2iemo+Qc0Dp
19 | 7D8Zz0cCVgeiN3V7oFDa34S2wx5n7uKe4Ld+ZFJwUUZg9c5JXhWnRTuKwnu+OLq6
20 | unX/z/ox/Qqpo6EzKslOW1d+yHL3k6+B3AIc/guXliI4fKfIIGbdcEMTBqTkhzc/
21 | HuXgxaR8V1UfSMoH2+nvWE8CgYAcw4MP3JF1cYATGA6ZMmdoZd/Rv6sWowF1HpOS
22 | 4nf/VCl0Fll1caIfdqyTAfa8sfRA0fKoOYfeR2k1WMnqPL3LK1jj0bFxQ2ftT4SY
23 | N9jyFe/kpCk4bxt2kUgoKMkEY6ZCxmNfao3j7E7pynk217xaC6tFzOnsIU7liaDz
24 | CnyrgQKBgQDtjairs6ehaqRu8Uk44gQoNIlReJ8qp7YmfPlK8ylFNTALs37c4308
25 | Qbjp+jLt7w+XMYnNaZPSNN1mt6EyWFSqUc+5QbfQpbw1cZRI1UBIQDwJjZUS04Ou
26 | H75Rif72nQxHh9Ly5CMNCEyioin7kq945vQbyAwyEr7+tomhUZaq9g==
27 | -----END RSA PRIVATE KEY-----
28 |
--------------------------------------------------------------------------------
/build/README.txt:
--------------------------------------------------------------------------------
1 | Building and publishing images to the amplab account:
2 | 1. make sure IMAGE_PREFIX="" (see build_all.sh)
3 | 2. build_all.sh
4 | 3. set IMAGE_PREFIX="amplab/"
5 | 4. build_all.sh
6 | 5. tag_all.sh
7 | 6. push_all.sh
8 |
--------------------------------------------------------------------------------
/build/build_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ "$USER" != "root" ]]; then
4 | echo "please run as: sudo $0"
5 | exit 1
6 | fi
7 |
8 | CURDIR=$(pwd)
9 | BASEDIR=$(cd $(dirname $0); pwd)"/.."
10 | dir_list=( "dnsmasq-precise" "apache-hadoop-hdfs-precise" "spark-0.7.3" "shark-0.7.0" "spark-0.8.0" "spark-0.9.0" "shark-0.8.0" )
11 |
12 | export IMAGE_PREFIX=""
13 | #"amplab/"
14 |
15 | # NOTE: the order matters but this is the right one
16 | for i in ${dir_list[@]}; do
17 | echo building $i;
18 | cd ${BASEDIR}/$i
19 | cat build
20 | ./build
21 | done
22 | cd $CURDIR
23 |
--------------------------------------------------------------------------------
/build/push_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ "$USER" != "root" ]]; then
4 | echo "please run as: sudo $0"
5 | exit 1
6 | fi
7 |
8 | image_list=( "apache-hadoop-hdfs-precise" "dnsmasq-precise" "spark-master" "spark-worker" "spark-shell" "shark-master" "shark-worker" "shark-shell" )
9 |
10 | IMAGE_PREFIX="amplab/"
11 |
12 | # NOTE: the order matters but this is the right one
13 | for i in ${image_list[@]}; do
14 | echo docker push ${IMAGE_PREFIX}${i}
15 | docker push ${IMAGE_PREFIX}${i}
16 | done
17 |
--------------------------------------------------------------------------------
/build/tag_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ "$USER" != "root" ]]; then
4 | echo "please run as: sudo $0"
5 | exit 1
6 | fi
7 |
8 | image_list=("spark-master:0.9.0" "spark-worker:0.9.0" "spark-shell:0.9.0" "shark-master:0.8.0" "shark-worker:0.8.0" "shark-shell:0.8.0" )
9 |
10 | IMAGE_PREFIX="amplab/"
11 |
12 | # NOTE: the order matters but this is the right one
13 | for i in ${image_list[@]}; do
14 | image=$(echo $i | awk -F ":" '{print $1}')
15 | echo docker tag ${IMAGE_PREFIX}${i} ${IMAGE_PREFIX}${image}:latest
16 | docker tag ${IMAGE_PREFIX}${i} ${IMAGE_PREFIX}${image}:latest
17 | done
18 |
--------------------------------------------------------------------------------
/deploy/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DEBUG=0
4 | BASEDIR=$(cd $(dirname $0); pwd)
5 |
6 | spark_images=( "amplab/spark:0.9.0" "amplab/spark:0.9.1" "amplab/spark:1.0.0")
7 | shark_images=( "amplab/shark:0.8.0" )
8 | NAMESERVER_IMAGE="amplab/dnsmasq-precise"
9 |
10 | start_shell=0
11 | VOLUME_MAP=""
12 |
13 | image_type="?"
14 | image_version="?"
15 | NUM_WORKERS=2
16 |
17 | source $BASEDIR/start_nameserver.sh
18 | source $BASEDIR/start_spark_cluster.sh
19 |
20 | function check_root() {
21 | if [[ "$USER" != "root" ]]; then
22 | echo "please run as: sudo $0"
23 | exit 1
24 | fi
25 | }
26 |
27 | function print_help() {
28 | echo "usage: $0 -i [-w <#workers>] [-v ] [-c]"
29 | echo ""
30 | echo " image: spark or shark image from:"
31 | echo -n " "
32 | for i in ${spark_images[@]}; do
33 | echo -n " $i"
34 | done
35 | echo ""
36 | echo -n " "
37 | for i in ${shark_images[@]}; do
38 | echo -n " $i"
39 | done
40 | echo ""
41 | }
42 |
43 | function parse_options() {
44 | while getopts "i:w:cv:h" opt; do
45 | case $opt in
46 | i)
47 | echo "$OPTARG" | grep "spark:" > /dev/null;
48 | if [ "$?" -eq 0 ]; then
49 | image_type="spark"
50 | fi
51 | echo "$OPTARG" | grep "shark:" > /dev/null;
52 | if [ "$?" -eq 0 ]; then
53 | image_type="shark"
54 | fi
55 | image_name=$(echo "$OPTARG" | awk -F ":" '{print $1}')
56 | image_version=$(echo "$OPTARG" | awk -F ":" '{print $2}')
57 | ;;
58 | w)
59 | NUM_WORKERS=$OPTARG
60 | ;;
61 | h)
62 | print_help
63 | exit 0
64 | ;;
65 | c)
66 | start_shell=1
67 | ;;
68 | v)
69 | VOLUME_MAP=$OPTARG
70 | ;;
71 | esac
72 | done
73 |
74 | if [ "$image_type" == "?" ]; then
75 | echo "missing or invalid option: -i "
76 | exit 1
77 | fi
78 |
79 | if [ ! "$VOLUME_MAP" == "" ]; then
80 | echo "data volume chosen: $VOLUME_MAP"
81 | VOLUME_MAP="-v $VOLUME_MAP:/data"
82 | fi
83 | }
84 |
85 | check_root
86 |
87 | if [[ "$#" -eq 0 ]]; then
88 | print_help
89 | exit 1
90 | fi
91 |
92 | parse_options $@
93 |
94 | if [ "$image_type" == "spark" ]; then
95 | SPARK_VERSION="$image_version"
96 | echo "*** Starting Spark $SPARK_VERSION ***"
97 | elif [ "$image_type" == "shark" ]; then
98 | SHARK_VERSION="$image_version"
99 | # note: we currently don't have a Shark 0.9 image but it's safe Spark
100 | # to Shark's version for all but Shark 0.7.0
101 | if [ "$SHARK_VERSION" == "0.9.0" ] || [ "$SHARK_VERSION" == "0.8.0" ]; then
102 | SPARK_VERSION="$SHARK_VERSION"
103 | else
104 | SPARK_VERSION="0.7.3"
105 | fi
106 | echo "*** Starting Shark $SHARK_VERSION + Spark ***"
107 | else
108 | echo "not starting anything"
109 | exit 0
110 | fi
111 |
112 | start_nameserver $NAMESERVER_IMAGE
113 | wait_for_nameserver
114 | start_master ${image_name}-master $image_version
115 | wait_for_master
116 | if [ "$image_type" == "spark" ]; then
117 | SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$SPARK_VERSION -n $NAMESERVER $VOLUME_MAP"
118 | elif [ "$image_type" == "shark" ]; then
119 | SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$SHARK_VERSION -n $NAMESERVER $VOLUME_MAP"
120 | fi
121 |
122 | start_workers ${image_name}-worker $image_version
123 | get_num_registered_workers
124 | echo -n "waiting for workers to register "
125 | until [[ "$NUM_REGISTERED_WORKERS" == "$NUM_WORKERS" ]]; do
126 | echo -n "."
127 | sleep 1
128 | get_num_registered_workers
129 | done
130 | echo ""
131 | print_cluster_info "$SHELLCOMMAND"
132 | if [[ "$start_shell" -eq 1 ]]; then
133 | SHELL_ID=$($SHELLCOMMAND | tail -n 1 | awk '{print $4}')
134 | sudo docker attach $SHELL_ID
135 | fi
136 |
--------------------------------------------------------------------------------
/deploy/kill_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | function kill_containers() {
4 | containers=($1)
5 | for i in "${containers[@]}"; do
6 | echo "killing container $i"
7 | sudo docker kill "$i"
8 | done
9 | }
10 |
11 | if [ "$#" -ne "1" ]; then
12 | echo -e "usage:\n $0 spark\n $0 shark\n $0 mesos\n $0 nameserver"
13 | exit 1;
14 | fi
15 |
16 | if [[ "$USER" != "root" ]]; then
17 | echo "please run as: sudo $0"
18 | exit 1
19 | fi
20 |
21 | clustertype=$1
22 |
23 | if [[ "$clustertype" == "nameserver" ]]; then
24 | nameserver=$(sudo docker ps | grep dnsmasq_files | awk '{print $1}' | tr '\n' ' ')
25 | kill_containers "$nameserver"
26 | else
27 | master=$(sudo docker ps | grep ${clustertype}_master | awk '{print $1}' | tr '\n' ' ')
28 | workers=$(sudo docker ps | grep ${clustertype}_worker | awk '{print $1}' | tr '\n' ' ')
29 | shells=$(sudo docker ps | grep ${clustertype}_shell | awk '{print $1}' | tr '\n' ' ')
30 | kill_containers "$master"
31 | kill_containers "$workers"
32 | kill_containers "$shells"
33 | fi
34 |
35 |
--------------------------------------------------------------------------------
/deploy/start_nameserver.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | NAMESERVER=-1
4 | NAMESERVER_IP=
5 | DOMAINNAME=
6 | #".mycluster.com"
7 |
8 | # starts the dnsmasq nameserver
9 | function start_nameserver() {
10 | DNSDIR="/tmp/dnsdir_$RANDOM"
11 | DNSFILE="${DNSDIR}/0hosts"
12 | mkdir $DNSDIR
13 |
14 | echo "starting nameserver container"
15 | if [ "$DEBUG" -gt 0 ]; then
16 | echo sudo docker run -d -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1
17 | fi
18 | NAMESERVER=$(sudo docker run -d -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1)
19 |
20 | if [ "$NAMESERVER" = "" ]; then
21 | echo "error: could not start nameserver container from image $1"
22 | exit 1
23 | fi
24 |
25 | echo "started nameserver container: $NAMESERVER"
26 | echo "DNS host->IP file mapped: $DNSFILE"
27 | sleep 2
28 | NAMESERVER_IP=$(sudo docker logs $NAMESERVER 2>&1 | egrep '^NAMESERVER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
29 | echo "NAMESERVER_IP: $NAMESERVER_IP"
30 | echo "address=\"/nameserver/$NAMESERVER_IP\"" > $DNSFILE
31 | }
32 |
33 | # contact nameserver container and resolve IP address (used for checking whether nameserver has registered
34 | # presence of new container). note: only returns exit code
35 | function check_hostname() {
36 | local __resultvar=$1
37 | local val_hostname=$2
38 | local val_expected_ip=$3
39 | if which dig >/dev/null; then
40 | DNSCMD="dig $val_hostname @${NAMESERVER_IP} | grep ANSWER -A1 | grep $val_expected_ip > /dev/null"
41 | else
42 | DNSCMD="nslookup $val_hostname $NAMESERVER_IP | grep Address | tail -n 1 | grep $val_expected_ip > /dev/null"
43 | fi
44 | #echo "DNSCMD: $DNSCMD"
45 | eval $DNSCMD
46 | eval $__resultvar=$?
47 | }
48 |
49 | # contact nameserver container and resolve IP address
50 | function resolve_hostname() {
51 | local __resultvar=$1
52 | local val_hostname=$2
53 | if which dig >/dev/null; then
54 | DNSCMD="dig $val_hostname @${NAMESERVER_IP} | grep ANSWER -A1 | tail -n 1 | awk '{print \$5}'"
55 | else
56 | DNSCMD="nslookup $val_hostname $NAMESERVER_IP | grep Address | tail -n 1 | awk -F":" '{print \$2}' | awk '{print \$1}'"
57 | fi
58 | #echo "DNSCMD: $DNSCMD"
59 | tmpval=$(eval "$DNSCMD")
60 | eval $__resultvar="$tmpval"
61 | }
62 |
63 | function wait_for_nameserver {
64 | echo -n "waiting for nameserver to come up "
65 | # Note: the original scripts assumed the nameserver resolves its own
66 | # hostname to 127.0.0.1
67 | # With newer versions of Docker that is not necessarily the case anymore.
68 | # Thanks to bmustafa (24601 on GitHub) for reporting and proposing a fix!
69 | check_hostname result nameserver "$NAMESERVER_IP"
70 | until [ "$result" -eq 0 ]; do
71 | echo -n "."
72 | sleep 1
73 | check_hostname result nameserver "$NAMESERVER_IP"
74 | done
75 | echo ""
76 | }
77 |
--------------------------------------------------------------------------------
/deploy/start_shell.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BASEDIR=$(cd $(dirname $0); pwd)
4 | source $BASEDIR/start_nameserver.sh
5 |
6 | SHELL_ID=-1
7 | SHELL_IP=
8 | NAMESERVER_IP=
9 | NAMESERVER_DIR=
10 | NAMESERVER_ID=-1
11 |
12 | image_type="?"
13 |
14 | DEBUG=1
15 |
16 | # TODO: remove redundant image list definition (source from file common to deploy.sh)
17 | spark_shell_images=( "amplab/spark-shell:0.9.0" "amplab/spark-shell:0.9.1" "amplab/spark-shell:1.0.0")
18 | shark_shell_images=( "amplab/shark-shell:0.8.0" )
19 |
20 | # TODO: unify with deploy.sh
21 | function check_root() {
22 | if [[ "$USER" != "root" ]]; then
23 | echo "please run as: sudo $0"
24 | exit 1
25 | fi
26 | }
27 |
28 | function print_help() {
29 | echo "usage: $0 -i -n [-v ]"
30 | echo ""
31 | echo " image: spark or shark image from:"
32 | echo -n " "
33 | for i in ${spark_shell_images[@]}; do
34 | echo -n " $i"
35 | done
36 | echo ""
37 | echo -n " "
38 | for i in ${shark_shell_images[@]}; do
39 | echo -n " $i"
40 | done
41 | echo ""
42 | }
43 |
44 | function parse_options() {
45 | while getopts "i:n:v:h" opt; do
46 | case $opt in
47 | i)
48 | echo "$OPTARG" | grep "spark-shell:" > /dev/null;
49 | if [ "$?" -eq 0 ]; then
50 | image_type="spark"
51 | fi
52 | echo "$OPTARG" | grep "shark-shell:" > /dev/null;
53 | if [ "$?" -eq 0 ]; then
54 | image_type="shark"
55 | fi
56 | image_name=$(echo "$OPTARG" | awk -F ":" '{print $1}')
57 | image_version=$(echo "$OPTARG" | awk -F ":" '{print $2}')
58 | ;;
59 | h)
60 | print_help
61 | exit 0
62 | ;;
63 | v)
64 | VOLUME_MAP=$OPTARG
65 | ;;
66 | n)
67 | NAMESERVER_ID=$OPTARG
68 | ;;
69 | esac
70 | done
71 |
72 | if [ "$image_type" == "?" ]; then
73 | echo "missing or invalid option: -i "
74 | exit 1
75 | fi
76 |
77 | if [ ! "$VOLUME_MAP" == "" ]; then
78 | echo "data volume chosen: $VOLUME_MAP"
79 | VOLUME_MAP="-v $VOLUME_MAP:/data"
80 | fi
81 | }
82 |
83 | # TODO: generalize and refactor this with the code for updating
84 | # master and worker nameserver entries.
85 | function set_nameserver_data() {
86 | IMAGENAME="$image_name:$image_version"
87 | DNSDIR=$(sudo docker inspect $NAMESERVER_ID | \
88 | grep dnsdir | awk '{print $2}' | tr -d '":')
89 | DNSFILE="${DNSDIR}/0hosts"
90 | SHELL_IP=$(docker inspect $SHELL_ID | \
91 | grep IPAddress | awk '{print $2}' | tr -d '":,')
92 |
93 | if [ "$DEBUG" -gt 0 ]; then
94 | echo "NAMESERVER_IP: $NAMESERVER_IP"
95 | echo "DNSFILE: $DNSFILE"
96 | echo "SHELL_IP: $SHELL_IP"
97 | echo "SHELL_HOSTNAME: $SHELL_HOSTNAME"
98 | fi
99 |
100 | echo "address=\"/$SHELL_HOSTNAME/$SHELL_IP\"" | sudo tee -a $DNSFILE > /dev/null
101 | }
102 |
103 | # starts the spark/shark shell container
104 | function start_shell() {
105 | IMAGENAME="$image_name:$image_version"
106 | NAMESERVER_IP=$(docker inspect $NAMESERVER_ID | \
107 | grep IPAddress | awk '{print $2}' | tr -d '":,')
108 |
109 | if [ "$NAMESERVER_IP" = "" ]; then
110 | echo "error: cannot determine nameserver IP"
111 | exit 1
112 | fi
113 |
114 | #MASTER_IP=$(dig master @$NAMESERVER_IP | grep ANSWER -A1 | \
115 | # tail -n 1 | awk '{print $5}')
116 | resolve_hostname MASTER_IP master
117 |
118 | if [ "$MASTER_IP" = "" ]; then
119 | echo "error: cannot determine master IP"
120 | exit 1
121 | fi
122 |
123 | SHELL_HOSTNAME="shell$RANDOM"
124 | echo "starting shell container"
125 | if [ "$DEBUG" -gt 0 ]; then
126 | echo sudo docker run -i -t -d --dns $NAMESERVER_IP -h $SHELL_HOSTNAME $VOLUME_MAP $IMAGENAME $MASTER_IP
127 | fi
128 | SHELL_ID=$(sudo docker run -i -t -d --dns $NAMESERVER_IP -h $SHELL_HOSTNAME $VOLUME_MAP $IMAGENAME $MASTER_IP)
129 |
130 | if [ "$SHELL_ID" = "" ]; then
131 | echo "error: could not start shell container from image $IMAGENAME"
132 | exit 1
133 | fi
134 | }
135 |
136 | check_root
137 |
138 | if [[ "$#" -eq 0 ]]; then
139 | print_help
140 | exit 1
141 | fi
142 |
143 | parse_options $@
144 |
145 | if [ "$image_type" == "spark" ]; then
146 | SPARK_VERSION="$image_version"
147 | echo "*** Starting Spark $SPARK_VERSION Shell ***"
148 | elif [ "$image_type" == "shark" ]; then
149 | SHARK_VERSION="$image_version"
150 | # note: we currently don't have a Shark 0.9 image but it's safe Spark
151 | # to Shark's version for all but Shark 0.7.0
152 | if [ "$SHARK_VERSION" == "0.9.0" ] || [ "$SHARK_VERSION" == "0.8.0" ]; then
153 | SPARK_VERSION="$SHARK_VERSION"
154 | else
155 | SPARK_VERSION="0.7.3"
156 | fi
157 | echo "*** Starting Shark $SHARK_VERSION + Spark Shell ***"
158 | else
159 | echo "not starting anything"
160 | exit 0
161 | fi
162 |
163 | start_shell
164 |
165 | sleep 2
166 |
167 | set_nameserver_data
168 |
169 | echo -n "waiting for nameserver to find shell "
170 | SHELL_IP=$(docker inspect $SHELL_ID | \
171 | grep IPAddress | awk '{print $2}' | tr -d '":,')
172 |
173 | check_hostname result $SHELL_HOSTNAME $SHELL_IP
174 | until [ "$result" -eq 0 ]; do
175 | echo -n "."
176 | sleep 1
177 | check_hostname result $SHELL_HOSTNAME $SHELL_IP
178 | done
179 |
180 | echo ""
181 | echo "***************************************************************"
182 | echo "connect to shell via:"
183 | echo "sudo docker attach $SHELL_ID"
184 |
185 |
--------------------------------------------------------------------------------
/deploy/start_spark_cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | MASTER=-1
4 | MASTER_IP=
5 | NUM_REGISTERED_WORKERS=0
6 |
7 | # starts the Spark/Shark master container
8 | function start_master() {
9 | echo "starting master container"
10 | if [ "$DEBUG" -gt 0 ]; then
11 | echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2
12 | fi
13 | MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2)
14 |
15 | if [ "$MASTER" = "" ]; then
16 | echo "error: could not start master container from image $1:$2"
17 | exit 1
18 | fi
19 |
20 | echo "started master container: $MASTER"
21 | sleep 3
22 | MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
23 | echo "MASTER_IP: $MASTER_IP"
24 | echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
25 | }
26 |
27 | # starts a number of Spark/Shark workers
28 | function start_workers() {
29 | for i in `seq 1 $NUM_WORKERS`; do
30 | echo "starting worker container"
31 | hostname="worker${i}${DOMAINNAME}"
32 | if [ "$DEBUG" -gt 0 ]; then
33 | echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}
34 | fi
35 | WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP})
36 |
37 | if [ "$WORKER" = "" ]; then
38 | echo "error: could not start worker container from image $1:$2"
39 | exit 1
40 | fi
41 |
42 | echo "started worker container: $WORKER"
43 | sleep 3
44 | WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
45 | echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
46 | done
47 | }
48 |
49 | # prints out information on the cluster
50 | function print_cluster_info() {
51 | BASEDIR=$(cd $(dirname $0); pwd)"/.."
52 | echo ""
53 | echo "***********************************************************************"
54 | echo "start shell via: $1"
55 | echo ""
56 | echo "visit Spark WebUI at: http://$MASTER_IP:8080/"
57 | echo "visit Hadoop Namenode at: http://$MASTER_IP:50070"
58 | echo "ssh into master via: ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}"
59 | echo ""
60 | echo "/data mapped: $VOLUME_MAP"
61 | echo ""
62 | echo "kill master via: sudo docker kill $MASTER"
63 | echo "***********************************************************************"
64 | echo ""
65 | echo "to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:"
66 | echo "nameserver $NAMESERVER_IP"
67 | }
68 |
69 | function get_num_registered_workers() {
70 | if [[ "$SPARK_VERSION" == "0.7.3" ]]; then
71 | DATA=$( curl --noproxy -s http://$MASTER_IP:8080/?format=json | tr -d '\n' | sed s/\"/\\\\\"/g)
72 | else
73 | # Docker on Mac uses tinycore Linux with busybox which has a limited version wget (?)
74 | echo $(uname -a) | grep "Linux boot2docker" > /dev/null
75 | if [[ "$?" == "0" ]]; then
76 | DATA=$( wget -Y off -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g)
77 | else
78 | DATA=$( wget --no-proxy -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g)
79 | fi
80 | fi
81 | NUM_REGISTERED_WORKERS=$(python -c "import json; data = \"$DATA\"; value = json.loads(data); print len(value['workers'])")
82 | }
83 |
84 | function wait_for_master {
85 | if [[ "$SPARK_VERSION" == "0.7.3" ]]; then
86 | query_string="INFO HttpServer: akka://sparkMaster/user/HttpServer started"
87 | elif [[ "$SPARK_VERSION" == "1.0.0" ]]; then
88 | query_string="MasterWebUI: Started MasterWebUI"
89 | else
90 | query_string="MasterWebUI: Started Master web UI"
91 | fi
92 | echo -n "waiting for master "
93 | sudo docker logs $MASTER | grep "$query_string" > /dev/null
94 | until [ "$?" -eq 0 ]; do
95 | echo -n "."
96 | sleep 1
97 | sudo docker logs $MASTER | grep "$query_string" > /dev/null;
98 | done
99 | echo ""
100 | echo -n "waiting for nameserver to find master "
101 | check_hostname result master "$MASTER_IP"
102 | until [ "$result" -eq 0 ]; do
103 | echo -n "."
104 | sleep 1
105 | check_hostname result master "$MASTER_IP"
106 | done
107 | echo ""
108 | sleep 3
109 | }
110 |
--------------------------------------------------------------------------------
/dnsmasq-precise/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:precise
2 |
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | VOLUME [ "/etc/dnsmasq.d" ]
6 |
7 | RUN apt-get install -y dnsmasq-base
8 |
9 | RUN echo "user=root" > /etc/dnsmasq.conf
10 | RUN echo "listen-address=__LOCAL_IP__" >> /etc/dnsmasq.conf
11 | RUN echo "resolv-file=/etc/resolv.dnsmasq.conf" >> /etc/dnsmasq.conf
12 | RUN echo "conf-dir=/etc/dnsmasq.d" >> /etc/dnsmasq.conf
13 | RUN echo "domain=cluster.com" >> /etc/dnsmasq.conf
14 |
15 | RUN echo "nameserver 8.8.8.8" >> /etc/resolv.dnsmasq.conf
16 |
17 | ADD files /root/dnsmasq_files
18 |
19 | CMD ["/root/dnsmasq_files/default_cmd"]
20 |
--------------------------------------------------------------------------------
/dnsmasq-precise/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}dnsmasq-precise .
5 |
--------------------------------------------------------------------------------
/dnsmasq-precise/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
4 | echo "NAMESERVER_IP=$IP"
5 |
6 | sed -i s/__LOCAL_IP__/$IP/ /etc/dnsmasq.conf
7 |
8 | dnsmasq
9 |
10 | while [ 1 ];
11 | do
12 | sleep 3
13 | # kill and restart dnsmasq every three seconds
14 | # in case its configuration has changed
15 | pkill dnsmasq
16 | dnsmasq
17 | done
18 |
--------------------------------------------------------------------------------
/mesos/NOTE.txt:
--------------------------------------------------------------------------------
1 | For build place pre-compiled mesos installation into file:
2 | mesos/mesos-base/files/mesos.tgz
3 |
--------------------------------------------------------------------------------
/mesos/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | mesos_dirs=$(ls -d mesos* spark-shell shark-shell)
4 | dir_list=("$mesos_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/mesos/deploy/deploy:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # determines which Mesos image is chosen
4 | MESOS_VERSION=0.13.0
5 |
6 | # set this value to the number of workers you want
7 | NUM_WORKERS=2
8 |
9 | if [[ "$USER" != "root" ]]; then
10 | echo "please run as: sudo $0"
11 | exit 1
12 | fi
13 |
14 | source ../../dnsmasq-precise/deploy/start_nameserver.sh
15 | source start_mesos_cluster.sh
16 |
17 | echo "*** Starting Mesos $MESOS_VERSION ***"
18 | start_nameserver
19 | sleep 5
20 | start_mesos_master
21 | sleep 40
22 | start_mesos_workers
23 | sleep 3
24 | print_cluster_info
25 |
26 |
--------------------------------------------------------------------------------
/mesos/deploy/start_mesos_cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | MASTER=-1
4 | MASTER_IP=
5 |
6 | # starts the Mesos master container
7 | function start_mesos_master() {
8 | echo "starting Mesos master container"
9 | MASTER=$(sudo docker run -i -t -d -dns $NAMESERVER_IP -h master mesos-master:$MESOS_VERSION)
10 | echo "started master container: $MASTER"
11 | sleep 3
12 | MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
13 | echo "MASTER_IP: $MASTER_IP"
14 | echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
15 | }
16 |
17 | # starts a number of Mesos workers
18 | function start_mesos_workers() {
19 | for i in `seq 1 $NUM_WORKERS`; do
20 | echo "starting Mesos worker container"
21 | hostname="worker${i}"
22 | WORKER=$(sudo docker run -d -dns $NAMESERVER_IP -h $hostname mesos-worker:${MESOS_VERSION} ${MASTER_IP} ${MASTER_IP}:5050)
23 | echo "started worker container: $WORKER"
24 | sleep 3
25 | WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
26 | echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
27 | done
28 | }
29 |
30 | # prints out information on the cluster
31 | function print_cluster_info() {
32 | echo ""
33 | echo "***********************************************************************"
34 | echo "visit Mesos WebUI at: http://$MASTER_IP:5050/"
35 | echo "visit Hadoop Namenode at: http://$MASTER_IP:50070"
36 | echo ""
37 | echo "start Spark Shell: sudo docker run -i -t -dns $NAMESERVER_IP -h spark-client spark-shell-mesos:0.7.3 $MASTER_IP"
38 | echo "start Shark Shell: sudo docker run -i -t -dns $NAMESERVER_IP -h shark-client shark-shell-mesos:0.7.0 $MASTER_IP"
39 | echo ""
40 | echo "ssh into master via: ssh -i ../../apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}"
41 | echo ""
42 | echo "kill cluster via: docker/kill_all"
43 | echo "***********************************************************************"
44 | echo ""
45 | echo "to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:"
46 | echo "nameserver $NAMESERVER_IP"
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/mesos/mesos-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Base Ubuntu Precise 12.04 LTS image
2 | #
3 | FROM amplab/shark-base:0.7.0
4 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
5 |
6 | #RUN apt-get install -y libcurl4-openssl-dev
7 | RUN apt-get install -y libcurl3
8 |
9 | # add Hadoop config file templates
10 | # NOTE: we rather do this as a single ADD statement
11 | # since we are running into
12 | # Error build: Unable to mount using aufs
13 | # Unable to mount using aufs
14 | # issue. For more information see
15 | # https://github.com/dotcloud/docker/issues/1171
16 | ADD files /root/mesos_files
17 |
18 | RUN (mv /root/mesos_files/mesos.tgz / && cd / && gunzip < mesos.tgz)|(cd /opt && tar -xvf -) && (rm /mesos.tgz && ln -s /opt/mesos /tmp/mesos)
19 |
20 |
--------------------------------------------------------------------------------
/mesos/mesos-base/build:
--------------------------------------------------------------------------------
1 | sudo docker build -t amplab/mesos-base:0.13.0 .
2 |
--------------------------------------------------------------------------------
/mesos/mesos-base/files/configure_mesos.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/shark_files/configure_shark.sh
4 |
5 | function create_mesos_directories() {
6 | create_shark_directories
7 | mkdir /tmp/mesos
8 | chown hdfs.hdfs /tmp/mesos
9 | }
10 |
11 | function deploy_mesos_files() {
12 | deploy_shark_files
13 | }
14 |
15 | function configure_mesos() {
16 | configure_shark $1
17 | sed -i s/"^export MASTER="/"#export MASTER="/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
18 | echo "export MASTER=mesos://$1:5050" >> /opt/spark-$SPARK_VERSION/conf/spark-env.sh
19 | echo "export MESOS_NATIVE_LIBRARY=/opt/mesos/lib/libmesos-0.13.0.so" >> /opt/spark-$SPARK_VERSION/conf/spark-env.sh
20 | echo "export JAVA_LIBRARY_PATH=/opt/mesos/lib/libmesos-0.13.0.so" >> /opt/spark-$SPARK_VERSION/conf/spark-env.sh
21 | }
22 |
23 | function prepare_mesos() {
24 | create_mesos_directories
25 | deploy_mesos_files
26 | configure_mesos $1
27 | }
28 |
--------------------------------------------------------------------------------
/mesos/mesos-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Mesos
2 | FROM amplab/mesos-base:0.13.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Setup a volume for data
6 | #VOLUME ["/data"]
7 |
8 | ADD files /root/mesos_master_files
9 |
10 | CMD ["/root/mesos_master_files/default_cmd"]
11 |
--------------------------------------------------------------------------------
/mesos/mesos-master/build:
--------------------------------------------------------------------------------
1 | sudo docker build -t amplab/mesos-master:0.13.0 .
2 |
--------------------------------------------------------------------------------
/mesos/mesos-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | env
4 |
5 | source /root/mesos_files/configure_mesos.sh
6 |
7 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
8 | echo "MASTER_IP=$IP"
9 |
10 | echo "preparing Mesos"
11 | prepare_mesos $IP
12 |
13 | echo "starting Hadoop Namenode"
14 | sudo -u hdfs hadoop namenode -format
15 | service hadoop-namenode start
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Mesos Master"
23 | cp /root/mesos_master_files/run_mesos_master.sh /
24 | chmod a+rx /run_mesos_master.sh
25 | sudo -u hdfs LD_LIBRARY_PATH=$LD_LIBRARY_PATH /run_mesos_master.sh $IP
26 |
--------------------------------------------------------------------------------
/mesos/mesos-master/files/run_mesos_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export LD_LIBRARY_PATH=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/server
3 | cd /opt/mesos/sbin && ./mesos-master --ip=$1
4 |
--------------------------------------------------------------------------------
/mesos/mesos-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Mesos
2 | FROM amplab/mesos-base:0.13.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Setup a volume for data
6 | #VOLUME ["/data"]
7 |
8 | ADD files /root/mesos_worker_files
9 |
10 | # Add the entrypoint script for the master
11 | CMD ["-h"]
12 | ENTRYPOINT ["/root/mesos_worker_files/default_cmd"]
13 |
--------------------------------------------------------------------------------
/mesos/mesos-worker/build:
--------------------------------------------------------------------------------
1 | sudo docker build -t amplab/mesos-worker:0.13.0 .
2 |
--------------------------------------------------------------------------------
/mesos/mesos-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/mesos_files/configure_mesos.sh
4 |
5 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
6 | echo "WORKER_IP=$IP"
7 |
8 | echo "preparing Mesos"
9 | prepare_mesos $1
10 |
11 | echo "starting Hadoop Datanode"
12 | service hadoop-datanode start
13 |
14 | echo "starting sshd"
15 | /usr/sbin/sshd
16 |
17 | sleep 5
18 |
19 | echo "starting Mesos Worker"
20 | cp /root/mesos_worker_files/run_mesos_worker.sh /
21 | chmod a+rx /run_mesos_worker.sh
22 | sudo -u hdfs HADOOP_HOME=$HADOOP_HOME LD_LIBRARY_PATH=$LD_LIBRARY_PATH /run_mesos_worker.sh $2 $IP
23 |
--------------------------------------------------------------------------------
/mesos/mesos-worker/files/run_mesos_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export LD_LIBRARY_PATH=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/server
3 | cd /opt/mesos/sbin && ./mesos-slave --master=$1 --ip=$2 --hadoop_home=$HADOOP_HOME
4 |
--------------------------------------------------------------------------------
/mesos/shark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM amplab/mesos-base:0.13.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | ADD files /root/shark_shell_files
6 |
7 | # Add the entrypoint script for the master
8 | ENTRYPOINT ["/root/shark_shell_files/default_cmd"]
9 |
--------------------------------------------------------------------------------
/mesos/shark-shell/build:
--------------------------------------------------------------------------------
1 | sudo docker build -t amplab/shark-shell-mesos:0.7.0 .
2 |
--------------------------------------------------------------------------------
/mesos/shark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/mesos_files/configure_mesos.sh
4 |
5 | env
6 |
7 | echo "preparing Mesos"
8 | prepare_mesos $1
9 |
10 | echo "starting Shark Shell"
11 | cd /opt/metastore && sudo -u hdfs /opt/shark-$SHARK_VERSION/bin/shark
12 |
--------------------------------------------------------------------------------
/mesos/spark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM amplab/mesos-base:0.13.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | ADD files /root/spark_shell_files
6 |
7 | # Add the entrypoint script for the master
8 | ENTRYPOINT ["/root/spark_shell_files/default_cmd"]
9 |
--------------------------------------------------------------------------------
/mesos/spark-shell/build:
--------------------------------------------------------------------------------
1 | sudo docker build -t amplab/spark-shell-mesos:0.7.3 .
2 |
--------------------------------------------------------------------------------
/mesos/spark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/mesos_files/configure_mesos.sh
4 |
5 | env
6 |
7 | echo "preparing Mesos"
8 | prepare_mesos $1
9 |
10 | echo "adding test data to HDFS"
11 | cp /root/spark_shell_files/test.txt /tmp
12 | sudo -u hdfs hadoop fs -put /tmp/test.txt hdfs://$1:9000/user/hdfs/test.txt
13 |
14 | echo "starting Spark Shell"
15 | cd $SPARK_HOME
16 | echo SPARK_HOME: `pwd`
17 | echo SHARK_VERSION: $SHARK_VERSION
18 | if [ "$SPARK_VERSION" == "0.8.0" ] || [ "$SPARK_VERSION" == "0.7.3" ]; then
19 | sudo -u hdfs HDFS_PREFIX=hdfs://${1}:9000 ./spark-shell
20 | else
21 | sudo -u hdfs HDFS_PREFIX=hdfs://${1}:9000 ./bin/spark-shell
22 | fi
23 |
--------------------------------------------------------------------------------
/mesos/spark-shell/files/test.txt:
--------------------------------------------------------------------------------
1 | this is a test
2 | more test
3 | one more line
4 |
--------------------------------------------------------------------------------
/shark-0.7.0/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | shark_dirs=$(ls -d shark*)
4 | dir_list=("$shark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.7.3, Shark 0.7.0
2 | # Version 0.7.0
3 | #
4 | # Use spark-base as base
5 | FROM spark-base:0.7.3
6 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
7 |
8 | # note: SPARK_VERSION should be inherited from spark-base
9 | # but for some reason isn't (?)
10 | ENV SPARK_VERSION 0.7.3
11 | ENV SHARK_VERSION 0.7.0
12 | ENV HIVE_VERSION 0.9.0
13 |
14 | # Install Shark
15 | ADD http://spark-project.org/download/shark-${SHARK_VERSION}-hadoop1-bin.tgz /
16 | RUN (cd / && gunzip < shark-${SHARK_VERSION}-hadoop1-bin.tgz)|(cd /opt && tar -xvf -)
17 | RUN rm /shark-${SHARK_VERSION}-hadoop1-bin.tgz
18 |
19 | # Add Shark config files and configure script
20 | ADD files /root/shark_files
21 |
22 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-base:0.7.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-base/files/configure_shark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | shark_files=( "/root/shark_files/shark-env.sh" )
6 | hive_files=( "/root/shark_files/hive-site.xml" "/etc/hadoop/core-site.xml" )
7 |
8 | function create_shark_directories() {
9 | create_spark_directories
10 | rm -rf /opt/metastore
11 | mkdir /opt/metastore
12 | chown hdfs.hdfs /opt/metastore
13 | }
14 |
15 | function deploy_shark_files() {
16 | deploy_spark_files
17 | for i in "${hive_files[@]}";
18 | do
19 | filename=$(basename $i);
20 | cp $i /opt/hive-${HIVE_VERSION}-bin/conf/$filename;
21 | done
22 | for i in "${shark_files[@]}";
23 | do
24 | filename=$(basename $i);
25 | cp $i /opt/shark-${SHARK_VERSION}/conf/$filename;
26 | done
27 | }
28 |
29 | function configure_shark() {
30 | configure_spark $1
31 | # Shark
32 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/shark-$SHARK_VERSION/conf/shark-env.sh
33 | sed -i s/__HIVE_HOME__/"\/opt\/hive-${HIVE_VERSION}-bin"/ /opt/shark-$SHARK_VERSION/conf/shark-env.sh
34 | # Hive
35 | sed -i s/__MASTER__/$1/ /opt/hive-0.9.0-bin/conf/hive-site.xml
36 | #sed -i s/__MASTER__/master/ /opt/hive-0.9.0-bin/conf/hive-site.xml
37 | }
38 |
39 | function prepare_shark() {
40 | create_shark_directories
41 | deploy_shark_files
42 | configure_shark $1
43 | }
44 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-base/files/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | fs.default.name
4 | hdfs://__MASTER__:9000/
5 |
6 |
7 | fs.defaultFS
8 | hdfs://__MASTER__:9000/
9 |
10 |
11 | mapred.job.tracker
12 | NONE
13 |
14 |
15 | hive.exec.scratchdir
16 | /tmp/hive-scratch
17 | Scratch space for Hive jobs
18 |
19 |
20 | hive.metastore.local
21 | true
22 |
23 |
24 | javax.jdo.option.ConnectionURL
25 | jdbc:derby:;databaseName=metastore_db;create=true
26 |
27 |
28 | javax.jdo.option.ConnectionDriverName
29 | org.apache.derby.jdbc.EmbeddedDriver
30 |
31 |
32 | hive.metastore.metadb.dir
33 | file:///opt/metastore/metadb/
34 |
35 |
36 | hive.metastore.uris
37 | file:///opt/metastore/metadb/
38 |
39 |
40 | hive.metastore.warehouse.dir
41 | hdfs://__MASTER__:9000/user/hdfs/warehouse
42 |
43 |
44 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-base/files/shark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | . __SPARK_HOME__/conf/spark-env.sh
3 | export SHARK_MASTER_MEM=700m
4 | export HIVE_HOME=__HIVE_HOME__
5 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Shark master
2 | FROM shark-base:0.7.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Add run script
6 | ADD files /root/shark_master_files
7 |
8 | # Add default command for master
9 | CMD ["/root/shark_master_files/default_cmd"]
10 |
11 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-master:0.7.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # run this as:
4 | # sudo docker run -i -t -d shark-master:$SHARK_VERSION
5 |
6 | source /root/shark_files/configure_shark.sh
7 |
8 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
9 | echo "MASTER_IP=$IP"
10 |
11 | echo "preparing Shark"
12 | prepare_shark $IP
13 |
14 | echo "starting Hadoop namenode"
15 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
16 | service hadoop-namenode start > /dev/null 2>&1
17 |
18 | echo "starting sshd"
19 | /usr/sbin/sshd
20 |
21 | sleep 5
22 |
23 | echo "starting Shark master"
24 | cp /root/shark_master_files/run_shark_master.sh /
25 | chmod a+rx /run_shark_master.sh
26 | sudo -u hdfs /run_shark_master.sh
27 | #$IP
28 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-master/files/run_shark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/shark-0.7.0/conf/shark-env.sh
3 | export PATH=$PATH:$SCALA_HOME/bin
4 | export CLASSPATH=$CLASSPATH:$SCALA_HOME/lib/scala-library.jar
5 | #/opt/spark-0.7.3/run spark.deploy.master.Master -i $1
6 | /opt/spark-0.7.3/run spark.deploy.master.Master -i master
7 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Shark master
2 | FROM shark-base:0.7.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Add run script
6 | ADD files /root/shark_shell_files
7 |
8 | # Add default command for master
9 | ENTRYPOINT ["/root/shark_shell_files/default_cmd"]
10 |
11 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-shell:0.7.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/shark_files/configure_shark.sh
4 | prepare_shark $1
5 | env
6 | sudo -u hdfs hadoop dfsadmin -safemode wait
7 |
8 | # Note: there are issues if the nameserver did not have time to
9 | # refresh its cache with this shell's hostname so give him time
10 | # to do so.
11 | sleep 3
12 |
13 | cd /opt/metastore && sudo -u hdfs /opt/shark-$SHARK_VERSION/bin/shark
14 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-shell/files/test.shark:
--------------------------------------------------------------------------------
1 | CREATE TABLE src(key INT, value STRING);
2 | LOAD DATA LOCAL INPATH '${env:HIVE_HOME}/examples/files/kv1.txt' INTO TABLE src;
3 | SELECT COUNT(1) FROM src;
4 | exit;
5 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Shark worker
2 | FROM shark-base:0.7.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Add run script
6 | ADD files /root/shark_worker_files
7 |
8 | # Add the entrypoint script for the worker
9 | ENTRYPOINT ["/root/shark_worker_files/default_cmd"]
10 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-worker:0.7.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # run this as:
4 | # sudo docker run -d shark-worker:${SHARK_VERSION} ${MASTER_IP} spark://${MASTER_IP}:7077
5 |
6 | source /root/shark_files/configure_shark.sh
7 |
8 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
9 | echo "WORKER_IP=$IP"
10 |
11 | echo "preparing Shark"
12 | prepare_shark $1
13 |
14 | echo "starting Hadoop datanode"
15 | service hadoop-datanode start
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Shark worker node"
23 | cp /root/shark_worker_files/run_shark_worker.sh /
24 | chmod a+rx /run_shark_worker.sh
25 | sudo -u hdfs /run_shark_worker.sh
26 | #$2
27 |
--------------------------------------------------------------------------------
/shark-0.7.0/shark-worker/files/run_shark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/shark-0.7.0/conf/shark-env.sh
3 | export PATH=$PATH:$SCALA_HOME/bin
4 | export CLASSPATH=$CLASSPATH:$SCALA_HOME/lib/scala-library.jar
5 | #/opt/spark-0.7.3/run spark.deploy.worker.Worker $1
6 | #/opt/spark-0.7.3/run spark.deploy.worker.Worker -i $(hostname) spark://master:7077
7 | ${SPARK_HOME}/run spark.deploy.worker.Worker spark://master:7077
8 |
--------------------------------------------------------------------------------
/shark-0.8.0/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | shark_dirs=$(ls -d shark*)
4 | dir_list=("$shark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.8.0, Shark 0.8.0
2 | #
3 | # Use spark-base as base
4 | FROM spark-base:0.8.0
5 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
6 |
7 | # note: SPARK_VERSION should be inherited from spark-base
8 | # but for some reason isn't (?)
9 | ENV SPARK_VERSION 0.8.0
10 | ENV SHARK_VERSION 0.8.0
11 | ENV HIVE_VERSION 0.9.0
12 |
13 | # Install Shark
14 | ADD https://github.com/amplab/shark/releases/download/v${SHARK_VERSION}/shark-${SHARK_VERSION}-bin-hadoop1.tgz /
15 | RUN (cd / && gunzip < shark-${SHARK_VERSION}-bin-hadoop1.tgz)|(cd /opt && tar -xvf -)
16 | RUN (ln -s /opt/shark-${SHARK_VERSION}-bin-hadoop1/shark-${SHARK_VERSION} /opt/shark-${SHARK_VERSION} && ln -s /opt/shark-${SHARK_VERSION}-bin-hadoop1/hive-${HIVE_VERSION}-shark-${SHARK_VERSION}-bin /opt/hive-${HIVE_VERSION}-bin && rm /shark-${SHARK_VERSION}-bin-hadoop1.tgz)
17 |
18 | # Add Shark config files and configure script
19 | ADD files /root/shark_files
20 |
21 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-base:0.8.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-base/files/configure_shark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | shark_files=( "/root/shark_files/shark-env.sh" )
6 | hive_files=( "/root/shark_files/hive-site.xml" "/etc/hadoop/core-site.xml" )
7 |
8 | function create_shark_directories() {
9 | create_spark_directories
10 | rm -rf /opt/metastore
11 | mkdir /opt/metastore
12 | chown hdfs.hdfs /opt/metastore
13 | }
14 |
15 | function deploy_shark_files() {
16 | deploy_spark_files
17 | for i in "${hive_files[@]}";
18 | do
19 | filename=$(basename $i);
20 | cp $i /opt/hive-${HIVE_VERSION}-bin/conf/$filename;
21 | done
22 | for i in "${shark_files[@]}";
23 | do
24 | filename=$(basename $i);
25 | cp $i /opt/shark-${SHARK_VERSION}/conf/$filename;
26 | done
27 | }
28 |
29 | function configure_shark() {
30 | configure_spark $1
31 | # Shark
32 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/shark-$SHARK_VERSION/conf/shark-env.sh
33 | sed -i s/__HIVE_HOME__/"\/opt\/hive-${HIVE_VERSION}-bin"/ /opt/shark-$SHARK_VERSION/conf/shark-env.sh
34 | # Hive
35 | sed -i s/__MASTER__/$1/ /opt/hive-0.9.0-bin/conf/hive-site.xml
36 | }
37 |
38 | function prepare_shark() {
39 | create_shark_directories
40 | deploy_shark_files
41 | configure_shark $1
42 | }
43 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-base/files/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | fs.default.name
4 | hdfs://__MASTER__:9000/
5 |
6 |
7 | fs.defaultFS
8 | hdfs://__MASTER__:9000/
9 |
10 |
11 | mapred.job.tracker
12 | NONE
13 |
14 |
15 | hive.exec.scratchdir
16 | /tmp/hive-scratch
17 | Scratch space for Hive jobs
18 |
19 |
20 | hive.metastore.local
21 | true
22 |
23 |
24 | javax.jdo.option.ConnectionURL
25 | jdbc:derby:;databaseName=metastore_db;create=true
26 |
27 |
28 | javax.jdo.option.ConnectionDriverName
29 | org.apache.derby.jdbc.EmbeddedDriver
30 |
31 |
32 | hive.metastore.metadb.dir
33 | file:///opt/metastore/metadb/
34 |
35 |
36 | hive.metastore.uris
37 | file:///opt/metastore/metadb/
38 |
39 |
40 | hive.metastore.warehouse.dir
41 | hdfs://__MASTER__:9000/user/hdfs/warehouse
42 |
43 |
44 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-base/files/shark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | . __SPARK_HOME__/conf/spark-env.sh
3 | export SHARK_MASTER_MEM=700m
4 | export HIVE_HOME=__HIVE_HOME__
5 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Shark master
2 | FROM shark-base:0.8.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Add run script
6 | ADD files /root/shark_master_files
7 |
8 | # Add default command for master
9 | CMD ["/root/shark_master_files/default_cmd"]
10 |
11 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-master:0.8.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # run this as:
4 | # sudo docker run -i -t -d shark-master:$SHARK_VERSION
5 |
6 | source /root/shark_files/configure_shark.sh
7 |
8 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
9 | echo "MASTER_IP=$IP"
10 |
11 | echo "preparing Shark"
12 | prepare_shark $IP
13 |
14 | echo "starting Hadoop namenode"
15 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
16 | service hadoop-namenode start > /dev/null 2>&1
17 |
18 | echo "starting sshd"
19 | /usr/sbin/sshd
20 |
21 | sleep 5
22 |
23 | echo "starting Shark master"
24 | cp /root/shark_master_files/run_shark_master.sh /
25 | chmod a+rx /run_shark_master.sh
26 | sudo -u hdfs SPARK_VERSION=$SPARK_VERSION SHARK_VERSION=$SHARK_VERSION /run_shark_master.sh $IP
27 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-master/files/run_shark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/shark-0.8.0/conf/shark-env.sh
3 | export PATH=$PATH:$SCALA_HOME/bin
4 | export CLASSPATH=$CLASSPATH:$SCALA_HOME/lib/scala-library.jar
5 |
6 | /opt/spark-0.8.0/bin/start-master.sh
7 |
8 | while [ 1 ];
9 | do
10 | tail -f /opt/spark-${SPARK_VERSION}/logs/*.out
11 | sleep 1
12 | done
13 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Shark master
2 | FROM shark-base:0.8.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Add run script
6 | ADD files /root/shark_shell_files
7 |
8 | # Add default command for master
9 | ENTRYPOINT ["/root/shark_shell_files/default_cmd"]
10 |
11 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-shell:0.8.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/shark_files/configure_shark.sh
4 | prepare_shark $1
5 | env
6 |
7 | # Note: there are issues if the nameserver did not have time to
8 | # refresh its cache with this shell's hostname so give him time
9 | # to do so.
10 | sleep 3
11 |
12 | cd /opt/metastore && sudo -u hdfs /opt/shark-$SHARK_VERSION/bin/shark
13 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-shell/files/test.shark:
--------------------------------------------------------------------------------
1 | CREATE TABLE src(key INT, value STRING);
2 | LOAD DATA LOCAL INPATH '${env:HIVE_HOME}/examples/files/kv1.txt' INTO TABLE src;
3 | SELECT COUNT(1) FROM src;
4 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Shark worker
2 | FROM shark-base:0.8.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Add run script
6 | ADD files /root/shark_worker_files
7 |
8 | # Add the entrypoint script for the worker
9 | ENTRYPOINT ["/root/shark_worker_files/default_cmd"]
10 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}shark-worker:0.8.0 .
5 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # run this as:
4 | # sudo docker run -d shark-worker:${SHARK_VERSION} ${MASTER_IP} spark://${MASTER_IP}:7077
5 |
6 | source /root/shark_files/configure_shark.sh
7 |
8 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
9 | echo "WORKER_IP=$IP"
10 |
11 | echo "preparing Shark"
12 | prepare_shark $1
13 |
14 | echo "starting Hadoop datanode"
15 | service hadoop-datanode start
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Shark worker node"
23 | cp /root/shark_worker_files/run_shark_worker.sh /
24 | chmod a+rx /run_shark_worker.sh
25 | sudo -u hdfs SPARK_VERSION=$SPARK_VERSION SHARK_VERSION=$SHARK_VERSION /run_shark_worker.sh
26 |
--------------------------------------------------------------------------------
/shark-0.8.0/shark-worker/files/run_shark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/shark-0.8.0/conf/shark-env.sh
3 | export PATH=$PATH:$SCALA_HOME/bin
4 | export CLASSPATH=$CLASSPATH:$SCALA_HOME/lib/scala-library.jar
5 | /opt/spark-0.8.0/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
6 |
--------------------------------------------------------------------------------
/spark-0.7.3/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | spark_dirs=$(ls -d spark*)
4 | dir_list=("$spark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Hadoop 1.2.1
2 | # Version 1.2.1
3 | #
4 | FROM apache-hadoop-hdfs-precise:1.2.1
5 |
6 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
7 |
8 | ENV SCALA_VERSION 2.9.3
9 | ENV SPARK_VERSION 0.7.3
10 | ENV SCALA_HOME /opt/scala-$SCALA_VERSION
11 | ENV SPARK_HOME /opt/spark-$SPARK_VERSION
12 | ENV PATH $SPARK_HOME:$SCALA_HOME/bin:$PATH
13 |
14 | # Install Scala
15 | ADD http://www.scala-lang.org/files/archive/scala-$SCALA_VERSION.tgz /
16 | RUN (cd / && gunzip < scala-$SCALA_VERSION.tgz)|(cd /opt && tar -xvf -)
17 | RUN rm /scala-$SCALA_VERSION.tgz && chown -R hdfs.hdfs /opt/scala-$SCALA_VERSION
18 |
19 | # Install Spark
20 | ADD http://spark-project.org/download/spark-$SPARK_VERSION-prebuilt-hadoop1.tgz /
21 | RUN (cd / && gunzip < spark-$SPARK_VERSION-prebuilt-hadoop1.tgz)|(cd /opt && tar -xvf -)
22 | RUN rm /spark-$SPARK_VERSION-prebuilt-hadoop1.tgz
23 |
24 | # Add Spark config files and configure script
25 | ADD files /root/spark_files
26 |
27 | #RUN cp /root/spark_files/spark-0.7.3_precomp_hadoop1.tar.gz /
28 | #RUN (cd / && gunzip < spark-0.7.3_precomp_hadoop1.tar.gz)|(cd /opt && tar -xvf -)
29 | #RUN rm /spark-0.7.3_precomp_hadoop1.tar.gz
30 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-base:0.7.3 .
5 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-base/files/configure_spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/hadoop_files/configure_hadoop.sh
4 |
5 | function create_spark_directories() {
6 | create_hadoop_directories
7 | rm -rf /opt/spark-$SPARK_VERSION/work
8 | mkdir -p /opt/spark-$SPARK_VERSION/work
9 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/work
10 | mkdir /tmp/spark
11 | chown hdfs.hdfs /tmp/spark
12 | # this one is for Spark shell logging
13 | rm -rf /var/lib/hadoop/hdfs
14 | mkdir -p /var/lib/hadoop/hdfs
15 | chown hdfs.hdfs /var/lib/hadoop/hdfs
16 | }
17 |
18 | function deploy_spark_files() {
19 | deploy_hadoop_files
20 | cp /root/spark_files/spark-env.sh /opt/spark-$SPARK_VERSION/conf/
21 | cp /root/spark_files/log4j.properties /opt/spark-$SPARK_VERSION/conf/
22 | }
23 |
24 | function configure_spark() {
25 | configure_hadoop $1
26 | #sed -i s/__MASTER__/$1/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
27 | #sed -i s/__MASTER__/master/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
28 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
29 | sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
30 | }
31 |
32 | function prepare_spark() {
33 | create_spark_directories
34 | deploy_spark_files
35 | configure_spark $1
36 | }
37 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-base/files/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
6 |
7 | # Ignore messages below warning level from Jetty, because it's a bit verbose
8 | log4j.logger.org.eclipse.jetty=WARN
9 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-base/files/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | export SCALA_HOME=/opt/scala-2.9.3
3 | export SPARK_HOME=__SPARK_HOME__
4 | export SPARK_WORKER_CORES=1
5 | export SPARK_MEM=800m
6 | export SPARK_WORKER_MEMORY=1500m
7 | export SPARK_MASTER_MEM=1500m
8 | export SPARK_WORKER_CORES=1
9 | export HADOOP_HOME="/etc/hadoop"
10 | export MASTER="spark://master:7077"
11 | export SPARK_LOCAL_DIR=/tmp/spark
12 | #SPARK_JAVA_OPTS="-Dspark.local.dir=/tmp/spark "
13 | #SPARK_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
14 | #SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
15 | #SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
16 | #export SPARK_JAVA_OPTS
17 | #SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
18 | #export SPARK_DAEMON_JAVA_OPTS
19 | export JAVA_HOME=__JAVA_HOME__
20 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.7.3
2 | # Version 0.7.3
3 | FROM spark-base:0.7.3
4 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
5 |
6 | ADD files /root/spark_master_files
7 |
8 | CMD ["/root/spark_master_files/default_cmd"]
9 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-master:0.7.3 .
5 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | env
4 |
5 | source /root/spark_files/configure_spark.sh
6 |
7 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
8 | echo "MASTER_IP=$IP"
9 |
10 | echo "preparing Spark"
11 | prepare_spark $IP
12 |
13 | echo "starting Hadoop Namenode"
14 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
15 | service hadoop-namenode start > /dev/null 2>&1
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | # note: it seems important to sleep here
21 | sleep 5
22 |
23 | echo "starting Spark Master"
24 | cp /root/spark_master_files/run_spark_master.sh /
25 | chmod a+rx /run_spark_master.sh
26 | sudo -u hdfs /run_spark_master.sh
27 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-master/files/run_spark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/spark-0.7.3/conf/spark-env.sh
3 | /opt/spark-0.7.3/run spark.deploy.master.Master -i master
4 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.7.3
2 | # Version 0.7.3
3 | FROM spark-base:0.7.3
4 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
5 |
6 | VOLUME [ "/etc/dnsmasq.d" ]
7 |
8 | ADD files /root/spark_shell_files
9 |
10 | # Add the entrypoint script for the master
11 | ENTRYPOINT ["/root/spark_shell_files/default_cmd"]
12 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-shell:0.7.3 .
5 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | env
6 |
7 | echo "preparing Spark"
8 | prepare_spark "master"
9 |
10 | echo "adding test data to HDFS"
11 | cp /root/spark_shell_files/test.txt /tmp
12 | sudo -u hdfs hadoop dfsadmin -safemode wait
13 | sudo -u hdfs hadoop fs -put /tmp/test.txt hdfs://master:9000/user/hdfs/test.txt
14 |
15 | # Note: there are issues if the nameserver did not have time to
16 | # refresh its cache with this shell's hostname so give him time
17 | # to do so.
18 | sleep 3
19 |
20 | echo "starting Spark Shell"
21 | cd $SPARK_HOME
22 | sudo -u hdfs HDFS_PREFIX=hdfs://master:9000 ./spark-shell
23 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-shell/files/test.spark:
--------------------------------------------------------------------------------
1 | val hdfs_prefix = System.getenv("HDFS_PREFIX")
2 | val textFile = sc.textFile(hdfs_prefix+"/user/hdfs/test.txt")
3 | textFile.count()
4 | textFile.map({line => line}).collect()
5 | exit
6 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-shell/files/test.txt:
--------------------------------------------------------------------------------
1 | this is a test
2 | more test
3 | one more line
4 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.7.3
2 | # Version 0.7.3
3 | FROM spark-base:0.7.3
4 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
5 |
6 | ADD files /root/spark_worker_files
7 |
8 | # Add the entrypoint script for the master
9 | CMD ["-h"]
10 | ENTRYPOINT ["/root/spark_worker_files/default_cmd"]
11 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-worker:0.7.3 .
5 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
6 | echo "WORKER_IP=$IP"
7 |
8 | echo "preparing Spark"
9 | prepare_spark $1
10 |
11 | echo "starting Hadoop Datanode"
12 | service hadoop-datanode start
13 |
14 | echo "starting sshd"
15 | /usr/sbin/sshd
16 |
17 | sleep 5
18 |
19 | echo "starting Spark Worker"
20 | cp /root/spark_worker_files/run_spark_worker.sh /
21 | chmod a+rx /run_spark_worker.sh
22 | sudo -u hdfs /run_spark_worker.sh
23 |
--------------------------------------------------------------------------------
/spark-0.7.3/spark-worker/files/run_spark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/spark-0.7.3/conf/spark-env.sh
3 | /opt/spark-0.7.3/run spark.deploy.worker.Worker spark://master:7077
4 |
--------------------------------------------------------------------------------
/spark-0.8.0/NOTE.txt:
--------------------------------------------------------------------------------
1 | Many of the files here are in fact identical to the ones in the
2 | Spark 0.7.3 directory. However, since Docker does not follow
3 | symbolic links when it builds images we need the duplication.
4 |
--------------------------------------------------------------------------------
/spark-0.8.0/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | spark_dirs=$(ls -d spark*)
4 | dir_list=("$spark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.8.0
2 | # Version 0.8.0
3 | #
4 | FROM apache-hadoop-hdfs-precise:1.2.1
5 |
6 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
7 |
8 | ENV SCALA_VERSION 2.9.3
9 | ENV SPARK_VERSION 0.8.0
10 | ENV SCALA_HOME /opt/scala-$SCALA_VERSION
11 | ENV SPARK_HOME /opt/spark-$SPARK_VERSION
12 | ENV PATH $SPARK_HOME:$SCALA_HOME/bin:$PATH
13 |
14 | # Install Scala
15 | ADD http://www.scala-lang.org/files/archive/scala-$SCALA_VERSION.tgz /
16 | RUN (cd / && gunzip < scala-$SCALA_VERSION.tgz)|(cd /opt && tar -xvf -)
17 | RUN rm /scala-$SCALA_VERSION.tgz
18 |
19 | # Install Spark
20 | ADD http://d3kbcqa49mib13.cloudfront.net/spark-$SPARK_VERSION-incubating-bin-hadoop1.tgz /
21 | RUN (cd / && gunzip < spark-$SPARK_VERSION-incubating-bin-hadoop1.tgz)|(cd /opt && tar -xvf -)
22 | RUN (ln -s /opt/spark-$SPARK_VERSION-incubating-bin-hadoop1 /opt/spark-$SPARK_VERSION && rm /spark-$SPARK_VERSION-incubating-bin-hadoop1.tgz)
23 |
24 | # Add Shark config files and configure script
25 | ADD files /root/spark_files
26 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-base:0.8.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-base/files/configure_spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/hadoop_files/configure_hadoop.sh
4 |
5 | function create_spark_directories() {
6 | create_hadoop_directories
7 | rm -rf /opt/spark-$SPARK_VERSION/work
8 | mkdir -p /opt/spark-$SPARK_VERSION/work
9 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/work
10 | mkdir /tmp/spark
11 | chown hdfs.hdfs /tmp/spark
12 | # this one is for Spark shell logging
13 | rm -rf /var/lib/hadoop/hdfs
14 | mkdir -p /var/lib/hadoop/hdfs
15 | chown hdfs.hdfs /var/lib/hadoop/hdfs
16 | rm -rf /opt/spark-$SPARK_VERSION/logs
17 | mkdir -p /opt/spark-$SPARK_VERSION/logs
18 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/logs
19 | }
20 |
21 | function deploy_spark_files() {
22 | deploy_hadoop_files
23 | cp /root/spark_files/spark-env.sh /opt/spark-$SPARK_VERSION/conf/
24 | cp /root/spark_files/log4j.properties /opt/spark-$SPARK_VERSION/conf/
25 | }
26 |
27 | function configure_spark() {
28 | configure_hadoop $1
29 | #sed -i s/__MASTER__/$1/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
30 | sed -i s/__MASTER__/master/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
31 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
32 | sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
33 | }
34 |
35 | function prepare_spark() {
36 | create_spark_directories
37 | deploy_spark_files
38 | configure_spark $1
39 | }
40 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-base/files/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
6 |
7 | # Ignore messages below warning level from Jetty, because it's a bit verbose
8 | log4j.logger.org.eclipse.jetty=WARN
9 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-base/files/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | export SCALA_HOME=/opt/scala-2.9.3
3 | export SPARK_HOME=__SPARK_HOME__
4 | export SPARK_WORKER_CORES=1
5 | export SPARK_MEM=800m
6 | export SPARK_WORKER_MEMORY=1500m
7 | export SPARK_MASTER_MEM=1500m
8 | export SPARK_MASTER_IP=__MASTER__
9 | export HADOOP_HOME="/etc/hadoop"
10 | export MASTER="spark://__MASTER__:7077"
11 | export SPARK_LOCAL_DIR=/tmp/spark
12 | #SPARK_JAVA_OPTS="-Dspark.local.dir=/tmp/spark "
13 | #SPARK_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
14 | #SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
15 | #SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
16 | #export SPARK_JAVA_OPTS
17 | #SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
18 | #export SPARK_DAEMON_JAVA_OPTS
19 | export JAVA_HOME=__JAVA_HOME__
20 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.8.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Expose TCP ports 7077 8080
6 | EXPOSE 7077 8080
7 |
8 | ADD files /root/spark_master_files
9 |
10 | CMD ["/root/spark_master_files/default_cmd"]
11 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-master:0.8.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | env
4 |
5 | source /root/spark_files/configure_spark.sh
6 |
7 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
8 | echo "MASTER_IP=$IP"
9 |
10 | echo "preparing Spark"
11 | prepare_spark $IP
12 |
13 | echo "starting Hadoop Namenode"
14 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
15 | service hadoop-namenode start > /dev/null 2>&1
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Spark Master"
23 | cp /root/spark_master_files/run_spark_master.sh /
24 | chmod a+rx /run_spark_master.sh
25 | sudo -u hdfs SPARK_VERSION=$SPARK_VERSION /run_spark_master.sh
26 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-master/files/run_spark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | /opt/spark-0.8.0/bin/start-master.sh
3 |
4 | while [ 1 ];
5 | do
6 | tail -f /opt/spark-${SPARK_VERSION}/logs/*.out
7 | sleep 1
8 | done
9 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.8.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_shell_files
10 |
11 | # Add the entrypoint script for the master
12 | ENTRYPOINT ["/root/spark_shell_files/default_cmd"]
13 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-shell:0.8.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | env
6 |
7 | echo "preparing Spark"
8 | prepare_spark "master"
9 |
10 | echo "adding test data to HDFS"
11 | cp /root/spark_shell_files/test.txt /tmp
12 | sudo -u hdfs hadoop dfsadmin -safemode wait
13 | sudo -u hdfs hadoop fs -put /tmp/test.txt hdfs://master:9000/user/hdfs/test.txt
14 |
15 | cp /root/spark_shell_files/test.spark /
16 |
17 | # Note: there are issues if the nameserver did not have time to
18 | # refresh its cache with this shell's hostname so give him time
19 | # to do so.
20 | sleep 3
21 |
22 | echo "starting Spark Shell"
23 |
24 | cd $SPARK_HOME
25 | sudo -u hdfs MASTER=spark://master:7077 HDFS_PREFIX=hdfs://master:9000 ./spark-shell
26 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-shell/files/test.spark:
--------------------------------------------------------------------------------
1 | val hdfs_prefix = System.getenv("HDFS_PREFIX")
2 | val textFile = sc.textFile(hdfs_prefix+"/user/hdfs/test.txt")
3 | textFile.count()
4 | textFile.map({line => line}).collect()
5 | exit
6 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-shell/files/test.txt:
--------------------------------------------------------------------------------
1 | this is a test
2 | more test
3 | one more line
4 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.8.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_worker_files
10 |
11 | # Add the entrypoint script for the master
12 | CMD ["-h"]
13 | ENTRYPOINT ["/root/spark_worker_files/default_cmd"]
14 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-worker:0.8.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
6 | echo "WORKER_IP=$IP"
7 |
8 | echo "preparing Spark"
9 | prepare_spark $1
10 |
11 | echo "starting Hadoop Datanode"
12 | service hadoop-datanode start
13 |
14 | echo "starting sshd"
15 | /usr/sbin/sshd
16 |
17 | sleep 5
18 |
19 | echo "starting Spark Worker"
20 | cp /root/spark_worker_files/run_spark_worker.sh /
21 | chmod a+rx /run_spark_worker.sh
22 | sudo -u hdfs /run_spark_worker.sh
23 |
--------------------------------------------------------------------------------
/spark-0.8.0/spark-worker/files/run_spark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/spark-0.8.0/conf/spark-env.sh
3 | ${SPARK_HOME}/spark-class org.apache.spark.deploy.worker.Worker $MASTER
4 |
--------------------------------------------------------------------------------
/spark-0.9.0/NOTE.txt:
--------------------------------------------------------------------------------
1 | Many of the files here are in fact identical to the ones in the
2 | Spark 0.9.0 directory. However, since Docker does not follow
3 | symbolic links when it builds images we need the duplication.
4 |
--------------------------------------------------------------------------------
/spark-0.9.0/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | spark_dirs=$(ls -d spark*)
4 | dir_list=("$spark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.9.0
2 | # Version 0.9.0
3 | #
4 | FROM apache-hadoop-hdfs-precise:1.2.1
5 |
6 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
7 |
8 | ENV SCALA_VERSION 2.10.3
9 | ENV SPARK_VERSION 0.9.0
10 | ENV SCALA_HOME /opt/scala-$SCALA_VERSION
11 | ENV SPARK_HOME /opt/spark-$SPARK_VERSION
12 | ENV PATH $SPARK_HOME:$SCALA_HOME/bin:$PATH
13 |
14 | # Install Scala
15 | ADD http://www.scala-lang.org/files/archive/scala-$SCALA_VERSION.tgz /
16 | RUN (cd / && gunzip < scala-$SCALA_VERSION.tgz)|(cd /opt && tar -xvf -)
17 | RUN rm /scala-$SCALA_VERSION.tgz
18 |
19 | # Install Spark
20 | ADD http://d3kbcqa49mib13.cloudfront.net/spark-$SPARK_VERSION-incubating-bin-hadoop1.tgz /
21 | RUN (cd / && gunzip < spark-$SPARK_VERSION-incubating-bin-hadoop1.tgz)|(cd /opt && tar -xvf -)
22 | RUN (ln -s /opt/spark-$SPARK_VERSION-incubating-bin-hadoop1 /opt/spark-$SPARK_VERSION && rm /spark-$SPARK_VERSION-incubating-bin-hadoop1.tgz)
23 |
24 | # Add Shark config files and configure script
25 | ADD files /root/spark_files
26 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-base:0.9.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-base/files/configure_spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/hadoop_files/configure_hadoop.sh
4 |
5 | function create_spark_directories() {
6 | create_hadoop_directories
7 | rm -rf /opt/spark-$SPARK_VERSION/work
8 | mkdir -p /opt/spark-$SPARK_VERSION/work
9 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/work
10 | mkdir /tmp/spark
11 | chown hdfs.hdfs /tmp/spark
12 | # this one is for Spark shell logging
13 | rm -rf /var/lib/hadoop/hdfs
14 | mkdir -p /var/lib/hadoop/hdfs
15 | chown hdfs.hdfs /var/lib/hadoop/hdfs
16 | rm -rf /opt/spark-$SPARK_VERSION/logs
17 | mkdir -p /opt/spark-$SPARK_VERSION/logs
18 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/logs
19 | }
20 |
21 | function deploy_spark_files() {
22 | deploy_hadoop_files
23 | cp /root/spark_files/spark-env.sh /opt/spark-$SPARK_VERSION/conf/
24 | cp /root/spark_files/log4j.properties /opt/spark-$SPARK_VERSION/conf/
25 | }
26 |
27 | function configure_spark() {
28 | configure_hadoop $1
29 | #sed -i s/__MASTER__/$1/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
30 | sed -i s/__MASTER__/master/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
31 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
32 | sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
33 | }
34 |
35 | function prepare_spark() {
36 | create_spark_directories
37 | deploy_spark_files
38 | configure_spark $1
39 | }
40 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-base/files/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
6 |
7 | # Ignore messages below warning level from Jetty, because it's a bit verbose
8 | log4j.logger.org.eclipse.jetty=WARN
9 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-base/files/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | export SCALA_HOME=/opt/scala-2.10.3
3 | export SPARK_HOME=__SPARK_HOME__
4 | export SPARK_WORKER_CORES=1
5 | export SPARK_MEM=800m
6 | export SPARK_WORKER_MEMORY=1500m
7 | export SPARK_MASTER_MEM=1500m
8 | export SPARK_MASTER_IP=__MASTER__
9 | export HADOOP_HOME="/etc/hadoop"
10 | export MASTER="spark://__MASTER__:7077"
11 | export SPARK_LOCAL_DIR=/tmp/spark
12 | #SPARK_JAVA_OPTS="-Dspark.local.dir=/tmp/spark "
13 | #SPARK_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
14 | #SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
15 | #SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
16 | #export SPARK_JAVA_OPTS
17 | #SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
18 | #export SPARK_DAEMON_JAVA_OPTS
19 | export JAVA_HOME=__JAVA_HOME__
20 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.9.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Expose TCP ports 7077 8080
6 | EXPOSE 7077 8080
7 |
8 | ADD files /root/spark_master_files
9 |
10 | CMD ["/root/spark_master_files/default_cmd"]
11 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-master:0.9.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | env
4 |
5 | source /root/spark_files/configure_spark.sh
6 |
7 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
8 | echo "MASTER_IP=$IP"
9 |
10 | echo "preparing Spark"
11 | prepare_spark $IP
12 |
13 | echo "starting Hadoop Namenode"
14 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
15 | service hadoop-namenode start > /dev/null 2>&1
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Spark Master"
23 | cp /root/spark_master_files/run_spark_master.sh /
24 | chmod a+rx /run_spark_master.sh
25 | sudo -u hdfs SPARK_VERSION=$SPARK_VERSION /run_spark_master.sh
26 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-master/files/run_spark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | /opt/spark-0.9.0/sbin/start-master.sh
3 |
4 | while [ 1 ];
5 | do
6 | tail -f /opt/spark-${SPARK_VERSION}/logs/*.out
7 | sleep 1
8 | done
9 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.9.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_shell_files
10 |
11 | # Add the entrypoint script for the master
12 | ENTRYPOINT ["/root/spark_shell_files/default_cmd"]
13 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-shell:0.9.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | env
6 |
7 | echo "preparing Spark"
8 | prepare_spark "master"
9 |
10 | echo "adding test data to HDFS"
11 | cp /root/spark_shell_files/test.txt /tmp
12 | sudo -u hdfs hadoop dfsadmin -safemode wait
13 | sudo -u hdfs hadoop fs -put /tmp/test.txt hdfs://master:9000/user/hdfs/test.txt
14 |
15 | cp /root/spark_shell_files/test.spark /
16 |
17 | # Note: there are issues if the nameserver did not have time to
18 | # refresh its cache with this shell's hostname so give him time
19 | # to do so.
20 | sleep 3
21 |
22 | echo "starting Spark Shell"
23 |
24 | cd $SPARK_HOME
25 | sudo -u hdfs MASTER=spark://master:7077 HDFS_PREFIX=hdfs://master:9000 ./bin/spark-shell
26 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-shell/files/test.spark:
--------------------------------------------------------------------------------
1 | val hdfs_prefix = System.getenv("HDFS_PREFIX")
2 | val textFile = sc.textFile(hdfs_prefix+"/user/hdfs/test.txt")
3 | textFile.count()
4 | textFile.map({line => line}).collect()
5 | exit
6 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-shell/files/test.txt:
--------------------------------------------------------------------------------
1 | this is a test
2 | more test
3 | one more line
4 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.9.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_worker_files
10 |
11 | # Add the entrypoint script for the master
12 | CMD ["-h"]
13 | ENTRYPOINT ["/root/spark_worker_files/default_cmd"]
14 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-worker:0.9.0 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
6 | echo "WORKER_IP=$IP"
7 |
8 | echo "preparing Spark"
9 | prepare_spark $1
10 |
11 | echo "starting Hadoop Datanode"
12 | service hadoop-datanode start
13 |
14 | echo "starting sshd"
15 | /usr/sbin/sshd
16 |
17 | sleep 5
18 |
19 | echo "starting Spark Worker"
20 | cp /root/spark_worker_files/run_spark_worker.sh /
21 | chmod a+rx /run_spark_worker.sh
22 | sudo -u hdfs /run_spark_worker.sh
23 |
--------------------------------------------------------------------------------
/spark-0.9.0/spark-worker/files/run_spark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/spark-0.9.0/conf/spark-env.sh
3 | ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER
4 |
--------------------------------------------------------------------------------
/spark-0.9.1/NOTE.txt:
--------------------------------------------------------------------------------
1 | Many of the files here are in fact identical to the ones in the
2 | Spark 0.9.1 directory. However, since Docker does not follow
3 | symbolic links when it builds images we need the duplication.
4 |
--------------------------------------------------------------------------------
/spark-0.9.1/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | spark_dirs=$(ls -d spark*)
4 | dir_list=("$spark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 0.9.1
2 | # Version 0.9.1
3 | #
4 | FROM apache-hadoop-hdfs-precise:1.2.1
5 |
6 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
7 |
8 | ENV SCALA_VERSION 2.10.3
9 | ENV SPARK_VERSION 0.9.1
10 | ENV SCALA_HOME /opt/scala-$SCALA_VERSION
11 | ENV SPARK_HOME /opt/spark-$SPARK_VERSION
12 | ENV PATH $SPARK_HOME:$SCALA_HOME/bin:$PATH
13 |
14 | # Install Scala
15 | ADD http://www.scala-lang.org/files/archive/scala-$SCALA_VERSION.tgz /
16 | RUN (cd / && gunzip < scala-$SCALA_VERSION.tgz)|(cd /opt && tar -xvf -)
17 | RUN rm /scala-$SCALA_VERSION.tgz
18 |
19 | # Install Spark
20 | ADD http://d3kbcqa49mib13.cloudfront.net/spark-$SPARK_VERSION-bin-hadoop1.tgz /
21 | RUN (cd / && gunzip < spark-$SPARK_VERSION-bin-hadoop1.tgz)|(cd /opt && tar -xvf -)
22 | RUN (ln -s /opt/spark-$SPARK_VERSION-bin-hadoop1 /opt/spark-$SPARK_VERSION && rm /spark-$SPARK_VERSION-bin-hadoop1.tgz)
23 |
24 | # Add Shark config files and configure script
25 | ADD files /root/spark_files
26 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-base:0.9.1 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-base/files/configure_spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/hadoop_files/configure_hadoop.sh
4 |
5 | function create_spark_directories() {
6 | create_hadoop_directories
7 | rm -rf /opt/spark-$SPARK_VERSION/work
8 | mkdir -p /opt/spark-$SPARK_VERSION/work
9 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/work
10 | mkdir /tmp/spark
11 | chown hdfs.hdfs /tmp/spark
12 | # this one is for Spark shell logging
13 | rm -rf /var/lib/hadoop/hdfs
14 | mkdir -p /var/lib/hadoop/hdfs
15 | chown hdfs.hdfs /var/lib/hadoop/hdfs
16 | rm -rf /opt/spark-$SPARK_VERSION/logs
17 | mkdir -p /opt/spark-$SPARK_VERSION/logs
18 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/logs
19 | }
20 |
21 | function deploy_spark_files() {
22 | deploy_hadoop_files
23 | cp /root/spark_files/spark-env.sh /opt/spark-$SPARK_VERSION/conf/
24 | cp /root/spark_files/log4j.properties /opt/spark-$SPARK_VERSION/conf/
25 | }
26 |
27 | function configure_spark() {
28 | configure_hadoop $1
29 | #sed -i s/__MASTER__/$1/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
30 | sed -i s/__MASTER__/master/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
31 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
32 | sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
33 | }
34 |
35 | function prepare_spark() {
36 | create_spark_directories
37 | deploy_spark_files
38 | configure_spark $1
39 | }
40 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-base/files/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
6 |
7 | # Ignore messages below warning level from Jetty, because it's a bit verbose
8 | log4j.logger.org.eclipse.jetty=WARN
9 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-base/files/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | export SCALA_HOME=/opt/scala-2.10.3
3 | export SPARK_HOME=__SPARK_HOME__
4 | export SPARK_WORKER_CORES=1
5 | export SPARK_MEM=800m
6 | export SPARK_WORKER_MEMORY=1500m
7 | export SPARK_MASTER_MEM=1500m
8 | export SPARK_MASTER_IP=__MASTER__
9 | export HADOOP_HOME="/etc/hadoop"
10 | export MASTER="spark://__MASTER__:7077"
11 | export SPARK_LOCAL_DIR=/tmp/spark
12 | #SPARK_JAVA_OPTS="-Dspark.local.dir=/tmp/spark "
13 | #SPARK_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
14 | #SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
15 | #SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
16 | #export SPARK_JAVA_OPTS
17 | #SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
18 | #export SPARK_DAEMON_JAVA_OPTS
19 | export JAVA_HOME=__JAVA_HOME__
20 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.9.1
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Expose TCP ports 7077 8080
6 | EXPOSE 7077 8080
7 |
8 | ADD files /root/spark_master_files
9 |
10 | CMD ["/root/spark_master_files/default_cmd"]
11 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-master:0.9.1 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | env
4 |
5 | source /root/spark_files/configure_spark.sh
6 |
7 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
8 | echo "MASTER_IP=$IP"
9 |
10 | echo "preparing Spark"
11 | prepare_spark $IP
12 |
13 | echo "starting Hadoop Namenode"
14 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
15 | service hadoop-namenode start > /dev/null 2>&1
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Spark Master"
23 | cp /root/spark_master_files/run_spark_master.sh /
24 | chmod a+rx /run_spark_master.sh
25 | sudo -u hdfs SPARK_VERSION=$SPARK_VERSION /run_spark_master.sh
26 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-master/files/run_spark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | /opt/spark-0.9.1/sbin/start-master.sh
3 |
4 | while [ 1 ];
5 | do
6 | tail -f /opt/spark-${SPARK_VERSION}/logs/*.out
7 | sleep 1
8 | done
9 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.9.1
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_shell_files
10 |
11 | # Add the entrypoint script for the master
12 | ENTRYPOINT ["/root/spark_shell_files/default_cmd"]
13 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-shell:0.9.1 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | env
6 |
7 | echo "preparing Spark"
8 | prepare_spark "master"
9 |
10 | echo "adding test data to HDFS"
11 | cp /root/spark_shell_files/test.txt /tmp
12 | sudo -u hdfs hadoop dfsadmin -safemode wait
13 | sudo -u hdfs hadoop fs -put /tmp/test.txt hdfs://master:9000/user/hdfs/test.txt
14 |
15 | cp /root/spark_shell_files/test.spark /
16 |
17 | # Note: there are issues if the nameserver did not have time to
18 | # refresh its cache with this shell's hostname so give him time
19 | # to do so.
20 | sleep 3
21 |
22 | echo "starting Spark Shell"
23 |
24 | cd $SPARK_HOME
25 | sudo -u hdfs MASTER=spark://master:7077 HDFS_PREFIX=hdfs://master:9000 ./bin/spark-shell
26 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-shell/files/test.spark:
--------------------------------------------------------------------------------
1 | val hdfs_prefix = System.getenv("HDFS_PREFIX")
2 | val textFile = sc.textFile(hdfs_prefix+"/user/hdfs/test.txt")
3 | textFile.count()
4 | textFile.map({line => line}).collect()
5 | exit
6 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-shell/files/test.txt:
--------------------------------------------------------------------------------
1 | this is a test
2 | more test
3 | one more line
4 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:0.9.1
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_worker_files
10 |
11 | # Add the entrypoint script for the master
12 | CMD ["-h"]
13 | ENTRYPOINT ["/root/spark_worker_files/default_cmd"]
14 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-worker:0.9.1 .
5 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
6 | echo "WORKER_IP=$IP"
7 |
8 | echo "preparing Spark"
9 | prepare_spark $1
10 |
11 | echo "starting Hadoop Datanode"
12 | service hadoop-datanode start
13 |
14 | echo "starting sshd"
15 | /usr/sbin/sshd
16 |
17 | sleep 5
18 |
19 | echo "starting Spark Worker"
20 | cp /root/spark_worker_files/run_spark_worker.sh /
21 | chmod a+rx /run_spark_worker.sh
22 | sudo -u hdfs /run_spark_worker.sh
23 |
--------------------------------------------------------------------------------
/spark-0.9.1/spark-worker/files/run_spark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/spark-0.9.1/conf/spark-env.sh
3 | ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER
4 |
--------------------------------------------------------------------------------
/spark-1.0.0/NOTE.txt:
--------------------------------------------------------------------------------
1 | Many of the files here are in fact identical to the ones in the
2 | Spark 1.0.0 directory. However, since Docker does not follow
3 | symbolic links when it builds images we need the duplication.
4 |
--------------------------------------------------------------------------------
/spark-1.0.0/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | spark_dirs=$(ls -d spark*)
4 | dir_list=("$spark_dirs")
5 |
6 | # NOTE: the order matters but this is the right one
7 | for i in ${dir_list[@]}; do
8 | echo building $i;
9 | cd $i;
10 | cat build;
11 | . build;
12 | cd ..;
13 | done
14 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-base/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark 1.0.0
2 | # Version 1.0.0
3 | #
4 | FROM apache-hadoop-hdfs-precise:1.2.1
5 |
6 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
7 |
8 | ENV SCALA_VERSION 2.10.3
9 | ENV SPARK_VERSION 1.0.0
10 | ENV SCALA_HOME /opt/scala-$SCALA_VERSION
11 | ENV SPARK_HOME /opt/spark-$SPARK_VERSION
12 | ENV PATH $SPARK_HOME:$SCALA_HOME/bin:$PATH
13 |
14 | # Install Scala
15 | ADD http://www.scala-lang.org/files/archive/scala-$SCALA_VERSION.tgz /
16 | RUN (cd / && gunzip < scala-$SCALA_VERSION.tgz)|(cd /opt && tar -xvf -)
17 | RUN rm /scala-$SCALA_VERSION.tgz
18 |
19 | # Install Spark
20 | ADD http://d3kbcqa49mib13.cloudfront.net/spark-$SPARK_VERSION-bin-hadoop1.tgz /
21 | RUN (cd / && gunzip < spark-$SPARK_VERSION-bin-hadoop1.tgz)|(cd /opt && tar -xvf -)
22 | RUN (ln -s /opt/spark-$SPARK_VERSION-bin-hadoop1 /opt/spark-$SPARK_VERSION && rm /spark-$SPARK_VERSION-bin-hadoop1.tgz)
23 |
24 | # Add Shark config files and configure script
25 | ADD files /root/spark_files
26 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-base/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-base:1.0.0 .
5 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-base/files/configure_spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/hadoop_files/configure_hadoop.sh
4 |
5 | function create_spark_directories() {
6 | create_hadoop_directories
7 | rm -rf /opt/spark-$SPARK_VERSION/work
8 | mkdir -p /opt/spark-$SPARK_VERSION/work
9 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/work
10 | mkdir /tmp/spark
11 | chown hdfs.hdfs /tmp/spark
12 | # this one is for Spark shell logging
13 | rm -rf /var/lib/hadoop/hdfs
14 | mkdir -p /var/lib/hadoop/hdfs
15 | chown hdfs.hdfs /var/lib/hadoop/hdfs
16 | rm -rf /opt/spark-$SPARK_VERSION/logs
17 | mkdir -p /opt/spark-$SPARK_VERSION/logs
18 | chown hdfs.hdfs /opt/spark-$SPARK_VERSION/logs
19 | }
20 |
21 | function deploy_spark_files() {
22 | deploy_hadoop_files
23 | cp /root/spark_files/spark-env.sh /opt/spark-$SPARK_VERSION/conf/
24 | cp /root/spark_files/log4j.properties /opt/spark-$SPARK_VERSION/conf/
25 | }
26 |
27 | function configure_spark() {
28 | configure_hadoop $1
29 | #sed -i s/__MASTER__/$1/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
30 | sed -i s/__MASTER__/master/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
31 | sed -i s/__SPARK_HOME__/"\/opt\/spark-${SPARK_VERSION}"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
32 | sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/spark-$SPARK_VERSION/conf/spark-env.sh
33 | }
34 |
35 | function prepare_spark() {
36 | create_spark_directories
37 | deploy_spark_files
38 | configure_spark $1
39 | }
40 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-base/files/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
6 |
7 | # Ignore messages below warning level from Jetty, because it's a bit verbose
8 | log4j.logger.org.eclipse.jetty=WARN
9 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-base/files/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | export SCALA_HOME=/opt/scala-2.10.3
3 | export SPARK_HOME=__SPARK_HOME__
4 | export SPARK_WORKER_CORES=1
5 | export SPARK_MEM=800m
6 | export SPARK_WORKER_MEMORY=1500m
7 | export SPARK_MASTER_MEM=1500m
8 | export SPARK_MASTER_IP=__MASTER__
9 | export HADOOP_HOME="/etc/hadoop"
10 | export MASTER="spark://__MASTER__:7077"
11 | export SPARK_LOCAL_DIR=/tmp/spark
12 | #SPARK_JAVA_OPTS="-Dspark.local.dir=/tmp/spark "
13 | #SPARK_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
14 | #SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
15 | #SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
16 | #export SPARK_JAVA_OPTS
17 | #SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true "
18 | #export SPARK_DAEMON_JAVA_OPTS
19 | export JAVA_HOME=__JAVA_HOME__
20 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-master/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:1.0.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Expose TCP ports 7077 8080
6 | EXPOSE 7077 8080
7 |
8 | ADD files /root/spark_master_files
9 |
10 | CMD ["/root/spark_master_files/default_cmd"]
11 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-master/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-master:1.0.0 .
5 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-master/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | env
4 |
5 | source /root/spark_files/configure_spark.sh
6 |
7 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
8 | echo "MASTER_IP=$IP"
9 |
10 | echo "preparing Spark"
11 | prepare_spark $IP
12 |
13 | echo "starting Hadoop Namenode"
14 | sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
15 | service hadoop-namenode start > /dev/null 2>&1
16 |
17 | echo "starting sshd"
18 | /usr/sbin/sshd
19 |
20 | sleep 5
21 |
22 | echo "starting Spark Master"
23 | cp /root/spark_master_files/run_spark_master.sh /
24 | chmod a+rx /run_spark_master.sh
25 | sudo -u hdfs SPARK_VERSION=$SPARK_VERSION /run_spark_master.sh
26 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-master/files/run_spark_master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | /opt/spark-1.0.0/sbin/start-master.sh
3 |
4 | while [ 1 ];
5 | do
6 | tail -f /opt/spark-${SPARK_VERSION}/logs/*.out
7 | sleep 1
8 | done
9 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-shell/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:1.0.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_shell_files
10 |
11 | # Add the entrypoint script for the master
12 | ENTRYPOINT ["/root/spark_shell_files/default_cmd"]
13 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-shell/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-shell:1.0.0 .
5 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-shell/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | env
6 |
7 | echo "preparing Spark"
8 | prepare_spark "master"
9 |
10 | echo "adding test data to HDFS"
11 | cp /root/spark_shell_files/test.txt /tmp
12 | sudo -u hdfs hadoop dfsadmin -safemode wait
13 | sudo -u hdfs hadoop fs -put /tmp/test.txt hdfs://master:9000/user/hdfs/test.txt
14 |
15 | cp /root/spark_shell_files/test.spark /
16 |
17 | # Note: there are issues if the nameserver did not have time to
18 | # refresh its cache with this shell's hostname so give him time
19 | # to do so.
20 | sleep 3
21 |
22 | echo "starting Spark Shell"
23 |
24 | cd $SPARK_HOME
25 | sudo -u hdfs MASTER=spark://master:7077 HDFS_PREFIX=hdfs://master:9000 ./bin/spark-shell
26 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-shell/files/test.spark:
--------------------------------------------------------------------------------
1 | val hdfs_prefix = System.getenv("HDFS_PREFIX")
2 | val textFile = sc.textFile(hdfs_prefix+"/user/hdfs/test.txt")
3 | textFile.count()
4 | textFile.map({line => line}).collect()
5 | exit
6 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-shell/files/test.txt:
--------------------------------------------------------------------------------
1 | this is a test
2 | more test
3 | one more line
4 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-worker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Spark
2 | FROM spark-base:1.0.0
3 | MAINTAINER amplab amp-docker@eecs.berkeley.edu
4 |
5 | # Instead of using a random port, bind the worker to a specific port
6 | ENV SPARK_WORKER_PORT 8888
7 | EXPOSE 8888
8 |
9 | ADD files /root/spark_worker_files
10 |
11 | # Add the entrypoint script for the master
12 | CMD ["-h"]
13 | ENTRYPOINT ["/root/spark_worker_files/default_cmd"]
14 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-worker/build:
--------------------------------------------------------------------------------
1 | rm -f files/files.hash
2 | for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
3 | mv /tmp/files.hash files/files.hash
4 | sudo docker build -t ${IMAGE_PREFIX}spark-worker:1.0.0 .
5 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-worker/files/default_cmd:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /root/spark_files/configure_spark.sh
4 |
5 | IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
6 | echo "WORKER_IP=$IP"
7 |
8 | echo "preparing Spark"
9 | prepare_spark $1
10 |
11 | echo "starting Hadoop Datanode"
12 | service hadoop-datanode start
13 |
14 | echo "starting sshd"
15 | /usr/sbin/sshd
16 |
17 | sleep 5
18 |
19 | echo "starting Spark Worker"
20 | cp /root/spark_worker_files/run_spark_worker.sh /
21 | chmod a+rx /run_spark_worker.sh
22 | sudo -u hdfs /run_spark_worker.sh
23 |
--------------------------------------------------------------------------------
/spark-1.0.0/spark-worker/files/run_spark_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . /opt/spark-1.0.0/conf/spark-env.sh
3 | ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER
4 |
--------------------------------------------------------------------------------
/test/test_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ "$USER" != "root" ]]; then
4 | echo "please run as: sudo $0"
5 | exit 1
6 | fi
7 |
8 | BASEDIR=$(cd $(dirname $0); pwd)"/.."
9 | service_list=("spark:0.9.0" "shark:0.8.0" "spark:0.8.0" "spark:0.7.3" "shark:0.7.0" )
10 |
11 | IMAGE_PREFIX=""
12 | #"amplab/"
13 |
14 | START=$(date)
15 | echo "starting tests at $START" > tests.log
16 |
17 | RESULT=0
18 | FAILED=0
19 |
20 | check_screen_session_alive() {
21 | screen -q -ls > /dev/null
22 | if (( $? < 10 )); then
23 | SCREEN_ALIVE=1
24 | fi
25 | }
26 |
27 | function wait_for_prompt() {
28 | service=$1
29 | OUTFILE=$2
30 | SCREEN_ALIVE=0
31 |
32 | if [[ "$service" == "spark" ]]; then
33 | query_string="scala>\s$"
34 | else
35 | query_string="^shark>\s$\|\s\s\s\s\s>\s$"
36 | fi
37 |
38 | tail -n 1 $OUTFILE | tr -d $'\r' | grep "$query_string" > /dev/null
39 | STOP="$?"
40 | until [[ "$STOP" == "0" ]]; do
41 | sleep 1
42 | check_screen_session_alive
43 | if [[ "$SCREEN_ALIVE" == "0" ]]; then
44 | sudo screen -S tmpshell -p 0 -X stuff $'\n'
45 | tail -n 1 $OUTFILE | tr -d $'\r' | grep "$query_string" > /dev/null
46 | STOP="$?"
47 | else
48 | break
49 | fi
50 | done
51 | }
52 |
53 | function check_result() {
54 | service=$1
55 | outfile=$2
56 |
57 | if [[ "$service" == "spark" ]]; then
58 | grep "Array(this is a test, more test, one more line)" $outfile > /dev/null
59 | RESULT="$?"
60 | elif [[ "$service" == "shark" ]]; then
61 | cat $outfile | tr -d $'\r' | grep "^500$" > /dev/null
62 | RESULT="$?"
63 | fi
64 | }
65 |
66 | # NOTE: the order matters but this is the right one
67 | for i in ${service_list[@]}; do
68 | service=$(echo $i | awk -F ":" '{print $1}')
69 | version=$(echo $i | awk -F ":" '{print $2}')
70 | dirname=${service}-${version}
71 | LOGFILE=${BASEDIR}/test/${dirname}.log
72 | OUTFILE=${BASEDIR}/test/${dirname}.out
73 | rm -f "$LOGFILE" "$OUTFILE"
74 | START=$(date)
75 | echo "starting tests at $START" > $LOGFILE
76 | $BASEDIR/deploy/deploy.sh -i ${IMAGE_PREFIX}${i} 1>>$LOGFILE 2>&1
77 | NAMESERVER_IP=$(grep NAMESERVER_IP ${dirname}.log | awk '{print $2}')
78 | MASTER_IP=$(grep MASTER_IP ${dirname}.log | awk '{print $2}')
79 |
80 | # we need this to set screen's output logfile
81 | cat << EOF >/tmp/screenrc
82 | logfile $OUTFILE
83 | EOF
84 | cat > cmd.sh < /dev/null 2>&1
108 |
109 | $BASEDIR/deploy/kill_all.sh $service 1>> $LOGFILE 2>&1
110 | $BASEDIR/deploy/kill_all.sh nameserver 1>> $LOGFILE 2>&1
111 | check_result "$service" "$OUTFILE"
112 | echo "RESULT: $RESULT" >> $LOGFILE
113 | END=$(date)
114 | echo "ending tests at $END" >> $LOGFILE
115 | let "FAILED=FAILED+RESULT"
116 | done
117 |
118 | echo "FAILED: $FAILED"
119 |
120 | if [[ "$FAILED" == "0" ]]; then
121 | exit 0
122 | else
123 | exit 1
124 | fi
125 |
--------------------------------------------------------------------------------