├── .gitignore ├── README.md ├── TODO.txt ├── ami-init.sh ├── base-hadoop ├── Dockerfile └── url.txt ├── base-java ├── .gitignore ├── Dockerfile └── url.txt ├── base-novnc └── Dockerfile ├── base-scala ├── Dockerfile ├── ide.txt ├── idea ├── jdk.txt ├── sbt-launch.jar └── system.jwmrc ├── base-ssh ├── Dockerfile └── config ├── build.sh ├── centos6-init.sh ├── cleanup.sh ├── devenv ├── Dockerfile ├── predef.scala └── start-all.sh ├── docker-compose.yml ├── dropbox.sh ├── hadoop ├── Dockerfile ├── core-site.xml ├── hdfs-site.xml ├── mapred-site.xml ├── start-hadoop.sh └── yarn-site.xml ├── sparkjobserver └── Dockerfile └── zeppelin ├── Dockerfile └── url.txt /.gitignore: -------------------------------------------------------------------------------- 1 | id_rsa 2 | id_rsa.pub 3 | _password 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BigDataDevKit 2 | 3 | Big Data Development Kit (Hadoop / Spark / Zeppelin / IntelliJ) in Amazon AWS 4 | 5 | Usage: 6 | 7 | Launch an instance on Amazon WebServices 8 | 9 | Specify in the initialization of the vm 10 | 11 | ``` 12 | #!/bin/bash 13 | wget -O- http://bit.ly/1PjnNB3 |\ 14 | PASSWORD="changeme"\ 15 | TOKEN="duck-dns-token"\ 16 | HOST="duckdnshost"\ 17 | EMAIL="your@email"\ 18 | bash 19 | ``` 20 | NOTE! 21 | - You need to specify Amazon Linux 64 bit 22 | - You need at least an image small (2g) better 4g for running all the services 23 | - You need at least 20 GB space 24 | - Create a secondary block on sdb and the /app folder with all your data will be mounted there, thus preserved from termination 25 | - change the password (change the string within the quotes with your password) to the one you want 26 | - register an hostname in www.duckdns.org and get the token, and replace them in the TOKEN and HOST variables 27 | - specify your email (user for let's encrypt service) 28 | - if you have a backup of let's encrypt (for example in Dropbox) specify LETGZ="...." to the url of your backup 29 | - before you launch the instance add a rule to open the HTTPS ports to the world 30 | 31 | # Spark / Hadoop / Zeppelin devkit 32 | 33 | Docker kit for Hadoop, Spark and Zeppelin 34 | 35 | Devenv with IntelliJ, SBT and Ammonite accessible via web 36 | 37 | ## Usage: 38 | 39 | First, get a docker machine and configure your docker to access it. 40 | Refer to docker documentation to learn how to do it. 41 | 42 | The script `sh build.sh ` builds the enviroment. 43 | 44 | Start it with `docker-compose up -d`. 45 | 46 | That is all. 47 | 48 | ## What is in the kit 49 | 50 | Access the shell with http://youserver:3000 and the desktop with http://yourserver:6080 51 | 52 | In the kit there is Intellij free edition, a terminal with sbt and ammonite 53 | 54 | Inside the kit you have also Zeppelin, internally accessible as 55 | 56 | http://zeppelin.loc:8000, Hadoop accessible as hdfs://hadoop.loc:8020 and Spark on http://spark.loc 57 | 58 | (to fix) 59 | You can also ssh (without password) on hadoop.loc, spark.loc and zeppelin.loc 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | - fix loading of profile 2 | - fix ssh to other servers 3 | - separate dropbox volumes from the rest 4 | -------------------------------------------------------------------------------- /ami-init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #uncomment and set values 3 | #PASSWORD=app user password 4 | #TOKEN=duckdns token 5 | #HOST=duckdns host 6 | #EMAIL=your email here 7 | # change password here 8 | PASSWD=${PASSWORD:?specify the password in PASSWORD envar} 9 | DDTOKEN=${TOKEN:?duckdns token} 10 | DDHOST=${HOST:?duckdns host} 11 | LEEMAIL=${EMAIL:?letsencrypt email} 12 | # end changes 13 | # set app directory 14 | mkdir /app 15 | if test -b /dev/sdb 16 | then if file -sL /dev/sdb | grep "/dev/sdb: data" 17 | then mkfs.ext4 /dev/sdb 18 | fi 19 | echo "/dev/sdb /app ext4 defaults 0 0" >>/etc/fstab 20 | mount -a 21 | fi 22 | groupadd -g 1000 app 23 | useradd -g 1000 -u 1000 -d /app app 24 | cp /etc/skel/.* /app 25 | echo "app ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers 26 | echo "app:${PASSWD}" | chpasswd 27 | chown -Rvf app:app /app 28 | # install stuff 29 | yum -y update && yum -y install docker tmux git nginx gcc make libffi-devel openssl-devel python-devel 30 | curl -L https://github.com/docker/compose/releases/download/1.5.2/docker-compose-`uname -s`-`uname -m` > /usr/bin/docker-compose 31 | chmod +x /usr/bin/docker-compose 32 | pip install butterfly 33 | /sbin/chkconfig nginx on 34 | /sbin/chkconfig docker on 35 | gpasswd -a app docker 36 | service docker start 37 | service nginx stop 38 | # prepare ssh access 39 | sed -i -e 's/PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config 40 | # assign duckdns hostname 41 | echo "curl http://www.duckdns.org/update?domains=${DDHOST}\&token=${DDTOKEN}\&ip=" >>/etc/rc.d/rc.local 42 | bash /etc/rc.d/rc.local 43 | # restore a backup of letsencrypt if provided 44 | if test -n "$LETGZ" 45 | then wget -O- "$LETGZ" | tar xzf - -C / 46 | fi 47 | # generate a certificate with letsencrypt 48 | if ! test -d /app/letsencrypt/live 49 | then 50 | mkdir -p /app/letsencrypt 51 | chmod 0755 /app /app/letsencrypt 52 | docker run --rm \ 53 | -p 80:80 -p 443:443 \ 54 | --name letsencrypt \ 55 | -v "/app/letsencrypt:/etc/letsencrypt" \ 56 | -v "/var/lib/letsencrypt:/var/lib/letsencrypt" \ 57 | -e "LETSENCRYPT_EMAIL=${LEEMAIL:?email}" \ 58 | -e "LETSENCRYPT_DOMAIN1=${DDHOST}.duckdns.org" \ 59 | quay.io/letsencrypt/letsencrypt:non-interactive auth \ 60 | --email ${LEEMAIL:?email} -d ${DDHOST}.duckdns.org --agree-tos 61 | fi 62 | # fallback to selfsigned if it did not work 63 | if ! test -e /app/letsencrypt/live/${DDHOST}.duckdns.org/fullchain.pem 64 | then 65 | mkdir -p /app/letsencrypt/live/${HOST}.duckdns.org 66 | printf "\\n\\n\\n\\n\\n\\n\\n" |\ 67 | openssl req -x509 -newkey rsa:2048 \ 68 | -keyout /app/letsencrypt/live/${HOST}.duckdns.org/privkey.pem \ 69 | -out /app/letsencrypt/live/${HOST}.duckdns.org/fullchain.pem \ 70 | -days 30000 -nodes 71 | fi 72 | chown -Rvf app:app /app 73 | # nginx configuration 74 | cat </etc/nginx/conf.d/proxies.conf 75 | server { 76 | listen 443; 77 | server_name localhost; 78 | root html; 79 | ssl on; 80 | ssl_certificate /app/letsencrypt/live/${HOST}.duckdns.org/fullchain.pem; 81 | ssl_certificate_key /app/letsencrypt/live/${HOST}.duckdns.org/privkey.pem; 82 | ssl_session_timeout 5m; 83 | ssl_protocols SSLv2 SSLv3 TLSv1; 84 | ssl_ciphers HIGH:!aNULL:!MD5; 85 | ssl_prefer_server_ciphers on; 86 | 87 | location ~ ^/(vnc\.html|websockify.*|include/.*|images/.*)$ { 88 | proxy_pass http://127.0.0.1:6080; 89 | proxy_http_version 1.1; 90 | proxy_set_header Upgrade \$http_upgrade; 91 | proxy_set_header Connection "upgrade"; 92 | proxy_read_timeout 43200000; 93 | proxy_set_header X-Real-IP \$remote_addr; 94 | proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; 95 | proxy_set_header Host \$http_host; 96 | proxy_set_header X-NginX-Proxy true; 97 | } 98 | 99 | location ~ ^/(index\.html|static/.*|ws.*)?$ { 100 | proxy_pass http://127.0.0.1:3000; 101 | proxy_http_version 1.1; 102 | proxy_set_header Upgrade \$http_upgrade; 103 | proxy_set_header Connection "upgrade"; 104 | proxy_read_timeout 43200000; 105 | proxy_set_header X-Real-IP \$remote_addr; 106 | proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; 107 | proxy_set_header Host \$http_host; 108 | proxy_set_header X-NginX-Proxy true; 109 | } 110 | } 111 | EOF 112 | echo "/usr/local/bin/butterfly.server.py --unsecure --host=127.0.0.1 --port=3000 &" >>/etc/rc.d/rc.local 113 | service nginx start 114 | service sshd restart 115 | bash /etc/rc.d/rc.local 116 | -------------------------------------------------------------------------------- /base-hadoop/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bddk/base-ssh 2 | WORKDIR /app/ 3 | COPY *.txt /app/ 4 | RUN wget --progress=dot:giga $(cat myurl.txt 2>/dev/null || cat url.txt) 5 | USER app 6 | RUN sudo chown -Rvf app /app ; tar xzvf hadoop-2.6.2.tar.gz ; mv /app/hadoop-2.6.2 /app/hadoop ; rm *.tar.gz 7 | -------------------------------------------------------------------------------- /base-hadoop/url.txt: -------------------------------------------------------------------------------- 1 | http://www.us.apache.org/dist/hadoop/common/hadoop-2.6.2/hadoop-2.6.2.tar.gz 2 | -------------------------------------------------------------------------------- /base-java/.gitignore: -------------------------------------------------------------------------------- 1 | authorized_keys 2 | id_rsa 3 | id_rsa.pub 4 | -------------------------------------------------------------------------------- /base-java/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:6.7 2 | RUN yum -y update && yum clean all && yum install -y rsync openssh-server openssh-clients sudo net-tools telnet wget tar gzip which less 3 | COPY *.txt / 4 | RUN wget --progress=dot:giga --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "$(cat myurl.txt 2>/dev/null || cat url.txt)" ; rpm -ihv jdk* ; rm -f -v jdk* 5 | ENV JAVA_HOME /usr 6 | ENV PATH $PATH:$JAVA_HOME/bin 7 | RUN groupadd -g 1000 app && useradd -g 1000 -u 1000 -d /app app 8 | RUN echo "app ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers ; sed -i 's/requiretty/!requiretty/' /etc/sudoers 9 | -------------------------------------------------------------------------------- /base-java/url.txt: -------------------------------------------------------------------------------- 1 | http://download.oracle.com/otn-pub/java/jdk/7u79-b15/jdk-7u79-linux-x64.rpm 2 | -------------------------------------------------------------------------------- /base-novnc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bddk/base-scala 2 | RUN apt-get -y install python-numpy 3 | USER app 4 | WORKDIR /app 5 | RUN git clone https://github.com/kanaka/noVNC noVNC 6 | RUN git clone https://github.com/kanaka/websockify noVNC/utils/websockify 7 | ENV ENV=/app/.shinit 8 | RUN echo 'bash' >/app/.shinit 9 | -------------------------------------------------------------------------------- /base-scala/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:jessie 2 | RUN sed -i -e 's/httpredir.debian.org/ftp.us.debian.org/' /etc/apt/sources.list 3 | RUN groupadd -g 1000 app && useradd -g 1000 -u 1000 -d /app app -m -s /bin/bash ; chown -Rv app:app /app/ 4 | RUN apt-get update && apt-get -y install git wget sudo jwm curl vim net-tools python make gcc g++ ssh iceweasel tightvncserver tmux xterm chromium chromium-l10n 5 | RUN echo "app ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers ; sed -i 's/requiretty/!requiretty/' /etc/sudoers 6 | RUN curl -sL https://deb.nodesource.com/setup_5.x | bash - ; apt-get -y install nodejs 7 | COPY *.txt / 8 | RUN wget --progress=dot:giga --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "$(cat myjdk.txt 2>/dev/null || cat jdk.txt)" 9 | RUN wget --progress=dot:giga $(cat myide.txt || cat ide.txt) 10 | RUN mkdir /usr/java ; tar xzvf jdk-*.tar.gz -C /usr/java ; tar xzvf ideaIC-*.tar.gz -C /usr/java ; rm *.tar.gz 11 | RUN ( echo export JAVA_HOME=$(ls -1d /usr/java/jdk*) ; echo export IDEA_HOME=$(ls -1d /usr/java/idea*) ; echo PATH='$JAVA_HOME/bin:$IDEA_HOME/bin:/usr/local/bin:$PATH' )>/etc/profile.d/java.sh 12 | COPY system.jwmrc /etc/jwm/system.jwmrc 13 | COPY sbt-launch.jar idea /usr/bin/ 14 | RUN curl -L -o /usr/bin/amm https://git.io/v0FGO ;\ 15 | curl -s https://raw.githubusercontent.com/paulp/sbt-extras/master/sbt > /usr/bin/sbt ;\ 16 | chmod 0755 /usr/bin/sbt /usr/bin/idea /usr/bin/amm 17 | -------------------------------------------------------------------------------- /base-scala/ide.txt: -------------------------------------------------------------------------------- 1 | https://download.jetbrains.com/idea/ideaIC-15.0.2.tar.gz 2 | -------------------------------------------------------------------------------- /base-scala/idea: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export JAVA_HOME=/usr/java/jdk1.8.0_65 3 | exec /usr/java/idea-IC-143.1184.17/bin/idea.sh 4 | -------------------------------------------------------------------------------- /base-scala/jdk.txt: -------------------------------------------------------------------------------- 1 | http://download.oracle.com/otn-pub/java/jdk/8u65-b17/jdk-8u65-linux-x64.tar.gz 2 | -------------------------------------------------------------------------------- /base-scala/sbt-launch.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sciabarra/BigDataDevKit/4e749a51ed334aab262f050bc58d620bce53658d/base-scala/sbt-launch.jar -------------------------------------------------------------------------------- /base-scala/system.jwmrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | idea 9 | chromium 10 | iceweasel 11 | xterm -fn 10x20 12 | 13 | 43 | 44 | /etc/jwm/debian-menu 45 | 46 | 47 | 48 | xscreensaver-command -activate 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | Pidgin 57 | 58 | 59 | 60 | 61 | gkrellm 62 | 63 | 64 | 65 | 66 | 67 | xterm 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | root:1 77 | 78 | showdesktop 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | xload -nolabel -bg black -fg red -hl white 90 | 91 | 92 | xclock 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | -*-fixed-*-r-*-*-10-*-*-*-*-*-*-* 101 | 4 102 | 20 103 | 104 | 105 | white 106 | 109 | gray30:gray60 110 | black 111 | 1.0 112 | 113 | 114 | 115 | #aaaaaa 116 | #808488:#303438 117 | black 118 | 0.5:0.9:0.1 119 | 120 | 121 | 122 | 123 | 124 | -*-fixed-*-r-*-*-10-*-*-*-*-*-*-* 125 | black 126 | gray80:gray90 127 | black 128 | gray90:gray80 129 | 130 | 131 | 132 | 133 | -*-fixed-*-r-*-*-10-*-*-*-*-*-*-* 134 | gray90 135 | black 136 | 0.75 137 | 138 | 139 | 140 | black 141 | gray90 142 | #808488 143 | #70849d 144 | #2e3a67 145 | 146 | 147 | 148 | -*-fixed-*-r-*-*-10-*-*-*-*-*-*-* 149 | black 150 | gray90 151 | white 152 | #70849d:#2e3a67 153 | 0.85 154 | 155 | 156 | 157 | -*-fixed-*-r-*-*-10-*-*-*-*-*-*-* 158 | black 159 | black 160 | yellow 161 | 162 | 163 | 164 | /usr/share/icons/wm-icons/32x32-gant 165 | 166 | 167 | 168 | 169 | 170 | 174 | /export0/images/formulae.jpg 175 | 176 | 177 | 178 | 400 179 | 180 | 181 | 2 182 | 183 | 184 | sloppy 185 | 186 | 187 | border 188 | 189 | 190 | opaque 191 | 192 | 193 | opaque 194 | 195 | gkrellm 196 | 197 | 198 | up 199 | down 200 | right 201 | left 202 | left 203 | down 204 | up 205 | right 206 | select 207 | escape 208 | 209 | next 210 | close 211 | desktop# 212 | root:1 213 | window 214 | maximize 215 | rdesktop 216 | ldesktop 217 | udesktop 218 | ddesktop 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /base-ssh/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bddk/base-java 2 | COPY config id_rsa id_rsa.pub /root/.ssh/ 3 | RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys ;\ 4 | cp -Rvf /root/.ssh /app/.ssh ;\ 5 | chmod 0600 /root/.ssh/* /app/.ssh/* ;\ 6 | mkdir /app/data ;\ 7 | chown app -Rvf /app 8 | RUN /etc/init.d/sshd start ; /etc/init.d/sshd stop 9 | WORKDIR /app 10 | USER app 11 | CMD sudo /usr/sbin/sshd ; tail -f /dev/null 12 | EXPOSE 22 13 | -------------------------------------------------------------------------------- /base-ssh/config: -------------------------------------------------------------------------------- 1 | Host 0.0.0.0 2 | StrictHostKeyChecking no 3 | UserKnownHostsFile=/dev/null 4 | Host localhost 5 | StrictHostKeyChecking no 6 | UserKnownHostsFile=/dev/null 7 | Host *.loc 8 | StrictHostKeyChecking no 9 | UserKnownHostsFile=/dev/null 10 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if ! test $(id -u) == "1000" 3 | then echo "Please run this script as 'app' user" ; exit 1 4 | fi 5 | if ! test -e devenv/_password 6 | then echo "Please type password for devenv" 7 | read PASSWORD 8 | echo $PASSWORD >devenv/_password 9 | fi 10 | test -e base-ssh/id_rsa || ssh-keygen -t rsa -f base-ssh/id_rsa -N '' 11 | cp base-ssh/id_* devenv 12 | for i in \ 13 | base-java base-ssh base-hadoop hadoop \ 14 | zeppelin sparkjobserver \ 15 | base-scala base-novnc devenv 16 | do docker build -t bddk/$i $i || exit 1 17 | done 18 | if ! grep dropbox /etc/rc.d/rc.local >/dev/null 19 | then sudo wget --no-check-certificate -O/usr/bin/dropbox https://www.dropbox.com/download?dl=packages/dropbox.py 20 | sudo chmod +x /usr/bin/dropbox 21 | cd $HOME ; wget -O - "http://www.dropbox.com/download?plat=lnx.x86_64" | tar xz 22 | echo "su - app -c '/usr/bin/dropbox start'" | sudo tee -a /etc/rc.d/rc.local >/dev/null 23 | echo Please configure Dropbox to save your data running $HOME/.dropbox-dist/dropboxd 24 | fi 25 | if ! grep docker-compose /etc/rc.d/rc.local >/dev/null 26 | then echo "Build complete. You can start with 'docker-compose up' or reboot to start automatically" 27 | sudo cp "$(dirname $0)/docker-compose.yml" /etc/docker-compose.yml 28 | echo "/usr/bin/docker-compose -f /etc/docker-compose.yml up -d" | sudo tee -a /etc/rc.d/rc.local >/dev/null 29 | fi 30 | 31 | -------------------------------------------------------------------------------- /centos6-init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PASSWD=${1:?specify the password of the app user} 3 | LEDOMAIN=${2:?fqdn of the host - fixed ip} 4 | LEEMAIL=${3:?your email for letsencrypt} 5 | # end changes 6 | # set app directory 7 | mkdir /app 8 | groupadd -g 1000 app 9 | useradd -g 1000 -u 1000 -d /app app 10 | cp /etc/skel/.* /app 11 | echo "app ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers 12 | echo "app:${PASSWD}" | chpasswd 13 | chown -Rvf app:app /app 14 | # install stuff 15 | rpm -Uvh https://mirror.webtatic.com/yum/el6/latest.rpm 16 | yum -y update && yum -y install docker-io tmux git nginx18 gcc make libffi-devel openssl-devel python-devel 17 | curl -L https://github.com/docker/compose/releases/download/1.5.2/docker-compose-`uname -s`-`uname -m` > /usr/bin/docker-compose 18 | chmod +x /usr/bin/docker-compose 19 | pip install butterfly 20 | /sbin/chkconfig nginx on 21 | /sbin/chkconfig docker on 22 | gpasswd -a app docker 23 | service docker start 24 | service nginx stop 25 | # generate a certificate with letsencrypt 26 | if ! test -d /app/letsencrypt/live 27 | then 28 | mkdir -p /app/letsencrypt 29 | chmod 0755 /app /app/letsencrypt 30 | docker run --rm \ 31 | -p 80:80 -p 443:443 \ 32 | --name letsencrypt \ 33 | -v "/app/letsencrypt:/etc/letsencrypt" \ 34 | -v "/var/lib/letsencrypt:/var/lib/letsencrypt" \ 35 | -e "LETSENCRYPT_EMAIL=${LEEMAIL:?email}" \ 36 | -e "LETSENCRYPT_DOMAIN1=${LEDOMAIN}" \ 37 | quay.io/letsencrypt/letsencrypt:non-interactive auth \ 38 | --email ${LEEMAIL:?email} -d ${LEDOMAIN} --agree-tos 39 | fi 40 | chown -Rvf app:app /app 41 | # nginx configuration 42 | cat </etc/nginx/conf.d/proxies.conf 43 | server { 44 | listen 443; 45 | server_name localhost; 46 | root html; 47 | ssl on; 48 | ssl_certificate /app/letsencrypt/live/${LEDOMAIN}/fullchain.pem; 49 | ssl_certificate_key /app/letsencrypt/live/${LEDOMAIN}/privkey.pem; 50 | ssl_session_timeout 5m; 51 | ssl_protocols SSLv2 SSLv3 TLSv1; 52 | ssl_ciphers HIGH:!aNULL:!MD5; 53 | ssl_prefer_server_ciphers on; 54 | 55 | location ~ ^/(vnc\.html|websockify.*|include/.*|images/.*)$ { 56 | proxy_pass http://127.0.0.1:6080; 57 | proxy_http_version 1.1; 58 | proxy_set_header Upgrade \$http_upgrade; 59 | proxy_set_header Connection "upgrade"; 60 | proxy_read_timeout 43200000; 61 | proxy_set_header X-Real-IP \$remote_addr; 62 | proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; 63 | proxy_set_header Host \$http_host; 64 | proxy_set_header X-NginX-Proxy true; 65 | } 66 | 67 | location ~ ^/(index\.html|static/.*|ws.*)?$ { 68 | proxy_pass http://127.0.0.1:3000; 69 | proxy_http_version 1.1; 70 | proxy_set_header Upgrade \$http_upgrade; 71 | proxy_set_header Connection "upgrade"; 72 | proxy_read_timeout 43200000; 73 | proxy_set_header X-Real-IP \$remote_addr; 74 | proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; 75 | proxy_set_header Host \$http_host; 76 | proxy_set_header X-NginX-Proxy true; 77 | } 78 | } 79 | EOF 80 | 81 | cat </etc/init.d/butterfly 82 | #!/bin/bash 83 | # chkconfig: - 50 50 84 | # description: Start/Stop butterfly 85 | case "\$1" in 86 | start) 87 | if test -e /var/run/butterfly.pid -a -e /proc/\$(cat /var/run/butterfly.pid) 88 | then echo Butterfly is running 89 | else /usr/local/bin/butterfly.server.py --unsecure --host=127.0.0.1 --port=3000 & 90 | echo \$! >/var/run/butterfly.pid 91 | fi 92 | ;; 93 | stop) 94 | if test -e /var/run/butterfly.pid -a -e /proc/\$(cat /var/run/butterfly.pid) 95 | then kill -9 \$(cat /var/run/butterfly.pid) ; rm /var/run/butterfly.pid 96 | else echo Butterfly is not running 97 | fi 98 | ;; 99 | status) 100 | if test -e /var/run/butterfly.pid -a -e /proc/\$(cat /var/run/butterfly.pid) 101 | then echo Butterfly is running 102 | else echo Butterfly is not running 103 | fi 104 | ;; 105 | *) 106 | echo "Usage: butterfly {start|stop|status}" 107 | exit 1 108 | ;; 109 | esac 110 | exit \$? 111 | EOF 112 | chmod +x /etc/init.d/butterfly 113 | /sbin/chkconfig butterfly 114 | service buttefly start 115 | service nginx start 116 | -------------------------------------------------------------------------------- /cleanup.sh: -------------------------------------------------------------------------------- 1 | docker ps -a -q | xargs docker rm 2 | docker images | grep '' | awk '{print $3}' | xargs docker rmi -f 3 | -------------------------------------------------------------------------------- /devenv/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bddk/base-novnc 2 | USER app 3 | WORKDIR /app 4 | COPY _password /app/_password 5 | COPY start-all.sh /app/bin/ 6 | RUN echo "app:$(cat /app/_password)" | sudo chpasswd ;\ 7 | mkdir .vnc ;\ 8 | printf "$(cat /app/_password)"'\n'"$(cat /app/_password)"'\nn\n' | vncpasswd -f >.vnc/passwd ;\ 9 | chmod 0600 .vnc/passwd ;\ 10 | rm /app/_password ;\ 11 | mkdir .ammonite .ssh ;\ 12 | echo 'source /etc/profile' >.bashrc 13 | COPY predef.scala .ammonite/ 14 | COPY id_* .ssh/ 15 | EXPOSE 6080 5900 16 | VOLUME /app/Dropbox 17 | CMD bash /app/bin/start-all.sh 18 | -------------------------------------------------------------------------------- /devenv/predef.scala: -------------------------------------------------------------------------------- 1 | load.ivy("com.lihaoyi" %% "ammonite-shell" % ammonite.Constants.version) 2 | @ 3 | val shellSession = ammonite.shell.ShellSession() 4 | import shellSession._ 5 | import ammonite.shell.PPrints._ 6 | import ammonite.ops._ 7 | import ammonite.shell._ 8 | ammonite.shell.Configure(repl, wd) 9 | -------------------------------------------------------------------------------- /devenv/start-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo /etc/init.d/ssh start 3 | export USER=app 4 | rm -f /tmp/.X0-lock /tmp/.X11-unix/X0 /app/home/.ssh/known_hosts 5 | vncserver -geometry 1280x720 :0 6 | cd /app/ ; ./noVNC/utils/launch.sh 7 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | hadoop: 2 | hostname: hadoop 3 | domainname: loc 4 | container_name: hadoop 5 | image: bddk/hadoop 6 | ports: 7 | - "8020" 8 | volumes: 9 | - "/app/Dropbox:/app/Dropbox" 10 | zeppelin: 11 | hostname: zeppelin 12 | domainname: loc 13 | container_name: zeppelin 14 | image: bddk/zeppelin 15 | links: 16 | - hadoop:hadoop.loc 17 | ports: 18 | - "8080" 19 | volumes: 20 | - "/app/Dropbox:/app/Dropbox" 21 | sparkjobserver: 22 | hostname: sparkjobserver 23 | domainname: loc 24 | container_name: sparkjobserver 25 | image: bddk/sparkjobserver 26 | links: 27 | - hadoop:hadoop.loc 28 | ports: 29 | - "8090:8090" 30 | volumes: 31 | - "/app/Dropbox:/app/Dropbox" 32 | command: "--executor-memory 256m" 33 | devenv: 34 | hostname: devenv 35 | domainname: loc 36 | container_name: devenv 37 | image: bddk/devenv 38 | links: 39 | - hadoop:hadoop.loc 40 | - zeppelin:zeppelin.loc 41 | - sparkjobserver:sparkjobserver.loc 42 | ports: 43 | - "6080:6080" 44 | - "5900:5900" 45 | volumes: 46 | - "/app/Dropbox:/app/Dropbox" 47 | -------------------------------------------------------------------------------- /dropbox.sh: -------------------------------------------------------------------------------- 1 | wget --no-check-certificate -O/usr/bin/dropbox https://www.dropbox.com/download?dl=packages/dropbox.py 2 | chmod +x /usr/bin/dropbox 3 | wget --no-check-certificate -O - "https://www.dropbox.com/download?plat=lnx.x86_64" | tar xzf - 4 | R 5 | -------------------------------------------------------------------------------- /hadoop/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bddk/base-hadoop 2 | WORKDIR /app/ 3 | USER app 4 | VOLUME /app/Dropbox 5 | COPY start-hadoop.sh /app/hadoop/ 6 | RUN echo "export JAVA_HOME=/usr" >>/app/hadoop/etc/hadoop/hadoop-env.sh 7 | COPY *.xml /app/hadoop/etc/hadoop/ 8 | RUN /app/hadoop/bin/hdfs namenode -format 9 | EXPOSE 50010 50020 50070 50075 50090 10 | EXPOSE 19888 11 | EXPOSE 8020 8030 8031 8032 8033 8040 8042 8088 12 | EXPOSE 49707 2122 13 | CMD /app/hadoop/start-hadoop.sh ; \ 14 | tail -f /dev/null 15 | -------------------------------------------------------------------------------- /hadoop/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | fs.defaultFS 6 | hdfs://hadoop.loc/ 7 | 8 | 9 | -------------------------------------------------------------------------------- /hadoop/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dfs.replication 6 | 1 7 | 8 | 9 | -------------------------------------------------------------------------------- /hadoop/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | mapreduce.framework.name 7 | yarn 8 | 9 | 10 | -------------------------------------------------------------------------------- /hadoop/start-hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "*** Starting SSHD" 3 | sudo /etc/init.d/sshd start 4 | echo "*** Starting DFS" 5 | /app/hadoop/sbin/start-dfs.sh 6 | echo "*** Starting YARN" 7 | /app/hadoop/sbin/start-yarn.sh 8 | echo "*** Starting HistoryServer" 9 | /app/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver 10 | -------------------------------------------------------------------------------- /hadoop/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | yarn.resourcemanager.hostname 5 | hadoop.loc 6 | 7 | 8 | yarn.nodemanager.aux-services 9 | mapreduce_shuffle 10 | 11 | 12 | -------------------------------------------------------------------------------- /sparkjobserver/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM velvia/spark-jobserver:0.5.2 2 | VOLUME /app/Dropbox 3 | -------------------------------------------------------------------------------- /zeppelin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bddk/base-java 2 | WORKDIR /app/ 3 | USER app 4 | RUN sudo yum install -y which 5 | COPY *.txt /app/ 6 | RUN wget --progress=dot:giga $(cat myurl.txt 2>/dev/null || cat url.txt) 7 | RUN sudo chown -Rvf app /app ; tar xzvf zeppelin-0.5.5-incubating-bin-all.tgz ; mv /app/zeppelin-0.5.5-incubating-bin-all /app/zeppelin ; rm *.tgz 8 | EXPOSE 8080 9 | VOLUME /app/Dropbox 10 | ENTRYPOINT /app/zeppelin/bin/zeppelin.sh 11 | -------------------------------------------------------------------------------- /zeppelin/url.txt: -------------------------------------------------------------------------------- 1 | http://www.us.apache.org/dist/incubator/zeppelin/0.5.5-incubating/zeppelin-0.5.5-incubating-bin-all.tgz 2 | --------------------------------------------------------------------------------