├── Dockerfile ├── LICENSE ├── README.md ├── addhost.bash ├── docker_cdh.sh └── docker_files ├── cdh_installer.sh ├── cdh_startup_script.sh ├── cloudera.pref ├── hadoop-env.sh ├── hbase-site.xml ├── install_cloudera_repositories.sh ├── solr ├── spark-defaults.conf ├── spark-env.sh └── yarn-site.xml /Dockerfile: -------------------------------------------------------------------------------- 1 | # VERSION 0.1 2 | 3 | FROM ubuntu:14.04 4 | MAINTAINER Caio Quirino 5 | 6 | ADD docker_files/cdh_installer.sh /tmp/cdh_installer.sh 7 | ADD docker_files/install_cloudera_repositories.sh /tmp/install_cloudera_repositories.sh 8 | 9 | ADD docker_files/cdh_startup_script.sh /usr/bin/cdh_startup_script.sh 10 | ADD docker_files/cloudera.pref /etc/apt/preferences.d/cloudera.pref 11 | ADD docker_files/hadoop-env.sh /etc/profile.d/hadoop-env.sh 12 | ADD docker_files/spark-env.sh /etc/profile.d/spark-env.sh 13 | ADD docker_files/spark-defaults.conf /etc/spark/conf/spark-defaults.conf 14 | 15 | ENV TERM xterm 16 | 17 | #The solr config file needs to be added after installation or it fails. 18 | ADD docker_files/solr /etc/default/solr.docker 19 | 20 | RUN \ 21 | chmod +x /tmp/cdh_installer.sh && \ 22 | chmod +x /usr/bin/cdh_startup_script.sh && \ 23 | bash /tmp/cdh_installer.sh 24 | 25 | ADD docker_files/yarn-site.xml /etc/hadoop/conf/yarn-site.xml 26 | ADD docker_files/hbase-site.xml /etc/hbase/conf.dist/hbase-site.xml 27 | 28 | # private and public mapping 29 | EXPOSE 2181:2181 30 | EXPOSE 8020:8020 31 | EXPOSE 8888:8888 32 | EXPOSE 11000:11000 33 | EXPOSE 11443:11443 34 | EXPOSE 9090:9090 35 | EXPOSE 8088:8088 36 | EXPOSE 19888:19888 37 | EXPOSE 9092:9092 38 | EXPOSE 8983:8983 39 | EXPOSE 16000:16000 40 | EXPOSE 16001:16001 41 | EXPOSE 42222:22 42 | EXPOSE 8042:8042 43 | EXPOSE 60010:60010 44 | 45 | # For Spark 46 | EXPOSE 8080:8080 47 | EXPOSE 7077:7077 48 | 49 | # private only 50 | #EXPOSE 80 51 | 52 | # Define default command. 53 | #CMD ["/usr/bin/cdh_startup_script.sh && bash"] 54 | #CMD ["bash /usr/bin/cdh_startup_script.sh && bash"] 55 | CMD ["cdh_startup_script.sh"] 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | docker-cloudera-quickstart 2 | ========================== 3 | 4 | Docker Cloudera Quick Start Image 5 | 6 | Cloudera Hadoop 5 (CDH5) 7 | 8 | 9 | Now you can run the Cloudera Quick Start image without all the overhead of a Virtual Machine. Just use docker-cloudera-quickstart Image. 10 | 11 | 12 | Based on Ubuntu 14.04 (Trusty LTS) 13 | 14 | Works with Cloudera CDH 5 15 | 16 | *UPDATED FOR LATEST VERSION - CDH5.3.2 17 | 18 | 19 | *Under development. 20 | 21 | 22 | # Instructions 23 | 24 | ## Install 25 | To install the docker-cloudera-quickstart from docker-hub, simply use the following command: 26 | ``` 27 | docker pull caioquirino/docker-cloudera-quickstart 28 | ``` 29 | ## Use 30 | To start an instance in BACKGROUND (as daemon): 31 | ``` 32 | docker run -i -t -d caioquirino/docker-cloudera-quickstart 33 | ``` 34 | To start an instance in FOREGROUND: 35 | ``` 36 | docker run -i -t caioquirino/docker-cloudera-quickstart 37 | ``` 38 | To open more terminal instances for the running instance: 39 | ``` 40 | docker ps 41 | docker exec -i -t CONTAINER_ID bash -l 42 | ``` 43 | 44 | # Links 45 | 46 | [Pull the image on Docker Hub](https://registry.hub.docker.com/u/caioquirino/docker-cloudera-quickstart/) 47 | 48 | [Github page](https://github.com/caioquirino/docker-cloudera-quickstart) 49 | 50 | 51 | # Checklist of components: 52 | 53 | Apache Hadoop (Common, HDFS, MapReduce, YARN) 54 | 55 | Apache HBase 56 | 57 | Apache ZooKeeper 58 | 59 | Apache Oozie 60 | 61 | Apache Hive 62 | 63 | Hue (Apache licensed) 64 | 65 | Apache Flume 66 | 67 | Cloudera Impala (Apache licensed) 68 | 69 | Apache Sentry 70 | 71 | Apache Sqoop 72 | 73 | Cloudera Search (Apache licensed) 74 | 75 | Apache Spark 76 | 77 | [Cloudera Documentation](http://www.cloudera.com/content/cloudera/en/documentation/core/latest/) 78 | 79 | # Debugging In Docker 80 | 81 | ## Changing Debug Delay 82 | If a samza job dies, its log files get deleted immediately. To allow them to hang around 83 | for ten minutes, add the following to /etc/hadoop/conf/yarn-site.xml : 84 | 85 | 86 | seconds after app finishes before app's files and logs deleted 87 | yarn.nodemanager.delete.debug-delay-sec 88 | 600 89 | 90 | 91 | Then you can find the logs, e.g.: `sudo -E ./yarnlogs.bash Matcher` 92 | # Please report any issue or feedback if possible. 93 | -------------------------------------------------------------------------------- /addhost.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "" >> /etc/hosts 4 | echo `docker-machine inspect default | grep "IPAddress" | awk '{gsub("[^0-9\.]", "", $2); print $2;}' | grep "\."` $1 >> /etc/hosts -------------------------------------------------------------------------------- /docker_cdh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | docker rm -f cdh 5 | #sudo docker rmi cdh 6 | 7 | set -e 8 | docker build --rm -t cdh . 9 | #sudo docker build --no-cache=true --rm -t cdh . 10 | # Use dockerhost as the hostname 11 | docker run --name cdh -i -t -h dockerhost \ 12 | --expose=1024-65535 \ 13 | -p 2181:2181 \ 14 | -p 8020:8020 \ 15 | -p 8888:8888 \ 16 | -p 11000:11000 \ 17 | -p 11443:11443 \ 18 | -p 9090:9090 \ 19 | -p 8088:8088 \ 20 | -p 19888:19888 \ 21 | -p 9092:9092 \ 22 | -p 8983:8983 \ 23 | -p 16000:16000 \ 24 | -p 16001:16001 \ 25 | -p 42222:22 \ 26 | -p 8042:8042 \ 27 | -p 60010:60010 \ 28 | cdh 29 | 30 | 31 | -------------------------------------------------------------------------------- /docker_files/cdh_installer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export DEBIAN_FRONTEND=noninteractive 3 | 4 | function die() { 5 | echo $* 6 | exit 1 7 | } 8 | 9 | echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections 10 | echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections 11 | 12 | # Add a local apt-cacher-ng proxy per https://docs.docker.com/examples/apt-cacher-ng/ 13 | # dockerhost=`/sbin/ip route|awk '/default/ { print $3 }'` 14 | # echo "Acquire::HTTP::Proxy \"http://${dockerhost}:3142\";" >> /etc/apt/apt.conf.d/01proxy 15 | # echo 'Acquire::HTTPS::Proxy "false";' >> /etc/apt/apt.conf.d/01proxy 16 | # echo 'Acquire::http::Proxy { download.oracle.com DIRECT; };' >> /etc/apt/apt.conf.d/01proxy 17 | 18 | apt-get update || die 19 | apt-get install -y --no-install-recommends software-properties-common || die 20 | #add-apt-repository ppa:webupd8team/java || die 21 | #apt-get update || die 22 | #apt-get install -y --force-yes --no-install-recommends oracle-java8-set-default || die 23 | 24 | apt-get install -y -q wget dialog curl sudo lsof vim axel telnet || die 25 | wget --no-cookies --progress=bar:force --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/8u40-b25/jdk-8u40-linux-x64.tar.gz" -P /tmp || die "Failed to download JDK" 26 | mkdir /usr/lib/jvm || die "Failed to create Java directory" 27 | 28 | tar -xzf /tmp/jdk-8u40-linux-x64.tar.gz -C /usr/lib/jvm || die "Failed to extract JDK" 29 | update-alternatives --install /usr/bin/java java /usr/lib/jvm/jdk1.8.0_40/bin/java 100 || die "Unable to install JDK" 30 | update-alternatives --install /usr/bin/javac javac /usr/lib/jvm/jdk1.8.0_40/bin/javac 100 || die "Unable to install JDK" 31 | update-alternatives --set java /usr/lib/jvm/jdk1.8.0_40/bin/java || die "Unable to set JDK version" 32 | update-alternatives --set javac /usr/lib/jvm/jdk1.8.0_40/bin/javac || die "Unable to set JDK version" 33 | 34 | 35 | export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_40 || die 36 | export PATH=$PATH:$JAVA_HOME/bin || die 37 | echo 'JAVA_HOME="/usr/lib/jvm/jdk1.8.0_40"' >> /etc/environment 38 | source /etc/environment 39 | 40 | if [ -f /tmp/install_cloudera_repositories.sh ]; then 41 | . /tmp/install_cloudera_repositories.sh || die 42 | fi 43 | 44 | apt-get update || die 45 | 46 | echo "Install Zookeeper" 47 | apt-get -y install zookeeper-server || die "Unable to install zookeeper-server" 48 | 49 | echo "Removing Max Client Connections Limit for Zookeeper" 50 | sed -i '/maxClientCnxns/s/=.*/=0/' /etc/zookeeper/conf/zoo.cfg 51 | 52 | echo "Start Zookeeper" 53 | service zookeeper-server init || die "Unable to init zookeeper-server" 54 | service zookeeper-server start || die "Unable to start zookeeper-server" 55 | service zookeeper-server stop || die "Unable to stop zookeeper-server" 56 | 57 | apt-get -y install hadoop-conf-pseudo impala impala-server impala-state-store impala-catalog impala-shell || die 58 | 59 | #CDH5-Installation-Guide Step 1 - Format the NameNode 60 | echo "Step 1 - Format the NameNode" 61 | sudo -E -u hdfs hdfs namenode -format || die 62 | 63 | #CDH5-Installation-Guide Step 2 - Start HDFS 64 | echo "Step 2 - Start HDFS" 65 | bash -c 'for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo -E service $x start ; done' || die 66 | 67 | #CDH5-Installation-Guide Step 3 - Create the directories needed for Hadoop processes 68 | echo "Step 3 - Create the directories needed for Hadoop processes" 69 | /usr/lib/hadoop/libexec/init-hdfs.sh || die 70 | 71 | #CDH5-Installation-Guide Step 4: Verify the HDFS File Structure 72 | echo "Step 4: Verify the HDFS File Structure" 73 | sudo -E -u hdfs hadoop fs -ls -R / || die 74 | 75 | #CDH5-Installation-Guide Step 5 - Start Yarn 76 | echo "Step 5 - Start Yarn" 77 | service hadoop-yarn-resourcemanager start || die 78 | service hadoop-yarn-nodemanager start || die 79 | service hadoop-mapreduce-historyserver start || die 80 | 81 | #CDH5-Installation-Guide Step 6 - Create User Directories 82 | echo "Step 6 - Create User Directories" 83 | sudo -E -u hdfs hdfs dfs -mkdir -p /user/hadoop || die 84 | sudo -E -u hdfs hdfs dfs -chown hadoop /user/hadoop || die 85 | hadoop fs -mkdir -p /tmp || die 86 | sudo -E -u hive hdfs dfs -mkdir -p /user/hive/warehouse || die 87 | hadoop fs -chmod g+w /tmp || die 88 | sudo -E -u hive hdfs dfs -chmod g+w /user/hive/warehouse || die 89 | sudo -E -u hdfs hdfs dfs -mkdir -p /hbase || die 90 | sudo -E -u hdfs hdfs dfs -chown hbase /hbase || die 91 | 92 | # For samza logs 93 | mkdir /var/log/samza 94 | chown yarn:yarn /var/log/samza 95 | 96 | echo "Add an HDH user and include as sudoer" 97 | useradd hdh 98 | echo "hdh:hdh" | chpasswd 99 | usermod -a -G sudo hdh 100 | mkdir -p /home/hdh 101 | chown hdh:hdh /home/hdh 102 | 103 | echo "Add folder for HDH" 104 | sudo -E -u hdfs hdfs dfs -mkdir -p /HDH || die "Unable to make HDFS directory /HDH" 105 | sudo -E -u hdfs hdfs dfs -chown hdh /HDH || die "Unable to change owner of HDFS directory /HDH" 106 | 107 | #CDH5-Installation-Guide Install HBase 108 | echo "Install Cloudera Components" 109 | #apt-get -y install hadoop-kms hadoop-kms-server hive hbase hbase-thrift hbase-master pig hue oozie oozie-client spark-core spark-master spark-worker spark-history-server spark-python || die 110 | apt-get -y install hadoop-kms hadoop-kms-server hive hbase hbase-thrift hbase-master hbase-regionserver pig hue oozie oozie-client || die 111 | 112 | #Use standalone Zookeeper 113 | echo "export HBASE_MANAGES_ZK=true" >> /etc/hbase/conf.dist/hbase-env.sh 114 | 115 | #Configure Oozie 116 | update-alternatives --set oozie-tomcat-conf /etc/oozie/tomcat-conf.http || die 117 | sudo -E -u hdfs hadoop fs -chown oozie:oozie /user/oozie || die 118 | sudo oozie-setup sharelib create -fs hdfs://localhost -locallib /usr/lib/oozie/oozie-sharelib-yarn || die 119 | #Initiate Oozie Database 120 | oozie-setup db create -run || die 121 | 122 | 123 | #Create HUE Secret Key 124 | sed -i 's/secret_key=/secret_key=_S@s+D=h;B,s$C%k#H!dMjPmEsSaJR/g' /etc/hue/conf/hue.ini || die 125 | 126 | apt-get -y install solr-server hue-search 127 | sudo -E -u hdfs hadoop fs -mkdir -p /solr 128 | sudo -E -u hdfs hadoop fs -chown solr /solr 129 | mv /etc/default/solr.docker /etc/default/solr 130 | service hbase-master start || die 131 | solrctl init 132 | 133 | # Install Kafka per https://www.digitalocean.com/community/tutorials/how-to-install-apache-kafka-on-ubuntu-14-04 134 | echo "Install Kafka" 135 | useradd kafka -m || die 136 | echo "kafka:kafka" | chpasswd || die # Note, this is not secure 137 | adduser kafka sudo || die 138 | 139 | KAFKA_HOME=/home/kafka 140 | sudo -E -u kafka mkdir -p ${KAFKA_HOME}/Downloads || die 141 | sudo -E -u kafka wget "http://mirror.cc.columbia.edu/pub/software/apache/kafka/0.8.2.1/kafka_2.10-0.8.2.1.tgz" -O ${KAFKA_HOME}/Downloads/kafka.tgz || die 142 | sudo -E -u kafka mkdir -p ${KAFKA_HOME}/kafka || die 143 | sudo -E -u kafka tar --strip 1 -xvzf ${KAFKA_HOME}/Downloads/kafka.tgz -C ${KAFKA_HOME}/kafka || die 144 | sudo -E -u kafka echo "auto.create.topics.enable = true" >> ${KAFKA_HOME}/kafka/config/server.properties || die 145 | sudo -E -u kafka echo "delete.topic.enable = true" >> ${KAFKA_HOME}/kafka/config/server.properties || die 146 | 147 | echo "Install Spark" 148 | apt-get -y install spark-core spark-master spark-worker spark-history-server spark-python || die 149 | 150 | -------------------------------------------------------------------------------- /docker_files/cdh_startup_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_40 4 | 5 | echo "Start Zookeeper" 6 | service zookeeper-server start 7 | 8 | echo "Start HDFS" 9 | bash -c 'for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done' 10 | 11 | echo "Start Yarn" 12 | service hadoop-yarn-resourcemanager start 13 | service hadoop-yarn-nodemanager start 14 | chmod -R 777 /var/log/hadoop-mapreduce 15 | service hadoop-mapreduce-historyserver start 16 | 17 | echo "Start Oozie" 18 | export OOZIE_URL=http://localhost:11000/oozie 19 | service oozie start 20 | 21 | echo "Start Spark" 22 | service spark-master start 23 | service spark-worker start 24 | 25 | echo "Start Kafka" 26 | KAFKA_HOME=/home/kafka 27 | sudo -u kafka nohup ${KAFKA_HOME}/kafka/bin/kafka-server-start.sh ${KAFKA_HOME}/kafka/config/server.properties > ${KAFKA_HOME}/kafka/kafka.log 2>&1 & 28 | 29 | echo "Start Components" 30 | service hue start 31 | 32 | service solr-server start 33 | 34 | nohup hiveserver2 & 35 | 36 | bash -c 'for x in `cd /etc/init.d ; ls impala-*` ; do sudo service $x start ; done' 37 | 38 | service hbase-master start 39 | #service hbase-regionserver start 40 | service hbase-thrift start 41 | 42 | echo "Start KMS" 43 | service hadoop-kms-server start 44 | 45 | echo "Press Ctrl+P and Ctrl+Q to background this process." 46 | echo 'Use exec command to open a new bash instance for this instance (Eg. "docker exec -i -t CONTAINER_ID bash"). Container ID can be obtained using "docker ps" command.' 47 | echo "Start Terminal" 48 | bash 49 | echo "Press Ctrl+C to stop instance." 50 | sleep infinity 51 | -------------------------------------------------------------------------------- /docker_files/cloudera.pref: -------------------------------------------------------------------------------- 1 | Package: * 2 | Pin: release o=Cloudera, l=Cloudera 3 | Pin-Priority: 501 -------------------------------------------------------------------------------- /docker_files/hadoop-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | [ -d /etc/hadoop/conf ] && export HADOOP_CONF_DIR=/etc/hadoop/conf 4 | 5 | -------------------------------------------------------------------------------- /docker_files/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hbase.cluster.distributed 6 | false 7 | 8 | 9 | hbase.zookeeper.property.clientPort 10 | 2182 11 | 12 | 13 | hbase.zookeeper.property.maxClientCnxns 14 | 300 15 | 16 | 17 | hbase.zookeeper.session.timeout 18 | 1800000 19 | 20 | 21 | hbase.coprocessor.abortonerror 22 | false 23 | 24 | -------------------------------------------------------------------------------- /docker_files/install_cloudera_repositories.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo 'Installing Cloudera repositories...' 4 | 5 | curl -s http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh/archive.key | apt-key add - 6 | 7 | echo 'deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh trusty-cdh5 contrib' > /etc/apt/sources.list.d/cloudera.list 8 | echo 'deb-src http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh trusty-cdh5 contrib' >> /etc/apt/sources.list.d/cloudera.list 9 | 10 | echo 'Cloudera repositories Installed.' 11 | -------------------------------------------------------------------------------- /docker_files/solr: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | SOLR_PORT=8983 16 | SOLR_ADMIN_PORT=8984 17 | SOLR_LOG=/var/log/solr 18 | SOLR_ZK_ENSEMBLE=localhost:2181/solr 19 | SOLR_HDFS_HOME=hdfs://localhost:8020/solr 20 | SOLR_HDFS_CONFIG=/etc/hadoop/conf 21 | # SOLR_KERBEROS_ENABLED=true 22 | # SOLR_KERBEROS_KEYTAB=/etc/solr/conf/solr.keytab 23 | # SOLR_KERBEROS_PRINCIPAL=solr/localhost@LOCALHOST 24 | SOLR_AUTHENTICATION_TYPE=simple 25 | SOLR_AUTHENTICATION_SIMPLE_ALLOW_ANON=true 26 | # SOLR_AUTHENTICATION_KERBEROS_KEYTAB=/etc/solr/conf/solr.keytab 27 | # SOLR_AUTHENTICATION_KERBEROS_PRINCIPAL=HTTP/localhost@LOCALHOST 28 | # SOLR_AUTHENTICATION_KERBEROS_NAME_RULES=DEFAULT 29 | # SOLR_AUTHENTICATION_JAAS_CONF=/etc/solr/conf/jaas.conf 30 | SOLR_SECURITY_ALLOWED_PROXYUSERS=hue 31 | SOLR_SECURITY_PROXYUSER_hue_HOSTS=* 32 | SOLR_SECURITY_PROXYUSER_hue_GROUPS=* 33 | # SOLR_AUTHORIZATION_SENTRY_SITE=/etc/solr/conf/sentry-site.xml 34 | # SOLR_AUTHORIZATION_SUPERUSER=solr 35 | SOLRD_WATCHDOG_TIMEOUT=30 36 | 37 | -------------------------------------------------------------------------------- /docker_files/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # Default system properties included when running spark-submit. 2 | # This is useful for setting default environmental settings. 3 | 4 | # Example: 5 | # spark.master spark://master:7077 6 | # spark.eventLog.enabled true 7 | # spark.eventLog.dir hdfs://namenode:8021/directory 8 | # spark.serializer org.apache.spark.serializer.KryoSerializer 9 | # spark.driver.memory 5g 10 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 11 | spark.eventLog.dir=/user/spark/applicationHistory 12 | spark.eventLog.enabled=true 13 | -------------------------------------------------------------------------------- /docker_files/spark-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | STANDALONE_SPARK_MASTER_HOST=`hostname` 3 | SPARK_MASTER_PORT=7077 4 | 5 | 6 | -------------------------------------------------------------------------------- /docker_files/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | yarn.nodemanager.aux-services 6 | mapreduce_shuffle 7 | 8 | 9 | yarn.nodemanager.aux-services.mapreduce_shuffle.class 10 | org.apache.hadoop.mapred.ShuffleHandler 11 | 12 | 13 | yarn.log-aggregation-enable 14 | true 15 | 16 | 17 | yarn.dispatcher.exit-on-error 18 | true 19 | 20 | 21 | seconds after app finishes before app's files and logs deleted 22 | yarn.nodemanager.delete.debug-delay-sec 23 | 600 24 | 25 | 26 | List of directories to store localized files in. 27 | yarn.nodemanager.local-dirs 28 | /var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir 29 | 30 | 31 | Where to store container logs. 32 | yarn.nodemanager.log-dirs 33 | /var/log/hadoop-yarn/containers 34 | 35 | 36 | Where to aggregate logs to. 37 | yarn.nodemanager.remote-app-log-dir 38 | /var/log/hadoop-yarn/apps 39 | 40 | 41 | Classpath for typical applications. 42 | yarn.application.classpath 43 | 44 | $HADOOP_CONF_DIR, 45 | $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*, 46 | $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*, 47 | $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*, 48 | $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/* 49 | 50 | 51 | 52 | yarn.resourcemanager.scheduler.class 53 | org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler 54 | 55 | 56 | yarn.nodemanager.vmem-check-enabled 57 | false 58 | 59 | 60 | yarn.scheduler.minimum-allocation-mb 61 | 384 62 | Minimum limit of memory to allocate to each container request at the Resource Manager. 63 | 64 | 65 | yarn.scheduler.maximum-allocation-mb 66 | 512 67 | Maximum limit of memory to allocate to each container request at the Resource Manager. 68 | 69 | 70 | yarn.nodemanager.resource.cpu-vcores 71 | 16 72 | 73 | 74 | yarn.nodemanager.resource.memory-mb 75 | 8192 76 | 77 | 78 | yarn.nodemanager.pmem-check-enabled 79 | false 80 | 81 | 82 | 83 | 84 | --------------------------------------------------------------------------------