├── .gitignore ├── README.md ├── hadoop-base ├── Dockerfile └── config │ ├── bashrc │ ├── hadoop-env.sh │ └── ssh_config ├── hadoop-dn ├── Dockerfile └── config │ ├── core-site.xml │ ├── hdfs-site.xml │ ├── mapred-site.xml │ ├── service │ ├── serf │ │ └── run │ └── sshd │ │ └── run │ └── yarn-site.xml ├── hadoop-nn-dn ├── Dockerfile ├── config │ ├── core-site.xml │ ├── hdfs-site.xml │ ├── mapred-site.xml │ ├── masters │ ├── service │ │ ├── serf │ │ │ └── run │ │ └── sshd │ │ │ └── run │ ├── slaves │ ├── start-hadoop.sh │ └── yarn-site.xml └── notes-etc.txt └── start-cluster.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Operating System Files 2 | 3 | *.DS_Store 4 | Thumbs.db 5 | 6 | # Build Files # 7 | 8 | bin 9 | target 10 | build/ 11 | .gradle 12 | 13 | # Eclipse Project Files # 14 | 15 | .classpath 16 | .project 17 | .settings 18 | 19 | # IntelliJ IDEA Files # 20 | 21 | *.iml 22 | *.ipr 23 | *.iws 24 | *.idea 25 | 26 | # Spring Bootstrap artifacts 27 | 28 | dependency-reduced-pom.xml 29 | README.html 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hadoop (YARN) Multinode Cluster with Docker. 2 | 3 | The purpose of this project is to help developer quickly start multinode cluster with docker containers on their laptop. 4 | 5 | There are multiple and betters ways to solve the problems and since I am not a DevOps guy please feel free to suggest , advice or contribute to it. 6 | 7 | The first docker container **hadoop-base** provisions ubuntu with java and hadoop and does most of the leg work to setup the master and slave containter.The **hadoop-base** is extending from **docker-serf**. See [DNSMASQ/SERF](https://github.com/alvinhenrick/docker-serf) 8 | 9 | 10 | The second container image **hadoop-dn** extends from hadoop-base and install the slave specific hadoop configuaration and it also installs daemontools to run the sshd , serf and dnsmasq so when we start the docker container in daemon mode it keep running instead of exiting immediatley after startup. 11 | 12 | 13 | The third docker container **hadoop-nn-dn** extends from hadoop-base and install the master specific hadoop configuaration and it also installs daemontools to run the sshd , serf and dnsmasq. 14 | 15 | I decided to start master container in foreground mode therefore when we startup master container it will take you to the bash shell prompt after successfull startup. 16 | 17 | 18 | The example for demonstration is using 2 node cluster.The master node is also configured as slave node so we have 2 slave nodes and 1 master node. 19 | 20 | 21 | Prerequisite 22 | ------------ 23 | 24 | 1. Docker must be installed on the host computer / laptop. 25 | 2. `git clone https://github.com/alvinhenrick/docker-serf` 26 | 3. Change directiy to where you cloned the above repository. 27 | 4. Run `docker build -t alvinhenrick/serf .` 28 | 29 | 30 | Build Multinode Hadoop Cluster 31 | ------------------------------ 32 | 33 | 34 | `git clone https://github.com/alvinhenrick/hadoop-mutinode` 35 | 36 | * Build hadoop-base container 37 | * Change directory to hadoop-mutinode/hadoop-base. 38 | * Run `docker build -t alvinhenrick/hadoop-base .` 39 | * This will take a while to build the container go grab a cup of coffee or whatever drink you like :) 40 | 41 | 42 | * Build hadoop-dn Slave container (DataNode / NodeManager) 43 | * Change directory to hadoop-mutinode/hadoop-dn. 44 | * Run `docker build -t alvinhenrick/hadoop-dn .` 45 | 46 | * Build hadoop-nn-dn Master container (NameNode / DataNode / Resource Manager / NodeManager) 47 | * Change directory to hadoop-mutinode/hadoop-nn-dn. 48 | * Run `docker build -t alvinhenrick/hadoop-nn-dn .` 49 | 50 | **Sart the containers.** 51 | 52 | * Change directory to hadoop-mutinode. 53 | * Run `./start-cluster.sh` 54 | * On BASH shell prompt Run `/usr/local/hadoop/bin/start-hadoop.sh` 55 | * After startup on prompt type `su - hduser` 56 | * Run `jps` 57 | * Run `hdfs dfs -ls /` 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /hadoop-base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alvinhenrick/serf 2 | MAINTAINER Alvin Henrick 3 | 4 | # Update Ubuntu 5 | RUN apt-get update && apt-get upgrade -y 6 | 7 | RUN apt-get install -y maven llvm-gcc build-essential zlib1g-dev make cmake pkg-config libssl-dev automake autoconf 8 | 9 | # Add oracle java 7 repository 10 | RUN apt-get -y install software-properties-common 11 | RUN add-apt-repository ppa:webupd8team/java 12 | RUN apt-get -y update 13 | 14 | # Accept the Oracle Java license 15 | RUN echo "oracle-java7-installer shared/accepted-oracle-license-v1-1 boolean true" | debconf-set-selections 16 | 17 | # Install Oracle Java 18 | RUN apt-get -y install oracle-java7-installer 19 | 20 | RUN update-alternatives --display java 21 | 22 | ENV JAVA_HOME /usr/lib/jvm/java-7-oracle/ 23 | ENV PATH $PATH:$JAVA_HOME/bin 24 | 25 | RUN addgroup hadoop 26 | RUN useradd -d /home/hduser -m -s /bin/bash -G hadoop hduser 27 | 28 | RUN apt-get install -y openssh-server 29 | RUN mkdir /var/run/sshd 30 | RUN su hduser -c "ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''" 31 | RUN su hduser -c "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys" 32 | ADD config/ssh_config ./ssh_config 33 | RUN mv ./ssh_config /home/hduser/.ssh/config 34 | 35 | RUN wget http://apache.mirror.anlx.net/hadoop/core/hadoop-2.3.0/hadoop-2.3.0-src.tar.gz 36 | RUN tar -xvf hadoop-2.3.0-src.tar.gz 37 | 38 | 39 | RUN wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz 40 | RUN tar -xvf protobuf-2.5.0.tar.gz 41 | RUN cd protobuf-2.5.0 && ./configure 42 | RUN cd protobuf-2.5.0 && make 43 | RUN cd protobuf-2.5.0 && make check 44 | RUN cd protobuf-2.5.0 && make install 45 | RUN cd protobuf-2.5.0 && ldconfig 46 | RUN protoc --version 47 | #ENV MAVEN_OPTS -Xms512m -XX:MaxPermSize=256 -Xmx512m 48 | RUN cd hadoop-2.3.0-src && mvn package -Pdist,native -DskipTests -Dtar 49 | 50 | RUN cd hadoop-2.3.0-src && tar -xvf hadoop-dist/target/hadoop-2.3.0.tar.gz -C /usr/local/ 51 | RUN ln -s /usr/local/hadoop-2.3.0 /usr/local/hadoop 52 | RUN chown -R hduser:hadoop /usr/local/hadoop-2.3.0 53 | 54 | ADD config/bashrc /home/hduser/.bashrc 55 | 56 | RUN rm -f /usr/local/hadoop/etc/hadoop/hadoop-env.sh 57 | ADD config/hadoop-env.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh 58 | 59 | EXPOSE 22 60 | -------------------------------------------------------------------------------- /hadoop-base/config/bashrc: -------------------------------------------------------------------------------- 1 | export JAVA_HOME=/usr/lib/jvm/java-7-oracle/ 2 | export HADOOP_INSTALL=/usr/local/hadoop 3 | export HADOOP_HOME=$HADOOP_INSTALL 4 | export PATH=$PATH:$HADOOP_INSTALL/bin 5 | export PATH=$PATH:$HADOOP_INSTALL/sbin 6 | export HADOOP_MAPRED_HOME=$HADOOP_INSTALL 7 | export HADOOP_COMMON_HOME=$HADOOP_INSTALL 8 | export HADOOP_HDFS_HOME=$HADOOP_INSTALL 9 | export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop 10 | export YARN_HOME=$HADOOP_INSTALL 11 | export YARN_CONF_DIR=$HADOOP_INSTALL/etc/hadoop -------------------------------------------------------------------------------- /hadoop-base/config/hadoop-env.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2011 The Apache Software Foundation 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | # Set Hadoop-specific environment variables here. 20 | 21 | # The only required environment variable is JAVA_HOME. All others are 22 | # optional. When running a distributed configuration it is best to 23 | # set JAVA_HOME in this file, so that it is correctly defined on 24 | # remote nodes. 25 | 26 | # The java implementation to use. 27 | export JAVA_HOME=/usr/lib/jvm/java-7-oracle/ 28 | 29 | # The jsvc implementation to use. Jsvc is required to run secure datanodes. 30 | #export JSVC_HOME=${JSVC_HOME} 31 | 32 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} 33 | 34 | # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. 35 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do 36 | if [ "$HADOOP_CLASSPATH" ]; then 37 | export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f 38 | else 39 | export HADOOP_CLASSPATH=$f 40 | fi 41 | done 42 | 43 | # The maximum amount of heap to use, in MB. Default is 1000. 44 | #export HADOOP_HEAPSIZE= 45 | #export HADOOP_NAMENODE_INIT_HEAPSIZE="" 46 | 47 | # Extra Java runtime options. Empty by default. 48 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" 49 | 50 | # Command specific options appended to HADOOP_OPTS when specified 51 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS" 52 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" 53 | 54 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" 55 | 56 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc) 57 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" 58 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" 59 | 60 | # On secure datanodes, user to run the datanode as after dropping privileges 61 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} 62 | 63 | # Where log files are stored. $HADOOP_HOME/logs by default. 64 | #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER 65 | 66 | # Where log files are stored in the secure data environment. 67 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} 68 | 69 | # The directory where pid files are stored. /tmp by default. 70 | # NOTE: this should be set to a directory that can only be written to by 71 | # the user that will run the hadoop daemons. Otherwise there is the 72 | # potential for a symlink attack. 73 | export HADOOP_PID_DIR=${HADOOP_PID_DIR} 74 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} 75 | 76 | # A string representing this instance of hadoop. $USER by default. 77 | export HADOOP_IDENT_STRING=$USER 78 | -------------------------------------------------------------------------------- /hadoop-base/config/ssh_config: -------------------------------------------------------------------------------- 1 | Host localhost 2 | StrictHostKeyChecking no 3 | 4 | Host 0.0.0.0 5 | StrictHostKeyChecking no 6 | 7 | Host *.mycorp.kom 8 | StrictHostKeyChecking no 9 | UserKnownHostsFile=/dev/null -------------------------------------------------------------------------------- /hadoop-dn/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alvinhenrick/hadoop-base 2 | 3 | MAINTAINER Alvin Henrick 4 | 5 | RUN apt-get install -y iputils-ping daemontools 6 | 7 | ENV HADOOP_INSTALL /usr/local/hadoop 8 | 9 | RUN mkdir $HADOOP_INSTALL/logs 10 | 11 | RUN mkdir -p /etc/service/serf 12 | RUN mkdir -p /etc/service/sshd 13 | 14 | ADD config/service /etc/service 15 | 16 | RUN chmod +x /etc/service/serf/run 17 | RUN chmod +x /etc/service/sshd/run 18 | 19 | ADD config/hdfs-site.xml $HADOOP_INSTALL/etc/hadoop/hdfs-site.xml 20 | ADD config/core-site.xml $HADOOP_INSTALL/etc/hadoop/core-site.xml 21 | ADD config/mapred-site.xml $HADOOP_INSTALL/etc/hadoop/mapred-site.xml 22 | ADD config/yarn-site.xml $HADOOP_INSTALL/etc/hadoop/yarn-site.xml 23 | 24 | RUN chown -R hduser:hadoop $HADOOP_INSTALL/logs 25 | 26 | # SSH and SERF ports 27 | EXPOSE 22 7373 7946 28 | 29 | # HDFS ports 30 | EXPOSE 9000 50010 50020 50070 50075 50090 50475 31 | 32 | # YARN ports 33 | EXPOSE 8030 8031 8032 8033 8040 8042 8060 8088 50060 34 | 35 | #ENTRYPOINT ["/bin/bash", "/usr/local/hadoop/bin/start-hadoop.sh"] 36 | ENTRYPOINT ["/usr/bin/svscan", "/etc/service/"] -------------------------------------------------------------------------------- /hadoop-dn/config/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | fs.defaultFS 6 | hdfs://master.mycorp.kom:9000/ 7 | NameNode URI 8 | 9 | -------------------------------------------------------------------------------- /hadoop-dn/config/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dfs.datanode.data.dir 6 | file:///home/hduser/hdfs/datanode 7 | DataNode directory 8 | 9 | 10 | dfs.namenode.name.dir 11 | file:///home/hduser/hdfs/namenode 12 | NameNode directory for namespace and transaction logs storage. 13 | 14 | 15 | 16 | dfs.replication 17 | 2 18 | 19 | 20 | 21 | dfs.permissions 22 | false 23 | 24 | 25 | 26 | dfs.datanode.use.datanode.hostname 27 | false 28 | 29 | 30 | 31 | dfs.namenode.datanode.registration.ip-hostname-check 32 | false 33 | 34 | -------------------------------------------------------------------------------- /hadoop-dn/config/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | mapreduce.framework.name 7 | yarn 8 | 9 | -------------------------------------------------------------------------------- /hadoop-dn/config/service/serf/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Starting serf" 4 | exec /etc/serf/start-serf-agent.sh 5 | -------------------------------------------------------------------------------- /hadoop-dn/config/service/sshd/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Starting sshd" 4 | exec /usr/sbin/sshd -D 5 | -------------------------------------------------------------------------------- /hadoop-dn/config/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | yarn.nodemanager.aux-services 5 | mapreduce_shuffle 6 | 7 | 8 | yarn.nodemanager.aux-services.mapreduce_shuffle.class 9 | org.apache.hadoop.mapred.ShuffleHandler 10 | 11 | 12 | yarn.resourcemanager.resource-tracker.address 13 | master.mycorp.kom:8025 14 | 15 | 16 | yarn.resourcemanager.scheduler.address 17 | master.mycorp.kom:8030 18 | 19 | 20 | yarn.resourcemanager.address 21 | master.mycorp.kom:8040 22 | 23 | -------------------------------------------------------------------------------- /hadoop-nn-dn/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alvinhenrick/hadoop-base 2 | 3 | MAINTAINER Alvin Henrick 4 | 5 | RUN apt-get install -y iputils-ping daemontools runit 6 | 7 | ENV HADOOP_INSTALL /usr/local/hadoop 8 | 9 | RUN mkdir -p /home/hduser/hdfs/namenode 10 | RUN mkdir -p /home/hduser/hdfs/datanode 11 | 12 | RUN mkdir $HADOOP_INSTALL/logs 13 | 14 | RUN mkdir -p /etc/service/serf 15 | RUN mkdir -p /etc/service/sshd 16 | 17 | ADD config/service /etc/service 18 | 19 | RUN chmod +x /etc/service/serf/run 20 | RUN chmod +x /etc/service/sshd/run 21 | 22 | 23 | ADD config/hdfs-site.xml $HADOOP_INSTALL/etc/hadoop/hdfs-site.xml 24 | ADD config/core-site.xml $HADOOP_INSTALL/etc/hadoop/core-site.xml 25 | ADD config/mapred-site.xml $HADOOP_INSTALL/etc/hadoop/mapred-site.xml 26 | ADD config/yarn-site.xml $HADOOP_INSTALL/etc/hadoop/yarn-site.xml 27 | ADD config/slaves $HADOOP_INSTALL/etc/hadoop/slaves 28 | 29 | RUN chown -R hduser:hadoop /home/hduser/hdfs/namenode 30 | RUN chown -R hduser:hadoop /home/hduser/hdfs/datanode 31 | RUN chown -R hduser:hadoop $HADOOP_INSTALL/logs 32 | RUN chmod 1777 /tmp 33 | 34 | # Format namenode 35 | RUN su hduser -c "/usr/local/hadoop/bin/hdfs namenode -format" 36 | 37 | ADD config/start-hadoop.sh $HADOOP_INSTALL/bin/start-hadoop.sh 38 | RUN chmod 755 $HADOOP_INSTALL/bin/start-hadoop.sh 39 | 40 | # SSH and SERF ports 41 | EXPOSE 22 7373 7946 42 | 43 | # HDFS ports 44 | EXPOSE 9000 50010 50020 50070 50075 50090 50475 45 | 46 | # YARN ports 47 | EXPOSE 8030 8031 8032 8033 8040 8042 8060 8088 50060 48 | 49 | #ENTRYPOINT ["/bin/bash", "/usr/local/hadoop/bin/start-hadoop.sh"] 50 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /hadoop-nn-dn/config/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | fs.defaultFS 6 | hdfs://master.mycorp.kom:9000/ 7 | NameNode URI 8 | 9 | -------------------------------------------------------------------------------- /hadoop-nn-dn/config/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dfs.datanode.data.dir 6 | file:///home/hduser/hdfs/datanode 7 | DataNode directory 8 | 9 | 10 | dfs.namenode.name.dir 11 | file:///home/hduser/hdfs/namenode 12 | NameNode directory for namespace and transaction logs storage. 13 | 14 | 15 | 16 | dfs.replication 17 | 2 18 | 19 | 20 | 21 | dfs.permissions 22 | false 23 | 24 | 25 | 26 | dfs.datanode.use.datanode.hostname 27 | false 28 | 29 | 30 | 31 | dfs.namenode.datanode.registration.ip-hostname-check 32 | false 33 | 34 | -------------------------------------------------------------------------------- /hadoop-nn-dn/config/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | mapreduce.framework.name 7 | yarn 8 | 9 | -------------------------------------------------------------------------------- /hadoop-nn-dn/config/masters: -------------------------------------------------------------------------------- 1 | master.mycorp.kom 2 | -------------------------------------------------------------------------------- /hadoop-nn-dn/config/service/serf/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Starting serf" 4 | exec /etc/serf/start-serf-agent.sh 5 | -------------------------------------------------------------------------------- /hadoop-nn-dn/config/service/sshd/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Starting sshd" 4 | exec /usr/sbin/sshd -D 5 | -------------------------------------------------------------------------------- /hadoop-nn-dn/config/slaves: -------------------------------------------------------------------------------- 1 | master.mycorp.kom 2 | slave1.mycorp.kom -------------------------------------------------------------------------------- /hadoop-nn-dn/config/start-hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /usr/bin/svscan /etc/service/ & 4 | sleep 4 5 | if [ "$NODE_TYPE" = "m" ]; then 6 | su hduser -c "$HADOOP_INSTALL/sbin/start-dfs.sh" 7 | su hduser -c "$HADOOP_INSTALL/sbin/start-yarn.sh" 8 | fi 9 | #tail -f $HADOOP_INSTALL/logs/* -------------------------------------------------------------------------------- /hadoop-nn-dn/config/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | yarn.nodemanager.aux-services 5 | mapreduce_shuffle 6 | 7 | 8 | yarn.nodemanager.aux-services.mapreduce_shuffle.class 9 | org.apache.hadoop.mapred.ShuffleHandler 10 | 11 | 12 | yarn.resourcemanager.resource-tracker.address 13 | master.mycorp.kom:8025 14 | 15 | 16 | yarn.resourcemanager.scheduler.address 17 | master.mycorp.kom:8030 18 | 19 | 20 | yarn.resourcemanager.address 21 | master.mycorp.kom:8040 22 | 23 | 24 | yarn.nodemanager.address 25 | master:8050 26 | 27 | 28 | yarn.nodemanager.localizer.address 29 | master:8060 30 | 31 | -------------------------------------------------------------------------------- /hadoop-nn-dn/notes-etc.txt: -------------------------------------------------------------------------------- 1 | #RUN sed -i -r 's/DEFAULT_FORWARD_POLICY=.*/DEFAULT_FORWARD_POLICY="ACCEPT"/' /etc/default/ufw 2 | #RUN ufw reload 3 | 4 | #docker run -i -t --dns 127.0.0.1 -P --name master -h master.mycorp.kom alvinhenrick/serf 5 | #docker run -i -t -e JOIN_IP=172.17.0.64 --dns 127.0.0.1 -P --name slave1 -h slave1.mycorp.kom alvinhenrick/serf /bin/bash 6 | #docker run -i -t --dns 127.0.0.1 -e NODE_TYPE=s -P --name slave1 -h slave1.mycorp.kom alvinhenrick/hadoop-dn /bin/bash 7 | #docker run -i -t --dns 127.0.0.1 -e NODE_TYPE=m -e JOIN_IP=172.17.0.5 -P --name master -h master.mycorp.kom alvinhenrick/hadoop-nn-dn-1 8 | 9 | #CD HADOOP_INSTALL 10 | #hdfs dfs -mkdir -p /user/hduser/input 11 | #bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.3.0.jar grep input output 'dfs[a-z.]+' -------------------------------------------------------------------------------- /start-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d -t --dns 127.0.0.1 -e NODE_TYPE=s -P --name slave1 -h slave1.mycorp.kom alvinhenrick/hadoop-dn 4 | FIRST_IP=$(docker inspect --format="{{.NetworkSettings.IPAddress}}" slave1) 5 | docker run -i -t --dns 127.0.0.1 -e NODE_TYPE=m -e JOIN_IP=$FIRST_IP -P --name master -h master.mycorp.kom alvinhenrick/hadoop-nn-dn 6 | 7 | --------------------------------------------------------------------------------