├── .gitignore ├── README.md ├── base ├── README.md ├── dockerfile ├── pip.conf ├── requirements.txt └── sources.list ├── data └── README.md ├── flume ├── README.md ├── conf │ ├── flume-avro.conf │ ├── flume-exec.conf │ └── flume-netcat.conf └── dockerfile ├── hadoop ├── README.md ├── conf │ ├── core-site.xml │ ├── hdfs-site.xml │ ├── mapred-site.xml │ ├── masters │ ├── slaves │ └── yarn-site.xml ├── docker-compose.yml ├── dockerfile └── hadoop-entrypoint.sh ├── hbase ├── README.md ├── conf │ ├── hbase-site.xml │ └── regionservers ├── docker-compose.yml └── dockerfile ├── hive ├── README.md ├── conf │ └── hive-site.xml └── dockerfile ├── kafka ├── README.md ├── conf │ └── server.propertie └── dockerfile ├── mysql ├── README.md └── dockerfile ├── redis ├── README.md └── redis.dockerfile ├── spark ├── README.md ├── compose.sh ├── conf │ ├── slaves │ └── spark-env.sh ├── docker-compose.yml └── dockerfile ├── storm ├── README.md ├── conf │ └── storm.yaml └── dockerfile └── zookeeper ├── README.md ├── conf └── zoo.cfg ├── docker-compose.yml └── dockerfile /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | **/.DS_Store 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YangDocker 2 | 3 | 自助搭建的 hadoop + spark + kafka + zookeeper + storm + hbase + hive + flume + mysql + redis 集群,一主两从。 4 | 5 | ## 操作指南 6 | 7 | 先将项目 clone 到本地,然后进入目录,然后运行: 8 | 9 | ```bash 10 | bash ./image.sh 11 | ``` 12 | 13 | 然后等待,构建所有的相关镜像。 14 | 15 | ## 操作系统 16 | 17 | ubuntu:16.04 18 | 19 | ## 工具包 20 | 21 | Hadoop 2.6:[下载地址](http://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz) 22 | 23 | Hive 1.2.2:[下载地址](http://mirror.bit.edu.cn/apache/hive/hive-1.2.2/apache-hive-1.2.2-bin.tar.gz) 24 | 25 | kafka 2.11:[下载地址](http://mirrors.tuna.tsinghua.edu.cn/apache/kafka/0.10.2.1/kafka_2.11-0.10.2.1.tgz) 26 | 27 | JDK 1.8:[下载地址](http://download.oracle.com/otn-pub/java/jdk/8u172-b11/a58eab1ec242421181065cdc37240b08/jdk-8u172-linux-x64.tar.gz) 28 | 29 | scala 2.11.4:[下载地址](https://downloads.lightbend.com/scala/2.11.4/scala-2.11.4.tgz) 30 | 31 | spark 1.6.0:[下载地址](https://archive.apache.org/dist/spark/spark-1.6.0/spark-1.6.0-bin-hadoop2.6.tgz) 32 | 33 | zookeeper 3.4.5:[下载地址](http://archive.apache.org/dist/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz) 34 | 35 | hbase 0.98.6:[下载地址](http://archive.apache.org/dist/hbase/hbase-0.98.6/hbase-0.98.6-hadoop2-bin.tar.gz) 36 | 37 | flume 1.6.0:[下载地址](http://archive.apache.org/dist/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz) 38 | 39 | mysql-connector 5.1.41:[下载地址](http://dev.mysql.com/get/Downloads/MySQL-5.1/mysql-5.1.41.tar.gz) 40 | 41 | redis 2.8.3:[下载地址](http://download.redis.io/releases/redis-2.8.3.tar.gz) 42 | 43 | storm 0.9.3:[下载地址](https://archive.apache.org/dist/storm/apache-storm-0.9.3/apache-storm-0.9.3.tar.gz) 44 | 45 | ## 注意 46 | 47 | 因为所有的环境/安装包都是临时下载的,所以构建过程速度较慢,镜像较大。 48 | 49 | ## 其他 50 | 51 | 欢迎 issue。 52 | 53 | ## 参考 54 | 55 | 1. [利用Docker搭建大数据处理集群](https://blog.csdn.net/iigeoxiaoyang/article/details/53020066) 56 | -------------------------------------------------------------------------------- /base/README.md: -------------------------------------------------------------------------------- 1 | # base 2 | 3 | ## apt 源 4 | 5 | [sources.list](./sources.list):使用阿里源,加快下载速度。 6 | 7 | - 安装 java scala python3。 8 | - 配置各个节点之间的免密登陆。 9 | - zsh 命令行 ys 主题。 10 | 11 | ## pip 源 12 | 13 | [pip.conf](./pip.conf):使用清华源,加快 python package 下载速度。 14 | 15 | ## python package 16 | 17 | [requirements.txt](./requirements.txt) 18 | 19 | ## 构建 20 | 21 | ```shell 22 | docker -t build cluster-base ./ 23 | ``` 24 | 25 | ## 启动集群 26 | 27 | ```bash 28 | docker network create --subnet=172.20.0.0/16 cluster-network 29 | docker run -itd --name master --hostname=master --net cluster-network --ip 172.20.0.2 ubuntu:16.04 bash 30 | docker run -itd --name slave1 --hostname=slave1 --net cluster-network --ip 172.20.0.3 ubuntu:16.04 bash 31 | docker run -itd --name slave2 --hostname=slave2 --net cluster-network --ip 172.20.0.4 ubuntu:16.04 bash 32 | ``` 33 | -------------------------------------------------------------------------------- /base/dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | # 将 apt 源更换一下,速度更快。 4 | ADD ./sources.list /etc/apt/ 5 | 6 | # 将 pip 源更换一下,提高下载速度。 7 | RUN mkdir /root/.pip/ 8 | ADD ./pip.conf /root/.pip/ 9 | 10 | # install apt & pip3 package. 11 | RUN apt-get update && apt-get upgrade -y \ 12 | && apt-get install -y apt-utils wget openssh-server openssh-client python3 python3-pip \ 13 | # install java env path:/usr/bin/java 14 | && apt-get install -y software-properties-common python-software-properties \ 15 | && add-apt-repository ppa:webupd8team/java && apt-get update \ 16 | && apt-get install -y oracle-java8-installer && update-java-alternatives -s java-8-oracle \ 17 | # install scala env path:/usr/bin/scala 18 | && apt-get install -y scala \ 19 | # install python3 package 20 | && pip3 install ipython -i https://pypi.douban.com/simple/ \ 21 | && rm -rf /var/lib/apt/lists/* && apt-get clean \ 22 | && rm -rf ~/.cache/pip/ 23 | 24 | # oh-my-zsh 并且更为 zsh 风格 ys,将 zsh 替代默认的bash。 25 | RUN git clone https://github.com/robbyrussell/oh-my-zsh.git ~/.oh-my-zsh \ 26 | && cp ~/.oh-my-zsh/templates/zshrc.zsh-template ~/.zshrc \ 27 | && cp ~/.zshrc ~/.zshrc.orig \ 28 | && chsh -s /bin/zsh \ 29 | && sed -ri 's/^ZSH_THEME="robbyrussell"/ZSH_THEME="ys"/' /root/.zshrc 30 | 31 | RUN mkdir /var/run/sshd 32 | RUN echo 'root:root' |chpasswd 33 | 34 | # ssh. 35 | RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config 36 | RUN sed -i 's/.*StrictHostKeyChecking ask/StrictHostKeyChecking no/' /etc/ssh/ssh_config 37 | 38 | # 直接在此处设置免密登录,便于后续的操作。 39 | RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa && \ 40 | cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \ 41 | chmod 700 ~/.ssh && \ 42 | chmod 600 ~/.ssh/id_rsa && \ 43 | chmod 644 ~/.ssh/authorized_keys 44 | 45 | EXPOSE 22 46 | 47 | CMD ["/usr/sbin/sshd", "-D"] 48 | -------------------------------------------------------------------------------- /base/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | index-url=https://pypi.tuna.tsinghua.edu.cn/simple 3 | [install] 4 | trusted-host=pypi.tuna.tsinghua.edu.cn 5 | -------------------------------------------------------------------------------- /base/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | ipthon 3 | scipy 4 | pandas 5 | -------------------------------------------------------------------------------- /base/sources.list: -------------------------------------------------------------------------------- 1 | # deb cdrom:[Ubuntu 16.04 LTS _Xenial Xerus_ - Release amd64 (20160420.1)]/ xenial main restricted 2 | deb-src http://archive.ubuntu.com/ubuntu xenial main restricted #Added by software-properties 3 | deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted 4 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted multiverse universe #Added by software-properties 5 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted 6 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted multiverse universe #Added by software-properties 7 | deb http://mirrors.aliyun.com/ubuntu/ xenial universe 8 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe 9 | deb http://mirrors.aliyun.com/ubuntu/ xenial multiverse 10 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates multiverse 11 | deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse 12 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse #Added by software-properties 13 | deb http://archive.canonical.com/ubuntu xenial partner 14 | deb-src http://archive.canonical.com/ubuntu xenial partner 15 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted 16 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted multiverse universe #Added by software-properties 17 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe 18 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse 19 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # DATA 2 | -------------------------------------------------------------------------------- /flume/README.md: -------------------------------------------------------------------------------- 1 | # flume 2 | -------------------------------------------------------------------------------- /flume/conf/flume-avro.conf: -------------------------------------------------------------------------------- 1 | # Define a memory channel called c1 on agent 2 | agent.channels.c1.type=memory 3 | 4 | # Define an avro source alled r1 on agent and tell it 5 | agent.sources.r1.channels=c1 6 | agent.sources.r1.type=avro 7 | agent.sources.r1.bind=127.0.0.1 8 | agent.sources.r1.port=44444 9 | 10 | # Describe/configuration the source 11 | agent.sinks.k1.type=hdfs 12 | agent.sinks.k1.channel=c1 13 | agent.sinks.k1.hdfs.path=hdfs://master:9000/flume_data_pool 14 | agent.sinks.k1.hdfs.filePrefix=events- 15 | agent.sinks.k1.hdfs.fileType=DataStream 16 | agent.sinks.k1.hdfs.writeFormat=Text 17 | agent.sinks.k1.hdfs.rollSize=0 18 | agent.sinks.k1.hdfs.rollCount= 600000 19 | agent.sinks.k1.hdfs.rollInterval=600 20 | 21 | agent.channels=c1 22 | agent.sources=r1 23 | agent.sinks=k1 24 | -------------------------------------------------------------------------------- /flume/conf/flume-exec.conf: -------------------------------------------------------------------------------- 1 | # Name the components on this agent 2 | agent.sources=r1 3 | agent.sinks=k1 4 | agent.channels=c1 5 | 6 | # Describe/configuration the source 7 | agent.sources.r1.type=exec 8 | agent.sources.r1.command=tail -f /data/hadoop/flume/test.txt 9 | 10 | # Describe the sink 11 | agent.sinks.k1.type=logger 12 | 13 | # Use a channel which buffers events in memory 14 | agent.channels.c1.type=memory 15 | agent.channels.c1.capacity=1000 16 | agent.channels.c1.transactionCapacity=100 17 | 18 | # Bind the source and sink to the channel 19 | agent.sources.r1.channels=c1 20 | agent.sinks.k1.channel=c1 21 | -------------------------------------------------------------------------------- /flume/conf/flume-netcat.conf: -------------------------------------------------------------------------------- 1 | # Name the components on this agent 2 | agent.sources=r1 3 | agent.sinks=k1 4 | agent.channels=c1 5 | 6 | # Describe/configuration the source 7 | agent.sources.r1.type=netcat 8 | agent.sources.r1.bind=127.0.0.1 9 | agent.sources.r1.port=44444 10 | 11 | # Describe the sink 12 | agent.sinks.k1.type=logger 13 | 14 | # Use a channel which buffers events in memory 15 | agent.channels.c1.type=memory 16 | agent.channels.c1.capacity=1000 17 | agent.channels.c1.transactionCapacity=100 18 | 19 | # Bind the source and sink to the channel 20 | agent.sources.r1.channels=c1 21 | agent.sinks.k1.channel=c1 22 | -------------------------------------------------------------------------------- /flume/dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | RUN wget http://archive.apache.org/dist/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz \ 4 | && tar -zxvf /apache-flume-1.6.0-bin.tar.gz -C /usr/local/ \ 5 | && mv /usr/local/apache-flume-1.6.0-bin /usr/local/flume \ 6 | && rm /apache-flume-1.6.0-bin.tar.gz 7 | -------------------------------------------------------------------------------- /hadoop/README.md: -------------------------------------------------------------------------------- 1 | # hadoop 2 | 3 | ## 网络 4 | 5 | - master: 172.17.0.2 6 | - slave1: 172.17.0.3 7 | - slave2: 172.17.0.4 8 | 9 | ## cluster-hadoop 10 | 11 | hadoop 单机的集群,以 cluster-base 为基础。 12 | 13 | ## 启动镜像 14 | 15 | ```bash 16 | docker network create --subnet=172.18.0.0/16 hadoop-network 17 | docker run -itd --name hadoop-master --hostname=hadoop-master --net hadoop-network --ip 172.18.0.2 cluster-hadoop zsh 18 | docker run -itd --name hadoop-slave1 --hostname=hadoop-slave1 --net hadoop-network --ip 172.18.0.2 cluster-hadoop zsh 19 | docker run -itd --name hadoop-slave2 --hostname=hadoop-slave2 --net hadoop-network --ip 172.18.0.2 cluster-hadoop zsh 20 | ``` 21 | 22 | docker run -ti --name master --hostname=master ubuntu:16.04 zsh 23 | docker run -ti --name slave1 --hostname=slave1 ubuntu:16.04 zsh 24 | docker run -ti --name slave2 --hostname=slave2 ubuntu:16.04 zsh -------------------------------------------------------------------------------- /hadoop/conf/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | fs.defaultFS 4 | hdfs://master:9000 5 | 6 | 7 | 8 | hadoop.tmp.dir 9 | file:/usr/local/hadoop/tmp 10 | 11 | 12 | -------------------------------------------------------------------------------- /hadoop/conf/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | dfs.namenode.secondary.http-address 4 | master:9001 5 | 6 | 7 | 8 | dfs.namenode.name.dir 9 | file:/usr/local/hadoop/name/ 10 | 11 | 12 | 13 | dfs.datanode.data.dir 14 | file:/usr/local/hadoop/data/ 15 | 16 | 17 | 18 | dfs.repliction 19 | 3 20 | 21 | 22 | -------------------------------------------------------------------------------- /hadoop/conf/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | mapreduce.framework.name 4 | yarn 5 | 6 | 7 | -------------------------------------------------------------------------------- /hadoop/conf/masters: -------------------------------------------------------------------------------- 1 | master -------------------------------------------------------------------------------- /hadoop/conf/slaves: -------------------------------------------------------------------------------- 1 | slave1 2 | slave2 -------------------------------------------------------------------------------- /hadoop/conf/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | yarn.nodemanager.aux-services 4 | mapreduce_shuffle 5 | 6 | 7 | 8 | yarn.nodemanager.aux-services.mapreduce.shuffle.class 9 | org.apache.hadoop.mapred.ShuffleHandler 10 | 11 | 12 | 13 | yarn.resourcemanager.address 14 | master:8032 15 | 16 | 17 | 18 | yarn.resourcemanager.scheduler.address 19 | master:8030 20 | 21 | 22 | 23 | yarn.resourcemanager.resource-tracker.address 24 | master:8035 25 | 26 | 27 | 28 | yarn.resourcemanager.admin.address 29 | master:8033 30 | 31 | 32 | 33 | yarn.resourcemanager.webapp.address 34 | master:8088 35 | 36 | 37 | -------------------------------------------------------------------------------- /hadoop/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | master: 4 | image: cluster-hadoop 5 | container_name: hadoop-master 6 | networks: 7 | cluster-network: 8 | ipv4_address: 172.20.0.2 9 | aliases: 10 | - hadoop-master 11 | volumes: 12 | - ../data/:/data 13 | - ./:/hadoop 14 | extra_hosts: 15 | - "slave1:172.20.0.3" 16 | - "slave2:172.20.0.4" 17 | hostname: master 18 | environment: 19 | ROLE: master 20 | 21 | slave1: 22 | image: cluster-hadoop 23 | container_name: hadoop-slave1 24 | networks: 25 | cluster-network: 26 | ipv4_address: 172.20.0.3 27 | aliases: 28 | - hadoop-slave1 29 | volumes: 30 | - ../data/:/data 31 | - ./:/hadoop 32 | extra_hosts: 33 | - "master:172.20.0.2" 34 | - "slave2:172.20.0.4" 35 | hostname: slave1 36 | environment: 37 | ROLE: slave 38 | 39 | slave2: 40 | image: cluster-hadoop 41 | container_name: hadoop-slave2 42 | networks: 43 | cluster-network: 44 | ipv4_address: 172.20.0.4 45 | aliases: 46 | - hadoop-slave2 47 | volumes: 48 | - ../data/:/data 49 | - ./:/hadoop 50 | extra_hosts: 51 | - "master:172.20.0.2" 52 | - "slave1:172.20.0.3" 53 | hostname: slave2 54 | environment: 55 | ROLE: slave 56 | 57 | networks: 58 | cluster-network: 59 | driver: bridge 60 | driver_opts: 61 | com.docker.network.enable_ipv6: "false" 62 | ipam: 63 | driver: default 64 | config: 65 | - subnet: 172.20.0.0/16 66 | -------------------------------------------------------------------------------- /hadoop/dockerfile: -------------------------------------------------------------------------------- 1 | FROM cluster-base 2 | 3 | # hadoop 环境安装: 4 | RUN wget http://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz \ 5 | && tar -zxvf /hadoop-2.6.0.tar.gz -C /usr/local/ \ 6 | && mv /usr/local/hadoop-2.6.0 /usr/local/hadoop \ 7 | && mkdir /usr/local/hadoop/tmp/ \ 8 | && mkdir /usr/local/hadoop/data/ \ 9 | && mkdir /usr/local/hadoop/name/ \ 10 | && rm /hadoop-2.6.0.tar.gz 11 | 12 | # java8 环境。 13 | ENV JAVA_HOME=/usr/local/jdk1.8 14 | ENV PATH $JAVA_HOME/bin:$PATH 15 | 16 | # hadoop 环境。 17 | ENV HADOOP_HOME=/usr/local/hadoop 18 | ENV PATH $HADOOP_HOME/bin:$PATH 19 | ENV HADOOP_PREFIX=$HADOOP_HOME 20 | 21 | RUN cd $HADOOP_HOME \ 22 | && echo "export JAVA_HOME=$JAVA_HOME" >> etc/hadoop/hadoop-env.sh \ 23 | && echo "export HADOOP_PREFIX=$HADOOP_PREFIX" >> etc/hadoop/hadoop-env.sh \ 24 | && echo "export JAVA_HOME=$JAVA_HOME" >> etc/hadoop/yarn-env.sh 25 | 26 | COPY ./conf/core-site.xml $HADOOP_HOME/etc/hadoop 27 | COPY ./conf/hdfs-site.xml $HADOOP_HOME/etc/hadoop 28 | COPY ./conf/mapred-site.xml $HADOOP_HOME/etc/hadoop 29 | COPY ./conf/yarn-site.xml $HADOOP_HOME/etc/hadoop 30 | COPY ./conf/masters $HADOOP_HOME/etc/hadoop 31 | COPY ./conf/slaves $HADOOP_HOME/etc/hadoop 32 | 33 | WORKDIR /hadoop 34 | 35 | EXPOSE 22 36 | 37 | CMD ["/usr/sbin/sshd", "-D"] 38 | -------------------------------------------------------------------------------- /hadoop/hadoop-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | startMaster() { 4 | /usr/local/hadoop/sbin/start-all.sh 5 | } 6 | 7 | stopMaster() { 8 | /usr/local/hadoop/sbin/stop-all.sh 9 | } 10 | 11 | main() { 12 | service ssh restart 13 | service sshd restart 14 | 15 | sleep 5 16 | 17 | if [ ${ROLE} == "master" ] 18 | then 19 | hdfs namenode -format 20 | startMaster 21 | fi 22 | } 23 | 24 | main 25 | -------------------------------------------------------------------------------- /hbase/README.md: -------------------------------------------------------------------------------- 1 | # hbase 2 | -------------------------------------------------------------------------------- /hbase/conf/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | hbase.tmp.dir 4 | /usr/local/hbase/tmp 5 | 6 | 7 | 8 | hbase.rootdir 9 | hdfs://master:9000/hbase 10 | 11 | 12 | 13 | hbase.cluster.distributed 14 | true 15 | 16 | 17 | 18 | hbase.zookeeper.quorum 19 | master, slave1, slave2 20 | 21 | 22 | 23 | hbase.zookeeper.property.dataDir 24 | /usr/local/hbase/zookeeper 25 | 26 | 27 | 28 | hbase.master.info.port 29 | 60010 30 | 31 | 32 | -------------------------------------------------------------------------------- /hbase/conf/regionservers: -------------------------------------------------------------------------------- 1 | master 2 | slave1 3 | slave2 4 | -------------------------------------------------------------------------------- /hbase/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | master: 4 | image: cluster-hbase 5 | container_name: hbase-master 6 | networks: 7 | default: 8 | ipv4_address: 172.17.0.2 9 | extra_hosts: 10 | - "slave1:172.17.0.3" 11 | - "slave2:172.17.0.4" 12 | hostname: master 13 | environment: 14 | ZK_ID: 1 15 | ROLE: master 16 | tty: true 17 | stdin_open: true 18 | 19 | slave1: 20 | image: cluster-hbase 21 | container_name: hbase-slave1 22 | networks: 23 | default: 24 | ipv4_address: 172.17.0.3 25 | extra_hosts: 26 | - "master:172.17.0.2" 27 | - "slave2:172.17.0.4" 28 | hostname: slave1 29 | environment: 30 | ZK_ID: 2 31 | ROLE: slave 32 | tty: true 33 | stdin_open: true 34 | 35 | slave2: 36 | image: cluster-hbase 37 | container_name: hbase-slave2 38 | networks: 39 | default: 40 | ipv4_address: 172.17.0.4 41 | extra_hosts: 42 | - "master:172.17.0.2" 43 | - "slave1:172.17.0.3" 44 | hostname: slave2 45 | environment: 46 | ZK_ID: 3 47 | ROLE: slave 48 | tty: true 49 | stdin_open: true 50 | 51 | networks: 52 | default: 53 | driver: bridge 54 | driver_opts: 55 | com.docker.network.enable_ipv6: "false" 56 | ipam: 57 | driver: default 58 | config: 59 | - subnet: 172.17.0.0/17 60 | -------------------------------------------------------------------------------- /hbase/dockerfile: -------------------------------------------------------------------------------- 1 | FROM cluster-spark 2 | 3 | RUN wget http://archive.apache.org/dist/hbase/hbase-0.98.6/hbase-0.98.6-hadoop2-bin.tar.gz \ 4 | && tar -zxvf /hbase-0.98.6-hadoop2-bin.tar.gz -C /usr/local/ \ 5 | && mv /usr/local/hbase-0.98.6-hadoop2 /usr/local/hbase \ 6 | && rm /hbase-0.98.6-hadoop2-bin.tar.gz \ 7 | && mkdir /usr/local/hbase/tmp 8 | 9 | ENV JAVA_HOME=/usr/local/jdk1.8 10 | ENV CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib 11 | 12 | ENV HBASE_HOME=/usr/local/hbase 13 | ENV HBASE_CLASSPATH=$HBASE_HOME/conf 14 | ENV HBASE_LOG_DIR=$HBASE_HOME/logs 15 | ENV PATH=$PATH:$HBASE_HOME/bin 16 | -------------------------------------------------------------------------------- /hive/README.md: -------------------------------------------------------------------------------- 1 | # hive 2 | -------------------------------------------------------------------------------- /hive/conf/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | javax.jdo.option.ConnectionURL 4 | jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true 5 | 6 | 7 | javax.jdo.option.ConnectionDriverName 8 | com.mysql.jdbc.Driver 9 | 10 | 11 | javax.jdo.option.ConnectionUserName 12 | root 13 | 14 | 15 | javax.jdo.option.ConnectionPassword 16 | hadoop 17 | 18 | 19 | -------------------------------------------------------------------------------- /hive/dockerfile: -------------------------------------------------------------------------------- 1 | FROM cluster-mysql 2 | 3 | RUN wget mirror.bit.edu.cn/apache/hive/hive-1.2.2/apache-hive-1.2.2-bin.tar.gz \ 4 | && tar -zxvf /apache-hive-1.2.2-bin.tar.gz -C /usr/local/ \ 5 | && mv /usr/local/apache-hive-1.2.2-bin /usr/local/hive \ 6 | && rm /apache-hive-1.2.2-bin.tar.gz 7 | 8 | ENV export HIVE_HOME=/usr/local/hive 9 | ENV PATH=$HIVE_HOME/bin:$PATH 10 | -------------------------------------------------------------------------------- /kafka/README.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | If you want to run kafka in cluster,in current folder run: 4 | 5 | ```bash 6 | docker-compose up 7 | ``` 8 | 9 | After that 10 | 11 | ```bash 12 | docker ps 13 | ``` 14 | 15 | You can find that there 3 containers started which named kafka-master, kafka-slave1, kafka-slave2 16 | 17 | Then go inside the Master node, which is kafka-master. 18 | 19 | ```bash 20 | docker exec -it kafka-master zsh 21 | jps 22 | ``` 23 | 24 | If you want to stop kafka, in current folder run: 25 | 26 | ```bash 27 | docker-compose down 28 | ``` 29 | -------------------------------------------------------------------------------- /kafka/conf/server.propertie: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | 21 | # Switch to enable topic deletion or not, default value is false 22 | #delete.topic.enable=true 23 | 24 | ############################# Socket Server Settings ############################# 25 | 26 | # The address the socket server listens on. It will get the value returned from 27 | # java.net.InetAddress.getCanonicalHostName() if not configured. 28 | # FORMAT: 29 | # listeners = listener_name://host_name:port 30 | # EXAMPLE: 31 | # listeners = PLAINTEXT://your.host.name:9092 32 | #listeners=PLAINTEXT://:9092 33 | 34 | # Hostname and port the broker will advertise to producers and consumers. If not set, 35 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 36 | # returned from java.net.InetAddress.getCanonicalHostName(). 37 | #advertised.listeners=PLAINTEXT://your.host.name:9092 38 | 39 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 40 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 41 | 42 | # The number of threads handling network requests 43 | num.network.threads=3 44 | 45 | # The number of threads doing disk I/O 46 | num.io.threads=8 47 | 48 | # The send buffer (SO_SNDBUF) used by the socket server 49 | socket.send.buffer.bytes=102400 50 | 51 | # The receive buffer (SO_RCVBUF) used by the socket server 52 | socket.receive.buffer.bytes=102400 53 | 54 | # The maximum size of a request that the socket server will accept (protection against OOM) 55 | socket.request.max.bytes=104857600 56 | 57 | 58 | ############################# Log Basics ############################# 59 | 60 | # A comma seperated list of directories under which to store log files 61 | log.dirs=/usr/local/kafka/kafka-logs 62 | zookeeper.connect=master:2181, slave1:2181, slave2:2181 63 | 64 | # The default number of log partitions per topic. More partitions allow greater 65 | # parallelism for consumption, but this will also result in more files across 66 | # the brokers. 67 | num.partitions=1 68 | 69 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 70 | # This value is recommended to be increased for installations with data dirs located in RAID array. 71 | num.recovery.threads.per.data.dir=1 72 | 73 | ############################# Log Flush Policy ############################# 74 | 75 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 76 | # the OS cache lazily. The following configurations control the flush of data to disk. 77 | # There are a few important trade-offs here: 78 | # 1. Durability: Unflushed data may be lost if you are not using replication. 79 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 80 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 81 | # The settings below allow one to configure the flush policy to flush data after a period of time or 82 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 83 | 84 | # The number of messages to accept before forcing a flush of data to disk 85 | #log.flush.interval.messages=10000 86 | 87 | # The maximum amount of time a message can sit in a log before we force a flush 88 | #log.flush.interval.ms=1000 89 | 90 | ############################# Log Retention Policy ############################# 91 | 92 | # The following configurations control the disposal of log segments. The policy can 93 | # be set to delete segments after a period of time, or after a given size has accumulated. 94 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 95 | # from the end of the log. 96 | 97 | # The minimum age of a log file to be eligible for deletion due to age 98 | log.retention.hours=168 99 | 100 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining 101 | # segments don't drop below log.retention.bytes. Functions independently of log.retention.hours. 102 | #log.retention.bytes=1073741824 103 | 104 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 105 | log.segment.bytes=1073741824 106 | 107 | # The interval at which log segments are checked to see if they can be deleted according 108 | # to the retention policies 109 | log.retention.check.interval.ms=300000 110 | 111 | ############################# Zookeeper ############################# 112 | 113 | # Zookeeper connection string (see zookeeper docs for details). 114 | # This is a comma separated host:port pairs, each corresponding to a zk 115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 116 | # You can also append an optional chroot string to the urls to specify the 117 | # root directory for all kafka znodes. 118 | 119 | # Timeout in ms for connecting to zookeeper 120 | zookeeper.connection.timeout.ms=6000 121 | -------------------------------------------------------------------------------- /kafka/dockerfile: -------------------------------------------------------------------------------- 1 | FROM cluster-hadoop 2 | 3 | RUN wget http://mirrors.tuna.tsinghua.edu.cn/apache/kafka/0.10.2.1/kafka_2.11-0.10.2.1.tgz \ 4 | && tar -zxvf /kafka_2.11-0.10.2.1.tgz -C /usr/local/ \ 5 | && mv /usr/local/kafka_2.11-0.10.2.1 /usr/local/kafka \ 6 | && rm /kafka_2.11-0.10.2.1.tgz \ 7 | && mkdir -p /usr/local/kafka/kafka-logs 8 | 9 | COPY ./conf/server.properties /usr/local/kafka/conf 10 | 11 | ENV KAFKA_HOME=/usr/local/kafka 12 | ENV PATH $KAFKA_HOME/bin:$PATH 13 | -------------------------------------------------------------------------------- /mysql/README.md: -------------------------------------------------------------------------------- 1 | # mysql -------------------------------------------------------------------------------- /mysql/dockerfile: -------------------------------------------------------------------------------- 1 | FROM cluster-base 2 | 3 | RUN wget --no-check-certificate --no-cookies http://dev.mysql.com/get/Downloads/MySQL-5.1/mysql-5.1.41.tar.gz \ 4 | && tar -zxvf /mysql-5.1.41.tar.gz -C /usr/local \ 5 | && mv /usr/local/mysql-5.1.41 /usr/local/mysql \ 6 | && rm /mysql-5.1.41.tar.gz 7 | -------------------------------------------------------------------------------- /redis/README.md: -------------------------------------------------------------------------------- 1 | # redis 2 | -------------------------------------------------------------------------------- /redis/redis.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | RUN wget http://download.redis.io/releases/redis-2.8.3.tar.gz \ 4 | && tar -zxvf /redis-2.8.3.tar.gz -C /usr/local/ \ 5 | && mv /usr/local/redis-2.8.3 /usr/local/redis \ 6 | && rm /redis-2.8.3.tar.gz 7 | -------------------------------------------------------------------------------- /spark/README.md: -------------------------------------------------------------------------------- 1 | # spark 2 | 3 | 本项⽬是通过 Docker 镜像来构建分布式集群,⼀个主节点,两个从结点。 4 | 5 | ## 使用 spark 6 | 7 | > 所有的功能写成了如下脚本。 8 | 9 | ### spark 相关脚本介绍 10 | 11 | - `bash ./initial-spark.sh`:初始化 spark 环境,只能初始化一次,重新初始化之前需要删除 spark 环境。 12 | - `bash ./start-spark.sh`:启动 spark 环境,后台启动,不会有任何反应。 13 | - `bash ./stop-spark.sh`:停用 spark 环境。 14 | - `bash ./delete-spark.sh`:删除 spark 环境。 15 | - `bash ./pyspark-shell.sh`:在完成初始化和启动 spark 环境后,执行此命令可以启动 pyspark 交互式窗口。 16 | 17 | ### pyspark 交互式 18 | 19 | 1. 在命令行中,进入本文件夹所在的目录。 20 | 2. 执行 `bash ./initial-spark.sh`,初始化环境 **(第一次下载安装包等待时间较长)**,然后执行 `bash ./start-spark.sh`,启动 spark 环境,接着执行 `bash ./pyspark-shell.sh`,直接进入了 pyspark 交互式窗口,此处配置的 `python3` 版本,使用 `ipython` 接口。 21 | 3. 执行结束了,可以执行 `bash ./stop-spark.sh` 来停用 spark 环境或者执行 `bash ./delete-spark.sh` 来删除 spark 环境。 22 | 23 | 由于 `pyspark` 是交互式执⾏行行的界⾯面,启动是已经帮你创建好了了会话 `SparkContext` ,所以可以直接使 ⽤用 `sc`。 24 | 25 | `ipython` 里,可以使⽤用 `tab` 来⾃自动补全。 26 | 27 | ### 数据⽂件 28 | 29 | 将需要的数据⽂件放在本项目的 `/data` 目录下,在 `pyspark` 交互中,目录同样在 `/data` 下。 30 | -------------------------------------------------------------------------------- /spark/compose.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function main() { 4 | service ssh restart 5 | service sshd restart 6 | if [ 'master' = ${ROLE} ] 7 | then 8 | hdfs namenode -format 9 | fi 10 | } 11 | 12 | main 13 | -------------------------------------------------------------------------------- /spark/conf/slaves: -------------------------------------------------------------------------------- 1 | slave1 2 | slave2 3 | -------------------------------------------------------------------------------- /spark/conf/spark-env.sh: -------------------------------------------------------------------------------- 1 | export SCALA_HOME=/usr/local/scala 2 | export JAVA_HOME=/usr/local/jdk1.8 3 | export HADOOP_HOME=/usr/local/hadoop 4 | export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop 5 | 6 | SPARK_MASTER_IP=master 7 | SPARK_LOCAL_DIRS=/usr/local/spark 8 | SPARK_DRIVER_MEMORY=1G # 每个worker结点能够最大分配给executors的内存大小,依据自己的机器而定 9 | export SPARK_WORKER_INSTANCES=1 # 每台机器上开启worker结点的数目 10 | export SPARK_MASTER_PORT=7077 11 | export SPARK_WORKER_CORES=2 # 每个worker结点所占有的CPU核数目 12 | export SPARK_WORKER_MEMORY=512m 13 | -------------------------------------------------------------------------------- /spark/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | master: 4 | image: cluster-spark 5 | container_name: spark-master 6 | volumes: 7 | - ../data/:/data 8 | - ./:/spark 9 | networks: 10 | cluster-spark-network: 11 | ipv4_address: 172.88.0.2 12 | extra_hosts: 13 | - "slave1:172.88.0.3" 14 | - "slave2:172.88.0.4" 15 | hostname: master 16 | environment: 17 | ROLE: master 18 | command: ['bash','/spark/compose.sh'] 19 | 20 | slave1: 21 | image: cluster-spark 22 | container_name: spark-slave1 23 | volumes: 24 | - ../data/:/data 25 | - ./:/spark 26 | depends_on: 27 | - master 28 | networks: 29 | cluster-spark-network: 30 | ipv4_address: 172.88.0.3 31 | extra_hosts: 32 | - "master:172.88.0.2" 33 | - "slave2:172.88.0.4" 34 | hostname: slave1 35 | environment: 36 | ROLE: slave 37 | 38 | slave2: 39 | image: cluster-spark 40 | container_name: spark-slave2 41 | volumes: 42 | - ../data/:/data 43 | - ./:/spark 44 | networks: 45 | cluster-spark-network: 46 | ipv4_address: 172.88.0.4 47 | extra_hosts: 48 | - "master:172.88.0.2" 49 | - "slave1:172.88.0.3" 50 | depends_on: 51 | - master 52 | hostname: slave2 53 | environment: 54 | ROLE: slave 55 | 56 | networks: 57 | cluster-spark-network: 58 | driver: bridge 59 | driver_opts: 60 | com.docker.network.enable_ipv6: "false" 61 | ipam: 62 | driver: default 63 | config: 64 | - subnet: 172.88.0.0/16 65 | -------------------------------------------------------------------------------- /spark/dockerfile: -------------------------------------------------------------------------------- 1 | FROM cluster-hadoop 2 | 3 | RUN wget https://archive.apache.org/dist/spark/spark-1.6.0/spark-1.6.0-bin-hadoop2.6.tgz \ 4 | && tar -zxvf /spark-1.6.0-bin-hadoop2.6.tgz -C /usr/local/ \ 5 | && mv /usr/local/spark-1.6.0-bin-hadoop2.6 /usr/local/spark \ 6 | && rm /spark-1.6.0-bin-hadoop2.6.tgz 7 | 8 | COPY ./conf/slaves /usr/local/spark/conf/ 9 | COPY ./conf/spark-env.sh /usr/local/spark/conf/ 10 | 11 | # 配置 pyspark 的 python3 环境,并设置 ipython 接口。 12 | ENV PYSPARK_PYTHON=python3 13 | ENV PYSPARK_DRIVER_PYTHON=ipython 14 | ENV SPARK_HOME=/usr/local/spark 15 | 16 | WORKDIR /spark 17 | 18 | EXPOSE 22 19 | 20 | CMD ["/usr/sbin/sshd", "-D"] 21 | -------------------------------------------------------------------------------- /storm/README.md: -------------------------------------------------------------------------------- 1 | # storm -------------------------------------------------------------------------------- /storm/conf/storm.yaml: -------------------------------------------------------------------------------- 1 | storm.zookeeper.servers: 2 | - "master" 3 | - "slave1" 4 | - "slave2" 5 | 6 | nimbus.host: "master" 7 | supervisor.slots.ports: 8 | - 6700 9 | - 6701 10 | - 6702 11 | - 6703 12 | - 6704 13 | - 6705 14 | -------------------------------------------------------------------------------- /storm/dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | RUN wget wget https://archive.apache.org/dist/storm/apache-storm-0.9.3/apache-storm-0.9.3.tar.gz \ 4 | && tar -zxvf /apache-storm-0.9.3.tar.gz -C /usr/local \ 5 | && mv /usr/local/apache-storm-0.9.3 /usr/local/storm \ 6 | && rm /apache-storm-0.9.3.tar.gz 7 | 8 | ENV STORM_HOME=/usr/local/storm 9 | ENV PATH=$PATH:$STORM_HOME/bin 10 | 11 | RUN chmod +x /usr/local/src/storm/bin/stop-storm.sh 12 | -------------------------------------------------------------------------------- /zookeeper/README.md: -------------------------------------------------------------------------------- 1 | # zookeeper 2 | -------------------------------------------------------------------------------- /zookeeper/conf/zoo.cfg: -------------------------------------------------------------------------------- 1 | dataDir=/usr/local/zookeeper/data 2 | dataLogDir=/usr/local/zookeeper/log 3 | server.1=master:2888:3888 4 | server.2=slave1:2888:3888 5 | server.3=slave2:2888:3888 6 | tickTime=2000 7 | clientPort=2181 8 | initLimit=5 9 | syncLimit=2 10 | -------------------------------------------------------------------------------- /zookeeper/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | master: 4 | image: cluster-zookeeper 5 | container_name: zk-master 6 | networks: 7 | default: 8 | ipv4_address: 172.17.0.2 9 | extra_hosts: 10 | - "slave1:172.17.0.3" 11 | - "slave2:172.17.0.4" 12 | hostname: master 13 | environment: 14 | ZK_ID: 1 15 | tty: true 16 | 17 | slave1: 18 | image: cluster-zookeeper 19 | container_name: zk-slave1 20 | networks: 21 | default: 22 | ipv4_address: 172.17.0.3 23 | extra_hosts: 24 | - "master:172.17.0.2" 25 | - "slave2:172.17.0.4" 26 | hostname: slave1 27 | environment: 28 | ZK_ID: 2 29 | tty: true 30 | 31 | slave2: 32 | image: cluster-zookeeper 33 | container_name: zk-slave2 34 | networks: 35 | default: 36 | ipv4_address: 172.17.0.4 37 | extra_hosts: 38 | - "master:172.17.0.2" 39 | - "slave1:172.17.0.3" 40 | hostname: slave2 41 | environment: 42 | ZK_ID: 3 43 | tty: true 44 | 45 | networks: 46 | default: 47 | driver: bridge 48 | driver_opts: 49 | com.docker.network.enable_ipv6: "false" 50 | ipam: 51 | driver: default 52 | config: 53 | - subnet: 172.17.0.0/17 54 | -------------------------------------------------------------------------------- /zookeeper/dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | # zookeeper 环境安装: 4 | RUN wget http://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz \ 5 | && tar -zxvf /zookeeper-3.4.10.tar.gz -C /usr/local/ \ 6 | && mv /usr/local/zookeeper-3.4.10 /usr/local/zookeeper \ 7 | && rm /zookeeper-3.4.10.tar.gz \ 8 | && mkdir /usr/local/zookeeper/data \ 9 | && mkdir /usr/local/zookeeper/log 10 | 11 | COPY ./conf/zoo.cfg /usr/local/zookeeper/conf 12 | 13 | ENV ZOOKEEPER_HOME=/usr/local/zookeeper 14 | ENV PATH $ZOOKEEPER_HOME/bin:$PATH 15 | --------------------------------------------------------------------------------