├── .gitignore
├── README.md
├── build.sh
├── centos-java
    └── Dockerfile
├── conf
    ├── configuration.xsl
    ├── log4j.properties
    └── zoo.cfg
├── docker-compose-hadoop.yml
├── docker-compose-hbase.yml
├── docker-compose-hive.yml
├── docker-compose-mysql.yml
├── docker-compose-spark.yml
├── docker-compose-zk.yml
├── docker-hadoop
    ├── Dockerfile
    └── conf
    │   ├── core-site.xml
    │   ├── hadoop-env.sh
    │   ├── hdfs-site.xml
    │   ├── log4j.properteis
    │   ├── mapred-env.sh
    │   ├── mapred-site.xml
    │   ├── masters
    │   ├── slaves
    │   ├── yarn-env.sh
    │   └── yarn-site.xml
├── docker-hbase
    ├── Dockerfile
    └── conf
    │   ├── backup-masters
    │   ├── hbase-env.sh
    │   ├── hbase-site.xml
    │   └── regionservers
├── docker-hive
    ├── Dockerfile
    └── conf
    │   ├── hive-env.sh
    │   ├── hive-exec-log4j.properties
    │   ├── hive-log4j.properties
    │   └── hive-site.xml
├── docker-mysql
    ├── Dockerfile
    └── my.cnf
├── docker-spark
    ├── Dockerfile
    └── conf
    │   ├── hive-site.xml
    │   ├── log4j.properties
    │   ├── slaves
    │   ├── spark-defaults.conf
    │   └── spark-env.sh
└── docker-zk
    ├── Dockerfile
    ├── conf
        ├── configuration.xsl
        ├── log4j.properties
        └── zoo.cfg
    ├── zoo1
        └── myid
    ├── zoo2
        └── myid
    ├── zoo3
        └── myid
    ├── zoo4
        └── myid
    └── zoo5
        └── myid


/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | **/version-2
3 | **/zookeeper_server.pid
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Bigdata-Docker构建大数据学习开发环境
  2 | 
  3 | 
  4 | ### 介绍
  5 | 
  6 | ##### 1、镜像环境
  7 | 
  8 | * 系统：centos 7
  9 | * Java ：java7
 10 | * Zookeeper: 3.4.6
 11 | * Hadoop: 2.7.1
 12 | * mysql: 5.6.29
 13 | * Hive: 1.2.1
 14 | * Spark: 1.6.2
 15 | * Hbase: 1.1.2
 16 | 
 17 | ##### 2、镜像介绍
 18 | 
 19 | * tonywell/centos-java：openssh、java7，基础镜像
 20 | * tonywell/docker-zk:  基于tonywell/centos-java构建，zookeeper，用于启动zk集群
 21 | * tonywell/docker-hadoop：基于tonywell/centos-java构建, hadoop，用于启动hadoop集群
 22 | * tonywell/docker-mysql：openssh、mysql，用于启动mysql容器提供给hive集群
 23 | * tonywell/docker-hive：基于tonywell/docker-hadoop镜像构建，包含hadoop、hive，用于启动hadoop、hive集群
 24 | * tonywell/docker-spark：基于tonywell/docker-hive镜像构建，包含hadoop、hive、spark，用于启动hadoo、hive、spark集群
 25 | * tonywell/docker-hbase：基于tonywell/docker-spark镜像构建，包含hadoop、hive、spark、hbase，用于启动hadoop、hive、spark、hbase集群
 26 | 
 27 | 
 28 | 
 29 | ### Quick Start
 30 | 
 31 | #### 1、构建镜像
 32 | 
 33 | ```
 34 | $ sh build.sh
 35 | ```
 36 | 
 37 | 可以根据需求注释掉不需要的镜像
 38 | 
 39 | #### 2、创建大数据集群网络
 40 | 
 41 | ```
 42 | $ docker network create zoo
 43 | ```
 44 | 
 45 | #### 3、启动zk集群
 46 | 
 47 | ```
 48 | $ docker-compose -f docker-compose-zk.yml up -d
 49 | ```
 50 | 
 51 | 根据需要可在compose膜拜中增减集群数量，注意同时要增减myid配置
 52 | 
 53 | #### 4、启动mysql容器
 54 | 
 55 | 如何仅仅想使用hadoop集群的，可省略此步。
 56 | 
 57 | ```
 58 | $ docker-compose -f docker-compose-mysql.yml up -d
 59 | ```
 60 | 
 61 | 然后就要修改密码和配置远程访问mysql了
 62 | 
 63 | ```
 64 | $ docker exec -it hadoop-mysql bash
 65 | $ cd /usr/local/mysql-5.6.29/bin
 66 | $ ./mysql -u root -p
 67 | #默认密码为空，回车即可
 68 | $ mysql> use mysql;
 69 | $ mysql> UPDATE user SET Password=PASSWORD('新密码') where USER='root';
 70 | $ mysql> FLUSH PRIVILEGES;
 71 | #授权远程访问
 72 | $ mysql> grant ALL PRIVILEGES ON *.* to root@"%" identified by "root" WITH GRANT OPTION;
 73 | $ mysql> FLUSH PRIVILEGES;
 74 | #配置字符集，解决后面hive建表报错
 75 | #FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:For direct MetaStore DB connections, we don't support retries at the client level.)
 76 | $ mysql> alter database hive character set latin1;
 77 | ```
 78 | 
 79 | ok mysql容器配置完成
 80 | 
 81 | #### 4、大数据集群
 82 | 
 83 | ##### a）启动Hadoop集群
 84 | 
 85 | ```
 86 | $ docker-compose -f docker-compose-hadoop.yml up -d
 87 | ```
 88 | 
 89 | 启动集群，格式化namenode
 90 | 
 91 | ```
 92 | $ docker exec -it hadoop-master bash
 93 | $ cd /usr/local/hadoop/bin
 94 | $ hdfs namenode -format
 95 | ```
 96 | 
 97 | 然后启动hdfs和yarn
 98 | 
 99 | ```
100 | $ cd /usr/local/hadoop/sbin
101 | $ ./start-all.sh
102 | ```
103 | 
104 |  访问http://localhost:50070，看集群是否启动成功
105 | 
106 | ##### b）启动Hive集群
107 | 
108 | 需要依赖mysql容器
109 | 
110 | ```
111 | $ docker-compose -f docker-compose-hive.yml up -d
112 | ```
113 | 
114 |  启动hadoo集群的操作和上面启动hadoop集群一样
115 | 
116 | ##### c）启动Spark集群
117 | 
118 | 需要依赖mysql容器
119 | 
120 | ```
121 | $ docker-compose -f docker-compose-spark.yml up -d
122 | ```
123 | 
124 |  启动hadoop集群同a。
125 | 
126 | 启动spark集群
127 | 
128 | ```
129 | $ sh /usr/local/spark/sbin/start-all.sh
130 | ```
131 | 
132 | 使用 spark 自带样例中的计算 Pi 的应用来验证一下
133 | 
134 | ```
135 | /usr/local/spark/bin/spark-submit --master spark://hadoop-master:7077 --class org.apache.spark.examples.SparkPi /usr/local/spark/lib/spark-examples-1.6.2-hadoop2.2.0.jar 1000
136 | ```
137 | 
138 | 计算结果输出如下
139 | 
140 | ```
141 | starting org.apache.spark.deploy.master.Master, logging to /usr/local/spark/logs/spark--org.apache.spark.deploy.master.Master-1-1bdfd98bccc7.out
142 | hadoop-slave2: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-9dd7e2ebbf13.out
143 | hadoop-slave3: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-97a87730dd03.out
144 | hadoop-slave1: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-adb07707f15b.out
145 | <k/bin/spark-submit --master spark://hadoop-master:7077 --class org.apache.spark.examples.SparkPi /usr/local/spark/li
146 | lib/      licenses/
147 | <.examples.SparkPi /usr/local/spark/lib/spark-examples-1.6.2-hadoop2.2.0.jar 1000
148 | 16/11/07 08:19:46 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
149 | Pi is roughly 3.1417756
150 | ```
151 | 
152 | 
153 | 
154 | ##### d）启动Hbase集群
155 | 
156 | ```
157 | $ docker-compose -f docker-compose-hbase.yml up -d
158 | ```
159 | 
160 | 启动hadoop、spark集群同c
161 | 
162 | 启动hbase集群
163 | 
164 | ```
165 | $ sh /usr/local/hbase/bin/start-hbase.sh
166 | ```
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 注意docker-compose-hadoop.yml、docker-compose-hive.yml、docker-compose-spark.yml和docker-compose-hbase.yml不要一起启动，后面模板中是包含了前一个的所有配置


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | docker build -t tonywell/centos-java centos-java
2 | docker build -t tonywell/docker-zk docker-zk
3 | docker build -t tonywell/docker-hadoop docker-hadoop
4 | docker build -t tonywell/docker-mysql docker-mysql
5 | docker build -t tonywell/docker-hive docker-hive
6 | docker build -t tonywell/docker-spark docker-spark
7 | docker build -t tonywell/docker-hbase docker-hbase
8 | 


--------------------------------------------------------------------------------
/centos-java/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM centos
 2 | MAINTAINER tonywell <tongwei1985@gmail.com>
 3 | 
 4 | # root password
 5 | RUN echo 'root:!23$QweR' | chpasswd
 6 | 
 7 | # 为解决Header V3 RSA/SHA256 Signature, key ID f4a80eb5: NOKEY问题
 8 | RUN rpm --import /etc/pki/rpm-gpg/RPM*
 9 | 
10 | RUN \
11 |     yum -y install \
12 |         openssh openssh-server openssh-clients \
13 |         sudo passwd wget &&\
14 |         yum clean all
15 | 
16 | RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
17 | RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
18 | RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
19 | RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
20 | 
21 | # 设置sshd
22 | RUN sshd-keygen
23 | RUN sed -i "s/#UsePrivilegeSeparation.*/UsePrivilegeSeparation no/g" /etc/ssh/sshd_config
24 | RUN sed -i "s/UsePAM.*/UsePAM no/g" /etc/ssh/sshd_config
25 | 
26 | RUN mkdir /var/run/sshd
27 | 
28 | RUN wget http://119.254.110.32:8081/download/jdk1.7.0_60.tar.gz && \
29 |        tar -zxvf jdk1.7.0_60.tar.gz -C /usr/local/ && \
30 |        rm -rf jdk1.7.0_60.tar.gz
31 | 
32 | #ADD ./jdk1.7.0_60.tar.gz /usr/local/
33 | 
34 | RUN mv /usr/local/jdk1.7.0_60 /usr/local/jdk1.7
35 | 
36 | ENV JAVA_HOME /usr/local/jdk1.7
37 | ENV PATH $JAVA_HOME/bin:$PATH
38 | 
39 | EXPOSE 22
40 | CMD ["/usr/sbin/sshd", "-D"]
41 | 


--------------------------------------------------------------------------------
/conf/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 3 | <xsl:output method="html"/>
 4 | <xsl:template match="configuration">
 5 | <html>
 6 | <body>
 7 | <table border="1">
 8 | <tr>
 9 |  <td>name</td>
10 |  <td>value</td>
11 |  <td>description</td>
12 | </tr>
13 | <xsl:for-each select="property">
14 | <tr>
15 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
16 |   <td><xsl:value-of select="value"/></td>
17 |   <td><xsl:value-of select="description"/></td>
18 | </tr>
19 | </xsl:for-each>
20 | </table>
21 | </body>
22 | </html>
23 | </xsl:template>
24 | </xsl:stylesheet>
25 | 


--------------------------------------------------------------------------------
/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | ome default values that can be overridden by system properties
 2 | zookeeper.root.logger=INFO, CONSOLE
 3 | zookeeper.console.threshold=INFO
 4 | zookeeper.log.dir=/opt/log
 5 | zookeeper.log.file=zookeeper.log
 6 | zookeeper.log.threshold=DEBUG
 7 | zookeeper.tracelog.dir=/opt/log
 8 | zookeeper.tracelog.file=zookeeper_trace.log
 9 | #
10 | # ZooKeeper Logging Configuration
11 | #
12 | # Format is "<default threshold> (, <appender>)+
13 | # DEFAULT: console appender only
14 | log4j.rootLogger=${zookeeper.root.logger}
15 | # Example with rolling log file
16 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE
17 | # Example with rolling log file and tracing
18 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE
19 | #
20 | # Log INFO level and above messages to the console
21 | #
22 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
23 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
24 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
25 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
26 | #
27 | # Add ROLLINGFILE to rootLogger to get log file output
28 | #    Log DEBUG level and above messages to a log file
29 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
30 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
31 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
32 | # Max log file size of 10MB
33 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB
34 | # uncomment the next line to limit number of backup files
35 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10
36 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
37 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
38 | #
39 | # Add TRACEFILE to rootLogger to get log file output
40 | #    Log DEBUG level and above messages to a log file
41 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender
42 | log4j.appender.TRACEFILE.Threshold=TRACE
43 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}
44 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout
45 | ### Notice we are including log4j's NDC here (%x)
46 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n
47 | 


--------------------------------------------------------------------------------
/conf/zoo.cfg:
--------------------------------------------------------------------------------
 1 | clientPort=2181
 2 | tickTime=2000
 3 | syncLimit=5
 4 | initLimit=10
 5 | dataDir=/opt/data
 6 | dataLogDir=/opt/log
 7 | server.1=zk1:2888:3888
 8 | server.2=zk2:2888:3888
 9 | server.3=zk3:2888:3888
10 | server.4=zk4:2888:3888
11 | server.5=zk5:2888:3888
12 | 
13 | 


--------------------------------------------------------------------------------
/docker-compose-hadoop.yml:
--------------------------------------------------------------------------------
 1 | hadoop-slave1:
 2 |    image: tonywell/docker-hadoop
 3 |    container_name: hadoop-slave1
 4 |    volumes:
 5 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
 6 |    net: zoo
 7 | hadoop-slave2:
 8 |    image: tonywell/docker-hadoop
 9 |    container_name: hadoop-slave2
10 |    volumes:
11 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
12 |    net: zoo
13 | hadoop-slave3:
14 |    image: tonywell/docker-hadoop
15 |    container_name: hadoop-slave3
16 |    volumes:
17 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
18 |    net: zoo
19 | hadoop-master:
20 |    image: tonywell/docker-hadoop
21 |    container_name: hadoop-master
22 |    volumes:
23 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
24 |    net: zoo
25 |    ports:
26 |      - "50070:50070"
27 |      - "8088:8088"
28 |    links:
29 |      - hadoop-slave1
30 |      - hadoop-slave2
31 |      - hadoop-slave3
32 | 


--------------------------------------------------------------------------------
/docker-compose-hbase.yml:
--------------------------------------------------------------------------------
 1 | hadoop-slave1:
 2 |    image: tonywell/docker-hbase
 3 |    container_name: hadoop-slave1
 4 |    volumes:
 5 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
 6 |      - ./docker-hive/conf:/usr/local/hive/conf
 7 |      - ./docker-spark/conf:/usr/local/spark/conf
 8 |      - ./docker-hbase/conf:/usr/local/hbase/conf
 9 |    net: zoo
10 | hadoop-slave2:
11 |    image: tonywell/docker-hbase
12 |    container_name: hadoop-slave2
13 |    volumes:
14 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
15 |      - ./docker-hive/conf:/usr/local/hive/conf
16 |      - ./docker-spark/conf:/usr/local/spark/conf
17 |      - ./docker-hbase/conf:/usr/local/hbase/conf
18 |    net: zoo
19 | hadoop-slave3:
20 |    image: tonywell/docker-hbase
21 |    container_name: hadoop-slave3
22 |    volumes:
23 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
24 |      - ./docker-hive/conf:/usr/local/hive/conf
25 |      - ./docker-spark/conf:/usr/local/spark/conf
26 |      - ./docker-hbase/conf:/usr/local/hbase/conf
27 |    net: zoo
28 | hadoop-master:
29 |    image: tonywell/docker-hbase
30 |    container_name: hadoop-master
31 |    volumes:
32 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
33 |      - ./docker-hive/conf:/usr/local/hive/conf
34 |      - ./docker-spark/conf:/usr/local/spark/conf
35 |      - ./docker-hbase/conf:/usr/local/hbase/conf
36 |    net: zoo
37 |    #command: bash /usr/local/hadoop/bin/hdfs namenode -format -y && sh /usr/local/hadoop/sbin/start-all.sh -yes && sh /usr/local/spark/sbin/start-all.sh && sh /usr/local/hbase/bin/start-hbase.sh && ping localhost > /dev/null
38 |    ports:
39 |      - "50070:50070"
40 |      - "9083:9083"
41 |      - "8088:8088"
42 |      - "8080:8080"
43 |      - "8042:8042"
44 |      - "16010:16010"
45 |    links:
46 |      - hadoop-slave1
47 |      - hadoop-slave2
48 |      - hadoop-slave3
49 | 


--------------------------------------------------------------------------------
/docker-compose-hive.yml:
--------------------------------------------------------------------------------
 1 | hadoop-slave1:
 2 |    image: tonywell/docker-hive
 3 |    container_name: hadoop-slave1
 4 |    volumes:
 5 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
 6 |      - ./docker-hive/conf:/usr/local/hive/conf
 7 |    net: zoo
 8 | hadoop-slave2:
 9 |    image: tonywell/docker-hive
10 |    container_name: hadoop-slave2
11 |    volumes:
12 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
13 |      - ./docker-hive/conf:/usr/local/hive/conf
14 |    net: zoo
15 | hadoop-slave3:
16 |    image: tonywell/docker-hive
17 |    container_name: hadoop-slave3
18 |    volumes:
19 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
20 |      - ./docker-hive/conf:/usr/local/hive/conf
21 |    net: zoo
22 | hadoop-master:
23 |    image: tonywell/docker-hive
24 |    container_name: hadoop-master
25 |    volumes:
26 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
27 |      - ./docker-hive/conf:/usr/local/hive/conf
28 |    net: zoo
29 |    ports:
30 |      - "50070:50070"
31 |      - "9083:9083"
32 |      - "8088:8088"
33 |    links:
34 |      - hadoop-slave1
35 |      - hadoop-slave2
36 |      - hadoop-slave3
37 | 


--------------------------------------------------------------------------------
/docker-compose-mysql.yml:
--------------------------------------------------------------------------------
 1 | hadoop-mysql:
 2 |    image: tonywell/docker-mysql
 3 |    container_name: hadoop-mysql
 4 |    volumes:
 5 |      - ./data:/usr/local/mysql-5.6.29/data
 6 |    net: zoo
 7 |    command: /usr/local/mysql-5.6.29/bin/mysqld_safe --user=mysql
 8 |    ports: 
 9 |      - "3306:3306"
10 | 


--------------------------------------------------------------------------------
/docker-compose-spark.yml:
--------------------------------------------------------------------------------
 1 | hadoop-slave1:
 2 |    image: tonywell/docker-spark
 3 |    container_name: hadoop-slave1
 4 |    volumes:
 5 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
 6 |      - ./docker-hive/conf:/usr/local/hive/conf
 7 |      - ./docker-spark/conf:/usr/local/spark/conf
 8 |    net: zoo
 9 | hadoop-slave2:
10 |    image: tonywell/docker-spark
11 |    container_name: hadoop-slave2
12 |    volumes:
13 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
14 |      - ./docker-hive/conf:/usr/local/hive/conf
15 |      - ./docker-spark/conf:/usr/local/spark/conf
16 |    net: zoo
17 | hadoop-slave3:
18 |    image: tonywell/docker-spark
19 |    container_name: hadoop-slave3
20 |    volumes:
21 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
22 |      - ./docker-hive/conf:/usr/local/hive/conf
23 |      - ./docker-spark/conf:/usr/local/spark/conf
24 |    net: zoo
25 | hadoop-master:
26 |    image: tonywell/docker-spark
27 |    container_name: hadoop-master
28 |    volumes:
29 |      - ./docker-hadoop/conf:/usr/local/hadoop/etc/hadoop
30 |      - ./docker-hive/conf:/usr/local/hive/conf
31 |      - ./docker-spark/conf:/usr/local/spark/conf
32 |    net: zoo
33 |    ports:
34 |      - "50070:50070"
35 |      - "9083:9083"
36 |      - "8088:8088"
37 |      - "8080:8080"
38 |      - "8042:8042"
39 |    links:
40 |      - hadoop-slave1
41 |      - hadoop-slave2
42 |      - hadoop-slave3
43 | 


--------------------------------------------------------------------------------
/docker-compose-zk.yml:
--------------------------------------------------------------------------------
 1 | zoo1:
 2 |    image: tonywell/docker-zk
 3 |    restart: always
 4 |    net: zoo
 5 |    container_name: zk1
 6 |    volumes:
 7 |       - ./docker-zk/zoo1:/opt/data
 8 |       - ./docker-zk/conf:/opt/zookeeper/conf
 9 |    ports:
10 |       - "2181:2181"
11 |    expose:
12 |       - "2888"
13 |       - "3888"
14 | zoo2:
15 |    image: tonywell/docker-zk
16 |    restart: always
17 |    net: zoo
18 |    container_name: zk2
19 |    volumes:
20 |       - ./docker-zk/zoo2:/opt/data 
21 |       - ./docker-zk/conf:/opt/zookeeper/conf
22 |    ports:
23 |       - "2182:2181"
24 |    expose:
25 |       - "2888"
26 |       - "3888"
27 | zoo3:
28 |    image: tonywell/docker-zk
29 |    restart: always
30 |    container_name: zk3
31 |    net: zoo
32 |    volumes:
33 |       - ./docker-zk/zoo3:/opt/data
34 |       - ./docker-zk/conf:/opt/zookeeper/conf
35 |    ports:
36 |       - "2183:2181"
37 |    expose:
38 |       - "2888"
39 |       - "3888"
40 | zoo4:
41 |    image: tonywell/docker-zk
42 |    restart: always
43 |    container_name: zk4
44 |    net: zoo
45 |    volumes:
46 |       - ./docker-zk/zoo4:/opt/data
47 |       - ./docker-zk/conf:/opt/zookeeper/conf
48 |    ports:
49 |       - "2184:2181"
50 |    expose:
51 |       - "2888"
52 |       - "3888"
53 | zoo5:
54 |    image: tonywell/docker-zk
55 |    restart: always
56 |    container_name: zk5
57 |    net: zoo
58 |    volumes:
59 |       - ./docker-zk/zoo5:/opt/data
60 |       - ./docker-zk/conf:/opt/zookeeper/conf
61 |    ports:
62 |       - "2185:2181"
63 |    expose:
64 |       - "2888"
65 |       - "3888"
66 | 


--------------------------------------------------------------------------------
/docker-hadoop/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tonywell/centos-java
 2 | MAINTAINER tonywell
 3 | 
 4 | RUN yum -y install which && yum clean all
 5 | 
 6 | #下载Hadoop
 7 | RUN wget http://www.eu.apache.org/dist/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz && \
 8 |         tar -zvxf hadoop-2.7.1.tar.gz -C /usr/local/ && \
 9 |         mv /usr/local/hadoop-2.7.1 /usr/local/hadoop && \
10 |         rm -rf hadoop-2.7.1.tar.gz
11 | 
12 | ENV HADOOP_HOME /usr/local/hadoop
13 | ENV HADOOP_PREFIX /usr/local/hadoop
14 | ENV HADOOP_COMMON_HOME /usr/local/hadoop
15 | ENV HADOOP_HDFS_HOME /usr/local/hadoop
16 | ENV HADOOP_MAPRED_HOME /usr/local/hadoop
17 | ENV HADOOP_YARN_HOME /usr/local/hadoop
18 | ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
19 | ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
20 | 
21 | ENV PATH $HADOOP_HOME/bin:$PATH
22 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/core-site.xml:
--------------------------------------------------------------------------------
 1 | <configuration>        
 2 |     <property>
 3 |         <name>fs.default.name</name>
 4 |         <value>hdfs://hadoop-master:54310</value>
 5 |     </property>
 6 |     <property>
 7 |       <name>fs.trash.interval</name>
 8 |       <value>3600</value>
 9 |     </property>
10 |     
11 |     <property>
12 |       <name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
13 |       <value>120</value>
14 |     </property>
15 |     
16 |     <property>
17 |       <name>ha.zookeeper.quorum</name>
18 |       <value>zk1:2181,zk2:2181,zk3:2181,zk4:2181,zk5:2181</value>
19 |     </property>
20 |     
21 |     <property>
22 |       <name>hadoop.http.authentication.simple.anonymous.allowed</name>
23 |       <value>true</value>
24 |     </property>
25 | </configuration>
26 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/hadoop-env.sh:
--------------------------------------------------------------------------------
 1 | # or more contributor license agreements.  See the NOTICE file
 2 | # distributed with this work for additional information
 3 | # regarding copyright ownership.  The ASF licenses this file
 4 | # to you under the Apache License, Version 2.0 (the
 5 | # "License"); you may not use this file except in compliance
 6 | # with the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # Set Hadoop-specific environment variables here.
17 | 
18 | # The only required environment variable is JAVA_HOME.  All others are
19 | # optional.  When running a distributed configuration it is best to
20 | # set JAVA_HOME in this file, so that it is correctly defined on
21 | # remote nodes.
22 | 
23 | # The java implementation to use.
24 | export JAVA_HOME=/usr/local/jdk1.7
25 | 
26 | # The jsvc implementation to use. Jsvc is required to run secure datanodes.
27 | #export JSVC_HOME=${JSVC_HOME}
28 | 
29 | #export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
30 | 
31 | # Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
32 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
33 |   if [ "$HADOOP_CLASSPATH" ]; then
34 |     export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
35 |   else
36 |     export HADOOP_CLASSPATH=$f
37 |   fi
38 | done
39 | 
40 | # The maximum amount of heap to use, in MB. Default is 1000.
41 | #export HADOOP_HEAPSIZE=
42 | #export HADOOP_NAMENODE_INIT_HEAPSIZE=""
43 | 
44 | # Extra Java runtime options.  Empty by default.
45 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
46 | 
47 | # Command specific options appended to HADOOP_OPTS when specified
48 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
49 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
50 | 
51 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
52 | 
53 | export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
54 | export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
55 | 
56 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
57 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
58 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
59 | 
60 | # On secure datanodes, user to run the datanode as after dropping privileges
61 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
62 | 
63 | # Where log files are stored.  $HADOOP_HOME/logs by default.
64 | #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
65 | 
66 | # Where log files are stored in the secure data environment.
67 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
68 | 
69 | # The directory where pid files are stored. /tmp by default.
70 | # NOTE: this should be set to a directory that can only be written to by 
71 | #       the user that will run the hadoop daemons.  Otherwise there is the
72 | #       potential for a symlink attack.
73 | export HADOOP_PID_DIR=${HADOOP_PID_DIR}
74 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
75 | 
76 | # A string representing this instance of hadoop. $USER by default.
77 | export HADOOP_IDENT_STRING=$USER
78 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | <property>
 3 | <name>dfs.namenode.name.dir</name>
 4 | <value>file:/works/dfs/name</value>
 5 | </property>
 6 |  
 7 | <property>
 8 | <name>dfs.datanode.data.dir</name>
 9 | <value>file:/works/dfs/data</value>
10 | </property>
11 | 
12 | <property>
13 | <name>dfs.namenode.checkpoint.dir</name>
14 | <value>file:/works/dfs/namesecondary</value>
15 | </property>
16 | 
17 | <property>
18 | <name>dfs.namenode.secondary.http-address</name>
19 | <value>hadoop-master:50090</value>
20 | </property>
21 | 
22 | <property>
23 | <name>dfs.replication</name> 
24 | <value>2</value>
25 | </property>
26 |  
27 | <property>
28 | <name>dfs.permission</name>
29 | <value>false</value>
30 | </property>
31 | </configuration>
32 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/log4j.properteis:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # Define some default values that can be overridden by system properties
 18 | hadoop.root.logger=WARN,console
 19 | hadoop.log.dir=.
 20 | hadoop.log.file=hadoop.log
 21 | 
 22 | # Define the root logger to the system property "hadoop.root.logger".
 23 | log4j.rootLogger=${hadoop.root.logger}, EventCounter
 24 | 
 25 | # Logging Threshold
 26 | log4j.threshold=ALL
 27 | 
 28 | # Null Appender
 29 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
 30 | 
 31 | #
 32 | # Rolling File Appender - cap space usage at 5gb.
 33 | #
 34 | hadoop.log.maxfilesize=256MB
 35 | hadoop.log.maxbackupindex=20
 36 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
 37 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
 38 | 
 39 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
 40 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
 41 | 
 42 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
 43 | 
 44 | # Pattern format: Date LogLevel LoggerName LogMessage
 45 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 46 | # Debugging Pattern format
 47 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
 48 | 
 49 | 
 50 | #
 51 | # Daily Rolling File Appender
 52 | #
 53 | 
 54 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
 55 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
 56 | 
 57 | # Rollver at midnight
 58 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
 59 | 
 60 | # 30-day backup
 61 | #log4j.appender.DRFA.MaxBackupIndex=30
 62 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
 63 | 
 64 | # Pattern format: Date LogLevel LoggerName LogMessage
 65 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 66 | # Debugging Pattern format
 67 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
 68 | 
 69 | 
 70 | #
 71 | # console
 72 | # Add "console" to rootlogger above if you want to use this 
 73 | #
 74 | 
 75 | log4j.appender.console=org.apache.log4j.ConsoleAppender
 76 | log4j.appender.console.target=System.err
 77 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
 78 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
 79 | 
 80 | #
 81 | # TaskLog Appender
 82 | #
 83 | 
 84 | #Default values
 85 | hadoop.tasklog.taskid=null
 86 | hadoop.tasklog.iscleanup=false
 87 | hadoop.tasklog.noKeepSplits=4
 88 | hadoop.tasklog.totalLogFileSize=100
 89 | hadoop.tasklog.purgeLogSplits=true
 90 | hadoop.tasklog.logsRetainHours=12
 91 | 
 92 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
 93 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
 94 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
 95 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
 96 | 
 97 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
 98 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 99 | 
100 | #
101 | # HDFS block state change log from block manager
102 | #
103 | # Uncomment the following to suppress normal block state change
104 | # messages from BlockManager in NameNode.
105 | #log4j.logger.BlockStateChange=WARN
106 | 
107 | #
108 | #Security appender
109 | #
110 | hadoop.security.logger=INFO,NullAppender
111 | hadoop.security.log.maxfilesize=256MB
112 | hadoop.security.log.maxbackupindex=20
113 | log4j.category.SecurityLogger=${hadoop.security.logger}
114 | hadoop.security.log.file=SecurityAuth-${user.name}.audit
115 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 
116 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
117 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
118 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
119 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
120 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
121 | 
122 | #
123 | # Daily Rolling Security appender
124 | #
125 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
126 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
127 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
128 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
129 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
130 | 
131 | #
132 | # hadoop configuration logging
133 | #
134 | 
135 | # Uncomment the following line to turn off configuration deprecation warnings.
136 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
137 | 
138 | #
139 | # hdfs audit logging
140 | #
141 | hdfs.audit.logger=INFO,NullAppender
142 | hdfs.audit.log.maxfilesize=256MB
143 | hdfs.audit.log.maxbackupindex=20
144 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
145 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
146 | log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
147 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
148 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
149 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
150 | log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
151 | log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
152 | 
153 | #
154 | # mapred audit logging
155 | #
156 | mapred.audit.logger=INFO,NullAppender
157 | mapred.audit.log.maxfilesize=256MB
158 | mapred.audit.log.maxbackupindex=20
159 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
160 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
161 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
162 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
163 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
164 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
165 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
166 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
167 | 
168 | # Custom Logging levels
169 | 
170 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
171 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
172 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
173 | 
174 | # Jets3t library
175 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
176 | 
177 | #
178 | # Event Counter Appender
179 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
180 | #
181 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
182 | 
183 | #
184 | # Job Summary Appender 
185 | #
186 | # Use following logger to send summary to separate file defined by 
187 | # hadoop.mapreduce.jobsummary.log.file :
188 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA
189 | # 
190 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
191 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
192 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
193 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20
194 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender
195 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
196 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
197 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
198 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
199 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
200 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
201 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
202 | 
203 | #
204 | # Yarn ResourceManager Application Summary Log 
205 | #
206 | # Set the ResourceManager summary log filename
207 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
208 | # Set the ResourceManager summary log level and appender
209 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
210 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
211 | 
212 | # To enable AppSummaryLogging for the RM, 
213 | # set yarn.server.resourcemanager.appsummary.logger to 
214 | # <LEVEL>,RMSUMMARY in hadoop-env.sh
215 | 
216 | # Appender for ResourceManager Application Summary Log
217 | # Requires the following properties to be set
218 | #    - hadoop.log.dir (Hadoop Log directory)
219 | #    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
220 | #    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
221 | 
222 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
223 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
224 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
225 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
226 | log4j.appender.RMSUMMARY.MaxFileSize=256MB
227 | log4j.appender.RMSUMMARY.MaxBackupIndex=20
228 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
229 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
230 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/mapred-env.sh:
--------------------------------------------------------------------------------
 1 | censed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/
17 | 
18 | export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
19 | 
20 | export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
21 | 
22 | #export HADOOP_JOB_HISTORYSERVER_OPTS=
23 | #export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
24 | #export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
25 | #export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
26 | #export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
27 | #export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
28 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 |   Unless required by applicable law or agreed to in writing, software
 9 |   distributed under the License is distributed on an "AS IS" BASIS,
10 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 |   See the License for the specific language governing permissions and
12 |   limitations under the License. See accompanying LICENSE file.
13 | -->
14 | 
15 | <!-- Put site-specific property overrides in this file. -->
16 | 
17 | <configuration>
18 | <property>
19 | <name>mapreduce.jobhistory.address</name>
20 | <value>0.0.0.0:10020</value> 
21 | </property>
22 | 
23 | <property>
24 | <name>mapreduce.jobhistory.webapp.address</name>
25 | <value>0.0.0.0:19888</value> 
26 | </property>
27 | 
28 | <property>
29 | <name>mapreduce.framework.name</name>
30 | <value>yarn</value> 
31 | </property>
32 |  
33 | <property>
34 | <name>mapreduce.job.tracker</name>
35 | <value>hdfs://hadoop-master:9001</value>
36 | <final>true</final>
37 | </property>
38 |  
39 | <property>
40 | <name>mapreduce.map.memory.mb</name>
41 | <value>1024</value>
42 | </property>
43 |  
44 | <property>
45 | <name>mapreduce.map.java.opts</name>
46 | <value>-Xmx1024M</value>
47 | </property>
48 |  
49 | <property>
50 | <name>mapreduce.reduce.memory.mb</name>
51 | <value>1024</value>
52 | </property>
53 |  
54 | <property>
55 | <name>mapreduce.reduce.java.opts</name>
56 | <value>-Xmx1024M</value>
57 | </property>
58 |  
59 | <property>
60 | <name>mapreduce.task.io.sort.mb</name>
61 | <value>512</value>
62 | </property>
63 |  
64 | <property>
65 | <name>mapreduce.task.io.sort.factor</name>
66 | <value>100</value>
67 | </property>
68 |  
69 | <property>
70 | <name>mapreduce.reduce.shuffle.parallelcopies</name>
71 | <value>50</value>
72 | </property>
73 | 
74 | <property>
75 | <name>mapreduce.jobtracker.jobhistory.location</name>
76 | <value>/root/hadoop-2.7.2/logs/history</value>
77 | </property> 
78 | 
79 | <property>
80 | <name>mapred.system.dir</name>
81 | <value>file:/works/hadoop/mapred/system</value>
82 | <final>true</final>
83 | </property>
84 |  
85 | <property>
86 | <name>mapred.local.dir</name>
87 | <value>file:/works/hadoop/mapred/local</value>
88 | <final>true</final>
89 | </property>
90 | </configuration>
91 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/masters:
--------------------------------------------------------------------------------
1 | doop-master
2 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/slaves:
--------------------------------------------------------------------------------
1 | hadoop-slave1
2 | hadoop-slave2
3 | hadoop-slave3
4 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/yarn-env.sh:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # User for YARN daemons
 17 | export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
 18 | 
 19 | # resolve links - $0 may be a softlink
 20 | export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
 21 | 
 22 | # some Java parameters
 23 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/
 24 | if [ "$JAVA_HOME" != "" ]; then
 25 |   #echo "run java in $JAVA_HOME"
 26 |   JAVA_HOME=$JAVA_HOME
 27 | fi
 28 |   
 29 | if [ "$JAVA_HOME" = "" ]; then
 30 |   echo "Error: JAVA_HOME is not set."
 31 |   exit 1
 32 | fi
 33 | 
 34 | JAVA=$JAVA_HOME/bin/java
 35 | JAVA_HEAP_MAX=-Xmx1000m 
 36 | 
 37 | # For setting YARN specific HEAP sizes please use this
 38 | # Parameter and set appropriately
 39 | # YARN_HEAPSIZE=1000
 40 | 
 41 | # check envvars which might override default args
 42 | if [ "$YARN_HEAPSIZE" != "" ]; then
 43 |   JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
 44 | fi
 45 | 
 46 | # Resource Manager specific parameters
 47 | 
 48 | # Specify the max Heapsize for the ResourceManager using a numerical value
 49 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 50 | # the value to 1000.
 51 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 52 | # and/or YARN_RESOURCEMANAGER_OPTS.
 53 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 54 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 55 | #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
 56 | 
 57 | # Specify the max Heapsize for the timeline server using a numerical value
 58 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 59 | # the value to 1000.
 60 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 61 | # and/or YARN_TIMELINESERVER_OPTS.
 62 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 63 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 64 | #export YARN_TIMELINESERVER_HEAPSIZE=1000
 65 | 
 66 | # Specify the JVM options to be used when starting the ResourceManager.
 67 | # These options will be appended to the options specified as YARN_OPTS
 68 | # and therefore may override any similar flags set in YARN_OPTS
 69 | #export YARN_RESOURCEMANAGER_OPTS=
 70 | 
 71 | # Node Manager specific parameters
 72 | 
 73 | # Specify the max Heapsize for the NodeManager using a numerical value
 74 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 75 | # the value to 1000.
 76 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 77 | # and/or YARN_NODEMANAGER_OPTS.
 78 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 79 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 80 | #export YARN_NODEMANAGER_HEAPSIZE=1000
 81 | 
 82 | # Specify the JVM options to be used when starting the NodeManager.
 83 | # These options will be appended to the options specified as YARN_OPTS
 84 | # and therefore may override any similar flags set in YARN_OPTS
 85 | #export YARN_NODEMANAGER_OPTS=
 86 | 
 87 | # so that filenames w/ spaces are handled correctly in loops below
 88 | IFS=
 89 | 
 90 | 
 91 | # default log directory & file
 92 | if [ "$YARN_LOG_DIR" = "" ]; then
 93 |   YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
 94 | fi
 95 | if [ "$YARN_LOGFILE" = "" ]; then
 96 |   YARN_LOGFILE='yarn.log'
 97 | fi
 98 | 
 99 | # default policy file for service-level authorization
100 | if [ "$YARN_POLICYFILE" = "" ]; then
101 |   YARN_POLICYFILE="hadoop-policy.xml"
102 | fi
103 | 
104 | # restore ordinary behaviour
105 | unset IFS
106 | 
107 | 
108 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
109 | YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
110 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
111 | YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
112 | YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
113 | YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
114 | YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
115 | YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
116 | if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
117 |   YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
118 | fi  
119 | YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
120 | 


--------------------------------------------------------------------------------
/docker-hadoop/conf/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 | <!-- Site specific YARN configuration properties -->
 4 | <property>
 5 | <name>yarn.resourcemanager.hostname</name>
 6 | <value>hadoop-master</value>
 7 | </property>
 8 | 
 9 | <property>
10 | <name>yarn.nodemanager.vmem-check-enabled</name>
11 | <value>false</value>
12 | </property>
13 | 
14 | <property>
15 | <name>yarn.nodemanager.hostname</name>
16 | <value>0.0.0.0</value>
17 | </property>
18 | 
19 | <property>
20 | <name>yarn.nodemanager.aux-services</name>
21 | <value>mapreduce_shuffle</value> 
22 | </property>
23 |  
24 | <property>
25 | <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
26 | <value>org.apache.hadoop.mapred.ShuffleHandler</value>
27 | </property>
28 | 
29 | <property>
30 | <name>yarn.resourcemanager.scheduler.class</name>
31 | <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
32 | </property>
33 | 
34 | </configuration>
35 | 


--------------------------------------------------------------------------------
/docker-hbase/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tonywell/docker-spark
 2 | 
 3 | MAINTAINER tonywell <tongwei1985@gmail.com>
 4 | 
 5 | ENV HBASE_HOME=/usr/local/hbase
 6 | ENV PATH=$PATH:$HBASE_HOME/bin
 7 | 
 8 | #RUN wget http://archive.apache.org/dist/hbase/1.1.2/hbase-1.1.2-bin.tar.gz && \
 9 | RUN wget ftp://temp:temp@192.168.50.104/centos7/hbase/hbase-1.1.2-bin.tar.gz && \
10 | 	tar -xzvf hbase-1.1.2-bin.tar.gz -C /usr/local/ && \
11 |         mv /usr/local/hbase-1.1.2 $HBASE_HOME && \
12 | 	rm -rf hbase-1.1.2-bin.tar.gz
13 | 
14 | # Hdfs ports
15 | EXPOSE 9000 50010 50020 50070 50075 50090
16 | # See https://issues.apache.org/jira/browse/HDFS-9427
17 | EXPOSE 9871 9870 9820 9869 9868 9867 9866 9865 9864
18 | # Mapred ports
19 | EXPOSE 19888
20 | #Yarn ports
21 | EXPOSE 8030 8031 8032 8033 8040 8042 8088 8188
22 | #Other ports
23 | EXPOSE 49707 2122
24 | 


--------------------------------------------------------------------------------
/docker-hbase/conf/backup-masters:
--------------------------------------------------------------------------------
1 | hadoop-slave1
2 | 


--------------------------------------------------------------------------------
/docker-hbase/conf/hbase-env.sh:
--------------------------------------------------------------------------------
  1 | #
  2 | #/**
  3 | # * Licensed to the Apache Software Foundation (ASF) under one
  4 | # * or more contributor license agreements.  See the NOTICE file
  5 | # * distributed with this work for additional information
  6 | # * regarding copyright ownership.  The ASF licenses this file
  7 | # * to you under the Apache License, Version 2.0 (the
  8 | # * "License"); you may not use this file except in compliance
  9 | # * with the License.  You may obtain a copy of the License at
 10 | # *
 11 | # *     http://www.apache.org/licenses/LICENSE-2.0
 12 | # *
 13 | # * Unless required by applicable law or agreed to in writing, software
 14 | # * distributed under the License is distributed on an "AS IS" BASIS,
 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # * See the License for the specific language governing permissions and
 17 | # * limitations under the License.
 18 | # */
 19 | 
 20 | # Set environment variables here.
 21 | 
 22 | # This script sets variables multiple times over the course of starting an hbase process,
 23 | # so try to keep things idempotent unless you want to take an even deeper look
 24 | # into the startup scripts (bin/hbase, etc.)
 25 | 
 26 | # The java implementation to use.  Java 1.7+ required.
 27 | # export JAVA_HOME=/usr/java/jdk1.6.0/
 28 | export JAVA_HOME=/usr/local/jdk1.7
 29 | 
 30 | # Extra Java CLASSPATH elements.  Optional.
 31 | # export HBASE_CLASSPATH=
 32 | 
 33 | # The maximum amount of heap to use. Default is left to JVM default.
 34 | # export HBASE_HEAPSIZE=1G
 35 | 
 36 | # Uncomment below if you intend to use off heap cache. For example, to allocate 8G of 
 37 | # offheap, set the value to "8G".
 38 | # export HBASE_OFFHEAPSIZE=1G
 39 | 
 40 | # Extra Java runtime options.
 41 | # Below are what we set by default.  May only work with SUN JVM.
 42 | # For more on why as well as other possible settings,
 43 | # see http://wiki.apache.org/hadoop/PerformanceTuning
 44 | export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
 45 | 
 46 | # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
 47 | 
 48 | # This enables basic gc logging to the .out file.
 49 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 50 | 
 51 | # This enables basic gc logging to its own file.
 52 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 53 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 54 | 
 55 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 56 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 57 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 58 | 
 59 | # Uncomment one of the below three options to enable java garbage collection logging for the client processes.
 60 | 
 61 | # This enables basic gc logging to the .out file.
 62 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 63 | 
 64 | # This enables basic gc logging to its own file.
 65 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 66 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 67 | 
 68 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 69 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 70 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 71 | 
 72 | # See the package documentation for org.apache.hadoop.hbase.io.hfile for other configurations
 73 | # needed setting up off-heap block caching. 
 74 | 
 75 | # Uncomment and adjust to enable JMX exporting
 76 | # See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
 77 | # More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
 78 | # NOTE: HBase provides an alternative JMX implementation to fix the random ports issue, please see JMX
 79 | # section in HBase Reference Guide for instructions.
 80 | 
 81 | # export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
 82 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
 83 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
 84 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
 85 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
 86 | # export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"
 87 | 
 88 | # File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
 89 | # export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
 90 | 
 91 | # Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
 92 | #HBASE_REGIONSERVER_MLOCK=true
 93 | #HBASE_REGIONSERVER_UID="hbase"
 94 | 
 95 | # File naming hosts on which backup HMaster will run.  $HBASE_HOME/conf/backup-masters by default.
 96 | # export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
 97 | 
 98 | # Extra ssh options.  Empty by default.
 99 | # export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
100 | 
101 | # Where log files are stored.  $HBASE_HOME/logs by default.
102 | # export HBASE_LOG_DIR=${HBASE_HOME}/logs
103 | 
104 | # Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 
105 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
106 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
107 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
108 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
109 | 
110 | # A string representing this instance of hbase. $USER by default.
111 | # export HBASE_IDENT_STRING=$USER
112 | 
113 | # The scheduling priority for daemon processes.  See 'man nice'.
114 | # export HBASE_NICENESS=10
115 | 
116 | # The directory where pid files are stored. /tmp by default.
117 | # export HBASE_PID_DIR=/var/hadoop/pids
118 | 
119 | # Seconds to sleep between slave commands.  Unset by default.  This
120 | # can be useful in large clusters, where, e.g., slave rsyncs can
121 | # otherwise arrive faster than the master can service them.
122 | # export HBASE_SLAVE_SLEEP=0.1
123 | 
124 | # Tell HBase whether it should manage it's own instance of Zookeeper or not.
125 | export HBASE_MANAGES_ZK=false
126 | 
127 | # The default log rolling policy is RFA, where the log file is rolled as per the size defined for the 
128 | # RFA appender. Please refer to the log4j.properties file to see more details on this appender.
129 | # In case one needs to do log rolling on a date change, one should set the environment property
130 | # HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
131 | # For example:
132 | # HBASE_ROOT_LOGGER=INFO,DRFA
133 | # The reason for changing default to RFA is to avoid the boundary case of filling out disk space as 
134 | # DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.
135 | 


--------------------------------------------------------------------------------
/docker-hbase/conf/hbase-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  *
 6 |  * Licensed to the Apache Software Foundation (ASF) under one
 7 |  * or more contributor license agreements.  See the NOTICE file
 8 |  * distributed with this work for additional information
 9 |  * regarding copyright ownership.  The ASF licenses this file
10 |  * to you under the Apache License, Version 2.0 (the
11 |  * "License"); you may not use this file except in compliance
12 |  * with the License.  You may obtain a copy of the License at
13 |  *
14 |  *     http://www.apache.org/licenses/LICENSE-2.0
15 |  *
16 |  * Unless required by applicable law or agreed to in writing, software
17 |  * distributed under the License is distributed on an "AS IS" BASIS,
18 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 |  * See the License for the specific language governing permissions and
20 |  * limitations under the License.
21 |  */
22 | -->
23 | <configuration>
24 | 	<property>
25 | 		<name>hbase.rootdir</name>
26 | 		<value>hdfs://hadoop-master:54310/hbase</value>
27 | 	</property>
28 | 	<property>
29 | 		<name>hbase.zookeeper.quorum</name>
30 | 		<value>zk1,zk2,z3</value>
31 | 	</property>
32 | 	<property>
33 | 		<name>hbase.cluster.distributed</name>
34 | 		<value>true</value>
35 | 	</property>
36 | 	<property>
37 | 		<name>hbase.tmp.dir</name>
38 | 		<value>/hworks/hbase/tmp</value>
39 | 	</property>
40 | </configuration>
41 | 


--------------------------------------------------------------------------------
/docker-hbase/conf/regionservers:
--------------------------------------------------------------------------------
1 | hadoop-master
2 | hadoop-slave1
3 | hadoop-slave2
4 | hadoop-slave3
5 | 


--------------------------------------------------------------------------------
/docker-hive/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tonywell/docker-hadoop
 2 | 
 3 | MAINTAINER tonywell <tongwei1985@gmail.com>
 4 | 
 5 | ENV HIVE_HOME=/usr/local/hive
 6 | 
 7 | RUN wget http://mirror.bit.edu.cn/apache/hive/hive-1.2.1/apache-hive-1.2.1-bin.tar.gz && \
 8 |      tar -zvxf apache-hive-1.2.1-bin.tar.gz -C /usr/local/ && \
 9 |      mv /usr/local/apache-hive-1.2.1-bin /usr/local/hive && \
10 |      rm apache-hive-1.2.1-bin.tar.gz
11 | 
12 | RUN wget http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.39.tar.gz && \
13 |     tar -zvxf mysql-connector-java-5.1.39.tar.gz -C /usr/local/ && \
14 |     mv /usr/local/mysql-connector-java-5.1.39/mysql-connector-java-5.1.39-bin.jar $HIVE_HOME/lib/ && \
15 |     rm -rf /usr/local/mysql-connector-java-5.1.39 
16 | 
17 | RUN mkdir -p /usr/hive/warehouse && mkdir -p /usr/hive/log
18 | 
19 | ENV PATH=$PATH:$HIVE_HOME/bin:.
20 | 
21 |  
22 |      
23 | 


--------------------------------------------------------------------------------
/docker-hive/conf/hive-env.sh:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Set Hive and Hadoop environment variables here. These variables can be used
18 | # to control the execution of Hive. It should be used by admins to configure
19 | # the Hive installation (so that users do not have to set environment variables
20 | # or set command line parameters to get correct behavior).
21 | #
22 | # The hive service being invoked (CLI/HWI etc.) is available via the environment
23 | # variable SERVICE
24 | 
25 | 
26 | # Hive Client memory usage can be an issue if a large number of clients
27 | # are running at the same time. The flags below have been useful in
28 | # reducing memory usage:
29 | #
30 | # if [ "$SERVICE" = "cli" ]; then
31 | #   if [ -z "$DEBUG" ]; then
32 | #     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
33 | #   else
34 | #     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
35 | #   fi
36 | # fi
37 | 
38 | # The heap size of the jvm stared by hive shell script can be controlled via:
39 | #
40 | # export HADOOP_HEAPSIZE=1024
41 | #
42 | # Larger heap size may be required when running queries over large number of files or partitions.
43 | # By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be
44 | # appropriate for hive server (hwi etc).
45 | 
46 | # Set HADOOP_HOME to point to a specific hadoop install directory
47 | HADOOP_HOME=/usr/local/hadoop
48 | 
49 | # Hive Configuration Directory can be controlled by:
50 | export HIVE_CONF_DIR=/usr/local/hive/conf
51 | 
52 | # Folder containing extra ibraries required for hive compilation/execution can be controlled by:
53 | export HIVE_AUX_JARS_PATH=/usr/local/hive/lib
54 | 


--------------------------------------------------------------------------------
/docker-hive/conf/hive-exec-log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hive.log.threshold=ALL
19 | hive.root.logger=INFO,FA
20 | hive.log.dir=${java.io.tmpdir}/${user.name}
21 | hive.query.id=hadoop
22 | hive.log.file=${hive.query.id}.log
23 | 
24 | # Define the root logger to the system property "hadoop.root.logger".
25 | log4j.rootLogger=${hive.root.logger}, EventCounter
26 | 
27 | # Logging Threshold
28 | log4j.threshhold=${hive.log.threshold}
29 | 
30 | #
31 | # File Appender
32 | #
33 | 
34 | log4j.appender.FA=org.apache.log4j.FileAppender
35 | log4j.appender.FA.File=${hive.log.dir}/${hive.log.file}
36 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout
37 | 
38 | # Pattern format: Date LogLevel LoggerName LogMessage
39 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
40 | # Debugging Pattern format
41 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
42 | 
43 | 
44 | #
45 | # console
46 | # Add "console" to rootlogger above if you want to use this
47 | #
48 | 
49 | log4j.appender.console=org.apache.log4j.ConsoleAppender
50 | log4j.appender.console.target=System.err
51 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
52 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t] %p %c{2}: %m%n
53 | 
54 | #custom logging levels
55 | #log4j.logger.xxx=DEBUG
56 | 
57 | #
58 | # Event Counter Appender
59 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
60 | #
61 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
62 | 
63 | 
64 | log4j.category.DataNucleus=ERROR,FA
65 | log4j.category.Datastore=ERROR,FA
66 | log4j.category.Datastore.Schema=ERROR,FA
67 | log4j.category.JPOX.Datastore=ERROR,FA
68 | log4j.category.JPOX.Plugin=ERROR,FA
69 | log4j.category.JPOX.MetaData=ERROR,FA
70 | log4j.category.JPOX.Query=ERROR,FA
71 | log4j.category.JPOX.General=ERROR,FA
72 | log4j.category.JPOX.Enhancer=ERROR,FA
73 | 
74 | 
75 | # Silence useless ZK logs
76 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,FA
77 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,FA
78 | 


--------------------------------------------------------------------------------
/docker-hive/conf/hive-log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hive.log.threshold=ALL
19 | hive.root.logger=INFO,DRFA
20 | hive.log.dir=${java.io.tmpdir}/${user.name}
21 | hive.log.file=hive.log
22 | 
23 | # Define the root logger to the system property "hadoop.root.logger".
24 | log4j.rootLogger=${hive.root.logger}, EventCounter
25 | 
26 | # Logging Threshold
27 | log4j.threshold=${hive.log.threshold}
28 | 
29 | #
30 | # Daily Rolling File Appender
31 | #
32 | # Use the PidDailyerRollingFileAppend class instead if you want to use separate log files
33 | # for different CLI session.
34 | #
35 | # log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender
36 | 
37 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
38 | 
39 | log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}
40 | 
41 | # Rollver at midnight
42 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
43 | 
44 | # 30-day backup
45 | #log4j.appender.DRFA.MaxBackupIndex=30
46 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
47 | 
48 | # Pattern format: Date LogLevel LoggerName LogMessage
49 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
50 | # Debugging Pattern format
51 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
52 | 
53 | 
54 | #
55 | # console
56 | # Add "console" to rootlogger above if you want to use this
57 | #
58 | 
59 | log4j.appender.console=org.apache.log4j.ConsoleAppender
60 | log4j.appender.console.target=System.err
61 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
62 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
63 | log4j.appender.console.encoding=UTF-8
64 | 
65 | #custom logging levels
66 | #log4j.logger.xxx=DEBUG
67 | 
68 | #
69 | # Event Counter Appender
70 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
71 | #
72 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
73 | 
74 | 
75 | log4j.category.DataNucleus=ERROR,DRFA
76 | log4j.category.Datastore=ERROR,DRFA
77 | log4j.category.Datastore.Schema=ERROR,DRFA
78 | log4j.category.JPOX.Datastore=ERROR,DRFA
79 | log4j.category.JPOX.Plugin=ERROR,DRFA
80 | log4j.category.JPOX.MetaData=ERROR,DRFA
81 | log4j.category.JPOX.Query=ERROR,DRFA
82 | log4j.category.JPOX.General=ERROR,DRFA
83 | log4j.category.JPOX.Enhancer=ERROR,DRFA
84 | 
85 | 
86 | # Silence useless ZK logs
87 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,DRFA
88 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA
89 | 


--------------------------------------------------------------------------------
/docker-mysql/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM centos
 2 | 
 3 | MAINTAINER tonywell <tongwei1985@gmail.com>
 4 | 
 5 | # root password
 6 | RUN echo 'root:!23$QweR' | chpasswd
 7 | 
 8 | # 为解决Header V3 RSA/SHA256 Signature, key ID f4a80eb5: NOKEY问题
 9 | RUN rpm --import /etc/pki/rpm-gpg/RPM*
10 | 
11 | RUN \
12 |     yum -y install \
13 |         openssh openssh-server openssh-clients gcc gcc-c++ make autoconf bison ncurses-devel which \
14 |         sudo passwd wget &&\
15 |         yum clean all
16 | 
17 | # 设置sshd
18 | RUN sshd-keygen
19 | RUN sed -i "s/#UsePrivilegeSeparation.*/UsePrivilegeSeparation no/g" /etc/ssh/sshd_config
20 | RUN sed -i "s/UsePAM.*/UsePAM no/g" /etc/ssh/sshd_config
21 | 
22 | RUN mkdir /var/run/sshd
23 | 
24 | RUN mkdir /opt/software
25 | 
26 | #解决Could not find (the correct version of) boost 问题
27 | #RUN wget http://120.52.72.22/nchc.dl.sourceforge.net/c3pr90ntc0td/project/boost/boost/1.59.0/boost_1_59_0.tar.gz && \
28 | #    tar -zvxf boost_1_59_0.tar.gz && cd boost_1_59_0 && \
29 | #    ./bootstrap.sh && \
30 | #    ./b2 stage threading=multi link=shared && \
31 | #    ./b2 install threading=multi link=shared && \
32 | #    cd ../ && rm -rf boost_1_59_0*
33 | 
34 | RUN wget http://www.cmake.org/files/v3.0/cmake-3.0.1.tar.gz && \
35 |     tar zxvf cmake-3.0.1.tar.gz && \
36 |     cd cmake-3.0.1 && ./configure --prefix=/usr/local/cmake && gmake && \
37 |     make && make install && \
38 |     cd ../ && rm -rf cmake-3.0.1*
39 | 
40 | 
41 | 
42 | #源码安装mysql
43 | RUN wget ftp://temp:temp@192.168.50.104/centos7/mysql-5.6.29.tar.gz && \
44 |      tar -zxvf mysql-5.6.29.tar.gz -C /opt/software/ && \
45 |      rm mysql-5.6.29.tar.gz && \
46 |      cd /opt/software/mysql-5.6.29 && \
47 |      /usr/local/cmake/bin/cmake . -DCMAKE_INSTALL_PREFIX=/usr/local/mysql-5.6.29 -DMYSQL_DATADIR=/usr/local/mysql-5.6.29/data -DSYSCONFDIR=/etc -DWITH_INNOBASE_STORAGE_ENGINE=1 -DWITH_ARCHIVE_STORAGE_ENGINE=1 -DWITH_BLACKHOLE_STORAGE_ENGINE=1 -DWITH_PARTITION_STORAGE_ENGINE=1 -DWITH_PERFSCHEMA_STORAGE_ENGINE=1 -DWITHOUT_EXAMPLE_STORAGE_ENGINE=1 -DWITHOUT_FEDERATED_STORAGE_ENGINE=1 -DDEFAULT_CHARSET=utf8 -DDEFAULT_COLLATION=utf8_general_ci -DWITH_EXTRA_CHARSETS=all -DENABLED_LOCAL_INFILE=1 -DWITH_READLINE=1 -DMYSQL_UNIX_ADDR=/usr/local/mysql-5.6.29/mysql.sock -DMYSQL_TCP_PORT=3306 -DMYSQL_USER=mysql -DCOMPILATION_COMMENT="lq-edition" -DENABLE_DTRACE=0 -DOPTIMIZER_TRACE=1 -DWITH_DEBUG=1 && \
48 |      make && make install
49 | 
50 | # 添加测试用户mysql，密码mysql，并且将此用户添加到sudoers里
51 | RUN useradd mysql
52 | RUN echo "mysql:mysql" | chpasswd
53 | RUN echo "mysql   ALL=(ALL)       ALL" >> /etc/sudoers
54 | 
55 | RUN cd /usr/local/mysql-5.6.29 && chown -R mysql:mysql ./
56 | 
57 | COPY my.cnf /etc/my.cnf
58 | RUN chown mysql:mysql /etc/my.cnf
59 | 
60 | RUN cd /usr/local/mysql-5.6.29 && ./scripts/mysql_install_db --user=mysql --basedir=/usr/local/mysql-5.6.29 --datadir=/usr/local/mysql-5.6.29/data/
61 | 
62 | ENV MYSQL_HOME /usr/local/mysql-5.6.29
63 | 
64 | # 容器需要开放MySQL 3306端口
65 | EXPOSE 3306
66 | 
67 | CMD ["/usr/sbin/sshd", "-D"]
68 | 
69 | 


--------------------------------------------------------------------------------
/docker-mysql/my.cnf:
--------------------------------------------------------------------------------
 1 | [client]
 2 | port = 3306
 3 | socket = /usr/local/mysql-5.6.29/data/mysql.sock
 4 | 
 5 | [mysqld]
 6 | 
 7 | # Remove leading # and set to the amount of RAM for the most important data
 8 | # cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
 9 | # innodb_buffer_pool_size = 128M
10 | 
11 | # Remove leading # to turn on a very important data integrity option: logging
12 | # changes to the binary log between backups.
13 | # log_bin
14 | 
15 | # These are commonly set, remove the # and set as required.
16 | basedir = /usr/local/mysql-5.6.29
17 | datadir = /usr/local/mysql-5.6.29/data
18 | port = 3306
19 | socket = /usr/local/mysql-5.6.29/data/mysql.sock
20 | 
21 | # Remove leading # to set options mainly useful for reporting servers.
22 | # The server defaults are faster for transactions and fast SELECTs.
23 | # Adjust sizes as needed, experiment to find the optimal values.
24 | # join_buffer_size = 128M
25 | # sort_buffer_size = 2M
26 | # read_rnd_buffer_size = 2M 
27 | 
28 | sql_mode=NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES 
29 | 


--------------------------------------------------------------------------------
/docker-spark/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tonywell/docker-hive
 2 | 
 3 | MAINTAINER tonywell <tongwei1985@gmail.com>
 4 | 
 5 | ENV SPARK_HOME=/usr/local/spark
 6 | ENV PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin:.
 7 | 
 8 | ENV JAVA_HOME /usr/local/jdk1.7
 9 | ENV PATH $JAVA_HOME/bin:$PATH
10 | 
11 | RUN wget http://apache.fayea.com/spark/spark-1.6.2/spark-1.6.2-bin-without-hadoop.tgz && \
12 |     tar -xzvf spark-1.6.2-bin-without-hadoop.tgz -C /usr/local/ && \
13 |     mv /usr/local/spark-1.6.2-bin-without-hadoop /usr/local/spark && \
14 |     rm -rf spark-1.6.2-bin-without-hadoop.tgz 
15 | 


--------------------------------------------------------------------------------
/docker-spark/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Set everything to be logged to the console
 2 | log4j.rootCategory=WARN, console
 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
 4 | log4j.appender.console.target=System.err
 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 7 | 
 8 | # Settings to quiet third party logs that are too verbose
 9 | log4j.logger.org.eclipse.jetty=WARN
10 | log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
13 | 


--------------------------------------------------------------------------------
/docker-spark/conf/slaves:
--------------------------------------------------------------------------------
1 | # A Spark Worker will be started on each of the machines listed below.
2 | hadoop-slave1
3 | hadoop-slave2
4 | hadoop-slave3
5 | 


--------------------------------------------------------------------------------
/docker-spark/conf/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | # Default system properties included when running spark-submit.
 2 | # This is useful for setting default environmental settings.
 3 | 
 4 | # Example:
 5 | # spark.master                     spark://master:7077
 6 | # spark.eventLog.enabled           true
 7 | # spark.eventLog.dir               hdfs://namenode:8021/directory
 8 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
 9 | # spark.driver.memory              5g
10 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
11 | 
12 | 
13 | spark.executor.extraClassPath	/usr/local/hive/lib/mysql-connector-java-5.1.39-bin.jar:/usr/local/hive/lib/guava-14.0.1.jar
14 | spark.driver.extraClassPath	/usr/local/hive/lib/mysql-connector-java-5.1.39-bin.jar:/usr/local/hive/lib/guava-14.0.1.jar
15 | 


--------------------------------------------------------------------------------
/docker-spark/conf/spark-env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This file is sourced when running various Spark programs.
 4 | # Copy it as spark-env.sh and edit that to configure Spark for your site.
 5 | 
 6 | # Options read when launching programs locally with 
 7 | # ./bin/run-example or ./bin/spark-submit
 8 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
 9 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
10 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
11 | # - SPARK_CLASSPATH, default classpath entries to append
12 | 
13 | # Options read by executors and drivers running inside the cluster
14 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
15 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
16 | # - SPARK_CLASSPATH, default classpath entries to append
17 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
18 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
19 | export JAVA_HOME=/usr/local/jdk1.7
20 | export HADOOP_HOME=/usr/local/hadoop
21 | export SPARK_DIST_CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath)
22 | export SPARK_LOCAL_DIRS=/works/spark
23 | export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
24 | 
25 | # Options read in YARN client mode
26 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
27 | # - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
28 | # - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
29 | # - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
30 | # - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
31 | # - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
32 | # - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
33 | # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
34 | # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
35 | 
36 | # Options for the daemons used in the standalone deploy mode
37 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
38 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
39 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
40 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
41 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
42 | export HIVE_HOME=/usr/local/hive
43 | export SPARK_WORKER_MEMORY=2g
44 | export SPARK_MASTER_IP=hadoop-master
45 | export HIVE_CONF_DIR=$HIVE_HOME/conf
46 | 
47 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
48 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
49 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
50 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
51 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
52 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
53 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
54 | 
55 | # Generic options for the daemons used in the standalone deploy mode
56 | # - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
57 | # - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
58 | # - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
59 | # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
60 | # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
61 | 


--------------------------------------------------------------------------------
/docker-zk/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tonywell/centos-java
 2 | 
 3 | RUN wget http://apache.fayea.com/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz && \
 4 |       tar -zvxf zookeeper-3.4.6.tar.gz -C /opt/ && \
 5 |       mv /opt/zookeeper-3.4.6 /opt/zookeeper && \
 6 |       rm zookeeper-3.4.6.tar.gz
 7 | 
 8 | RUN mkdir -p /opt/data
 9 | RUN mkdir -p /opt/log
10 | 
11 | ENV ZOO_HOME /opt/zookeeper
12 | ENV PATH $PATH:$ZOO_HOME/bin
13 | 
14 | ENV TZ "Asia/Shanghai"
15 | EXPOSE 2181 2888 3888
16 | CMD ["zkServer.sh", "start-foreground"]
17 | 


--------------------------------------------------------------------------------
/docker-zk/conf/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 3 | <xsl:output method="html"/>
 4 | <xsl:template match="configuration">
 5 | <html>
 6 | <body>
 7 | <table border="1">
 8 | <tr>
 9 |  <td>name</td>
10 |  <td>value</td>
11 |  <td>description</td>
12 | </tr>
13 | <xsl:for-each select="property">
14 | <tr>
15 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
16 |   <td><xsl:value-of select="value"/></td>
17 |   <td><xsl:value-of select="description"/></td>
18 | </tr>
19 | </xsl:for-each>
20 | </table>
21 | </body>
22 | </html>
23 | </xsl:template>
24 | </xsl:stylesheet>
25 | 


--------------------------------------------------------------------------------
/docker-zk/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | ome default values that can be overridden by system properties
 2 | zookeeper.root.logger=INFO, CONSOLE
 3 | zookeeper.console.threshold=INFO
 4 | zookeeper.log.dir=/opt/log
 5 | zookeeper.log.file=zookeeper.log
 6 | zookeeper.log.threshold=DEBUG
 7 | zookeeper.tracelog.dir=/opt/log
 8 | zookeeper.tracelog.file=zookeeper_trace.log
 9 | #
10 | # ZooKeeper Logging Configuration
11 | #
12 | # Format is "<default threshold> (, <appender>)+
13 | # DEFAULT: console appender only
14 | log4j.rootLogger=${zookeeper.root.logger}
15 | # Example with rolling log file
16 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE
17 | # Example with rolling log file and tracing
18 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE
19 | #
20 | # Log INFO level and above messages to the console
21 | #
22 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
23 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
24 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
25 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
26 | #
27 | # Add ROLLINGFILE to rootLogger to get log file output
28 | #    Log DEBUG level and above messages to a log file
29 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
30 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
31 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
32 | # Max log file size of 10MB
33 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB
34 | # uncomment the next line to limit number of backup files
35 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10
36 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
37 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
38 | #
39 | # Add TRACEFILE to rootLogger to get log file output
40 | #    Log DEBUG level and above messages to a log file
41 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender
42 | log4j.appender.TRACEFILE.Threshold=TRACE
43 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}
44 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout
45 | ### Notice we are including log4j's NDC here (%x)
46 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n
47 | 


--------------------------------------------------------------------------------
/docker-zk/conf/zoo.cfg:
--------------------------------------------------------------------------------
 1 | clientPort=2181
 2 | tickTime=2000
 3 | syncLimit=5
 4 | initLimit=10
 5 | dataDir=/opt/data
 6 | dataLogDir=/opt/log
 7 | server.1=zk1:2888:3888
 8 | server.2=zk2:2888:3888
 9 | server.3=zk3:2888:3888
10 | server.4=zk4:2888:3888
11 | server.5=zk5:2888:3888
12 | 
13 | 


--------------------------------------------------------------------------------
/docker-zk/zoo1/myid:
--------------------------------------------------------------------------------
1 | 1
2 | 


--------------------------------------------------------------------------------
/docker-zk/zoo2/myid:
--------------------------------------------------------------------------------
1 | 2
2 | 


--------------------------------------------------------------------------------
/docker-zk/zoo3/myid:
--------------------------------------------------------------------------------
1 | 3
2 | 


--------------------------------------------------------------------------------
/docker-zk/zoo4/myid:
--------------------------------------------------------------------------------
1 | 4
2 | 


--------------------------------------------------------------------------------
/docker-zk/zoo5/myid:
--------------------------------------------------------------------------------
1 | 5
2 | 


--------------------------------------------------------------------------------