├── image
├── hbase.png
├── hive.png
├── cluster.png
├── namenode.png
└── hbase-region.png
├── datanode
├── Dockerfile
└── start.sh
├── cluster
├── Dockerfile
├── hive-site.xml
└── cluster.sh
├── deploy-datanode.yml
├── README.md
└── deploy-namenode.yml
/image/hbase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cclient/kubernetes-hadoop/HEAD/image/hbase.png
--------------------------------------------------------------------------------
/image/hive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cclient/kubernetes-hadoop/HEAD/image/hive.png
--------------------------------------------------------------------------------
/image/cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cclient/kubernetes-hadoop/HEAD/image/cluster.png
--------------------------------------------------------------------------------
/image/namenode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cclient/kubernetes-hadoop/HEAD/image/namenode.png
--------------------------------------------------------------------------------
/image/hbase-region.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cclient/kubernetes-hadoop/HEAD/image/hbase-region.png
--------------------------------------------------------------------------------
/datanode/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM izone/hadoop:datanode
2 | ADD start.sh /etc/start.sh
3 | RUN apt-get install -y iputils-ping vim
4 |
--------------------------------------------------------------------------------
/datanode/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | /etc/init.d/ssh start
4 |
5 | if [[ $1 == "bash" ]]; then
6 | echo " "
7 | echo -e "\e[01;32m*\e[00m `date` \e[01;32mShell Bash\e[00m"
8 | /bin/bash
9 | fi
10 |
11 | sleep 31557600
12 |
--------------------------------------------------------------------------------
/cluster/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM izone/hadoop:cluster
2 | ADD cluster.sh /etc/cluster.sh
3 | RUN apt-get install -y iputils-ping vim
4 | RUN curl https://cdn.mysql.com/archives/mysql-connector-java-5.1/mysql-connector-java-5.1.49.tar.gz -o /tmp/mysql.tgz && tar -zxf /tmp/mysql.tgz -C /tmp/ && mv /tmp/mysql-connector-java-5.1.49/mysql-connector-java-5.1.49-bin.jar /opt/hive/lib/ && rm -rf /tmp/mysql*
5 | ADD hive-site.xml /opt/hive/conf/
--------------------------------------------------------------------------------
/deploy-datanode.yml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: StatefulSet
3 | metadata:
4 | name: hadoop-node
5 | spec:
6 | serviceName: "hadoop-node"
7 | replicas: 1
8 | selector:
9 | matchLabels:
10 | app: hadoop-node
11 | template:
12 | metadata:
13 | labels:
14 | app: hadoop-node
15 | spec:
16 | affinity:
17 | podAntiAffinity:
18 | requiredDuringSchedulingIgnoredDuringExecution:
19 | - labelSelector:
20 | matchExpressions:
21 | - key: "app"
22 | operator: In
23 | values:
24 | - hadoop-node
25 | topologyKey: "kubernetes.io/hostname"
26 | containers:
27 | - name: hadoop-node
28 | image: cclient/hadoop:2.8.3-datanode
29 | # imagePullPolicy: Always
30 | imagePullPolicy: IfNotPresent
31 | ports:
32 | - containerPort: 22
33 | name: ssh
34 | ---
35 | apiVersion: v1
36 | kind: Service
37 | metadata:
38 | name: hadoop-node
39 | labels:
40 | app: hadoop-node
41 | spec:
42 | ports:
43 | - name: ssh
44 | port: 22
45 | targetPort: 22
46 | - name: "8040"
47 | port: 8040
48 | targetPort: 8040
49 | - name: "8042"
50 | port: 8042
51 | targetPort: 8042
52 | - name: "50010 "
53 | port: 50010
54 | targetPort: 50010
55 | - name: "50020"
56 | port: 50020
57 | targetPort: 50020
58 | - name: "50075"
59 | port: 50075
60 | targetPort: 50075
61 | selector:
62 | app: hadoop-node
--------------------------------------------------------------------------------
/cluster/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | javax.jdo.option.ConnectionURL
6 | jdbc:mysql://hadoop-hive-mysql/hive?createDatabaseIfNotExist=true
7 | JDBC connect string for a JDBC metastore
8 |
9 |
10 | javax.jdo.option.ConnectionDriverName
11 | com.mysql.jdbc.Driver
12 | Driver class name for a JDBC metastore
13 |
14 |
15 | javax.jdo.option.ConnectionUserName
16 | root
17 | username to use against metastore database
18 |
19 |
20 | javax.jdo.option.ConnectionPassword
21 | hadoop
22 | password to use against metastore database
23 |
24 |
25 | hive.server2.enable.doAs
26 | FALSE
27 | Setting this property to true will have HiveServer2 execute Hive operations as the user making the calls to it.
28 |
29 |
30 |
31 | hive.server2.support.dynamic.service.discovery
32 | true
33 | Whether HiveServer2 supports dynamic service discovery for its clients. To support this, each instance of HiveServer2 currently uses ZooKeeper to register itself, when it is brought up. JDBC/ODBC clients should use the ZooKeeper ensemble: hive.zookeeper.quorum in their connection string.
34 |
35 |
--------------------------------------------------------------------------------
/cluster/cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | /etc/init.d/ssh restart
4 |
5 | nodesSSH(){
6 | rm -f /etc/machines
7 | for NODE in ${NODES}
8 | do
9 | echo "Configuring SSH $NODE"
10 | for f in $HOME/.ssh/id_rsa.pub $HOME/.ssh/id_dsa.pub
11 | do
12 | sshpass -p $RPASS ssh-copy-id -i $f $NODE # Copy key to the nodes
13 | done &>/dev/null
14 | echo $NODE >>/etc/machines # Create "machines" file with the names of nodes
15 | done
16 | }; nodesSSH
17 |
18 | confFiles(){
19 | rm -f $HADOOP_HOME/etc/hadoop/slaves
20 | echo $HOSTNAME >$HADOOP_HOME/etc/hadoop/slaves
21 | cat /etc/machines >>$HADOOP_HOME/etc/hadoop/slaves
22 | cat /etc/machines >>$SPARK_HOME/conf/slaves
23 | sed -i "s/NAMENODE/$HOSTNAME/" $HADOOP_HOME/etc/hadoop/core-site.xml
24 | sed -i "s/NAMENODE/$HOSTNAME/" $HADOOP_HOME/etc/hadoop/yarn-site.xml
25 | cat $HADOOP_HOME/etc/hadoop/slaves >$HBASE_HOME/conf/regionservers
26 | sed -i "s/NAMENODE/$HOSTNAME/" $HBASE_HOME/conf/hbase-site.xml
27 | sed -i "s/NAMENODE/$HOSTNAME/" $HBASE_HOME/conf/hbase-site_slave.xml
28 | # sed -i "s/QUORUM/$(echo `cat /opt/hadoop/etc/hadoop/slaves` | sed 's/ /,/g')/" $HBASE_HOME/conf/hbase-site.xml
29 | # sed -i "s/QUORUM/$HOSTNAME/" $HBASE_HOME/conf/hbase-site.xml
30 | }; confFiles
31 |
32 | hostsNodes(){
33 | HADOOP=$HADOOP_HOME/etc/hadoop
34 | SPARK=$SPARK_HOME/conf
35 | HBASE=$HBASE_HOME/conf
36 |
37 | for NODE in ${NODES}
38 | do
39 | # sed -i "/${NODE}/d" /etc/hosts
40 | echo "Get Node $NODE IP"
41 | nodeHostInfo=$(ssh $NODE "cat /etc/hosts |grep '${NODE}'")
42 | echo "${nodeHostInfo}" >> /etc/hosts
43 | done
44 |
45 | TEMP=$(cat /etc/hosts | grep hadoop)
46 | for NODE in ${NODES}
47 | do
48 | echo "Configuring files $NODE"
49 | ssh $NODE "echo '${TEMP}' >> /etc/hosts"
50 | for f in ${HADOOP}/hadoop-env.sh \
51 | ${HADOOP}/hdfs-site.xml \
52 | ${HADOOP}/core-site.xml \
53 | ${HADOOP}/mapred-site.xml \
54 | ${HADOOP}/yarn-site.xml \
55 | ${SPARK}/spark-env.sh \
56 | ${HBASE}/hbase-site_slave.xml \
57 | ${HBASE}/hbase-env.sh
58 | do
59 | scp $f $NODE:$f
60 | ssh $NODE mv ${HBASE}/hbase-site_slave.xml ${HBASE}/hbase-site.xml
61 | done &>/dev/null
62 | done
63 | }; hostsNodes
64 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # k8s hadoop simple cluster
2 |
3 | use image [izone/hadoop](https://hub.docker.com/r/izone/hadoop/)
4 |
5 | cluster/cluster.sh [origin](https://github.com/luvres/hadoop/blob/master/cluster/cluster.sh)
6 |
7 | datanode/start.sh [origin](https://github.com/luvres/hadoop/blob/master/cluster/datanode/start.sh)
8 |
9 | ## images
10 |
11 | ### datanode
12 |
13 | cd ../datanode
14 |
15 | docker build -t cclient/hadoop:2.8.3-datanode ./
16 |
17 | docker build --build-arg http_proxy= --build-arg https_proxy= -t cclient/hadoop:2.8.3-datanode ./
18 |
19 | ### namenode
20 |
21 | cd ../cluster
22 |
23 | docker build -t cclient/hadoop:2.8.3-namenode ./
24 |
25 | docker build --build-arg http_proxy= --build-arg https_proxy= -t cclient/hadoop:2.8.3-namenode ./
26 |
27 | ## deploy
28 |
29 | datanode must have started before deploy namenode(namenode ssh datanode to config)
30 |
31 | ### datanode
32 |
33 | kubectl apply -f deploy-datanode.yml
34 |
35 | ### namenode
36 |
37 | kubectl apply -f deploy-namenode.yml
38 |
39 | ### start hive
40 |
41 | ```
42 | kubectl exec -it hadoop-master-0 bash
43 | # init
44 | cd /opt/hive/bin
45 | /opt/hive/bin/hive --service schemaTool -initSchema -dbType mysql
46 | # start
47 | nohup /opt/hive/bin/hive --service hiveserver2 &
48 | # connect
49 |
50 | /opt/hive/bin/beeline -u jdbc:hive2://127.0.0.1:10000 -nroot -phadoop
51 |
52 | 0: jdbc:hive2://127.0.0.1:10000> show databases;
53 | +----------------+
54 | | database_name |
55 | +----------------+
56 | | default |
57 | +----------------+
58 | 1 row selected (1.929 seconds)
59 |
60 | ```
61 |
62 | ## show
63 |
64 | ```bash
65 | $ kubectl get pods
66 | NAME READY STATUS RESTARTS AGE
67 | hadoop-master-0 1/1 Running 0 10h
68 | hadoop-node-0 1/1 Running 0 10h
69 |
70 | $ kubectl get svc
71 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
72 | hadoop-master NodePort 10.96.64.27 22:32620/TCP,8088:32688/TCP,8042:31507/TCP,50030:32630/TCP,50070:32670/TCP,8888:32488/TCP,4040:31798/TCP,8787:30829/TCP,9000:30760/TCP,60010:32510/TCP,60030:32530/TCP,10000:32000/TCP,10002:32002/TCP 11m
73 | hadoop-node ClusterIP 10.100.139.149 22/TCP 11m
74 | ```
75 |
76 | ## view demo
77 |
78 | ### hadoop
79 |
80 | cluster http://k8s-node-ip:32688
81 |
82 | 
83 |
84 | namenode http://k8s-node-ip:32670
85 |
86 | 
87 |
88 | ### hbase
89 |
90 | hbase-master http://k8s-node-ip:32510
91 |
92 | 
93 |
94 | hbase-regionserver http://k8s-node-ip:32530
95 |
96 | 
97 |
98 | ### hive
99 |
100 | hive http://k8s-node-ip:32002
101 |
102 | 
103 |
104 | ### jupyter
105 |
106 | notebook http://k8s-node-ip:32488
--------------------------------------------------------------------------------
/deploy-namenode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | # hive metastore
3 | apiVersion: apps/v1
4 | kind: Deployment
5 | metadata:
6 | labels:
7 | app: hadoop-hive-mysql
8 | name: hadoop-hive-mysql
9 | namespace: default
10 | spec:
11 | replicas: 1
12 | selector:
13 | matchLabels:
14 | app: hadoop-hive-mysql
15 | template:
16 | metadata:
17 | labels:
18 | app: hadoop-hive-mysql
19 | spec:
20 | containers:
21 | - env:
22 | - name: MYSQL_ROOT_PASSWORD
23 | value: hadoop
24 | - name: HOST
25 | value: 0.0.0.0
26 | - name: TZ
27 | value: Asia/Shanghai
28 | image: mysql:5.7.33
29 | name: hadoop-hive-mysql
30 | # volumeMounts:
31 | # - mountPath: /var/lib/mysql
32 | # name: data
33 | ports:
34 | - containerPort: 3306
35 | # volumes:
36 | # - hostPath:
37 | # path: /data/pv/hadoop-hive-mysql
38 | # type: ""
39 | # name: data
40 | ---
41 | apiVersion: v1
42 | kind: Service
43 | metadata:
44 | name: hadoop-hive-mysql
45 | namespace: default
46 | spec:
47 | ports:
48 | - name: mysql
49 | port: 3306
50 | protocol: TCP
51 | targetPort: 3306
52 | selector:
53 | app: hadoop-hive-mysql
54 | ---
55 | apiVersion: apps/v1
56 | kind: StatefulSet
57 | metadata:
58 | name: hadoop-master
59 | spec:
60 | serviceName: "hadoop-master"
61 | replicas: 1
62 | selector:
63 | matchLabels:
64 | app: hadoop-master
65 | template:
66 | metadata:
67 | labels:
68 | app: hadoop-master
69 | spec:
70 | affinity:
71 | podAntiAffinity:
72 | requiredDuringSchedulingIgnoredDuringExecution:
73 | - labelSelector:
74 | matchExpressions:
75 | - key: "app"
76 | operator: In
77 | values:
78 | - hadoop-master
79 | topologyKey: "kubernetes.io/hostname"
80 | containers:
81 | - name: hadoop-master
82 | image: cclient/hadoop:2.8.3-namenode
83 | imagePullPolicy: Always
84 | imagePullPolicy: IfNotPresent
85 | env:
86 | - name: NODES
87 | value: hadoop-node
88 | - name: HOSTNODE
89 | value: hadoop-master
90 | ports:
91 | - containerPort: 22
92 | name: ssh
93 | - containerPort: 8088
94 | - containerPort: 8042
95 | - containerPort: 50070
96 | - containerPort: 8888
97 | - containerPort: 8080
98 | - containerPort: 4040
99 | - containerPort: 8787
100 | - containerPort: 9000
101 | - containerPort: 60010
102 | - containerPort: 60030
103 | ---
104 | apiVersion: v1
105 | kind: Service
106 | metadata:
107 | name: hadoop-master
108 | labels:
109 | app: hadoop-master
110 | spec:
111 | type: NodePort
112 | ports:
113 | - name: ssh
114 | port: 22
115 | nodePort: 32620
116 | targetPort: 22
117 | - name: hadoop-cluster-info
118 | port: 8088
119 | nodePort: 32688
120 | targetPort: 8088
121 | - name: b
122 | port: 8042
123 | # nodePort: 8342
124 | targetPort: 8042
125 | - name: c
126 | port: 50030
127 | nodePort: 32630
128 | targetPort: 50030
129 | - name: hadoop-namenode-info
130 | port: 50070
131 | nodePort: 32670
132 | targetPort: 50070
133 | - name: jupyter-notebook
134 | port: 8888
135 | nodePort: 32488
136 | targetPort: 8888
137 | - name: e
138 | port: 4040
139 | # nodePort: 8340
140 | targetPort: 4040
141 | - name: f
142 | port: 8787
143 | # nodePort: 8387
144 | targetPort: 8787
145 | - name: g
146 | port: 9000
147 | targetPort: 9000
148 | - name: hbase-master-info
149 | port: 60010
150 | nodePort: 32510
151 | targetPort: 60010
152 | - name: hbase-regionserver-info
153 | port: 60030
154 | nodePort: 32530
155 | targetPort: 60030
156 | - name: hiveserver2-jdbc
157 | port: 10000
158 | nodePort: 32000
159 | targetPort: 10000
160 | - name: hiveserver2-web
161 | port: 10002
162 | nodePort: 32002
163 | targetPort: 10002
164 | selector:
165 | app: hadoop-master
--------------------------------------------------------------------------------