├── README.md ├── cluster ├── Dockerfile ├── cluster.sh └── hive-site.xml ├── datanode ├── Dockerfile └── start.sh ├── deploy-datanode.yml ├── deploy-namenode.yml └── image ├── cluster.png ├── hbase-region.png ├── hbase.png ├── hive.png └── namenode.png /README.md: -------------------------------------------------------------------------------- 1 | # k8s hadoop simple cluster 2 | 3 | use image [izone/hadoop](https://hub.docker.com/r/izone/hadoop/) 4 | 5 | cluster/cluster.sh [origin](https://github.com/luvres/hadoop/blob/master/cluster/cluster.sh) 6 | 7 | datanode/start.sh [origin](https://github.com/luvres/hadoop/blob/master/cluster/datanode/start.sh) 8 | 9 | ## images 10 | 11 | ### datanode 12 | 13 | cd ../datanode 14 | 15 | docker build -t cclient/hadoop:2.8.3-datanode ./ 16 | 17 | docker build --build-arg http_proxy= --build-arg https_proxy= -t cclient/hadoop:2.8.3-datanode ./ 18 | 19 | ### namenode 20 | 21 | cd ../cluster 22 | 23 | docker build -t cclient/hadoop:2.8.3-namenode ./ 24 | 25 | docker build --build-arg http_proxy= --build-arg https_proxy= -t cclient/hadoop:2.8.3-namenode ./ 26 | 27 | ## deploy 28 | 29 | datanode must have started before deploy namenode(namenode ssh datanode to config) 30 | 31 | ### datanode 32 | 33 | kubectl apply -f deploy-datanode.yml 34 | 35 | ### namenode 36 | 37 | kubectl apply -f deploy-namenode.yml 38 | 39 | ### start hive 40 | 41 | ``` 42 | kubectl exec -it hadoop-master-0 bash 43 | # init 44 | cd /opt/hive/bin 45 | /opt/hive/bin/hive --service schemaTool -initSchema -dbType mysql 46 | # start 47 | nohup /opt/hive/bin/hive --service hiveserver2 & 48 | # connect 49 | 50 | /opt/hive/bin/beeline -u jdbc:hive2://127.0.0.1:10000 -nroot -phadoop 51 | 52 | 0: jdbc:hive2://127.0.0.1:10000> show databases; 53 | +----------------+ 54 | | database_name | 55 | +----------------+ 56 | | default | 57 | +----------------+ 58 | 1 row selected (1.929 seconds) 59 | 60 | ``` 61 | 62 | ## show 63 | 64 | ```bash 65 | $ kubectl get pods 66 | NAME READY STATUS RESTARTS AGE 67 | hadoop-master-0 1/1 Running 0 10h 68 | hadoop-node-0 1/1 Running 0 10h 69 | 70 | $ kubectl get svc 71 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 72 | hadoop-master NodePort 10.96.64.27 22:32620/TCP,8088:32688/TCP,8042:31507/TCP,50030:32630/TCP,50070:32670/TCP,8888:32488/TCP,4040:31798/TCP,8787:30829/TCP,9000:30760/TCP,60010:32510/TCP,60030:32530/TCP,10000:32000/TCP,10002:32002/TCP 11m 73 | hadoop-node ClusterIP 10.100.139.149 22/TCP 11m 74 | ``` 75 | 76 | ## view demo 77 | 78 | ### hadoop 79 | 80 | cluster http://k8s-node-ip:32688 81 | 82 | ![cluster.png](./image/cluster.png) 83 | 84 | namenode http://k8s-node-ip:32670 85 | 86 | ![namenode.png](./image/namenode.png) 87 | 88 | ### hbase 89 | 90 | hbase-master http://k8s-node-ip:32510 91 | 92 | ![hbase.png](./image/hbase.png) 93 | 94 | hbase-regionserver http://k8s-node-ip:32530 95 | 96 | ![hbase-region.png](./image/hbase-region.png) 97 | 98 | ### hive 99 | 100 | hive http://k8s-node-ip:32002 101 | 102 | ![hive.png](./image/hive.png) 103 | 104 | ### jupyter 105 | 106 | notebook http://k8s-node-ip:32488 -------------------------------------------------------------------------------- /cluster/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM izone/hadoop:cluster 2 | ADD cluster.sh /etc/cluster.sh 3 | RUN apt-get install -y iputils-ping vim 4 | RUN curl https://cdn.mysql.com/archives/mysql-connector-java-5.1/mysql-connector-java-5.1.49.tar.gz -o /tmp/mysql.tgz && tar -zxf /tmp/mysql.tgz -C /tmp/ && mv /tmp/mysql-connector-java-5.1.49/mysql-connector-java-5.1.49-bin.jar /opt/hive/lib/ && rm -rf /tmp/mysql* 5 | ADD hive-site.xml /opt/hive/conf/ -------------------------------------------------------------------------------- /cluster/cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /etc/init.d/ssh restart 4 | 5 | nodesSSH(){ 6 | rm -f /etc/machines 7 | for NODE in ${NODES} 8 | do 9 | echo "Configuring SSH $NODE" 10 | for f in $HOME/.ssh/id_rsa.pub $HOME/.ssh/id_dsa.pub 11 | do 12 | sshpass -p $RPASS ssh-copy-id -i $f $NODE # Copy key to the nodes 13 | done &>/dev/null 14 | echo $NODE >>/etc/machines # Create "machines" file with the names of nodes 15 | done 16 | }; nodesSSH 17 | 18 | confFiles(){ 19 | rm -f $HADOOP_HOME/etc/hadoop/slaves 20 | echo $HOSTNAME >$HADOOP_HOME/etc/hadoop/slaves 21 | cat /etc/machines >>$HADOOP_HOME/etc/hadoop/slaves 22 | cat /etc/machines >>$SPARK_HOME/conf/slaves 23 | sed -i "s/NAMENODE/$HOSTNAME/" $HADOOP_HOME/etc/hadoop/core-site.xml 24 | sed -i "s/NAMENODE/$HOSTNAME/" $HADOOP_HOME/etc/hadoop/yarn-site.xml 25 | cat $HADOOP_HOME/etc/hadoop/slaves >$HBASE_HOME/conf/regionservers 26 | sed -i "s/NAMENODE/$HOSTNAME/" $HBASE_HOME/conf/hbase-site.xml 27 | sed -i "s/NAMENODE/$HOSTNAME/" $HBASE_HOME/conf/hbase-site_slave.xml 28 | # sed -i "s/QUORUM/$(echo `cat /opt/hadoop/etc/hadoop/slaves` | sed 's/ /,/g')/" $HBASE_HOME/conf/hbase-site.xml 29 | # sed -i "s/QUORUM/$HOSTNAME/" $HBASE_HOME/conf/hbase-site.xml 30 | }; confFiles 31 | 32 | hostsNodes(){ 33 | HADOOP=$HADOOP_HOME/etc/hadoop 34 | SPARK=$SPARK_HOME/conf 35 | HBASE=$HBASE_HOME/conf 36 | 37 | for NODE in ${NODES} 38 | do 39 | # sed -i "/${NODE}/d" /etc/hosts 40 | echo "Get Node $NODE IP" 41 | nodeHostInfo=$(ssh $NODE "cat /etc/hosts |grep '${NODE}'") 42 | echo "${nodeHostInfo}" >> /etc/hosts 43 | done 44 | 45 | TEMP=$(cat /etc/hosts | grep hadoop) 46 | for NODE in ${NODES} 47 | do 48 | echo "Configuring files $NODE" 49 | ssh $NODE "echo '${TEMP}' >> /etc/hosts" 50 | for f in ${HADOOP}/hadoop-env.sh \ 51 | ${HADOOP}/hdfs-site.xml \ 52 | ${HADOOP}/core-site.xml \ 53 | ${HADOOP}/mapred-site.xml \ 54 | ${HADOOP}/yarn-site.xml \ 55 | ${SPARK}/spark-env.sh \ 56 | ${HBASE}/hbase-site_slave.xml \ 57 | ${HBASE}/hbase-env.sh 58 | do 59 | scp $f $NODE:$f 60 | ssh $NODE mv ${HBASE}/hbase-site_slave.xml ${HBASE}/hbase-site.xml 61 | done &>/dev/null 62 | done 63 | }; hostsNodes 64 | -------------------------------------------------------------------------------- /cluster/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | javax.jdo.option.ConnectionURL 6 | jdbc:mysql://hadoop-hive-mysql/hive?createDatabaseIfNotExist=true 7 | JDBC connect string for a JDBC metastore 8 | 9 | 10 | javax.jdo.option.ConnectionDriverName 11 | com.mysql.jdbc.Driver 12 | Driver class name for a JDBC metastore 13 | 14 | 15 | javax.jdo.option.ConnectionUserName 16 | root 17 | username to use against metastore database 18 | 19 | 20 | javax.jdo.option.ConnectionPassword 21 | hadoop 22 | password to use against metastore database 23 | 24 | 25 | hive.server2.enable.doAs 26 | FALSE 27 | Setting this property to true will have HiveServer2 execute Hive operations as the user making the calls to it. 28 | 29 | 30 | 31 | hive.server2.support.dynamic.service.discovery 32 | true 33 | Whether HiveServer2 supports dynamic service discovery for its clients. To support this, each instance of HiveServer2 currently uses ZooKeeper to register itself, when it is brought up. JDBC/ODBC clients should use the ZooKeeper ensemble: hive.zookeeper.quorum in their connection string. 34 | 35 | -------------------------------------------------------------------------------- /datanode/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM izone/hadoop:datanode 2 | ADD start.sh /etc/start.sh 3 | RUN apt-get install -y iputils-ping vim 4 | -------------------------------------------------------------------------------- /datanode/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /etc/init.d/ssh start 4 | 5 | if [[ $1 == "bash" ]]; then 6 | echo " " 7 | echo -e "\e[01;32m*\e[00m `date` \e[01;32mShell Bash\e[00m" 8 | /bin/bash 9 | fi 10 | 11 | sleep 31557600 12 | -------------------------------------------------------------------------------- /deploy-datanode.yml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: hadoop-node 5 | spec: 6 | serviceName: "hadoop-node" 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: hadoop-node 11 | template: 12 | metadata: 13 | labels: 14 | app: hadoop-node 15 | spec: 16 | affinity: 17 | podAntiAffinity: 18 | requiredDuringSchedulingIgnoredDuringExecution: 19 | - labelSelector: 20 | matchExpressions: 21 | - key: "app" 22 | operator: In 23 | values: 24 | - hadoop-node 25 | topologyKey: "kubernetes.io/hostname" 26 | containers: 27 | - name: hadoop-node 28 | image: cclient/hadoop:2.8.3-datanode 29 | # imagePullPolicy: Always 30 | imagePullPolicy: IfNotPresent 31 | ports: 32 | - containerPort: 22 33 | name: ssh 34 | --- 35 | apiVersion: v1 36 | kind: Service 37 | metadata: 38 | name: hadoop-node 39 | labels: 40 | app: hadoop-node 41 | spec: 42 | ports: 43 | - name: ssh 44 | port: 22 45 | targetPort: 22 46 | - name: "8040" 47 | port: 8040 48 | targetPort: 8040 49 | - name: "8042" 50 | port: 8042 51 | targetPort: 8042 52 | - name: "50010 " 53 | port: 50010 54 | targetPort: 50010 55 | - name: "50020" 56 | port: 50020 57 | targetPort: 50020 58 | - name: "50075" 59 | port: 50075 60 | targetPort: 50075 61 | selector: 62 | app: hadoop-node -------------------------------------------------------------------------------- /deploy-namenode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # hive metastore 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | labels: 7 | app: hadoop-hive-mysql 8 | name: hadoop-hive-mysql 9 | namespace: default 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: hadoop-hive-mysql 15 | template: 16 | metadata: 17 | labels: 18 | app: hadoop-hive-mysql 19 | spec: 20 | containers: 21 | - env: 22 | - name: MYSQL_ROOT_PASSWORD 23 | value: hadoop 24 | - name: HOST 25 | value: 0.0.0.0 26 | - name: TZ 27 | value: Asia/Shanghai 28 | image: mysql:5.7.33 29 | name: hadoop-hive-mysql 30 | # volumeMounts: 31 | # - mountPath: /var/lib/mysql 32 | # name: data 33 | ports: 34 | - containerPort: 3306 35 | # volumes: 36 | # - hostPath: 37 | # path: /data/pv/hadoop-hive-mysql 38 | # type: "" 39 | # name: data 40 | --- 41 | apiVersion: v1 42 | kind: Service 43 | metadata: 44 | name: hadoop-hive-mysql 45 | namespace: default 46 | spec: 47 | ports: 48 | - name: mysql 49 | port: 3306 50 | protocol: TCP 51 | targetPort: 3306 52 | selector: 53 | app: hadoop-hive-mysql 54 | --- 55 | apiVersion: apps/v1 56 | kind: StatefulSet 57 | metadata: 58 | name: hadoop-master 59 | spec: 60 | serviceName: "hadoop-master" 61 | replicas: 1 62 | selector: 63 | matchLabels: 64 | app: hadoop-master 65 | template: 66 | metadata: 67 | labels: 68 | app: hadoop-master 69 | spec: 70 | affinity: 71 | podAntiAffinity: 72 | requiredDuringSchedulingIgnoredDuringExecution: 73 | - labelSelector: 74 | matchExpressions: 75 | - key: "app" 76 | operator: In 77 | values: 78 | - hadoop-master 79 | topologyKey: "kubernetes.io/hostname" 80 | containers: 81 | - name: hadoop-master 82 | image: cclient/hadoop:2.8.3-namenode 83 | imagePullPolicy: Always 84 | imagePullPolicy: IfNotPresent 85 | env: 86 | - name: NODES 87 | value: hadoop-node 88 | - name: HOSTNODE 89 | value: hadoop-master 90 | ports: 91 | - containerPort: 22 92 | name: ssh 93 | - containerPort: 8088 94 | - containerPort: 8042 95 | - containerPort: 50070 96 | - containerPort: 8888 97 | - containerPort: 8080 98 | - containerPort: 4040 99 | - containerPort: 8787 100 | - containerPort: 9000 101 | - containerPort: 60010 102 | - containerPort: 60030 103 | --- 104 | apiVersion: v1 105 | kind: Service 106 | metadata: 107 | name: hadoop-master 108 | labels: 109 | app: hadoop-master 110 | spec: 111 | type: NodePort 112 | ports: 113 | - name: ssh 114 | port: 22 115 | nodePort: 32620 116 | targetPort: 22 117 | - name: hadoop-cluster-info 118 | port: 8088 119 | nodePort: 32688 120 | targetPort: 8088 121 | - name: b 122 | port: 8042 123 | # nodePort: 8342 124 | targetPort: 8042 125 | - name: c 126 | port: 50030 127 | nodePort: 32630 128 | targetPort: 50030 129 | - name: hadoop-namenode-info 130 | port: 50070 131 | nodePort: 32670 132 | targetPort: 50070 133 | - name: jupyter-notebook 134 | port: 8888 135 | nodePort: 32488 136 | targetPort: 8888 137 | - name: e 138 | port: 4040 139 | # nodePort: 8340 140 | targetPort: 4040 141 | - name: f 142 | port: 8787 143 | # nodePort: 8387 144 | targetPort: 8787 145 | - name: g 146 | port: 9000 147 | targetPort: 9000 148 | - name: hbase-master-info 149 | port: 60010 150 | nodePort: 32510 151 | targetPort: 60010 152 | - name: hbase-regionserver-info 153 | port: 60030 154 | nodePort: 32530 155 | targetPort: 60030 156 | - name: hiveserver2-jdbc 157 | port: 10000 158 | nodePort: 32000 159 | targetPort: 10000 160 | - name: hiveserver2-web 161 | port: 10002 162 | nodePort: 32002 163 | targetPort: 10002 164 | selector: 165 | app: hadoop-master -------------------------------------------------------------------------------- /image/cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cclient/kubernetes-hadoop/d0aaa02c5374737444ae99e7a87fc6bc29ae9dee/image/cluster.png -------------------------------------------------------------------------------- /image/hbase-region.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cclient/kubernetes-hadoop/d0aaa02c5374737444ae99e7a87fc6bc29ae9dee/image/hbase-region.png -------------------------------------------------------------------------------- /image/hbase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cclient/kubernetes-hadoop/d0aaa02c5374737444ae99e7a87fc6bc29ae9dee/image/hbase.png -------------------------------------------------------------------------------- /image/hive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cclient/kubernetes-hadoop/d0aaa02c5374737444ae99e7a87fc6bc29ae9dee/image/hive.png -------------------------------------------------------------------------------- /image/namenode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cclient/kubernetes-hadoop/d0aaa02c5374737444ae99e7a87fc6bc29ae9dee/image/namenode.png --------------------------------------------------------------------------------