├── .gitignore ├── README.md ├── SogouQ.sample.txt ├── copy-jar.sh ├── docker-compose.yml ├── hadoop-hive.env ├── mysql-connector-java-5.1.28.jar ├── run.sh ├── sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz └── stop.sh /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | example/SparkWriteApplication.jar 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-hadoop-spark-hive 2 | 3 | docker-hadoop-spark-hive 快速构建你的大数据环境 4 | 5 | 这是一个 基于docker 构建的 一键启停 大数据 学习平台 6 | 7 | - hadoop 2.8 8 | - hive 2.1.0 9 | - spark 2.1.0 10 | 11 | 要用到docker 你可能需要使用虚拟环境 这里 我用的是 virtualbox 12 | 13 | 我研究了一段时间,才配置成功了各项参数。 14 | 15 | 所有软件访问需要预先 配置的端口,都已经 一一映射! 16 | 17 | 其中 hadoop , spark , hive 均可以在 我的windows 连接哦。很nice 18 | 19 | 接下来 介绍一下 如何使用 20 | 21 | ## 首先你需要安装docker 22 | 23 | ``` 24 | sudo apt install docker.io 25 | ``` 26 | 27 | ## 安装 docker-compose 28 | 29 | ``` 30 | sudo apt install docker-compose 31 | ``` 32 | 33 | ## 开始你的表演 34 | 35 | ``` 36 | # 进入 docker-compose.yml 的目录 37 | ./run.sh 38 | 39 | # 如果发现存在没有成功启动的 容器 (状态为 exited ) 40 | docker ps -a 41 | 42 | # 手动重启下 43 | docker-compose up -d 44 | 45 | # 如果需要停掉 46 | ./stop.sh 47 | 48 | # 如果需要设置一些 环境变量 49 | 50 | vim *.env 51 | 52 | 这里需要注意,环境变量 基本不需要设置,如果你玩大了,别给我提issue 。有能耐你自己解决啊 (傲娇脸) 53 | 54 | # 如果需要修改 docker 容器 运行的一些选项 55 | vim docker-compose.yml 56 | 57 | ``` 58 | -------------------------------------------------------------------------------- /copy-jar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /opt/hadoop-2.8.0/share/hadoop/yarn/lib/ && cp jersey-core-1.9.jar jersey-client-1.9.jar /spark/jars/ && rm -rf /spark/jars/jersey-client-2.22.2.jar 4 | 5 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | namenode: 4 | image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8 5 | container_name: namenode 6 | volumes: 7 | - ./data/namenode:/hadoop/dfs/name 8 | environment: 9 | - CLUSTER_NAME=test 10 | env_file: 11 | - ./hadoop-hive.env 12 | ports: 13 | - 50070:50070 14 | - 8020:8020 15 | 16 | resourcemanager: 17 | image: bde2020/hadoop-resourcemanager:1.1.0-hadoop2.8-java8 18 | container_name: resourcemanager 19 | environment: 20 | - CLUSTER_NAME=test 21 | env_file: 22 | - ./hadoop-hive.env 23 | ports: 24 | - 8088:8088 25 | 26 | historyserver: 27 | image: bde2020/hadoop-historyserver:1.1.0-hadoop2.8-java8 28 | container_name: historyserver 29 | environment: 30 | - CLUSTER_NAME=test 31 | env_file: 32 | - ./hadoop-hive.env 33 | ports: 34 | - 8188:8188 35 | 36 | datanode: 37 | image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8 38 | depends_on: 39 | - namenode 40 | volumes: 41 | - ./data/datanode:/hadoop/dfs/data 42 | env_file: 43 | - ./hadoop-hive.env 44 | ports: 45 | - 50075:50075 46 | 47 | nodemanager: 48 | image: bde2020/hadoop-nodemanager:1.1.0-hadoop2.8-java8 49 | container_name: nodemanager 50 | hostname: nodemanager 51 | environment: 52 | - CLUSTER_NAME=test 53 | env_file: 54 | - ./hadoop-hive.env 55 | ports: 56 | - 8042:8042 57 | 58 | hive-server: 59 | image: bde2020/hive:2.1.0-postgresql-metastore 60 | container_name: hive-server 61 | env_file: 62 | - ./hadoop-hive.env 63 | environment: 64 | - "HIVE_CORE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore/metastore" 65 | ports: 66 | - "10000:10000" 67 | 68 | hive-metastore: 69 | image: bde2020/hive:2.1.0-postgresql-metastore 70 | container_name: hive-metastore 71 | env_file: 72 | - ./hadoop-hive.env 73 | command: /opt/hive/bin/hive --service metastore 74 | ports: 75 | - 9083:9083 76 | 77 | hive-metastore-postgresql: 78 | image: bde2020/hive-metastore-postgresql:2.1.0 79 | ports: 80 | - 5432:5432 81 | volumes: 82 | - ./data/postgresql/:/var/lib/postgresql/data 83 | 84 | spark-master: 85 | image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8 86 | container_name: spark-master 87 | hostname: spark-master 88 | volumes: 89 | - ./copy-jar.sh:/copy-jar.sh 90 | ports: 91 | - 8080:8080 92 | - 7077:7077 93 | env_file: 94 | - ./hadoop-hive.env 95 | 96 | 97 | spark-worker: 98 | image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8 99 | depends_on: 100 | - spark-master 101 | environment: 102 | - SPARK_MASTER=spark://spark-master:7077 103 | ports: 104 | - "8081:8081" 105 | env_file: 106 | - ./hadoop-hive.env 107 | mysql-server: 108 | image: mysql:5.7 109 | container_name: mysql-server 110 | ports: 111 | - "3306:3306" 112 | environment: 113 | - MYSQL_ROOT_PASSWORD=zhangyang517 114 | volumes: 115 | - ./data/mysql:/var/lib/mysql 116 | 117 | elasticsearch: 118 | image: elasticsearch:6.5.3 119 | environment: 120 | - discovery.type=single-node 121 | ports: 122 | - "9200:9200" 123 | - "9300:9300" 124 | networks: 125 | - es_network 126 | kibana: 127 | image: kibana:6.5.3 128 | ports: 129 | - "5601:5601" 130 | networks: 131 | - es_network 132 | 133 | networks: 134 | es_network: 135 | external: true 136 | -------------------------------------------------------------------------------- /hadoop-hive.env: -------------------------------------------------------------------------------- 1 | HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore 2 | HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver 3 | HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive 4 | HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive 5 | HIVE_SITE_CONF_datanucleus_autoCreateSchema=false 6 | HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083 7 | HIVE_SITE_CONF_hive_metastore_warehouse_dir=hdfs://namenode:8020/user/hive/warehouse 8 | 9 | CORE_CONF_fs_defaultFS=hdfs://namenode:8020 10 | CORE_CONF_hadoop_http_staticuser_user=root 11 | CORE_CONF_hadoop_proxyuser_hue_hosts=* 12 | CORE_CONF_hadoop_proxyuser_hue_groups=* 13 | 14 | HDFS_CONF_dfs_webhdfs_enabled=true 15 | HDFS_CONF_dfs_permissions_enabled=false 16 | 17 | YARN_CONF_yarn_log___aggregation___enable=true 18 | YARN_CONF_yarn_resourcemanager_recovery_enabled=true 19 | YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore 20 | YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate 21 | YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs 22 | YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ 23 | YARN_CONF_yarn_timeline___service_enabled=true 24 | YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true 25 | YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true 26 | YARN_CONF_yarn_resourcemanager_hostname=resourcemanager 27 | YARN_CONF_yarn_timeline___service_hostname=historyserver 28 | YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 29 | YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 30 | YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 31 | 32 | -------------------------------------------------------------------------------- /mysql-connector-java-5.1.28.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibywind/docker-hadoop-spark-hive/48218fe2ac144407657cf6b2f46587d2740cdc67/mysql-connector-java-5.1.28.jar -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker-compose -f docker-compose.yml up -d namenode hive-metastore-postgresql 4 | docker-compose -f docker-compose.yml up -d datanode hive-metastore 5 | docker-compose -f docker-compose.yml up -d resourcemanager 6 | docker-compose -f docker-compose.yml up -d nodemanager 7 | docker-compose -f docker-compose.yml up -d historyserver 8 | sleep 5 9 | docker-compose -f docker-compose.yml up -d hive-server 10 | docker-compose -f docker-compose.yml up -d spark-master spark-worker 11 | docker-compose -f docker-compose.yml up -d mysql-server 12 | docker-compose -f docker-compose.yml up -d elasticsearch 13 | docker-compose -f docker-compose.yml up -d kibana 14 | my_ip=`ip route get 1|awk '{print $NF;exit}'` 15 | echo "Namenode: http://${my_ip}:50070" 16 | echo "Datanode: http://${my_ip}:50075" 17 | echo "Spark-master: http://${my_ip}:8080" 18 | docker-compose exec spark-master bash -c "./copy-jar.sh && exit" 19 | 20 | -------------------------------------------------------------------------------- /sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibywind/docker-hadoop-spark-hive/48218fe2ac144407657cf6b2f46587d2740cdc67/sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz -------------------------------------------------------------------------------- /stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker-compose stop 3 | --------------------------------------------------------------------------------