├── .gitignore ├── Makefile ├── README.md ├── airflow ├── code-for-dag │ ├── hive-script.sql │ └── pysparkscript.py └── dags │ ├── __pycache__ │ └── hdfs-hive-spark.cpython-38.pyc │ └── hdfs-hive-spark.py ├── base ├── Dockerfile ├── bde-spark.css ├── entrypoint.sh ├── execute-step.sh ├── finish-step.sh └── wait-for-step.sh ├── codeScript └── pysparkscript.py ├── conf ├── beeline-log4j2.properties ├── hive-env.sh ├── hive-exec-log4j2.properties ├── hive-log4j2.properties ├── hive-site.xml ├── ivysettings.xml └── llap-daemon-log4j2.properties ├── dataForProject ├── breweries.csv ├── dvdrental.rar ├── image │ ├── dvdrental-schema.png │ ├── dvdrental │ │ ├── 3055.dat │ │ ├── 3057.dat │ │ ├── 3059.dat │ │ ├── 3061.dat │ │ ├── 3062.dat │ │ ├── 3063.dat │ │ ├── 3065.dat │ │ ├── 3067.dat │ │ ├── 3069.dat │ │ ├── 3071.dat │ │ ├── 3073.dat │ │ ├── 3075.dat │ │ ├── 3077.dat │ │ ├── 3079.dat │ │ ├── 3081.dat │ │ ├── restore.sql │ │ └── toc.dat │ ├── dvdrentalDW-schema.png │ ├── postgreConf.png │ ├── projectBigdata.jpg │ ├── reportDVD.png │ └── superset.png └── script-init │ ├── createDW-when-db-have-init.sql │ ├── hive-dw-init.txt │ ├── install-jdbc.txt │ └── read-postgres-to-hdfs.txt ├── datanode ├── Dockerfile └── run.sh ├── docker-compose.yaml ├── entrypoint.sh ├── hadoop-hive.env ├── hadoop.env ├── historyserver ├── Dockerfile └── run.sh ├── master ├── Dockerfile ├── README.md └── master.sh ├── namenode ├── Dockerfile └── run.sh ├── nginx ├── Dockerfile ├── bde-hadoop.css ├── default.conf └── materialize.min.css ├── nodemanager ├── Dockerfile └── run.sh ├── postgresql-42.6.0.jar ├── resourcemanager ├── Dockerfile └── run.sh ├── spark_in_action.MD ├── startup.sh ├── superset ├── Dockerfile ├── superset-init.sh └── superset_config.py ├── template ├── java │ ├── Dockerfile │ ├── README.md │ └── template.sh ├── python │ ├── Dockerfile │ ├── README.md │ └── template.sh └── scala │ ├── Dockerfile │ ├── README.md │ ├── build.sbt │ ├── plugins.sbt │ └── template.sh └── worker ├── Dockerfile ├── README.md └── worker.sh /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | airflow/logs 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/README.md -------------------------------------------------------------------------------- /airflow/code-for-dag/hive-script.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/airflow/code-for-dag/hive-script.sql -------------------------------------------------------------------------------- /airflow/code-for-dag/pysparkscript.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/airflow/code-for-dag/pysparkscript.py -------------------------------------------------------------------------------- /airflow/dags/__pycache__/hdfs-hive-spark.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/airflow/dags/__pycache__/hdfs-hive-spark.cpython-38.pyc -------------------------------------------------------------------------------- /airflow/dags/hdfs-hive-spark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/airflow/dags/hdfs-hive-spark.py -------------------------------------------------------------------------------- /base/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/base/Dockerfile -------------------------------------------------------------------------------- /base/bde-spark.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/base/bde-spark.css -------------------------------------------------------------------------------- /base/entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/base/entrypoint.sh -------------------------------------------------------------------------------- /base/execute-step.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/base/execute-step.sh -------------------------------------------------------------------------------- /base/finish-step.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/base/finish-step.sh -------------------------------------------------------------------------------- /base/wait-for-step.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/base/wait-for-step.sh -------------------------------------------------------------------------------- /codeScript/pysparkscript.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/codeScript/pysparkscript.py -------------------------------------------------------------------------------- /conf/beeline-log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/beeline-log4j2.properties -------------------------------------------------------------------------------- /conf/hive-env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/hive-env.sh -------------------------------------------------------------------------------- /conf/hive-exec-log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/hive-exec-log4j2.properties -------------------------------------------------------------------------------- /conf/hive-log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/hive-log4j2.properties -------------------------------------------------------------------------------- /conf/hive-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/hive-site.xml -------------------------------------------------------------------------------- /conf/ivysettings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/ivysettings.xml -------------------------------------------------------------------------------- /conf/llap-daemon-log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/conf/llap-daemon-log4j2.properties -------------------------------------------------------------------------------- /dataForProject/breweries.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/breweries.csv -------------------------------------------------------------------------------- /dataForProject/dvdrental.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/dvdrental.rar -------------------------------------------------------------------------------- /dataForProject/image/dvdrental-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental-schema.png -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3055.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3055.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3057.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3057.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3059.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3059.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3061.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3061.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3062.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3062.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3063.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3063.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3065.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3065.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3067.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3067.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3069.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3069.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3071.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3071.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3073.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3073.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3075.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3075.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3077.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3077.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3079.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3079.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/3081.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/3081.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/restore.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/restore.sql -------------------------------------------------------------------------------- /dataForProject/image/dvdrental/toc.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrental/toc.dat -------------------------------------------------------------------------------- /dataForProject/image/dvdrentalDW-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/dvdrentalDW-schema.png -------------------------------------------------------------------------------- /dataForProject/image/postgreConf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/postgreConf.png -------------------------------------------------------------------------------- /dataForProject/image/projectBigdata.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/projectBigdata.jpg -------------------------------------------------------------------------------- /dataForProject/image/reportDVD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/reportDVD.png -------------------------------------------------------------------------------- /dataForProject/image/superset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/image/superset.png -------------------------------------------------------------------------------- /dataForProject/script-init/createDW-when-db-have-init.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/script-init/createDW-when-db-have-init.sql -------------------------------------------------------------------------------- /dataForProject/script-init/hive-dw-init.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/script-init/hive-dw-init.txt -------------------------------------------------------------------------------- /dataForProject/script-init/install-jdbc.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/script-init/install-jdbc.txt -------------------------------------------------------------------------------- /dataForProject/script-init/read-postgres-to-hdfs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/dataForProject/script-init/read-postgres-to-hdfs.txt -------------------------------------------------------------------------------- /datanode/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/datanode/Dockerfile -------------------------------------------------------------------------------- /datanode/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/datanode/run.sh -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/docker-compose.yaml -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/entrypoint.sh -------------------------------------------------------------------------------- /hadoop-hive.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/hadoop-hive.env -------------------------------------------------------------------------------- /hadoop.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/hadoop.env -------------------------------------------------------------------------------- /historyserver/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/historyserver/Dockerfile -------------------------------------------------------------------------------- /historyserver/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/historyserver/run.sh -------------------------------------------------------------------------------- /master/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/master/Dockerfile -------------------------------------------------------------------------------- /master/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/master/README.md -------------------------------------------------------------------------------- /master/master.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/master/master.sh -------------------------------------------------------------------------------- /namenode/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/namenode/Dockerfile -------------------------------------------------------------------------------- /namenode/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/namenode/run.sh -------------------------------------------------------------------------------- /nginx/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/nginx/Dockerfile -------------------------------------------------------------------------------- /nginx/bde-hadoop.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/nginx/bde-hadoop.css -------------------------------------------------------------------------------- /nginx/default.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/nginx/default.conf -------------------------------------------------------------------------------- /nginx/materialize.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/nginx/materialize.min.css -------------------------------------------------------------------------------- /nodemanager/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/nodemanager/Dockerfile -------------------------------------------------------------------------------- /nodemanager/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/nodemanager/run.sh -------------------------------------------------------------------------------- /postgresql-42.6.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/postgresql-42.6.0.jar -------------------------------------------------------------------------------- /resourcemanager/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/resourcemanager/Dockerfile -------------------------------------------------------------------------------- /resourcemanager/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/resourcemanager/run.sh -------------------------------------------------------------------------------- /spark_in_action.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/spark_in_action.MD -------------------------------------------------------------------------------- /startup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/startup.sh -------------------------------------------------------------------------------- /superset/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/superset/Dockerfile -------------------------------------------------------------------------------- /superset/superset-init.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/superset/superset-init.sh -------------------------------------------------------------------------------- /superset/superset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/superset/superset_config.py -------------------------------------------------------------------------------- /template/java/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/java/Dockerfile -------------------------------------------------------------------------------- /template/java/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/java/README.md -------------------------------------------------------------------------------- /template/java/template.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/java/template.sh -------------------------------------------------------------------------------- /template/python/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/python/Dockerfile -------------------------------------------------------------------------------- /template/python/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/python/README.md -------------------------------------------------------------------------------- /template/python/template.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sh /submit.sh 4 | -------------------------------------------------------------------------------- /template/scala/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/scala/Dockerfile -------------------------------------------------------------------------------- /template/scala/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/scala/README.md -------------------------------------------------------------------------------- /template/scala/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/scala/build.sbt -------------------------------------------------------------------------------- /template/scala/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/scala/plugins.sbt -------------------------------------------------------------------------------- /template/scala/template.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/template/scala/template.sh -------------------------------------------------------------------------------- /worker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/worker/Dockerfile -------------------------------------------------------------------------------- /worker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/worker/README.md -------------------------------------------------------------------------------- /worker/worker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DucAnhNTT/bigdata-ETL-pipeline/HEAD/worker/worker.sh --------------------------------------------------------------------------------