├── .gitignore ├── Dockerfile-airflow ├── Dockerfile-hadoop ├── Readme.md ├── airflow ├── Dockerfile ├── airflow.cfg ├── entrypoint.sh └── run_airflow.sh ├── conf ├── core-site.xml ├── hdfs-site.xml ├── mapred-site.xml └── yarn-site.xml ├── dags ├── __pycache__ │ └── docker_exec.cpython-37.pyc ├── avg_product_price.py └── lowest_rated_movies.py ├── data ├── Sales.csv ├── benda.txt ├── u.data ├── u.item └── words.txt ├── datanode ├── Dockerfile └── run.sh ├── docker-compose-airflow-hadoop.yml ├── docker-compose-airflow.yml ├── docker-compose-hadoop.yml ├── entrypoint.sh ├── hadoop.env ├── historyserver ├── Dockerfile └── run.sh ├── map_reduce ├── analyze_map_reduce.py ├── ratings_breakdown.py ├── simple_mrjob.py ├── spark │ ├── LowestRatedMovieDataFrame.py │ ├── LowestRatedPopularMovieDataFrame.py │ ├── LowestRatedPopularMovieSpark.py │ ├── MovieRecommendationsALS.py │ ├── average_price.py │ └── lowest_rated_movies_spark.py └── word_count │ ├── mapper.py │ ├── reducer.py │ └── word_freq_mrjob.py ├── namenode ├── Dockerfile └── run.sh ├── nodemanager ├── Dockerfile └── run.sh ├── poetry.lock ├── pyproject.toml ├── requirements.txt ├── resourcemanager ├── Dockerfile └── run.sh ├── run_cluster.sh ├── spark ├── Dockerfile ├── docker-compose.yml ├── start-cluster.sh └── start-spark.sh ├── src └── hadoop-spark-airflow-architecture.png └── stop_cluster.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile-airflow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/Dockerfile-airflow -------------------------------------------------------------------------------- /Dockerfile-hadoop: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/Dockerfile-hadoop -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/Readme.md -------------------------------------------------------------------------------- /airflow/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/airflow/Dockerfile -------------------------------------------------------------------------------- /airflow/airflow.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/airflow/airflow.cfg -------------------------------------------------------------------------------- /airflow/entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/airflow/entrypoint.sh -------------------------------------------------------------------------------- /airflow/run_airflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker build -t airflow-hadoop-base:3.3.6 . 4 | -------------------------------------------------------------------------------- /conf/core-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/conf/core-site.xml -------------------------------------------------------------------------------- /conf/hdfs-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/conf/hdfs-site.xml -------------------------------------------------------------------------------- /conf/mapred-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/conf/mapred-site.xml -------------------------------------------------------------------------------- /conf/yarn-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/conf/yarn-site.xml -------------------------------------------------------------------------------- /dags/__pycache__/docker_exec.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/dags/__pycache__/docker_exec.cpython-37.pyc -------------------------------------------------------------------------------- /dags/avg_product_price.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/dags/avg_product_price.py -------------------------------------------------------------------------------- /dags/lowest_rated_movies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/dags/lowest_rated_movies.py -------------------------------------------------------------------------------- /data/Sales.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/data/Sales.csv -------------------------------------------------------------------------------- /data/benda.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/data/benda.txt -------------------------------------------------------------------------------- /data/u.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/data/u.data -------------------------------------------------------------------------------- /data/u.item: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/data/u.item -------------------------------------------------------------------------------- /data/words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/data/words.txt -------------------------------------------------------------------------------- /datanode/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/datanode/Dockerfile -------------------------------------------------------------------------------- /datanode/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/datanode/run.sh -------------------------------------------------------------------------------- /docker-compose-airflow-hadoop.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/docker-compose-airflow-hadoop.yml -------------------------------------------------------------------------------- /docker-compose-airflow.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/docker-compose-airflow.yml -------------------------------------------------------------------------------- /docker-compose-hadoop.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/docker-compose-hadoop.yml -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/entrypoint.sh -------------------------------------------------------------------------------- /hadoop.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/hadoop.env -------------------------------------------------------------------------------- /historyserver/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/historyserver/Dockerfile -------------------------------------------------------------------------------- /historyserver/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/historyserver/run.sh -------------------------------------------------------------------------------- /map_reduce/analyze_map_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/analyze_map_reduce.py -------------------------------------------------------------------------------- /map_reduce/ratings_breakdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/ratings_breakdown.py -------------------------------------------------------------------------------- /map_reduce/simple_mrjob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/simple_mrjob.py -------------------------------------------------------------------------------- /map_reduce/spark/LowestRatedMovieDataFrame.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/spark/LowestRatedMovieDataFrame.py -------------------------------------------------------------------------------- /map_reduce/spark/LowestRatedPopularMovieDataFrame.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/spark/LowestRatedPopularMovieDataFrame.py -------------------------------------------------------------------------------- /map_reduce/spark/LowestRatedPopularMovieSpark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/spark/LowestRatedPopularMovieSpark.py -------------------------------------------------------------------------------- /map_reduce/spark/MovieRecommendationsALS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/spark/MovieRecommendationsALS.py -------------------------------------------------------------------------------- /map_reduce/spark/average_price.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/spark/average_price.py -------------------------------------------------------------------------------- /map_reduce/spark/lowest_rated_movies_spark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/spark/lowest_rated_movies_spark.py -------------------------------------------------------------------------------- /map_reduce/word_count/mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/word_count/mapper.py -------------------------------------------------------------------------------- /map_reduce/word_count/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/word_count/reducer.py -------------------------------------------------------------------------------- /map_reduce/word_count/word_freq_mrjob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/map_reduce/word_count/word_freq_mrjob.py -------------------------------------------------------------------------------- /namenode/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/namenode/Dockerfile -------------------------------------------------------------------------------- /namenode/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/namenode/run.sh -------------------------------------------------------------------------------- /nodemanager/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/nodemanager/Dockerfile -------------------------------------------------------------------------------- /nodemanager/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/nodemanager/run.sh -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/requirements.txt -------------------------------------------------------------------------------- /resourcemanager/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/resourcemanager/Dockerfile -------------------------------------------------------------------------------- /resourcemanager/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/resourcemanager/run.sh -------------------------------------------------------------------------------- /run_cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/run_cluster.sh -------------------------------------------------------------------------------- /spark/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/spark/Dockerfile -------------------------------------------------------------------------------- /spark/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/spark/docker-compose.yml -------------------------------------------------------------------------------- /spark/start-cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/spark/start-cluster.sh -------------------------------------------------------------------------------- /spark/start-spark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/spark/start-spark.sh -------------------------------------------------------------------------------- /src/hadoop-spark-airflow-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/src/hadoop-spark-airflow-architecture.png -------------------------------------------------------------------------------- /stop_cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FirasKahlaoui/hadoop-docker-bd/HEAD/stop_cluster.sh --------------------------------------------------------------------------------