├── .gitignore ├── Dockerfile ├── README.md ├── dev.env ├── docker-compose.yml └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .idea 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | .DS_Store 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # data files 133 | mage_ai/server/data/files/** 134 | default_repo 135 | instance_metadata.json 136 | ~/.mage_data 137 | 138 | # test notebook 139 | test.ipynb 140 | 141 | # test files 142 | testfiles/ 143 | 144 | # Docker specific files 145 | .bash_history 146 | .jupyter/ 147 | .local/ 148 | .npm/ 149 | .python_history 150 | mage_ai/frontend/package-lock.json 151 | docker-compose.override.yml 152 | 153 | # DB files 154 | *.db 155 | 156 | # Terraform files 157 | **/.terraform* 158 | *.tfstate* 159 | *.zip 160 | 161 | # vscode 162 | .vscode/ 163 | 164 | # front-end 165 | node_modules 166 | .npmrc 167 | 168 | # Scratch files 169 | scratch* 170 | /mage_data* 171 | *secrets*.json 172 | magic-zoomcamp 173 | *.json 174 | 175 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mageai/mageai:latest 2 | 3 | ARG USER_CODE_PATH=/home/src/${PROJECT_NAME} 4 | 5 | # Note: this overwrites the requirements.txt file in your new project on first run. 6 | # You can delete this line for the second run :) 7 | COPY requirements.txt ${USER_CODE_PATH}requirements.txt 8 | 9 | RUN pip3 install -r ${USER_CODE_PATH}requirements.txt 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | ## Data Engineering Zoomcamp - Week 2 6 | 7 | Welcome to DE Zoomcamp with Mage! 8 | 9 | Mage is an open-source, hybrid framework for transforming and integrating data. ✨ 10 | 11 | In this module, you'll learn how to use the Mage platform to author and share _magical_ data pipelines. This will all be covered in the course, but if you'd like to learn a bit more about Mage, check out our docs [here](https://docs.mage.ai/introduction/overview). 12 | 13 | [Get Started](https://github.com/mage-ai/mage-zoomcamp?tab=readme-ov-file#lets-get-started) 14 | [Assistance](https://github.com/mage-ai/mage-zoomcamp?tab=readme-ov-file#assistance) 15 | 16 | ## Let's get started 17 | 18 | This repo contains a Docker Compose template for getting started with a new Mage project. It requires Docker to be installed locally. If Docker is not installed, please follow the instructions [here](https://docs.docker.com/get-docker/). 19 | 20 | You can start by cloning the repo: 21 | 22 | ```bash 23 | git clone https://github.com/mage-ai/mage-zoomcamp.git mage-zoomcamp 24 | ``` 25 | 26 | Navigate to the repo: 27 | 28 | ```bash 29 | cd mage-data-engineering-zoomcamp 30 | ``` 31 | 32 | Rename `dev.env` to simply `.env`— this will _ensure_ the file is not committed to Git by accident, since it _will_ contain credentials in the future. 33 | 34 | Now, let's build the container 35 | 36 | ```bash 37 | docker compose build 38 | ``` 39 | 40 | Finally, start the Docker container: 41 | 42 | ```bash 43 | docker compose up 44 | ``` 45 | 46 | Now, navigate to http://localhost:6789 in your browser! Voila! You're ready to get started with the course. 47 | 48 | ### What just happened? 49 | 50 | We just initialized a new mage repository. It will be present in your project under the name `magic-zoomcamp`. If you changed the varable `PROJECT_NAME` in the `.env` file, it will be named whatever you set it to. 51 | 52 | This repository should have the following structure: 53 | 54 | ``` 55 | . 56 | ├── mage_data 57 | │ └── magic-zoomcamp 58 | ├── magic-zoomcamp 59 | │ ├── __pycache__ 60 | │ ├── charts 61 | │ ├── custom 62 | │ ├── data_exporters 63 | │ ├── data_loaders 64 | │ ├── dbt 65 | │ ├── extensions 66 | │ ├── interactions 67 | │ ├── pipelines 68 | │ ├── scratchpads 69 | │ ├── transformers 70 | │ ├── utils 71 | │ ├── __init__.py 72 | │ ├── io_config.yaml 73 | │ ├── metadata.yaml 74 | │ └── requirements.txt 75 | ├── Dockerfile 76 | ├── README.md 77 | ├── dev.env 78 | ├── docker-compose.yml 79 | └── requirements.txt 80 | ``` 81 | 82 | ## Assistance 83 | 84 | 1. [Mage Docs](https://docs.mage.ai/introduction/overview): a good place to understand Mage functionality or concepts. 85 | 2. [Mage Slack](https://www.mage.ai/chat): a good place to ask questions or get help from the Mage team. 86 | 3. [DTC Zoomcamp](https://github.com/DataTalksClub/data-engineering-zoomcamp/tree/main/week_2_workflow_orchestration): a good place to get help from the community on course-specific inquireies. 87 | 4. [Mage GitHub](https://github.com/mage-ai/mage-ai): a good place to open issues or feature requests. 88 | -------------------------------------------------------------------------------- /dev.env: -------------------------------------------------------------------------------- 1 | PROJECT_NAME=magic-zoomcamp 2 | POSTGRES_DBNAME=postgres 3 | POSTGRES_SCHEMA=magic 4 | POSTGRES_USER=postgres 5 | POSTGRES_PASSWORD=postgres 6 | POSTGRES_HOST=postgres 7 | POSTGRES_PORT=5432 8 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | magic: 4 | image: mageai/mageai:latest 5 | command: mage start ${PROJECT_NAME} 6 | env_file: 7 | - .env 8 | build: 9 | context: . 10 | dockerfile: Dockerfile 11 | environment: 12 | USER_CODE_PATH: /home/src/${PROJECT_NAME} 13 | POSTGRES_DBNAME: ${POSTGRES_DBNAME} 14 | POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} 15 | POSTGRES_USER: ${POSTGRES_USER} 16 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 17 | POSTGRES_HOST: ${POSTGRES_HOST} 18 | POSTGRES_PORT: ${POSTGRES_PORT} 19 | ports: 20 | - 6789:6789 21 | volumes: 22 | - .:/home/src/ 23 | - ~/Documents/secrets/personal-gcp.json:/home/src/personal-gcp.json 24 | restart: on-failure:5 25 | postgres: 26 | image: postgres:14 27 | restart: on-failure 28 | container_name: ${PROJECT_NAME}-postgres 29 | env_file: 30 | - .env 31 | environment: 32 | POSTGRES_DB: ${POSTGRES_DBNAME} 33 | POSTGRES_USER: ${POSTGRES_USER} 34 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 35 | ports: 36 | - "${POSTGRES_PORT}:5432" 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mage-ai/mage-zoomcamp/ff811d08342a18da022ae80843943d10b8758de9/requirements.txt --------------------------------------------------------------------------------