├── .gitignore
├── Dockerfile
├── README.md
├── dev.env
├── docker-compose.yml
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .idea
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | .DS_Store
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
132 | # data files
133 | mage_ai/server/data/files/**
134 | default_repo
135 | instance_metadata.json
136 | ~/.mage_data
137 |
138 | # test notebook
139 | test.ipynb
140 |
141 | # test files
142 | testfiles/
143 |
144 | # Docker specific files
145 | .bash_history
146 | .jupyter/
147 | .local/
148 | .npm/
149 | .python_history
150 | mage_ai/frontend/package-lock.json
151 | docker-compose.override.yml
152 |
153 | # DB files
154 | *.db
155 |
156 | # Terraform files
157 | **/.terraform*
158 | *.tfstate*
159 | *.zip
160 |
161 | # vscode
162 | .vscode/
163 |
164 | # front-end
165 | node_modules
166 | .npmrc
167 |
168 | # Scratch files
169 | scratch*
170 | /mage_data*
171 | *secrets*.json
172 | magic-zoomcamp
173 | *.json
174 |
175 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mageai/mageai:latest
2 |
3 | ARG USER_CODE_PATH=/home/src/${PROJECT_NAME}
4 |
5 | # Note: this overwrites the requirements.txt file in your new project on first run.
6 | # You can delete this line for the second run :)
7 | COPY requirements.txt ${USER_CODE_PATH}requirements.txt
8 |
9 | RUN pip3 install -r ${USER_CODE_PATH}requirements.txt
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | ## Data Engineering Zoomcamp - Week 2
6 |
7 | Welcome to DE Zoomcamp with Mage!
8 |
9 | Mage is an open-source, hybrid framework for transforming and integrating data. ✨
10 |
11 | In this module, you'll learn how to use the Mage platform to author and share _magical_ data pipelines. This will all be covered in the course, but if you'd like to learn a bit more about Mage, check out our docs [here](https://docs.mage.ai/introduction/overview).
12 |
13 | [Get Started](https://github.com/mage-ai/mage-zoomcamp?tab=readme-ov-file#lets-get-started)
14 | [Assistance](https://github.com/mage-ai/mage-zoomcamp?tab=readme-ov-file#assistance)
15 |
16 | ## Let's get started
17 |
18 | This repo contains a Docker Compose template for getting started with a new Mage project. It requires Docker to be installed locally. If Docker is not installed, please follow the instructions [here](https://docs.docker.com/get-docker/).
19 |
20 | You can start by cloning the repo:
21 |
22 | ```bash
23 | git clone https://github.com/mage-ai/mage-zoomcamp.git mage-zoomcamp
24 | ```
25 |
26 | Navigate to the repo:
27 |
28 | ```bash
29 | cd mage-data-engineering-zoomcamp
30 | ```
31 |
32 | Rename `dev.env` to simply `.env`— this will _ensure_ the file is not committed to Git by accident, since it _will_ contain credentials in the future.
33 |
34 | Now, let's build the container
35 |
36 | ```bash
37 | docker compose build
38 | ```
39 |
40 | Finally, start the Docker container:
41 |
42 | ```bash
43 | docker compose up
44 | ```
45 |
46 | Now, navigate to http://localhost:6789 in your browser! Voila! You're ready to get started with the course.
47 |
48 | ### What just happened?
49 |
50 | We just initialized a new mage repository. It will be present in your project under the name `magic-zoomcamp`. If you changed the varable `PROJECT_NAME` in the `.env` file, it will be named whatever you set it to.
51 |
52 | This repository should have the following structure:
53 |
54 | ```
55 | .
56 | ├── mage_data
57 | │ └── magic-zoomcamp
58 | ├── magic-zoomcamp
59 | │ ├── __pycache__
60 | │ ├── charts
61 | │ ├── custom
62 | │ ├── data_exporters
63 | │ ├── data_loaders
64 | │ ├── dbt
65 | │ ├── extensions
66 | │ ├── interactions
67 | │ ├── pipelines
68 | │ ├── scratchpads
69 | │ ├── transformers
70 | │ ├── utils
71 | │ ├── __init__.py
72 | │ ├── io_config.yaml
73 | │ ├── metadata.yaml
74 | │ └── requirements.txt
75 | ├── Dockerfile
76 | ├── README.md
77 | ├── dev.env
78 | ├── docker-compose.yml
79 | └── requirements.txt
80 | ```
81 |
82 | ## Assistance
83 |
84 | 1. [Mage Docs](https://docs.mage.ai/introduction/overview): a good place to understand Mage functionality or concepts.
85 | 2. [Mage Slack](https://www.mage.ai/chat): a good place to ask questions or get help from the Mage team.
86 | 3. [DTC Zoomcamp](https://github.com/DataTalksClub/data-engineering-zoomcamp/tree/main/week_2_workflow_orchestration): a good place to get help from the community on course-specific inquireies.
87 | 4. [Mage GitHub](https://github.com/mage-ai/mage-ai): a good place to open issues or feature requests.
88 |
--------------------------------------------------------------------------------
/dev.env:
--------------------------------------------------------------------------------
1 | PROJECT_NAME=magic-zoomcamp
2 | POSTGRES_DBNAME=postgres
3 | POSTGRES_SCHEMA=magic
4 | POSTGRES_USER=postgres
5 | POSTGRES_PASSWORD=postgres
6 | POSTGRES_HOST=postgres
7 | POSTGRES_PORT=5432
8 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | magic:
4 | image: mageai/mageai:latest
5 | command: mage start ${PROJECT_NAME}
6 | env_file:
7 | - .env
8 | build:
9 | context: .
10 | dockerfile: Dockerfile
11 | environment:
12 | USER_CODE_PATH: /home/src/${PROJECT_NAME}
13 | POSTGRES_DBNAME: ${POSTGRES_DBNAME}
14 | POSTGRES_SCHEMA: ${POSTGRES_SCHEMA}
15 | POSTGRES_USER: ${POSTGRES_USER}
16 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
17 | POSTGRES_HOST: ${POSTGRES_HOST}
18 | POSTGRES_PORT: ${POSTGRES_PORT}
19 | ports:
20 | - 6789:6789
21 | volumes:
22 | - .:/home/src/
23 | - ~/Documents/secrets/personal-gcp.json:/home/src/personal-gcp.json
24 | restart: on-failure:5
25 | postgres:
26 | image: postgres:14
27 | restart: on-failure
28 | container_name: ${PROJECT_NAME}-postgres
29 | env_file:
30 | - .env
31 | environment:
32 | POSTGRES_DB: ${POSTGRES_DBNAME}
33 | POSTGRES_USER: ${POSTGRES_USER}
34 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
35 | ports:
36 | - "${POSTGRES_PORT}:5432"
37 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mage-ai/mage-zoomcamp/ff811d08342a18da022ae80843943d10b8758de9/requirements.txt
--------------------------------------------------------------------------------