├── .dockerignore
├── .github
└── workflows
│ └── ci_streamlit.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .prefectignore
├── CHANGELOG.md
├── Dockerfile
├── README.md
├── api
├── Dockerfile
├── api.go
├── go.mod
└── go.sum
├── components
├── __init__.py
├── about_section.py
├── connections.py
├── fixtures_section.py
├── highlights_section.py
├── injuries_section.py
├── league_form_section.py
├── news_section.py
├── point_progression_section.py
├── point_slider_section.py
├── social_media_section.py
├── squads_section.py
├── stadiums_map_section.py
├── stock_section.py
├── top_scorers_section.py
└── top_teams_section.py
├── dbt_prod
└── models
│ ├── schema.yml
│ └── stocks.sql
├── etl
├── README.md
├── bigquery
│ ├── __init__.py
│ ├── current_round.py
│ ├── highlights.py
│ ├── injuries.py
│ ├── news.py
│ ├── squads.py
│ ├── stadiums.py
│ ├── standings.py
│ ├── teams.py
│ └── top_scorers.py
├── cloud_functions
│ ├── standings_transfer.py
│ └── top_scorers_transfer.py
├── firestore
│ └── fixtures.py
├── postgres
│ └── stock.py
└── requirements-data.txt
├── monitoring
├── docker-compose.yml
├── postgres_exporter.yml
└── prometheus.yml
├── prefect
├── data_quality_scan.py
├── deployments
│ ├── current_round-deployment.yaml
│ ├── highlights-deployment.yaml
│ ├── injuries-deployment.yaml
│ ├── news-deployment.yaml
│ ├── run_soda_scan-deployment.yaml
│ ├── squads-deployment.yaml
│ ├── statistics-deployment.yaml
│ └── stocks-deployment.yaml
└── flows.py
├── pyproject.toml
├── requirements.txt
├── soda
├── checks.yaml
└── configuration.yaml
├── streamlit_app.py
├── terraform
├── installations.sh
└── main.tf
└── tests
├── requirements-tests.txt
└── test_streamlit_app.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Files
2 | **.json
3 | **.DS_Store
4 | .streamlit/
5 | .gitignore
6 | .pylintrc
7 | README.md
8 |
9 | # Folders
10 | testing/
11 | .github/
12 | api/
13 | etl/
14 |
15 | # Terraform
16 | terraform/
17 |
18 | # Cache
19 | .mypy_cache/
20 | .ruff_cache/
21 | components/__pycache__/
22 |
23 | # Byte-compiled / optimized / DLL files
24 | **/__pycache__
25 | *.py[cod]
26 | *$py.class
27 |
28 | # Environments
29 | .env
30 | .venv
31 | env/
32 | venv/
33 | ENV/
34 | env.bak/
35 | venv.bak/
--------------------------------------------------------------------------------
/.github/workflows/ci_streamlit.yaml:
--------------------------------------------------------------------------------
1 | name: CI/CD - Streamlit Image
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | logLevel:
7 | description: 'Log level'
8 | required: true
9 | default: 'warning'
10 | type: choice
11 | options:
12 | - info
13 |
14 | push:
15 | paths-ignore:
16 | - '.github/**'
17 | - 'api/**'
18 | - 'dbt_prod/**'
19 | - 'etl/**'
20 | - 'monitoring/**'
21 | - 'prefect/**'
22 | - 'soda/**'
23 | - 'terraform/**'
24 | - 'tests/**'
25 | - 'CHANGELOG.md'
26 | - '.dockerignore'
27 | - '.gitignore'
28 | - '.prefectignore'
29 | - '.pre-commit-config.yaml'
30 | - '.streamlit'
31 | - 'pyproject.toml'
32 | - 'README.md'
33 | branches:
34 | - "main"
35 |
36 | env:
37 | GAR_LOCATION: us-central1
38 | VERSION_NUMBER: '2.17.1'
39 | REGISTRY_IMAGE: digitalghostdev/premier-league
40 |
41 | jobs:
42 |
43 | snyk:
44 | runs-on: ubuntu-22.04
45 |
46 | permissions:
47 | actions: read
48 | contents: read
49 | security-events: write
50 |
51 | steps:
52 |
53 | - name: Checkout
54 | uses: actions/checkout@v4
55 |
56 | - name: Run Snyk
57 | uses: snyk/actions/python-3.10@master
58 | continue-on-error: true
59 | env:
60 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
61 | with:
62 | args: --sarif-file-output=snyk.sarif --skip-unresolved=true
63 |
64 | - name: Upload Result to GitHub Code Scanning
65 | uses: github/codeql-action/upload-sarif@v2
66 | with:
67 | sarif_file: snyk.sarif
68 |
69 | pytest:
70 | permissions:
71 | contents: 'read'
72 | id-token: 'write'
73 |
74 | runs-on: ubuntu-22.04
75 | needs: [snyk]
76 | if: |
77 | always() &&
78 | (needs.snyk.result == 'success')
79 | steps:
80 | - name: Checkout
81 | uses: actions/checkout@v4
82 |
83 | - name: Set up Python
84 | uses: 'actions/setup-python@v5.0.0'
85 | with:
86 | python-version: 3.12.0
87 |
88 | - name: Install Dependencies
89 | run: |
90 | python -m pip install --upgrade pip
91 | pip install -r requirements.txt
92 | pip install pytest==7.4.3
93 | pip install pytest-cov==4.1.0
94 |
95 | - name: Google Auth
96 | id: auth
97 | uses: 'google-github-actions/auth@v2'
98 | with:
99 | token_format: 'access_token'
100 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}'
101 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_DATABASE }}'
102 | project_id: '${{ secrets.PROJECT_ID }}'
103 |
104 | - name: Run Tests
105 | run: |
106 | pytest --cov=streamlit_app tests/ -v
107 |
108 | build-streamlit-image:
109 | runs-on: ubuntu-22.04
110 | needs: [pytest]
111 | if: |
112 | always() &&
113 | (needs.pytest.result == 'success')
114 |
115 | steps:
116 |
117 | - name: Checkout
118 | uses: actions/checkout@v4
119 |
120 | - name: Set up Docker Buildx
121 | uses: 'docker/setup-buildx-action@v3.0.0'
122 |
123 | - name: Prepare Docker Build Context
124 | run: |
125 | mkdir docker-context
126 | cp ./.dockerignore docker-context
127 | cp ./Dockerfile docker-context
128 | cp -r ./components docker-context/components
129 | cp ./streamlit_app.py docker-context
130 | cp ./requirements.txt docker-context
131 |
132 | - name: Build and Export
133 | uses: 'docker/build-push-action@v5.0.0'
134 | with:
135 | context: ./docker-context
136 | tags: streamlit:${{ env.VERSION_NUMBER }}
137 | outputs: type=docker,dest=/tmp/streamlit.tar
138 |
139 | - name: Upload Artifact
140 | uses: actions/upload-artifact@v4
141 | with:
142 | name: streamlit
143 | path: /tmp/streamlit.tar
144 |
145 | push-artifact-registry:
146 | permissions:
147 | contents: 'read'
148 | id-token: 'write'
149 |
150 | runs-on: ubuntu-22.04
151 | needs: [build-streamlit-image]
152 | if: |
153 | always() &&
154 | (needs.build-streamlit-image.result == 'success')
155 |
156 | steps:
157 |
158 | - name: Checkout
159 | uses: actions/checkout@v4
160 |
161 | - name: Set up Docker Buildx
162 | uses: 'docker/setup-buildx-action@v3.0.0'
163 |
164 | - name: Download Artifact
165 | uses: actions/download-artifact@v4
166 | with:
167 | name: streamlit
168 | path: /tmp
169 |
170 | - name: Google Auth
171 | id: auth
172 | uses: 'google-github-actions/auth@v2'
173 | with:
174 | token_format: 'access_token'
175 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}'
176 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_ARTIFACT_REGISTRY }}'
177 | project_id: '${{ secrets.PROJECT_ID }}'
178 |
179 | - name: Docker Auth
180 | id: docker-auth
181 | uses: 'docker/login-action@v3'
182 | with:
183 | username: 'oauth2accesstoken'
184 | password: '${{ steps.auth.outputs.access_token }}'
185 | registry: '${{ env.GAR_LOCATION }}-docker.pkg.dev'
186 |
187 | - name: Load Image
188 | run: |
189 | docker load --input /tmp/streamlit.tar
190 | docker image ls -a
191 |
192 | - name: Tag Image
193 | run: |
194 | docker tag \
195 | streamlit:${{ env.VERSION_NUMBER }} \
196 | "${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ secrets.REGISTRY_REPO }}/streamlit:${{ env.VERSION_NUMBER }}"
197 |
198 | - name: Push Image
199 | run: |
200 | docker push \
201 | "${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ secrets.REGISTRY_REPO }}/streamlit:${{ env.VERSION_NUMBER }}"
202 |
203 | deploy-streamlit-image:
204 | permissions:
205 | contents: 'read'
206 | id-token: 'write'
207 |
208 | runs-on: ubuntu-22.04
209 | needs: [push-artifact-registry]
210 | if: |
211 | always() &&
212 | (needs.push-artifact-registry.result == 'success')
213 |
214 | steps:
215 |
216 | - name: Checkout
217 | uses: actions/checkout@v4
218 |
219 | - name: Google Auth
220 | id: auth
221 | uses: 'google-github-actions/auth@v2'
222 | with:
223 | token_format: 'access_token'
224 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}'
225 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_CLOUD_RUN }}'
226 | project_id: '${{ secrets.PROJECT_ID }}'
227 |
228 | - name: Deploy Image
229 | id: 'deploy'
230 | uses: 'google-github-actions/deploy-cloudrun@v2.0.0'
231 | with:
232 | service: streamlit
233 | image: "${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ secrets.REGISTRY_REPO }}/streamlit:${{ env.VERSION_NUMBER }}"
234 | flags: "--service-account=${{ secrets.WIF_SERVICE_ACCOUNT_CLOUD_RUN }} --max-instances=3"
235 | no_traffic: false
236 |
237 | set-latest-revision:
238 | permissions:
239 | contents: 'read'
240 | id-token: 'write'
241 |
242 | runs-on: ubuntu-22.04
243 | needs: [deploy-streamlit-image]
244 | if: |
245 | always() &&
246 | (needs.deploy-streamlit-image.result == 'success')
247 |
248 | steps:
249 |
250 | - name: Checkout
251 | uses: actions/checkout@v4
252 |
253 | - name: Google Auth
254 | id: auth
255 | uses: 'google-github-actions/auth@v2'
256 | with:
257 | token_format: 'access_token'
258 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}'
259 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_CLOUD_RUN }}'
260 | project_id: '${{ secrets.PROJECT_ID }}'
261 |
262 | - name: 'Set up Cloud SDK'
263 | uses: 'google-github-actions/setup-gcloud@v2'
264 |
265 | - name: 'Set Latest Revision'
266 | run: |
267 | gcloud run services update-traffic streamlit \
268 | --to-latest \
269 | --region=${{ env.GAR_LOCATION }} \
270 |
271 | architecture-build:
272 | runs-on: ubuntu-22.04
273 | needs: [pytest]
274 | if: |
275 | always() &&
276 | (needs.pytest.result == 'success')
277 |
278 | strategy:
279 | fail-fast: false
280 | matrix:
281 | platform: [linux/amd64, linux/arm64]
282 |
283 | steps:
284 |
285 | - name: Checkout
286 | uses: actions/checkout@v4
287 |
288 | - name: Docker Meta
289 | id: meta
290 | uses: 'docker/metadata-action@v5.0.0'
291 | with:
292 | images: ${{ env.REGISTRY_IMAGE }}
293 |
294 | - name: Set up QEMU
295 | uses: 'docker/setup-qemu-action@v3'
296 |
297 | - name: Set up Docker Buildx
298 | uses: 'docker/setup-buildx-action@v3.0.0'
299 |
300 | - name: Login to Docker Hub
301 | uses: 'docker/login-action@v3'
302 | with:
303 | username: ${{ secrets.DOCKERHUB_USERNAME }}
304 | password: ${{ secrets.DOCKERHUB_TOKEN }}
305 |
306 | - name: Build and Push by Digest
307 | id: build
308 | uses: 'docker/build-push-action@v5.0.0'
309 | with:
310 | context: .
311 | platforms: ${{ matrix.platform }}
312 | labels: ${{ steps.meta.outputs.labels }}
313 | outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
314 |
315 | - name: Export Digest
316 | run: |
317 | mkdir -p /tmp/digests
318 | digest="${{ steps.build.outputs.digest }}"
319 | touch "/tmp/digests/${digest#sha256:}"
320 |
321 | - name: Upload Digest for AMD64
322 | if: matrix.platform == 'linux/amd64'
323 | uses: actions/upload-artifact@v4
324 | with:
325 | name: digests-amd64
326 | path: /tmp/digests/*
327 | if-no-files-found: error
328 | retention-days: 1
329 |
330 | - name: Upload Digest for ARM64
331 | if: matrix.platform == 'linux/arm64'
332 | uses: actions/upload-artifact@v4
333 | with:
334 | name: digests-arm64
335 | path: /tmp/digests/*
336 | if-no-files-found: error
337 | retention-days: 1
338 |
339 |
340 | create-manifest-and-push:
341 | runs-on: ubuntu-22.04
342 | needs:
343 | - architecture-build
344 |
345 | steps:
346 |
347 | - name: Download Digests
348 | uses: actions/download-artifact@v4
349 | with:
350 | pattern: digests-*
351 | path: /tmp/digests
352 | merge-multiple: true
353 |
354 | - name: Set up Docker Buildx
355 | uses: 'docker/setup-buildx-action@v3.0.0'
356 |
357 | - name: Docker meta
358 | id: meta
359 | uses: 'docker/metadata-action@v5.0.0'
360 | with:
361 | images: ${{ env.REGISTRY_IMAGE }}
362 | tags: ${{ env.VERSION_NUMBER }}
363 |
364 | - name: Login to Docker Hub
365 | uses: 'docker/login-action@v3'
366 | with:
367 | username: ${{ secrets.DOCKERHUB_USERNAME }}
368 | password: ${{ secrets.DOCKERHUB_TOKEN }}
369 |
370 | - name: Create Manifest List and Push
371 | working-directory: /tmp/digests
372 | run: |
373 | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
374 | $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
375 |
376 | - name: Inspect image
377 | run: |
378 | docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
379 |
380 | syft:
381 | permissions:
382 | contents: 'read'
383 | id-token: 'write'
384 |
385 | runs-on: ubuntu-22.04
386 | needs: [build-streamlit-image]
387 | if: |
388 | always() &&
389 | (needs.build-streamlit-image.result == 'success')
390 |
391 | steps:
392 |
393 | - name: Checkout
394 | uses: actions/checkout@v4
395 |
396 | - name: Set up Docker Buildx
397 | uses: 'docker/setup-buildx-action@v3.0.0'
398 |
399 | - name: Download Artifact
400 | uses: actions/download-artifact@v4
401 | with:
402 | name: streamlit
403 | path: /tmp
404 |
405 | - name: Load Image
406 | run: |
407 | docker load --input /tmp/streamlit.tar
408 | docker image ls -a
409 |
410 | - name: Create and Upload SBOM
411 | uses: anchore/sbom-action@v0
412 | with:
413 | image: streamlit:${{ env.VERSION_NUMBER }}
414 | artifact-name: streamlit-sbom-${{ env.VERSION_NUMBER }}.spdx.json
415 | upload-artifact: true
416 |
417 | grype:
418 | permissions:
419 | actions: read
420 | contents: read
421 | security-events: write
422 |
423 | runs-on: ubuntu-22.04
424 | needs: [syft]
425 |
426 | if: |
427 | always() &&
428 | (needs.syft.result == 'success')
429 |
430 | steps:
431 |
432 | - name: Download SBOM
433 | uses: actions/download-artifact@v3
434 | with:
435 | name: streamlit-sbom-${{ env.VERSION_NUMBER }}.spdx.json
436 |
437 | - name: Scan SBOM
438 | uses: anchore/scan-action@v3
439 | id: scan
440 | with:
441 | sbom: streamlit-sbom-${{ env.VERSION_NUMBER }}.spdx.json
442 | fail-build: false
443 | output-format: sarif
444 | severity-cutoff: critical
445 |
446 | - name: Upload SARIF Report
447 | uses: github/codeql-action/upload-sarif@v2
448 | with:
449 | sarif_file: ${{ steps.scan.outputs.sarif }}
450 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Files
2 | **.json
3 | **.DS_store
4 | .streamlit/secrets.toml
5 |
6 | # Testing files and directories
7 | testing/
8 | test.py
9 |
10 | # Cache files and directories
11 | **.*cache
12 |
13 | # Terraform secrets and state files
14 | terraform/.terraform*
15 | terraform/terraform*
16 | terraform/cloud_sql/.terraform*
17 | terraform/cloud_sql/terraform*
18 |
19 | # Monitoring
20 | monitoring/values*
21 | monitoring/grafana_storage/
22 |
23 | # Soda configuration files
24 | soda/configuration.yaml
25 |
26 | # dbt configuration files
27 | dbt_prod/target/
28 | dbt_prod/dbt_packages/
29 | dbt_prod/logs/
30 |
31 | # Byte-compiled / optimized / DLL files
32 | **/__pycache__
33 | *.py[cod]
34 | *$py.class
35 |
36 | # C extensions
37 | *.so
38 |
39 | # Distribution / packaging
40 | .Python
41 | build/
42 | develop-eggs/
43 | dist/
44 | downloads/
45 | eggs/
46 | .eggs/
47 | lib/
48 | lib64/
49 | parts/
50 | sdist/
51 | var/
52 | wheels/
53 | share/python-wheels/
54 | *.egg-info/
55 | .installed.cfg
56 | *.egg
57 | MANIFEST
58 |
59 | # PyInstaller
60 | # Usually these files are written by a python script from a template
61 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
62 | *.manifest
63 | *.spec
64 |
65 | # Installer logs
66 | pip-log.txt
67 | pip-delete-this-directory.txt
68 |
69 | # Unit test / coverage reports
70 | htmlcov/
71 | .tox/
72 | .nox/
73 | .coverage
74 | .coverage.*
75 | .cache
76 | nosetests.xml
77 | coverage.xml
78 | *.cover
79 | *.py,cover
80 | .hypothesis/
81 | .pytest_cache/
82 | cover/
83 |
84 | # Translations
85 | *.mo
86 | *.pot
87 |
88 | # Django stuff:
89 | *.log
90 | local_settings.py
91 | db.sqlite3
92 | db.sqlite3-journal
93 |
94 | # Flask stuff:
95 | instance/
96 | .webassets-cache
97 |
98 | # Scrapy stuff:
99 | .scrapy
100 |
101 | # Sphinx documentation
102 | docs/_build/
103 |
104 | # PyBuilder
105 | .pybuilder/
106 | target/
107 |
108 | # Jupyter Notebook
109 | .ipynb_checkpoints
110 |
111 | # IPython
112 | profile_default/
113 | ipython_config.py
114 | .pdm.toml
115 |
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 |
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 |
123 | # SageMath parsed files
124 | *.sage.py
125 |
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv*/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 |
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 |
139 | # Rope project settings
140 | .ropeproject
141 |
142 | # mkdocs documentation
143 | /site
144 |
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 |
150 | # Pyre type checker
151 | .pyre/
152 |
153 | # pytype static type analyzer
154 | .pytype/
155 |
156 | # Cython debug symbols
157 | cython_debug/
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/PyCQA/bandit
3 | rev: '1.7.5'
4 | hooks:
5 | - id: bandit
6 | args: ["-c", "pyproject.toml"]
7 | additional_dependencies: [".[toml]"]
8 |
9 | - repo: https://github.com/pre-commit/mirrors-mypy
10 | rev: 'v1.4.1'
11 | hooks:
12 | - id: mypy
13 |
14 | - repo: https://github.com/astral-sh/ruff-pre-commit
15 | # Ruff version.
16 | rev: v0.1.6
17 | hooks:
18 | - id: ruff-format
19 |
20 | - repo: https://github.com/astral-sh/ruff-pre-commit
21 | rev: v0.1.6
22 | hooks:
23 | - id: ruff
24 |
25 | exclude: prefect/flows.py
--------------------------------------------------------------------------------
/.prefectignore:
--------------------------------------------------------------------------------
1 | # prefect artifacts
2 | .prefectignore
3 |
4 | # python artifacts
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 | *.egg-info/
9 | *.egg
10 |
11 | # Type checking artifacts
12 | .mypy_cache/
13 | .dmypy.json
14 | dmypy.json
15 | .pyre/
16 |
17 | # IPython
18 | profile_default/
19 | ipython_config.py
20 | *.ipynb_checkpoints/*
21 |
22 | # Environments
23 | .python-version
24 | .env
25 | .venv
26 | env/
27 | venv/
28 |
29 | # MacOS
30 | .DS_Store
31 |
32 | # Dask
33 | dask-worker-space/
34 |
35 | # Editors
36 | .idea/
37 | .vscode/
38 |
39 | # VCS
40 | .git/
41 | .hg/
42 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 | This change log provides version history for the Streamlit Dashboard.
3 |
4 | View the Streamlit dasboard: https://streamlit.digitalghost.dev/
5 |
6 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
7 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8 |
9 | * **MAJOR:** Any changes to the backend infrastructure that requires new methods of moving data that won't work with the previous architecture, mainly with the addition of new databases or data sources.
10 | * **MINOR:** Any changes to the Streamlit dashboard that adds a new interaction/feature or removal of one.
11 | * **PATCH:** Any changes that fix bugs, typos or small edits.
12 |
13 | # Update History
14 |
15 | ## 2.17.1 | 2024-04-27
16 |
17 | ### Changed
18 | * [#184](https://github.com/digitalghost-dev/premier-league/issues/184) - Changed the calling of the dashboard's different components to using the new `@st.experimental_fragment` decorator in Streamlit's `1.33.0` version.
19 | * [#185](https://github.com/digitalghost-dev/premier-league/issues/185) - Changed the News section into an importable `class`.
20 |
21 | ---
22 |
23 | ## [2.17.0] | 2024-03-17
24 |
25 | ### Added
26 | * [#183](https://github.com/digitalghost-dev/premier-league/issues/183) - Added a new *Recent Injuries* section under *Players & Injuries* tab.
27 |
28 | ### Changed
29 | * [#182](https://github.com/digitalghost-dev/premier-league/issues/182) - Changed the tab name for *Players Statistics* to *Players & Injuries*.
30 |
31 | ---
32 |
33 | ## [2.16.1] | 2024-03-01
34 |
35 | ### Changed
36 | * [#181](https://github.com/digitalghost-dev/premier-league/issues/181) - Changed `components/connections.py` to use new dataset in BigQuery for team squads.
37 |
38 | ---
39 |
40 | ## [2.16.0] | 2024-02-11
41 |
42 | ### Added
43 | * [#179](https://github.com/digitalghost-dev/premier-league/issues/179) - Added a new tab that shows a stock chart for **MANU**, Manchester United's stock ticker.
44 | * [#180](https://github.com/digitalghost-dev/premier-league/issues/180) - Added a `st.info` and `st.warning` message to explain the tab with its functions and to explain that no data was found, respectively.
45 |
46 | ---
47 |
48 | ## [2.15.0] | 2024-01-28
49 |
50 | ### Added
51 | * [#165](https://github.com/digitalghost-dev/premier-league/issues/165) - Added each team's club icon to the **Squads** tab when a team is selected from the dropdown menu.
52 | * [#172](https://github.com/digitalghost-dev/premier-league/issues/172) - Added a new **Players Statistics** tab.
53 |
54 | ### Changed
55 | * [#164](https://github.com/digitalghost-dev/premier-league/issues/164) - Changed the default value `st.selectbox` to `None` in the **Squads** tab.
56 | * [#168](https://github.com/digitalghost-dev/premier-league/issues/168) - Changed the `max_value` for each `st.dataframe` to programatically calaculate based on current max value in the DataFrame under the **League Statistics** section.
57 | * [#171](https://github.com/digitalghost-dev/premier-league/issues/171) - Changed line chart under **Point Progression throughout the Season** section to use plotly instead of Streamlit's built in `st.line_chart` method.
58 |
59 | ### Removed
60 | * [#170](https://github.com/digitalghost-dev/premier-league/issues/168) - Removed `for` loop that previously generated the sections for **Goalkeepers**, **Defenders**, **Midfielders**, and **Attackers** under the **Squads** tab.
61 | * [#173](https://github.com/digitalghost-dev/premier-league/issues/173) - Removed `st.container` border from **Top 5 Teams** and **Top 5 Scorers** sections.
62 |
63 | ---
64 |
65 | ## [2.14.1] | 2024-01-25
66 |
67 | ### Changed
68 | * [#169](https://github.com/digitalghost-dev/premier-league/issues/154) - Changed the query for `components/connections.py` to reflect table schema changes for the standings `st.dataframe`.
69 |
70 | ---
71 |
72 | ## [2.14.0] | 2024-01-08
73 |
74 | ### Added
75 | * [#154](https://github.com/digitalghost-dev/premier-league/issues/154) - Added a new tab called **Squads** that displays the current squad for each team in the league.
76 |
77 | ### Changed
78 | * [#153](https://github.com/digitalghost-dev/premier-league/issues/153) - Changed the Fixtures `st.header()` to `st.subheader()`.
79 | * [#155](https://github.com/digitalghost-dev/premier-league/issues/155) - Changed the About `st.header()` to `st.subheader()`.
80 |
81 | ---
82 |
83 | ## [2.13.0] | 2023-12-19
84 |
85 | ### Added
86 | * [#148](https://github.com/digitalghost-dev/premier-league/issues/148) - Added a `st.header` titled **Fixtures** to the fixtures tab.
87 | * [#146](https://github.com/digitalghost-dev/premier-league/issues/147) - Added a new section that shows highlights using the YouTube API under the **News & Highlights** tab.
88 |
89 | ### Changed
90 | * [#149](https://github.com/digitalghost-dev/premier-league/issues/149) - Changed the current `st.subheader` to `st.header` on the **About** tab.
91 | * [#147](https://github.com/digitalghost-dev/premier-league/issues/147) - Changed the **News** tab to **News & Highlights** to reflect the new section that was added.
92 |
93 | ---
94 |
95 | ## [2.12.1] | 2023-12-12
96 |
97 | ### Fixed
98 | * [#144](https://github.com/digitalghost-dev/premier-league/issues/144) - Fixed the `st.line_chart` **Point Progression** section to display the correct column for the legend.
99 |
100 | ---
101 |
102 | ## [2.12.0] | 2023-12-11
103 |
104 | ### Added
105 | * [#138](https://github.com/digitalghost-dev/premier-league/issues/138) - Added borders around the **Top 5 Teams** and **Top 5 Scorers** `st.container` sections.
106 | * [#125](https://github.com/digitalghost-dev/premier-league/issues/125) - Added a **Games Played** column to the `st.dataframe` **Standings** table.
107 |
108 | ### Changed
109 | * [#143](https://github.com/digitalghost-dev/premier-league/issues/143) - Changed the postiion of the **Points** column in the `st.dataframe` **Standings** table to be the second column.
110 |
111 | ---
112 |
113 | ## [2.11.5] | 2023-12-02
114 |
115 | ### Changed
116 | * [#137](https://github.com/digitalghost-dev/premier-league/issues/137) - Changed the Points Progression section into an importable `class`.
117 | * [#136](https://github.com/digitalghost-dev/premier-league/issues/136) - Changed the Top Teams section into an importable `class`.
118 | * [#135](https://github.com/digitalghost-dev/premier-league/issues/135) - Changed the League Forms section into an importable `class`.
119 | * [#134](https://github.com/digitalghost-dev/premier-league/issues/134) - Changed the Top Scorers section into an importable `class`.
120 |
121 | ### Fixed
122 | * [#139](https://github.com/digitalghost-dev/premier-league/issues/139) - Fixed the `st.subheader` typo in "Points Progression" section.
123 |
124 | ---
125 |
126 | ## [2.11.4] | 2023-12-01
127 |
128 | ### Fixed
129 | * [#128](https://github.com/digitalghost-dev/premier-league/issues/128) - Fixed the method of retrieving an item from a pandas DataFrame since the previous method will be deprecated.
130 |
131 | ### Removed
132 | * [#133](https://github.com/digitalghost-dev/premier-league/issues/133) - Removed dependency on a `.streamlit/secrets.toml` file for authentication.
133 |
134 | ---
135 |
136 | ## [2.11.3] | 2023-11-27
137 |
138 | ### Changed
139 | * [#127](https://github.com/digitalghost-dev/premier-league/issues/126) - Changed the maximum value for the `average_goals_df` `st.dataframe` and for the `win_streak_df` `st.dataframe`.
140 | * [#126](https://github.com/digitalghost-dev/premier-league/issues/126) - Changed the text for the win streak `st.dataframe()` to display *Biggest Win Streak* instead of *Current Win Streak*.
141 | * [#124](https://github.com/digitalghost-dev/premier-league/issues/124) - Changed the `social_media_section.display()` function to be called only once at the end of the `streamlit_app()` function instead of in each tab.
142 | * [#123](https://github.com/digitalghost-dev/premier-league/issues/123) - Changed the data connection functions into importable functions where all queries are now cached.
143 |
144 | ---
145 |
146 | ## [2.11.2] | 2023-11-17
147 |
148 | ### Changed
149 | * [#122](https://github.com/digitalghost-dev/premier-league/issues/122) - Changed the `Dockerfile` to handle the theme configuration instead of using a `.streamlit/config.toml` file.
150 | * [#121](https://github.com/digitalghost-dev/premier-league/issues/121) - Changed the icon for the dashboard from an image hosted on GCP's Cloud Storage to using [SimpleIcon's Premier League icon](https://simpleicons.org/?q=premier+league).
151 |
152 | ---
153 |
154 | ## [2.11.1] | 2023-11-15
155 |
156 | ### Changed
157 | * [#119](https://github.com/digitalghost-dev/premier-league/issues/119) - Changed import names in `streamlit_app.py` to match new naming standard.
158 | * [#118](https://github.com/digitalghost-dev/premier-league/issues/118) - Changed file names under `components/` to end with `_section.py` for better clarity.
159 | * [#117](https://github.com/digitalghost-dev/premier-league/issues/117) - Changed the `firestore_pull()` function into an importable `class`.
160 |
161 | ### Removed
162 | * [#120](https://github.com/digitalghost-dev/premier-league/issues/120) - Removed the `toast()` function.
163 |
164 | ---
165 |
166 | ## [2.11.0] | 2023-11-03
167 |
168 | ### Added
169 | * [#112](https://github.com/digitalghost-dev/premier-league/issues/112) - Added an **About** tab to display information about the project and the author.
170 |
171 | ### Changed
172 | * [#114](https://github.com/digitalghost-dev/premier-league/issues/114) - Changed the `stadiums_map()` function into an importable `class`.
173 |
174 | ### Fixed
175 | * [#115](https://github.com/digitalghost-dev/premier-league/issues/115) - Fixed the SQL responsible for populating the `st.dataframe` for **Standings** to order rows by `rank`.
176 |
177 | ### Removed
178 | * [#113](https://github.com/digitalghost-dev/premier-league/issues/113) - Removed **Top Teams Movement** section from **Standings & Overview** tab.
179 |
180 | ---
181 |
182 | ## [2.10.3] | 2023-10-26
183 |
184 | ### Changed
185 | * [#104](https://github.com/digitalghost-dev/premier-league/issues/104) - Changed the `social_media()` function into an importable `class` from the newly created `components/` directory.
186 |
187 | ---
188 |
189 | ## [2.10.2] | 2023-10-20
190 |
191 | ### Changed
192 | * [#103](https://github.com/digitalghost-dev/premier-league/issues/103) - Changed social media icons into static `.svg` files instead of using Font Awesome icons.
193 |
194 | ---
195 |
196 | ## [2.10.1] | 2023-09-10
197 |
198 | ### Fixed
199 | * [#91](https://github.com/digitalghost-dev/premier-league/issues/91) - Fixed the **News** tab to not error out when the table does not have at least 4 rows of data by implementing a `try/except` block.
200 |
201 | ---
202 |
203 | ## [2.10.0] | 2023-09-04
204 |
205 | ### Added
206 | * [#90](https://github.com/digitalghost-dev/premier-league/issues/90) - Added `st.subheader` under main header to display current round.
207 | * [#89](https://github.com/digitalghost-dev/premier-league/issues/89) - Added **News** tab to display the latest news from the Premier League using the [News API](https://newsapi.org/).
208 | * [#88](https://github.com/digitalghost-dev/premier-league/issues/88) - Added club logo to the Standings `st.dataframe`.
209 |
210 | ---
211 |
212 | ## [2.9.1] | 2023-08-27
213 |
214 | ### Fixed
215 | * [#87](https://github.com/digitalghost-dev/premier-league/issues/87) - Fixed the Standings column headers in the `st.dataframe` element to display proper column names instead of the SQL column names.
216 |
217 | ---
218 |
219 | ## [2.9.0] | 2023-08-20
220 |
221 | ### Added
222 | * [#75](https://github.com/digitalghost-dev/premier-league/issues/75) - Added Docker logo to social media section with link to Docker Hub repository.
223 | * [#72](https://github.com/digitalghost-dev/premier-league/issues/72) - Added `st.toast` to display a more subtle message to the user that the page is loading and when the data has loaded.
224 | * [#78](https://github.com/digitalghost-dev/premier-league/issues/78) Added a new `st.dataframe` table to display current total metrics for the league (Goals Scored, Penalties Scored, and Clean Sheets).
225 |
226 | ### Changed
227 | * [#74](https://github.com/digitalghost-dev/premier-league/issues/74) - Changed page title to **"Streamlit: Premier League"**.
228 | * [#73](https://github.com/digitalghost-dev/premier-league/issues/73) - Changed tab names from **Standings** to **Standings & Overview** and **Statistics** to **Top Teams & Scorers**.
229 | * [#76](https://github.com/digitalghost-dev/premier-league/issues/76) - Changed `st.data_editor` to `st.dataframe` for displaying the statistic tables.
230 |
231 | ### Fixed
232 | * [#79](https://github.com/digitalghost-dev/premier-league/issues/79) - Fixed the `st.dataframe` tables under the **Top Teams Movement** section to display the correct data by sorting columns in descending order.
233 |
234 | ### Removed
235 | * Removed `st.spinner`.
236 |
237 | ---
238 |
239 | ## [2.8.0] | 2023-08-12
240 |
241 | ### Added
242 | * Added `st.spinner` to run when page loads to allow all tabs and data to load before a user can start navigating.
243 | * Added type annotations to `standings_table()` function to return `DeltaGenerator`.
244 | * Added type annotations to `stadiums_map()` function to return `DeltaGenerator`.
245 |
246 | ### Changed
247 | * Changed `st.subheader` from "Standings" to "Current Standings".
248 | * Changed `st.table` to `st.dataframe` for showing current standings.
249 | * Changed `st.map` location from *Playground* to *Standings* tab.
250 | * Changed the Social Media section to exist inside a function: `social_media()` and be called later in each tab.
251 | * Changed the standings table code to exist inside a function: `standings_table()`.
252 | * Changed the map code to exist inside a function: `stadiums_map()`.
253 | * Changed the format of writing out the Top 5 Teams, Top 5 Scorers, and Forms for the Rest of the League sections to use a `for` loop instead of writing out each section individually.
254 |
255 | ### Fixed
256 | * Fixed the date to correctly display the suffix of the number *(i.e. 1st, 2nd, 3rd, etc.)* and to remove leading zeroes for single digit dates.
257 |
258 | ### Removed
259 | * Removed *Playground Tab*.
260 | * Removed `pages/` directory as this app will continue development as a single page.
261 | * Removed `style.css`, standings table is no longer stylized with CSS.
262 | * Removed `st.slider` as interactive Streamlit elements in dashboards with tabs seemed to currently be bugged.
263 | * Related issues: [#4996](https://github.com/streamlit/streamlit/issues/4996), [#6257](https://github.com/streamlit/streamlit/issues/6257), and [#7017](https://github.com/streamlit/streamlit/issues/7017).
264 | * `st.bar_chart` has also been removed due to this bug.
265 | * Removed `import os`, `import psycopg2`, `import plotly.graph_objects as go` as they are no longer needed.
266 |
267 | ---
268 |
269 | ## [2.7.1] | 2023-07-13
270 |
271 | ### Fixed
272 | * **Main Page**, *Standings Tab*: Fixed `iloc[X][X]` values to match the correct column to pull in correct data for the Top 5 Teams section.
273 |
274 | ---
275 |
276 | ## [2.7.0] | 2023-07-12
277 |
278 | ### **Added**
279 | * **Main Page**, *Standings Tab*: Added 3 `st.column_config.ProgressColumn` cards to display rankings of teams with the highest `penalties_scored`, `average_goals`, and `win_streak` during the season.
280 |
281 | ### **Changed**
282 | * **Main Pages**, *Standings Tab*: Changed the data values for `label` and `value` for the `st.metric` card.
283 |
284 | ---
285 |
286 | ## [2.6.0] | 2023-06-28
287 |
288 | ### **Added**
289 | * **Playground Page**: Added social media icons to bottom of page.
290 | * **Main Page**, *Statistics Tab*: Added `assists` metric to the *Top 5 Scorers Section*.
291 | * **Main Page**, *Standings Tab*: Added a metric card to display the top 5 teams' position movement throughout the season.
292 |
293 | ### **Changed**
294 | * **Main Page**: Changed title to "2023-24" to reflect the new season.
295 | * **Main Page**, *Fixtures Tab*: Changed ordering of `fixtures` to appear in chronological order.
296 |
297 | ### **Removed**
298 | * **Main Page**, *Fixtures Tab*: Removed extra comma from `fixtures` date.
299 |
300 | ---
301 |
302 | ## [2.5.0] | 2023-06-19
303 |
304 | ### **Added**
305 | * Added a new page: **Playground**, that holds graphs with slicers, filters, and other sortable features that allows the end user view statitics in a custom way.
306 | * Added `Recent_Form` to `standings` table as a new column.
307 | * Added string to display current date on **Standings** tab.
308 |
309 | ### **Changed**
310 | * Changed page title from **Overivew** to **Premier League - Statistics, Scores & More**.
311 | * Changed **Overview** tab name to **Standings**.
312 |
313 | ### **Removed**
314 | * Removed map of stadium locations from **Main** page; moved it to the new **Playground** page.
315 |
316 | ---
317 |
318 | ## [2.4.0] | 2023-05-26
319 |
320 | ### **Added**
321 | * Added number to *Top 5 Teams* section to indicate current rank.
322 | * Added suffix to rank number in *Forms for the Rest of the League section*.
323 |
324 | ### **Changed**
325 | * Changed hyperlink for GitHub icon to point to GitHub profile instead of repository for project. A link to GitHub repository already exists by default.
326 |
327 | ### **Fixed**
328 | * Added `target="_blank" rel="noopener noreferrer"` to anchor elements to allow linked icons to open properly.
329 |
330 | ---
331 |
332 | ## [2.3.1] | 2023-05-25
333 |
334 | ### **Fixed**
335 | * Fixed broken link for GitHub Icon on all tabs.
336 |
337 | ---
338 |
339 | ## [2.3.0] | 2023-05-24
340 |
341 | ### **Added**
342 | * Added text that displays the final gameday of the season.
343 | * Added linked icons to social media pages.
344 |
345 | ### **Changed**
346 | * Changed tab title from **Top Teams & Top Scorers** to **Statistics**.
347 |
348 | ---
349 |
350 | ## [2.2.1] | 2023-05-19
351 |
352 | ### **Fixed**
353 | * Fixed promotion/demotion legend by displaying items as a column instead of in a row.
354 |
355 | ---
356 |
357 | ## [2.2.0] | 2023-05-17
358 |
359 | ### **Changed**
360 | * Changed the hex colors used for promtion/demotion status.
361 | * Changed the color of `locations` map markers to `indigo` to match the rest of the theme.
362 |
363 | ### **Added**
364 | * Added an extra color to denote europa conference league qualification promotion.
365 | * Added solid border element to `standings` table to better denote promotion/demotion status.
366 | * Added text under table to explain which color denotes which promotion/demotion status.
367 |
368 | ---
369 |
370 | ## [2.1.0] | 2023-05-10
371 |
372 | ### **Changed**
373 | * Changed stadium `locations` map to use [plotly express](https://plotly.com/python/mapbox-layers/) `scatter_mapbox` instead of Streamlit's built in `st.map()` function.
374 | * This allows the stadium points to be hoverable which enables a tooltip that provides more information about the venue.
375 | * Changed title to display ***Premier League Statistics / 2022-23*** instead of ***Premier League Statistics / '22-'23***.
376 |
377 | ---
378 |
379 | ## [2.0.2] | 2023-05-08
380 |
381 | ### **Fixed**
382 | * Fixed the sorting of `rounds` to appear in decending order on the `fixtures` tab.
383 |
384 | ---
385 |
386 | ## [2.0.1] | 2023-05-05
387 |
388 | ### **Fixed**
389 | * Adding '`<=`' to `while` loop to get the current round. Previously, the Streamlit app would only select rounds that were *less* than the `MAX` round which would omit the final round.
390 |
391 | ---
392 |
393 | ## [2.0.0] | 2023-05-02
394 | Now using [Firestore](https://firebase.google.com/docs/firestore/) to store fixture data in a document format.
395 |
396 | ### **Added**
397 | * Added `Fixtures` tab for all rounds in the current season. Updates 3 times a day and will add new rounds as they start.
398 |
399 | ---
400 |
401 | ## [1.3.0] | 2023-04-17
402 |
403 | ### **Added**
404 |
405 | * Added page title.
406 | * Added position number to teams in **Forms for the Rest of the League** section.
407 |
408 | ### **Fixed**
409 |
410 | * Fixing capitalization for **Forms for the Rest of the League** subheader.
411 |
412 | ### **Removed**
413 |
414 | * Removed Emojis from tab titles.
415 |
416 | ---
417 |
418 | ## [1.2.0] | 2023-04-16
419 |
420 | ### **Changed**
421 |
422 | Top Teams Tab
423 | * Renamed tab to: "⚽️ Top Teams & 🏃🏻♂️ Top Scorers".
424 | * Changed `st.plotly_chart` to `st.line_chart`.
425 | * Moved top scorers to this tab.
426 |
427 | ### **Removed**
428 |
429 | Top Players Tab
430 | * Removed this tab, combined with top teams tab.
431 |
432 | ---
433 |
434 | ## [1.1.0] | 2023-04-07
435 |
436 | ### **Added**
437 |
438 | Top Teams Tab
439 | * Added `logo` and `form` for the rest of the league.
440 |
441 | ### **Changed**
442 |
443 | Top Teams Tab
444 | * Center aligning `logo`, `form (last 5)`, `clean sheets`, `penalties scored`, and `penalties missed` in their containers.
445 | * Setting `logo` width for top 5 teams to `150px`.
446 |
447 | Top Players Tab
448 | * Center aligning `photo`, `name`, `goals`, `team`, and `nationality` in their containers.
449 | * Setting `photo` width for top 5 players to `150px`.
450 |
451 | ### **Removed**
452 | * Removed `LIMIT 5` from SQL query to pull all teams.
453 |
454 | ---
455 |
456 | ## [1.0.0] | 2023-04-05
457 |
458 | ### **Added**
459 |
460 | Overview Tab
461 | * View the current standings for the league for the current season.
462 | * An adjustable slider gives control to focus in on teams that fit within a certain number of points.
463 | * A bar chart with teams (x-axis) and points (y-axis) adjusts accordingly to the slider.
464 | * A map with plots for the stadium locations for each team in the current season.
465 |
466 | Top Teams Tab
467 | * Shows the `logo`, `form (last 5)`, `clean sheets`, `penalties scored`, and `penalties missed` for the current top five teams in the league.
468 | * A line graph depicts the rise in points over each matchday.
469 |
470 | Top Players Tab
471 | * Shows the `portrait`, `goals`, `team`, and `nationality` of the current top five goal scorers in the league.
472 |
473 | [2.17.0]: https://github.com/digitalghost-dev/premier-league/commit/f097df039469c361d992c4e52eaa6211354aefb5
474 |
475 | [2.16.1]: https://github.com/digitalghost-dev/premier-league/commit/950590251f6559beb2376acf491a3cf1edec8a8e
476 |
477 | [2.16.0]: https://github.com/digitalghost-dev/premier-league/commit/aae9d9c814eafc905104a765c475b5763d0881f8
478 |
479 | [2.15.0]: https://github.com/digitalghost-dev/premier-league/commit/95aac28fbf4ab29f7965e8bc326f631198cf7272
480 |
481 | [2.14.1]: https://github.com/digitalghost-dev/premier-league/commit/e4a0ba46fd3dee96544b34b2022140c73a4d2ccd
482 |
483 | [2.14.0]: https://github.com/digitalghost-dev/premier-league/commit/62a27e488c3fbc91c585e55e73c91adbe9edf0b8#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
484 |
485 | [2.13.0]: https://github.com/digitalghost-dev/premier-league/commit/dec0426ca5d3de50e8093874635f5bf01718aaa6
486 |
487 | [2.12.1]: https://github.com/digitalghost-dev/premier-league/commit/11e04f7aa42e607d65300600aef7b6743c520542
488 |
489 | [2.12.0]: https://github.com/digitalghost-dev/premier-league/commit/3df7c162a9d1deb587fe6f9681e3c8e028d2e094#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
490 |
491 | [2.11.5]: https://github.com/digitalghost-dev/premier-league/commit/d3f4e7416e6b667364235a070cf4715413091f8b#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
492 |
493 | [2.11.4]: https://github.com/digitalghost-dev/premier-league/commit/71f0424ff0c1b14571390ee6fe0775dd8da6d7ae#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
494 |
495 | [2.11.3]: https://github.com/digitalghost-dev/premier-league/commit/b13541d5a64ea67e42c1b10e87dd2a7e32798463#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
496 |
497 | [2.11.2]: https://github.com/digitalghost-dev/premier-league/commit/25bfb7f76f46a0f8badce8a896937ddf12690332#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
498 |
499 | [2.11.1]: https://github.com/digitalghost-dev/premier-league/commit/fad6ab3060540f7034435971e9d38c125af1ff06#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
500 |
501 | [2.11.0]: https://github.com/digitalghost-dev/premier-league/commit/4436a5387a3c9969236af2ec83fb0f7bef03ef7e#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
502 |
503 | [2.10.3]: https://github.com/digitalghost-dev/premier-league/commit/c18d9bfaf762ba7c4c2714150c1f6cd0f722b9e8#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
504 |
505 | [2.10.2]: https://github.com/digitalghost-dev/premier-league/commit/53218cf868e3bc8128327932512f5ac1d28e6740#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
506 |
507 | [2.10.1]: https://github.com/digitalghost-dev/premier-league/commit/c2a0d39eb7cab1b7ed3013bb5811490f70bd256e#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
508 |
509 | [2.10.0]: https://github.com/digitalghost-dev/premier-league/commit/483e68208487c1632d2aa93ac098683a6c3515cc#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
510 |
511 | [2.9.1]: https://github.com/digitalghost-dev/premier-league/commit/a726d8fbf9f99bddc03a7fbf465ddba14ed97aee#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
512 |
513 | [2.9.0]: https://github.com/digitalghost-dev/premier-league/commit/d905a2a26b38200a519c78fa4e3847b598dc3d8f#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
514 |
515 | [2.8.0]: https://github.com/digitalghost-dev/premier-league/commit/ffc31af3ca6bc58294ab6c8c6daba105d9e7c1a5#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
516 |
517 | [2.7.1]: https://github.com/digitalghost-dev/premier-league/commit/a18341f802c46043fa8122c517e479103c067870#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
518 |
519 | [2.7.0]: https://github.com/digitalghost-dev/premier-league/commit/522600c0da5c6c20dd51528794bc959c1adcd9e3#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
520 |
521 | [2.6.0]: https://github.com/digitalghost-dev/premier-league/commit/de5b6c14e370ec08f0a79a2cc1dafd84a144411a#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
522 |
523 | [2.5.0]: https://github.com/digitalghost-dev/premier-league/commit/247029c3a94e607d5ffd2adabc41178647d1796e#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
524 |
525 | [2.4.0]: https://github.com/digitalghost-dev/premier-league/commit/19ff4063496a646aad3b8750a7c434cdeb1004e9#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
526 |
527 | [2.3.1]: https://github.com/digitalghost-dev/premier-league/commit/c11bfaa2f2aa0317783be65f935387e25cf180de#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
528 |
529 | [2.3.0]: https://github.com/digitalghost-dev/premier-league/commit/5e3cadd68cefef3abf7dbe1809257a9fae39af4a#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
530 |
531 | [2.2.1]: https://github.com/digitalghost-dev/premier-league/commit/903d457765df9de9d3a0ea879082dc0096bdbb38#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
532 |
533 | [2.2.0]: https://github.com/digitalghost-dev/premier-league/commit/11606ed57e6a4460d5059fc0141fbeccd268b716#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
534 |
535 | [2.1.0]: https://github.com/digitalghost-dev/premier-league/commit/f4e580d998e8e1042b9b824aa846bf3e738b3fd4#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
536 |
537 | [2.0.2]: https://github.com/digitalghost-dev/premier-league/commit/72337e2ac3ee365612a6a02eda25f390ab2690b9#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
538 |
539 | [2.0.1]: https://github.com/digitalghost-dev/premier-league/commit/dc92180f52a325f79e14d89097940162711ac35f#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
540 |
541 | [2.0.0]: https://github.com/digitalghost-dev/premier-league/commit/a8b11f02c8b517453c1d7d2e34b0986ea73588ba#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
542 |
543 | [1.3.0]: https://github.com/digitalghost-dev/premier-league/commit/4b2063a3663f48e166f7b13cbe06e51b24fd2056#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
544 |
545 | [1.2.0]: https://github.com/digitalghost-dev/premier-league/commit/8d5fbb7cdf91263eb55f2bc7ecd09236d975a704#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
546 |
547 | [1.1.0]: https://github.com/digitalghost-dev/premier-league/commit/e99f1f4a6eab3ef967c30b6c21b4fffa109de8e9#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
548 |
549 | [1.0.0]: https://github.com/digitalghost-dev/premier-league/commit/429a6f3ca12bcdbb5bee4505d390838b25edb8bb#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Dockerfile to build the Streamlit app.
2 |
3 | FROM python:3.12-slim-bookworm
4 |
5 | RUN groupadd -r streamlit_group
6 |
7 | RUN useradd -r -g streamlit_group streamlit_user
8 |
9 | WORKDIR /app
10 |
11 | RUN apt-get update && apt-get install -y \
12 | build-essential \
13 | curl \
14 | software-properties-common \
15 | git \
16 | && rm -rf /var/lib/apt/lists/*
17 |
18 | COPY requirements.txt .
19 | COPY components components
20 | COPY streamlit_app.py .
21 |
22 | RUN pip3 install --no-cache-dir -r requirements.txt
23 |
24 | RUN chown -R streamlit_user:streamlit_group /app
25 |
26 | EXPOSE 8501
27 |
28 | HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
29 |
30 | USER streamlit_user
31 |
32 | ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--theme.primaryColor=indigo", "--theme.textColor=black", "--theme.backgroundColor=#FFF", "--theme.secondaryBackgroundColor=#FFF"]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Premier League Data Pipeline
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | > [!WARNING]
19 | > After a year and some change of building this project, it's time for me to archive it. I've started to use these tools in my current position so learning these on my own and spending my own money on paying for the Football API and Google Cloud services no longer makes sense. I'm switching my focus on learning Golang!
20 |
21 | ## Overview
22 | This repository contains a personal project designed to enhance my skills in Data Engineering. It focuses on developing data pipelines that extract, transform, and load data from various sources into diverse databases. Additionally, it involves creating a dashboard with visualizations using Streamlit.
23 |
24 | > [!IMPORTANT]
25 | > Many architectural choices and decisions in this project may not make the most efficent sense on purpose for the sake of practicing and learning.
26 |
27 | ## Infrastructure
28 | ### Tools & Services
29 |      
30 |
31 | ### Databases
32 |   
33 |
34 | ### Code Quality
35 | 
36 |
37 | | Security Linter | Code Formatting | Type Checking | Code Linting |
38 | | --- | --- | --- | --- |
39 | | [`bandit`](https://github.com/PyCQA/bandit) | [`ruff-format`](https://github.com/astral-sh/ruff) | [`mypy`](https://github.com/python/mypy) | [`ruff`](https://github.com/astral-sh/ruff) |
40 |
41 | ---
42 |
43 | ## Data and CI/CD Pipelines
44 | ### Data Pipelines
45 |
46 | Data Pipeline 1
47 |
48 | Orchestrated with [Prefect](https://www.prefect.io), a Python file is ran to extract stock data for Manchester United.
49 |
50 | 1. Data from the [Financial Modeling Prep API](https://site.financialmodelingprep.com) is extracted with Python using the `/quote` endpoint.
51 | 2. The data is loaded directly into a PostgreSQL database hosted on [Cloud SQL](https://cloud.google.com/sql?hl=en) with no transformations.
52 | 3. Once the data is loaded into PostgreSQL, Datastream replicates the data into BigQuery. Datastream checks for staleness every 15 minutes.
53 | 4. [dbt](https://getdbt.com) is used to transform the data in BigQuery and create a view with transformed data.
54 |
55 | Data Pipeline 2
56 |
57 | Orchestrated with [Prefect](https://www.prefect.io), Python files are ran that perform a full ETL process.
58 |
59 | 1. Data is extracted from multiple API sources:
60 | * Data from the [Football Data API](https://www.football-data.org/) is extracted to retrieve information on the current standings, team statistics, top scorers, squads, fixtures, and the current round. The following endpoints are used:
61 | * `/standings`
62 | * `/teams`
63 | * `/top_scorers`
64 | * `/squads`
65 | * `/fixtures/current_round`
66 | * `/fixtures`
67 | * Data from the [NewsAPI](https://newsapi.org) is extracted to retrieve news article links with filters set to the Premier League from Sky Sports, The Guardian, and 90min. The following endpoints are used:
68 | * `/everything`
69 | * Data from a self-built API written in Golang is extracted to retrieve information on teams' stadiums. The following endpoints are used:
70 | * `/stadiums`
71 | * Data from the [YouTube API](https://developers.google.com/youtube/v3) is extracted to retrieve the latest highlights from NBC Sports YouTube channel.
72 | 2. Python performs any necessary transformations such as coverting data types or checking for `NULL` values
73 | 3. Majority of the data is then loaded into **BigQuery** in their respective tables. Fixture data is loaded into **Firestore** as documents categoirzed by the round number.
74 |
75 | Data Pipeline 3
76 | 1. Daily exports of the standings and top scorers data in BigQuery are exported to a Cloud Storage bucket using Cloud Scheduler to be used in another project.
77 | * The other project is a [CLI](https://github.com/digitalghost-dev/pl-cli/) tool written in Golang.
78 |
79 | Pipeline Diagram
80 |
81 | 
82 |
83 | ### CI/CD Pipeline
84 | The CI/CD pipeline is focused on building the Streamlit app into a Docker container that is then pushed to Artifact Registry and deployed to Cloud Run as a Service. Different architecutres are buit for different machine types and pushed to Docker Hub.
85 |
86 | 1. The repository code is checked out and a Docker image containing the updated `streamlit_app.py` file will build.
87 | 2. The newly built Docker image will be pushed to [Artifact Registry](https://cloud.google.com/artifact-registry).
88 | 3. The Docker image is then deployed to [Cloud Run](https://cloud.google.com/run/docs/overview/what-is-cloud-run) as a Service.
89 |
90 | #### Pipeline Diagram
91 | 
92 |
93 | ---
94 |
95 | ## Security
96 | * [Syft](https://github.com/anchore/syft) and [Grype](https://github.com/anchore/grype) work together to scan the Streamlit Docker image. Syft creates an [`SBOM`](https://www.linuxfoundation.org/blog/blog/what-is-an-sbom) and Grype scans the `SBOM` for vulnerabilities. The results are sent to the repository's Security tab.
97 | * [Snyk](https://github.com/snyk/actions/tree/master/python-3.10) is also used to scan the repository for vulnerabilities in the Python packages.
98 |
--------------------------------------------------------------------------------
/api/Dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 | FROM golang:1.19-alpine
3 |
4 | WORKDIR /app
5 |
6 | COPY go.mod .
7 | COPY go.sum .
8 |
9 | RUN go mod download
10 |
11 | COPY api.go .
12 |
13 | RUN go build -o /docker-gs-ping
14 |
15 | EXPOSE 8080
16 |
17 | CMD [ "/docker-gs-ping" ]
--------------------------------------------------------------------------------
/api/api.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 | "fmt"
6 | "log"
7 | "net/http"
8 | "github.com/gin-gonic/gin"
9 | )
10 |
11 | // data structure
12 | type stadium struct {
13 | Team string `json:"team"`
14 | Stadium string `json:"stadium"`
15 | City string `json:"city"`
16 | Latitude float64 `json:"latitude"`
17 | Longitude float64 `json:"longitude"`
18 | Capacity string `json:"capacity"`
19 | Year_Opened string `json:"year_opened"`
20 | }
21 |
22 | var stadiums = []stadium{
23 | {Team: "Arsenal", Stadium: "Emirates Stadium", City: "London", Latitude: 51.554867, Longitude: -0.109112, Capacity: "60,704", Year_Opened: "2006"},
24 | {Team: "Aston Villa", Stadium: "Villa Park", City: "Birmingham", Latitude: 52.509090, Longitude: -1.885249, Capacity: "42,657", Year_Opened: "1897"},
25 | {Team: "Bournemouth", Stadium: "Vitality Stadium", City: "Bournemouth", Latitude: 50.7348, Longitude: -1.8391, Capacity: "11,307", Year_Opened: "1910"},
26 | {Team: "Brentford", Stadium: "Gtech Community Stadium", City: "London", Latitude: 51.490715, Longitude: -0.289048, Capacity: "17,250", Year_Opened: "2020"},
27 | {Team: "Brighton", Stadium: "Falmer Stadium", City: "Falmer", Latitude: 50.861782, Longitude: -0.084357, Capacity: "31,800", Year_Opened: "2011"},
28 | {Team: "Burnley", Stadium: "Turf Moor", City: "Burnley", Latitude: 53.789108, Longitude: -2.230575, Capacity: "21,944", Year_Opened: "1883"},
29 | {Team: "Chelsea", Stadium: "Stamford Bridge", City: "London", Latitude: 51.481834, Longitude: -0.191390, Capacity: "40,343", Year_Opened: "1877"},
30 | {Team: "Crystal Palace", Stadium: "Selhurst Park", City: "London", Latitude: 51.398338, Longitude: -0.086084, Capacity: "25,486", Year_Opened: "1924"},
31 | {Team: "Everton", Stadium: "Goodison Park", City: "Liverpool", Latitude: 53.438751, Longitude: -2.966681, Capacity: "39,414", Year_Opened: "1892"},
32 | {Team: "Fulham", Stadium: "Craven Cottage", City: "London", Latitude: 51.281799, Longitude: -0.131080, Capacity: "29,600", Year_Opened: "1896"},
33 | {Team: "Liverpool", Stadium: "Anfield", City: "Liverpool", Latitude: 53.430759, Longitude: -2.961425, Capacity: "53,394", Year_Opened: "1884"},
34 | {Team: "Luton Town", Stadium: "Kenilworth Road", City: "Luton", Latitude: 51.883829798, Longitude: -0.425664964, Capacity: "10,356", Year_Opened: "1905"},
35 | {Team: "Manchester City", Stadium: "Etihad Sadium", City: "Manchester", Latitude: 53.483135, Longitude: -2.200941, Capacity: "53,400", Year_Opened: "2003"},
36 | {Team: "Manchester United", Stadium: "Old Trafford", City: "Manchester", Latitude: 53.463493, Longitude: -2.292279, Capacity: "74,310", Year_Opened: "1910"},
37 | {Team: "Newcastle", Stadium: "St James' Park", City: "Newcastle upon Tyne", Latitude: 54.975170, Longitude: -1.622539, Capacity: "52,305", Year_Opened: "1892"},
38 | {Team: "Nottingham Forest", Stadium: "City Ground", City: "West Bridgford", Latitude: 52.939938, Longitude: -1.13287, Capacity: "30,332", Year_Opened: "1898"},
39 | {Team: "Sheffield United", Stadium: "Bramall Lane", City: "Sheffield", Latitude: 53.368831858, Longitude: -1.46916479, Capacity: "32,050", Year_Opened: "1855"},
40 | {Team: "Tottemham", Stadium: "Tottenham Hotspur Stadium", City: "London", Latitude: 51.604252, Longitude: -0.067007, Capacity: "62,850", Year_Opened: "2019"},
41 | {Team: "West Ham", Stadium: "London Stadium", City: "London", Latitude: 51.538811, Longitude: -0.017136, Capacity: "62,500", Year_Opened: "2012"},
42 | {Team: "Wolves", Stadium: "Molineux Stadium", City: "Wolverhampton", Latitude: 52.590382, Longitude: -2.130924, Capacity: "31,750", Year_Opened: "1889"},
43 | }
44 |
45 | // getStadium responds with the list of all stadiums as JSON.
46 | func getStadium(c *gin.Context) {
47 | c.IndentedJSON(http.StatusOK, stadiums)
48 | }
49 |
50 | // setting up the endpoint.
51 | func main() {
52 | router := gin.Default()
53 | router.GET("/stadiums", getStadium)
54 |
55 | router.Run()
56 |
57 | port := os.Getenv("PORT")
58 | if port == "" {
59 | port = "8080"
60 | }
61 |
62 | http.HandleFunc("/v1/", func(w http.ResponseWriter, r *http.Request) {
63 | fmt.Fprintf(w, "{status: 'running'}")
64 | })
65 |
66 | log.Println("listening on port", port)
67 | if err := http.ListenAndServe(":"+port, nil); err != nil {
68 | log.Fatalf("Error launching REST API server: %v", err)
69 | }
70 | }
--------------------------------------------------------------------------------
/api/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/digitalghost-dev/api
2 |
3 | go 1.21
4 |
5 | require github.com/gin-gonic/gin v1.8.1
6 |
7 | require (
8 | github.com/gin-contrib/sse v0.1.0 // indirect
9 | github.com/go-playground/locales v0.14.0 // indirect
10 | github.com/go-playground/universal-translator v0.18.0 // indirect
11 | github.com/go-playground/validator/v10 v10.10.0 // indirect
12 | github.com/goccy/go-json v0.9.7 // indirect
13 | github.com/json-iterator/go v1.1.12 // indirect
14 | github.com/leodido/go-urn v1.2.1 // indirect
15 | github.com/mattn/go-isatty v0.0.14 // indirect
16 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
17 | github.com/modern-go/reflect2 v1.0.2 // indirect
18 | github.com/pelletier/go-toml/v2 v2.0.1 // indirect
19 | github.com/ugorji/go/codec v1.2.7 // indirect
20 | golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 // indirect
21 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 // indirect
22 | golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069 // indirect
23 | golang.org/x/text v0.3.6 // indirect
24 | google.golang.org/protobuf v1.28.0 // indirect
25 | gopkg.in/yaml.v2 v2.4.0 // indirect
26 | )
27 |
--------------------------------------------------------------------------------
/api/go.sum:
--------------------------------------------------------------------------------
1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
6 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
7 | github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8=
8 | github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk=
9 | github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A=
10 | github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
11 | github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU=
12 | github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs=
13 | github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho=
14 | github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA=
15 | github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0=
16 | github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos=
17 | github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM=
18 | github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
19 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
20 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
21 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
22 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
23 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
24 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
25 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
26 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
27 | github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
28 | github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
29 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
30 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
31 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
32 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
33 | github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w=
34 | github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
35 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
36 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
37 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
38 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
39 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
40 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
41 | github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU=
42 | github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
43 | github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
44 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
45 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
46 | github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
47 | github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
48 | github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
49 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
50 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
51 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
52 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
53 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
54 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
55 | github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
56 | github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0=
57 | github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
58 | golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 h1:/UOmuWzQfxxo9UtlXMwuQU8CMgg1eZXqTRwkSQJWKOI=
59 | golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
60 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 h1:qWPm9rbaAMKs8Bq/9LRpbMqxWRVUAQwMI9fVrssnTfw=
61 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
62 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
63 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
64 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
65 | golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069 h1:siQdpVirKtzPhKl3lZWozZraCFObP8S1v6PRp0bLrtU=
66 | golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
67 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
68 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
69 | golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
70 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
71 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
72 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
73 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
74 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
75 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
76 | google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
77 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
78 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
79 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
80 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
81 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
82 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
83 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
84 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
85 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
86 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--------------------------------------------------------------------------------
/components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digitalghost-dev/premier-league/964156b1e7876fe6ced410c8a902ca30d3dd7cdf/components/__init__.py
--------------------------------------------------------------------------------
/components/about_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class AboutSection:
5 | def __init__(self):
6 | pass
7 |
8 | def display(self):
9 | st.subheader("About")
10 | st.write(
11 | """
12 | This project is created by maintained by [myself](https://github.com/digitalghost-dev) to practice my skills in Data Engineering to one day break into the field.
13 |
14 | I chose using data for Premier League because I am a huge fan of the sport and I am always interested in learning more about the game.
15 |
16 | This is the only project that I'm currently working on and plan to continue to add more features and tools to it as I learn more about Data Engineering.
17 | """
18 | )
19 |
--------------------------------------------------------------------------------
/components/connections.py:
--------------------------------------------------------------------------------
1 | import firebase_admin # type: ignore
2 | import pandas as pd
3 | import streamlit as st
4 | from firebase_admin import firestore # type: ignore
5 | from google.cloud import bigquery
6 | import google.auth
7 |
8 |
9 | # Firestore Connection
10 | @st.cache_resource
11 | def firestore_connection() -> firestore.Client:
12 | credentials, project = google.auth.default()
13 | if not firebase_admin._apps:
14 | firebase_admin.initialize_app()
15 |
16 | return firestore.Client(credentials=credentials)
17 |
18 |
19 | # BigQuery Connection
20 | @st.cache_data(ttl=600)
21 | def run_query(query):
22 | credentials, project = google.auth.default()
23 | query_job = bigquery.Client(credentials=credentials).query(query)
24 | raw_data = query_job.result()
25 | data = [dict(data) for data in raw_data]
26 | return data
27 |
28 |
29 | @st.cache_resource
30 | def get_standings() -> pd.DataFrame:
31 | standings_data = run_query(
32 | """
33 | SELECT rank, points, t.logo, t.team, games_played, wins, draws, loses, goals_for, goals_against, goal_difference
34 | FROM `premier_league_dataset.standings` AS s
35 | INNER JOIN `premier_league_dataset.teams` AS t
36 | ON s.team_id = t.team_id
37 | ORDER BY rank ASC;
38 | """
39 | )
40 | return pd.DataFrame(data=standings_data)
41 |
42 |
43 | @st.cache_resource
44 | def get_stadiums() -> pd.DataFrame:
45 | stadiums_data = run_query(
46 | """
47 | SELECT team, stadium, latitude, longitude
48 | FROM `premier_league_dataset.stadiums`;
49 | """
50 | )
51 | return pd.DataFrame(data=stadiums_data)
52 |
53 |
54 | @st.cache_resource
55 | def get_teams() -> pd.DataFrame:
56 | teams_data = run_query(
57 | """
58 | SELECT t.logo, form, t.team, clean_sheets, penalties_scored, penalties_missed, average_goals, win_streak
59 | FROM `premier_league_dataset.teams` AS t
60 | LEFT JOIN `premier_league_dataset.standings` AS s
61 | ON t.team = s.Team
62 | ORDER BY s.rank;
63 | """
64 | )
65 | return pd.DataFrame(data=teams_data)
66 |
67 |
68 | @st.cache_resource
69 | def get_top_scorers() -> pd.DataFrame:
70 | top_scorers_data = run_query(
71 | """
72 | SELECT *
73 | FROM `premier_league_dataset.top_scorers`
74 | ORDER BY Goals DESC;
75 | """
76 | )
77 | return pd.DataFrame(data=top_scorers_data)
78 |
79 |
80 | @st.cache_resource
81 | def get_news() -> pd.DataFrame:
82 | news_data = run_query(
83 | """
84 | SELECT *
85 | FROM `premier_league_dataset.news`
86 | ORDER BY published_at DESC;
87 | """
88 | )
89 | return pd.DataFrame(data=news_data)
90 |
91 |
92 | @st.cache_resource
93 | def get_highlights() -> pd.DataFrame:
94 | highlights_data = run_query(
95 | """
96 | SELECT *
97 | FROM `premier_league_dataset.highlights`
98 | ORDER BY publish_time DESC;
99 | """
100 | )
101 | return pd.DataFrame(data=highlights_data)
102 |
103 |
104 | @st.cache_resource
105 | def get_league_statistics() -> pd.DataFrame:
106 | league_statistics = run_query(
107 | """
108 | SELECT
109 | SUM(goals_for) AS league_goals_scored,
110 | SUM(penalties_scored) AS league_penalties_scored,
111 | SUM(clean_sheets) AS league_clean_sheets
112 | FROM premier_league_dataset.teams AS t
113 | JOIN premier_league_dataset.standings AS s
114 | ON t.team_id = s.team_id;
115 | """
116 | )
117 | return pd.DataFrame(data=league_statistics)
118 |
119 |
120 | @st.cache_resource
121 | def get_min_round() -> int:
122 | min_round_row = run_query(
123 | """
124 | SELECT MIN(round) AS round
125 | FROM `premier_league_dataset.current_round`;
126 | """
127 | )
128 | min_round_df = pd.DataFrame(data=min_round_row)
129 | min_round = min_round_df["round"][0]
130 | return min_round
131 |
132 |
133 | @st.cache_resource
134 | def get_max_round() -> int:
135 | max_round_row = run_query(
136 | """
137 | SELECT MAX(round) AS round
138 | FROM `premier_league_dataset.current_round`;
139 | """
140 | )
141 | max_round_row = pd.DataFrame(data=max_round_row)
142 | max_round = max_round_row["round"][0]
143 | return max_round
144 |
145 |
146 | @st.cache_resource
147 | def get_squads() -> pd.DataFrame:
148 | squads_data = run_query(
149 | """
150 | SELECT *
151 | FROM `premier_league_squads.all_teams_squads_view`
152 |
153 | """
154 | )
155 | return pd.DataFrame(data=squads_data)
156 |
157 |
158 | @st.cache_resource
159 | def get_injuries() -> pd.DataFrame:
160 | injuries_data = run_query(
161 | """
162 | SELECT *
163 | FROM `premier_league_injuries.all_teams_injuries_view`
164 | """
165 | )
166 | return pd.DataFrame(data=injuries_data)
167 |
168 |
169 | @st.cache_resource
170 | def get_stocks() -> pd.DataFrame:
171 | stock_data = run_query(
172 | """
173 | SELECT new_york_time, price
174 | FROM `dbt_production.stocks`
175 | """
176 | )
177 | return pd.DataFrame(data=stock_data)
178 |
--------------------------------------------------------------------------------
/components/fixtures_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from firebase_admin import firestore # type: ignore
3 | from datetime import datetime
4 |
5 | from typing import List
6 | from typing import Tuple
7 |
8 |
9 | class FixturesSection:
10 | def __init__(self, firestore_database, max_round: int, min_round: int):
11 | self.firestore_database = firestore_database
12 | self.max_round = int(max_round)
13 | self.min_round = int(min_round)
14 |
15 | def firestore_pull(
16 | self, round_count
17 | ) -> Tuple[List[str], List[int], List[int], List[str], List[str], List[str], List[str]]:
18 | # Calling each document in the collection in ascending order by date.
19 | collection_ref = self.firestore_database.collection(f"Regular Season - {round_count}")
20 | query = collection_ref.order_by("date", direction=firestore.Query.ASCENDING)
21 | results = query.stream()
22 |
23 | # Setting an empty list. This list will contain each fixture's details that can later be called by referencing its index.
24 | documents = []
25 |
26 | # Iterating through the query results to get the document ID (e.g., 'Manchester City vs Burnley') and its data.
27 | for doc in results:
28 | document_dict = {"id": doc.id, "data": doc.to_dict()}
29 | documents.append(document_dict)
30 |
31 | # Retrieving and formatting match date.
32 | match_date = [
33 | datetime.strptime(documents[count]["data"]["date"], "%Y-%m-%dT%H:%M:%S+00:00")
34 | .strftime("%B %d{}, %Y - %H:%M")
35 | .format(
36 | "th"
37 | if 4
38 | <= int(datetime.strptime(documents[count]["data"]["date"], "%Y-%m-%dT%H:%M:%S+00:00").strftime("%d"))
39 | <= 20
40 | else {1: "st", 2: "nd", 3: "rd"}.get(
41 | int(datetime.strptime(documents[count]["data"]["date"], "%Y-%m-%dT%H:%M:%S+00:00").strftime("%d"))
42 | % 10,
43 | "th",
44 | )
45 | )
46 | for count in range(10)
47 | ]
48 |
49 | # Retrieving away and home goals for each match.
50 | away_goals = [documents[count]["data"]["goals"]["away"] for count in range(10)]
51 | home_goals = [documents[count]["data"]["goals"]["home"] for count in range(10)]
52 |
53 | # Retrieving away and home team for each match.
54 | away_team = [documents[count]["data"]["teams"]["away"]["name"] for count in range(10)]
55 | home_team = [documents[count]["data"]["teams"]["home"]["name"] for count in range(10)]
56 |
57 | # Retrieving away and home logo for each team.
58 | away_logo = [documents[count]["data"]["teams"]["away"]["logo"] for count in range(10)]
59 | home_logo = [documents[count]["data"]["teams"]["home"]["logo"] for count in range(10)]
60 |
61 | return (
62 | match_date,
63 | away_goals,
64 | home_goals,
65 | away_team,
66 | home_team,
67 | away_logo,
68 | home_logo,
69 | )
70 |
71 | def display(self):
72 | round_count = self.max_round
73 | st.subheader("Fixtures")
74 |
75 | while round_count >= self.min_round:
76 | with st.expander(f"Round {round_count}"):
77 | (
78 | match_date,
79 | away_goals,
80 | home_goals,
81 | away_team,
82 | home_team,
83 | away_logo,
84 | home_logo,
85 | ) = self.firestore_pull(round_count)
86 |
87 | count = 0
88 |
89 | while count < 10:
90 | # Creating a container for each match.
91 | with st.container():
92 | col1, col2, col3, col4, col5 = st.columns(5)
93 |
94 | with col1:
95 | st.write("")
96 |
97 | # Home teams
98 | with col2:
99 | st.markdown(
100 | f"{home_goals[count]} ",
101 | unsafe_allow_html=True,
102 | )
103 | st.markdown(
104 | f" ",
105 | unsafe_allow_html=True,
106 | )
107 | st.write("")
108 | st.write("")
109 |
110 | # Match date
111 | with col3:
112 | st.write("")
113 | st.markdown(
114 | "Match Date & Time
",
115 | unsafe_allow_html=True,
116 | )
117 | st.markdown(
118 | f"{match_date[count]}
",
119 | unsafe_allow_html=True,
120 | )
121 | st.markdown(
122 | f"{home_team[count]} vs. {away_team[count]}
",
123 | unsafe_allow_html=True,
124 | )
125 |
126 | # Away teams
127 | with col4:
128 | st.markdown(
129 | f"{away_goals[count]} ",
130 | unsafe_allow_html=True,
131 | )
132 | st.markdown(
133 | f" ",
134 | unsafe_allow_html=True,
135 | )
136 | st.write("")
137 | st.write("")
138 |
139 | with col5:
140 | st.write("")
141 |
142 | count += 1
143 |
144 | round_count -= 1
145 |
--------------------------------------------------------------------------------
/components/highlights_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class HighlightsSection:
5 | def __init__(self, highlights_df):
6 | self.highlights_df = highlights_df
7 |
8 | def display_first_row(self):
9 | st.header("Recent Highlights")
10 | columns = st.columns(3)
11 |
12 | for i, col in enumerate(columns):
13 | with col:
14 | try:
15 | st.image(self.highlights_df.iloc[i, 3], use_column_width="auto")
16 | st.subheader(self.highlights_df.iloc[i, 2])
17 | st.write(f"Publish time: {self.highlights_df.iloc[i, -1]}")
18 | st.markdown(
19 | f"Watch on YouTube ",
20 | unsafe_allow_html=True,
21 | )
22 | except IndexError:
23 | pass
24 |
25 | def display_second_row(self):
26 | columns = st.columns(3)
27 |
28 | for i, col in enumerate(columns):
29 | with col:
30 | try:
31 | st.image(self.highlights_df.iloc[i + 3, 3], use_column_width="auto")
32 | st.subheader(self.highlights_df.iloc[i + 3, 2])
33 | st.write(f"Publish time: {self.highlights_df.iloc[i + 3, -1]}")
34 | st.markdown(
35 | f"Watch on YouTube ",
36 | unsafe_allow_html=True,
37 | )
38 | except IndexError:
39 | pass
40 |
--------------------------------------------------------------------------------
/components/injuries_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class InjuriesSection:
5 | def __init__(self, injuries_df):
6 | self.injuries_df = injuries_df
7 | self.teams = (
8 | "Arsenal",
9 | "Aston Villa",
10 | "Bournemouth",
11 | "Brentford",
12 | "Brighton",
13 | "Burnley",
14 | "Chelsea",
15 | "Crystal Palace",
16 | "Everton",
17 | "Fulham",
18 | "Liverpool",
19 | "Luton",
20 | "Manchester City",
21 | "Manchester United",
22 | "Newcastle",
23 | "Nottingham Forest",
24 | "Sheffield Utd",
25 | "Tottenham",
26 | "West Ham",
27 | "Wolves",
28 | )
29 |
30 | def display(self):
31 | st.divider()
32 | st.subheader("Recent Injuries")
33 | st.write("Select the teams you want to see recent injuries for.")
34 | popover = st.popover("Filter Teams")
35 | filtered_df = self.injuries_df.drop(columns=["team_id", "player_id"])
36 | team_checkboxes = {}
37 |
38 | for team in self.teams:
39 | team_checkboxes[team] = popover.checkbox(f"{team}", value=False)
40 |
41 | for team, is_checked in team_checkboxes.items():
42 | if is_checked:
43 | team_df = filtered_df[(filtered_df["team_name"] == team)]
44 | team_df = team_df.drop(columns=["team_name"])
45 | st.write(f"**{team}**")
46 | if team_df.empty:
47 | st.write("No recent injuries reported.")
48 | st.empty()
49 | else:
50 | st.dataframe(
51 | team_df,
52 | column_config={
53 | "player_name": "Player",
54 | "injury_type": "Injury Type",
55 | "injury_reason": "Reason",
56 | "injury_date": "Date",
57 | },
58 | hide_index=True,
59 | use_container_width=True,
60 | )
61 |
--------------------------------------------------------------------------------
/components/league_form_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class LeagueFormsSection:
5 | def __init__(self, teams_df):
6 | self.teams_df = teams_df
7 |
8 | def generate_team_html(self, team_indices):
9 | markdown_list = []
10 | for index in team_indices:
11 | team_info = self.teams_df.iloc[index]
12 | markdown_list.append(
13 | f" "
14 | )
15 | markdown_list.append(
16 | f"{index + 1}th / {team_info.iloc[1][-5:]}
"
17 | )
18 | return markdown_list
19 |
20 | def display(self):
21 | st.subheader("Forms for the Rest of the League")
22 | columns = st.columns(5)
23 |
24 | for i, col in enumerate(columns):
25 | with col:
26 | team_indices = [i + 5, i + 10, i + 15]
27 | markdown_list = self.generate_team_html(team_indices)
28 | for item in markdown_list:
29 | st.markdown(item, unsafe_allow_html=True)
30 |
--------------------------------------------------------------------------------
/components/news_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 | class NewsSection:
4 | def __init__(self, news_df):
5 | self.news_df = news_df
6 |
7 | def display(self):
8 | st.header("Recent News")
9 | col1, col2, col3, col4 = st.columns(4)
10 |
11 | with col1:
12 | # Your code here
13 | with st.container():
14 | try:
15 | st.image(self.news_df.iloc[0, 2], use_column_width=True)
16 | st.subheader(self.news_df.iloc[0, 0])
17 | st.write(f"Publish time: {self.news_df.iloc[0, 3]}")
18 | st.markdown(
19 | f"Read More ",
20 | unsafe_allow_html=True,
21 | )
22 | except IndexError:
23 | pass
24 |
25 | with col2:
26 | with st.container():
27 | try:
28 | st.image(self.news_df.iloc[1, 2], use_column_width=True)
29 | st.subheader(self.news_df.iloc[1, 0])
30 | st.write(f"Publish time: {self.news_df.iloc[1, 3]}")
31 | st.markdown(
32 | f"Read More ",
33 | unsafe_allow_html=True,
34 | )
35 | except IndexError:
36 | pass
37 |
38 | with col3:
39 | with st.container():
40 | try:
41 | st.image(self.news_df.iloc[2, 2], use_column_width=True)
42 | st.subheader(self.news_df.iloc[2, 0])
43 | st.write(f"Publish time: {self.news_df.iloc[2, 3]}")
44 | st.markdown(
45 | f"Read More ",
46 | unsafe_allow_html=True,
47 | )
48 | except IndexError:
49 | pass
50 |
51 | with col4:
52 | with st.container():
53 | try:
54 | st.image(self.news_df.iloc[3, 2], use_column_width=True)
55 | st.subheader(self.news_df.iloc[3, 0])
56 | st.write(f"Publish time: {self.news_df.iloc[3, 3]}")
57 | st.markdown(
58 | f"Read More ",
59 | unsafe_allow_html=True,
60 | )
61 | except IndexError:
62 | pass
63 |
64 | st.divider()
65 |
--------------------------------------------------------------------------------
/components/point_progression_section.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import plotly.graph_objects as go
3 | import streamlit as st
4 |
5 |
6 | class PointProgressionSection:
7 | def __init__(self, teams_df, standings_df):
8 | self.teams_df = teams_df
9 | self.standings_df = standings_df
10 |
11 | def calculate_points(self):
12 | team_forms = [[], [], [], [], []]
13 | forms = [self.teams_df.iloc[i, 1] for i in range(5)]
14 |
15 | for count, form in enumerate(forms):
16 | points = 0
17 | for char in form:
18 | if char == "W":
19 | points += 3
20 | elif char == "D":
21 | points += 1
22 | else:
23 | points += 0
24 |
25 | team_forms[count].append(points)
26 |
27 | return team_forms
28 |
29 | def create_dataframe(self, team_forms):
30 | headers = [str(self.standings_df.iloc[i, 3]) for i in range(5)]
31 | zipped = list(zip(*team_forms)) # Transpose the list of lists
32 | return pd.DataFrame(zipped, columns=headers)
33 |
34 | def display(self):
35 | team_forms = self.calculate_points()
36 | df = self.create_dataframe(team_forms)
37 |
38 | st.subheader("Point Progression throughout the Season")
39 |
40 | labels = [str(f"{self.standings_df.iloc[i, 3]} - {self.standings_df.iloc[i, 1]} points") for i in range(5)]
41 | colors = ["#1e90ff", "#ff4500", "#ffd700", "#228b22", "#000000"]
42 |
43 | fig = go.Figure()
44 |
45 | for i in range(5):
46 | fig.add_trace(go.Scatter(x=df.index, y=df.iloc[:, i], name=labels[i], line=dict(color=colors[i], width=2)))
47 |
48 | # add markers
49 | fig.update_traces(mode="markers+lines", marker=dict(size=8, line=dict(width=2)))
50 |
51 | fig.update_layout(
52 | xaxis_title="Gameweek",
53 | yaxis_title="Points",
54 | legend_title="Team",
55 | legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
56 | height=600,
57 | )
58 |
59 | st.plotly_chart(fig, use_container_width=True)
60 |
--------------------------------------------------------------------------------
/components/point_slider_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import plotly.graph_objects as go
3 |
4 |
5 | class PointSliderSection:
6 | def __init__(self, standings_df):
7 | self.standings_df = standings_df
8 |
9 | def display(self):
10 | st.subheader("Points per Team:")
11 | # Creating the slider.
12 | points = self.standings_df["points"].tolist()
13 | points_selection = st.slider(
14 | "Select a Range of Points:", min_value=min(points), max_value=max(points), value=(min(points), max(points))
15 | )
16 | # Picking colors to use for the bar chart.
17 | colors = [
18 | "indigo",
19 | ] * 20
20 | # Making sure the bar chart changes with the slider.
21 | mask = self.standings_df["points"].between(*points_selection)
22 | amount_of_teams = self.standings_df[mask].shape[0]
23 |
24 | df_grouped = self.standings_df[mask]
25 | df_grouped = df_grouped.reset_index()
26 | lowest_number = df_grouped["points"].min()
27 | st.markdown(f"Number of teams with {lowest_number} or more points: {amount_of_teams}")
28 | # Creating the bar chart.
29 | points_chart = go.Figure(
30 | data=[
31 | go.Bar(
32 | x=df_grouped["team"],
33 | y=df_grouped["points"],
34 | marker_color=colors,
35 | text=df_grouped["points"],
36 | textposition="auto",
37 | )
38 | ]
39 | )
40 | # Rotating x axis lables.
41 | points_chart.update_layout(
42 | xaxis_tickangle=-35,
43 | autosize=False,
44 | margin=dict(
45 | l=0, # left
46 | r=0, # right
47 | b=0, # bottom
48 | t=0, # top
49 | ),
50 | )
51 |
52 | st.plotly_chart(points_chart, use_container_width=True)
53 |
--------------------------------------------------------------------------------
/components/social_media_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import streamlit.components.v1 as components
3 |
4 |
5 | class SocialMediaSection:
6 | def __init__(self):
7 | self.social_links = [
8 | {
9 | "url": "https://hub.docker.com/r/digitalghostdev/premier-league/tags",
10 | "icon_url": "https://storage.googleapis.com/premier_league_bucket/icons/companies/docker.svg",
11 | "alt_text": "Docker",
12 | },
13 | {
14 | "url": "https://github.com/digitalghost-dev/",
15 | "icon_url": "https://storage.googleapis.com/premier_league_bucket/icons/companies/github.svg",
16 | "alt_text": "GitHub",
17 | },
18 | ]
19 |
20 | def generate_html(self):
21 | html = ""
22 | for link in self.social_links:
23 | html += f"""
24 |
25 |
26 |
27 | """
28 | return f"{html}
"
29 |
30 | def display(self):
31 | st.divider()
32 | st.subheader("Social")
33 | social_html = self.generate_html()
34 | components.html(social_html)
35 |
--------------------------------------------------------------------------------
/components/squads_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class SquadSection:
5 | def __init__(self, squads_df):
6 | self.squads_df = squads_df
7 | self.teams = (
8 | "Arsenal",
9 | "Aston Villa",
10 | "Bournemouth",
11 | "Brentford",
12 | "Brighton",
13 | "Burnley",
14 | "Chelsea",
15 | "Crystal Palace",
16 | "Everton",
17 | "Fulham",
18 | "Liverpool",
19 | "Luton",
20 | "Manchester City",
21 | "Manchester United",
22 | "Newcastle",
23 | "Nottingham Forest",
24 | "Sheffield Utd",
25 | "Tottenham",
26 | "West Ham",
27 | "Wolves",
28 | )
29 |
30 | def display(self, team_name):
31 | (
32 | col1,
33 | col2,
34 | ) = st.columns(2)
35 | with col1:
36 | top_positions = ["Goalkeeper", "Midfielder"]
37 | for position in top_positions:
38 | filtered_df = self.squads_df[
39 | (self.squads_df["team_name"] == team_name) & (self.squads_df["player_position"] == position)
40 | ]
41 | filtered_df = filtered_df.drop(columns=["team_id", "team_name", "player_id", "player_position"])
42 |
43 | st.write(f"**{position}s**")
44 | st.data_editor(
45 | filtered_df,
46 | column_config={
47 | "player_name": st.column_config.TextColumn("Player Name"),
48 | "player_photo": st.column_config.ImageColumn("Photo", width="small"),
49 | },
50 | hide_index=True,
51 | key=f"{team_name}-{position}",
52 | )
53 |
54 | with col2:
55 | bottom_positions = ["Defender", "Attacker"]
56 | for position in bottom_positions:
57 | filtered_df = self.squads_df[
58 | (self.squads_df["team_name"] == team_name) & (self.squads_df["player_position"] == position)
59 | ]
60 | filtered_df = filtered_df.drop(columns=["team_id", "team_name", "player_id", "player_position"])
61 |
62 | st.write(f"**{position}s**")
63 | st.data_editor(
64 | filtered_df,
65 | column_config={
66 | "player_name": st.column_config.TextColumn("Player Name"),
67 | "player_photo": st.column_config.ImageColumn("Photo", width="small"),
68 | },
69 | hide_index=True,
70 | key=f"{team_name}-{position}",
71 | )
72 |
--------------------------------------------------------------------------------
/components/stadiums_map_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import plotly.express as px # type: ignore
3 | from google.cloud import secretmanager
4 |
5 |
6 | def gcp_secret_rapid_api() -> str:
7 | """This function retrieves the Mapbox API key from GCP Secret Manager"""
8 |
9 | client = secretmanager.SecretManagerServiceClient()
10 | name = "projects/463690670206/secrets/mapbox-api/versions/1"
11 | response = client.access_secret_version(request={"name": name})
12 | mapbox_api_key = response.payload.data.decode("UTF-8")
13 |
14 | return mapbox_api_key
15 |
16 |
17 | class StadiumMapSection:
18 | def __init__(self):
19 | self.mapbox_access_token = gcp_secret_rapid_api()
20 | px.set_mapbox_access_token(self.mapbox_access_token)
21 |
22 | def create_stadium_map(self, stadiums_df):
23 | stadium_map = px.scatter_mapbox(
24 | stadiums_df,
25 | lat="latitude",
26 | lon="longitude",
27 | hover_name="stadium",
28 | hover_data="team",
29 | )
30 |
31 | stadium_map.update_layout(
32 | mapbox_style="light",
33 | margin={"r": 0, "t": 0, "l": 0, "b": 0},
34 | mapbox_bounds={"west": -17, "east": 17, "south": 45, "north": 60},
35 | )
36 |
37 | stadium_map.update_traces(marker=dict(size=8), marker_color="indigo")
38 |
39 | stadium_map.update_mapboxes(zoom=4)
40 |
41 | map_plotly_chart = st.plotly_chart(
42 | stadium_map, height=1000, use_container_width=True
43 | )
44 |
45 | return map_plotly_chart
46 |
47 | def display(self, stadiums_df):
48 | st.subheader("Location of Stadiums")
49 | map_plotly_chart = self.create_stadium_map(stadiums_df)
50 | return map_plotly_chart
51 |
--------------------------------------------------------------------------------
/components/stock_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import altair as alt
3 |
4 |
5 | class StockSection:
6 | def __init__(self, stock_df):
7 | self.stock_df = stock_df
8 | self.line_chart = None
9 |
10 | def display(self):
11 | st.subheader("MANU - Stock Price")
12 | st.info(
13 | """
14 | **INFO**\n
15 | This tab shows a stock price chart for the ***previous*** trading day for **MANU** ticker.\n
16 | Currently, the chart price is shown with 30 minutes intervals. Still testing this tab and hope to move it to every 10 minutes.\n
17 | Since this shows the previous trading day's data, there will be no data displayed on Sunday and Monday, New York time.
18 | """
19 | )
20 | if self.stock_df.empty:
21 | st.warning("No data for today. Check back **after** the next trading day.")
22 | else:
23 | # Check if the timezone is already set
24 | if self.stock_df["new_york_time"].dt.tz is not None:
25 | self.stock_df["new_york_time"] = self.stock_df["new_york_time"].dt.tz_convert("US/Eastern")
26 | else:
27 | self.stock_df["new_york_time"] = self.stock_df["new_york_time"].dt.tz_localize("US/Eastern")
28 |
29 | self.line_chart = (
30 | alt.Chart(self.stock_df)
31 | .mark_line()
32 | .encode(
33 | x=alt.X("new_york_time:T", title="Time"),
34 | y=alt.Y("price:Q", title="Price").scale(zero=False),
35 | )
36 | )
37 |
38 | st.altair_chart(self.line_chart, use_container_width=True)
39 |
--------------------------------------------------------------------------------
/components/top_scorers_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class TopScorersSection:
5 | def __init__(self, top_scorers_df):
6 | self.top_scorers_df = top_scorers_df
7 |
8 | def generate_scorer_html(self, index):
9 | scorer = self.top_scorers_df.iloc[index]
10 | return [
11 | f" ",
12 | f"{scorer.iloc[0]}
",
13 | f"Goals: {scorer.iloc[1]}
",
14 | f"Assists: {scorer.iloc[3]}
",
15 | f"Team: {scorer.iloc[2]}
",
16 | f"Nationality: {scorer.iloc[4]}
",
17 | ]
18 |
19 | def display(self):
20 | with st.container():
21 | st.subheader("Top 5 Scorers")
22 | columns = st.columns(5)
23 |
24 | for i, col in enumerate(columns):
25 | with col:
26 | markdown_list = self.generate_scorer_html(i)
27 | for item in markdown_list:
28 | st.markdown(item, unsafe_allow_html=True)
29 |
--------------------------------------------------------------------------------
/components/top_teams_section.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 |
4 | class TopTeamsSection:
5 | def __init__(self, teams_df):
6 | self.teams_df = teams_df
7 |
8 | def generate_team_html(self, index):
9 | team = self.teams_df.iloc[index]
10 | return [
11 | f" ",
12 | f"{index + 1}st / Form (Last 5): {team.iloc[1][-5:]}
",
13 | f"Clean Sheets: {team.iloc[3]}
",
14 | f"Penalties Scored: {team.iloc[4]}
",
15 | f"Penalties Missed: {team.iloc[5]}
",
16 | ]
17 |
18 | def display(self):
19 | with st.container():
20 | st.subheader("Top 5 Teams")
21 | columns = st.columns(5)
22 |
23 | for i, col in enumerate(columns):
24 | with col:
25 | markdown_list = self.generate_team_html(i)
26 | for item in markdown_list:
27 | st.markdown(item, unsafe_allow_html=True)
28 |
--------------------------------------------------------------------------------
/dbt_prod/models/schema.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: stocks
5 | columns:
6 | - name: formatted_time
7 | tests:
8 | - not_null
9 | - unique
10 | - name: new_york_time
11 | tests:
12 | - not_null
13 | - unique
14 | - name: price
15 | tests:
16 | - not_null
--------------------------------------------------------------------------------
/dbt_prod/models/stocks.sql:
--------------------------------------------------------------------------------
1 | WITH date_cte AS (
2 | SELECT
3 | FORMAT_TIMESTAMP('%Y%m%d', TIMESTAMP_SECONDS(timestamp)) AS formatted_date,
4 | FORMAT_TIMESTAMP('%H:%M:%S', TIMESTAMP_SECONDS(timestamp), 'America/New_York') AS formatted_time,
5 | DATETIME(TIMESTAMP_SECONDS(timestamp), 'America/New_York') AS new_york_time,
6 | ROUND(price, 2) AS price
7 | FROM
8 | `premier_league_dataset.public_stocks`
9 | WHERE
10 | DATE(TIMESTAMP_SECONDS(timestamp), 'America/New_York') IN (DATE_SUB(DATE(CURRENT_DATETIME('America/New_York')), INTERVAL 1 DAY))
11 | )
12 |
13 | SELECT formatted_time, new_york_time, price
14 | FROM date_cte
15 | WHERE formatted_time < '16:00:00'
16 | ORDER BY date_cte.new_york_time DESC
--------------------------------------------------------------------------------
/etl/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # Data Pipelines
6 |
7 | This directory contains the ETL (Extract, Transform, Load) scripts and related files for the Premier League project.
8 |
9 | ## Overview
10 |
11 | The `etl` directory is responsible for extracting data from various sources, transforming it into a consistent format, and loading it into BigQuery, Firestore, and PostgreSQL.
12 |
13 | ## Data Pipelines Diagram
14 |
15 |
20 | Diagram of a data pipelines in this project
21 |
22 |
23 | ## Data Sources
24 | * [Football API](https://rapidapi.com/api-sports/api/api-football)
25 | * [News API](https://newsapi.org)
26 | * [Financial Modeling Prep](https://site.financialmodelingprep.com/developer)
27 | * [MapBox](https://www.mapbox.com)
28 | * [YouTube API](https://developers.google.com/youtube/v3)
--------------------------------------------------------------------------------
/etl/bigquery/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digitalghost-dev/premier-league/964156b1e7876fe6ced410c8a902ca30d3dd7cdf/etl/bigquery/__init__.py
--------------------------------------------------------------------------------
/etl/bigquery/current_round.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pandas as pd
4 | import requests # type: ignore
5 |
6 | import google.auth
7 | from google.cloud import secretmanager, bigquery
8 | from pandas import DataFrame
9 |
10 | PROJECT_ID = "cloud-data-infrastructure"
11 | os.environ["GCLOUD_PROJECT"] = PROJECT_ID
12 | credentials, project_id = google.auth.default()
13 |
14 |
15 | class DataRetrieval:
16 | def __init__(self, project_id):
17 | self.project_id = project_id
18 |
19 | def _get_rapid_api_key(self) -> str:
20 | client = secretmanager.SecretManagerServiceClient()
21 | name = f"projects/{self.project_id}/secrets/rapid-api/versions/1"
22 | response = client.access_secret_version(request={"name": name})
23 | return response.payload.data.decode("UTF-8")
24 |
25 | def _call_api(self) -> str:
26 | payload = self._get_rapid_api_key()
27 | headers = {
28 | "X-RapidAPI-Key": payload,
29 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
30 | }
31 | url = "https://api-football-v1.p.rapidapi.com/v3/fixtures/rounds"
32 | querystring = {"league": "39", "season": "2023", "current": "true"}
33 | response = requests.get(url, headers=headers, params=querystring, timeout=10)
34 | return response.json()["response"][0]
35 |
36 | def _call_bigquery(self) -> int:
37 | client = bigquery.Client()
38 | query = f"""
39 | SELECT CONCAT(season, " - ", MAX(round)) AS max_round
40 | FROM `{self.project_id}.premier_league_dataset.current_round`
41 | GROUP BY season
42 | LIMIT 1
43 | """
44 | query_job = client.query(query)
45 | results = query_job.result()
46 | for row in results:
47 | bigquery_current_round = row.max_round
48 | return bigquery_current_round
49 |
50 | def retrieve_data(self) -> tuple[str, int]:
51 | """Retrieve data for the current round"""
52 | rapid_api_current_round = self._call_api()
53 | bigquery_current_round = self._call_bigquery()
54 | return rapid_api_current_round, bigquery_current_round
55 |
56 |
57 | rapid_api_current_round, bigquery_current_round = DataRetrieval(PROJECT_ID).retrieve_data()
58 |
59 |
60 | def load_current_round() -> None:
61 | if rapid_api_current_round == bigquery_current_round:
62 | print("Current round is already loaded!")
63 | else:
64 | print("Current round is not loaded!")
65 |
66 | def create_dataframe() -> DataFrame:
67 | # Spliting a string that looks like: "Regular Season - 12"
68 | regular_season = [rapid_api_current_round[:14]]
69 | round_number = [rapid_api_current_round[17:]]
70 | round_number_int = int(round_number[0])
71 |
72 | data = {"season": regular_season, "round": round_number_int}
73 |
74 | # create a pandas dataframe from the dictionary
75 | df = pd.DataFrame(data, columns=["season", "round"])
76 |
77 | return df, round_number_int
78 |
79 | def define_table_schema() -> list[dict[str, str]]:
80 | schema_definition = [
81 | {"name": "season", "type": "STRING"},
82 | {"name": "round", "type": "INTEGER"},
83 | ]
84 |
85 | return schema_definition
86 |
87 | # Tranforming data and loading into the PostgreSQL database.
88 | def send_dataframe_to_bigquery(
89 | current_round_dataframe: DataFrame, schema_definition: list[dict[str, str]]
90 | ) -> None:
91 | """This function sends the dataframe to BigQuery."""
92 | current_round_dataframe, round_number_int = create_dataframe()
93 |
94 | current_round_dataframe.to_gbq(
95 | destination_table="premier_league_dataset.current_round",
96 | if_exists="append",
97 | table_schema=schema_definition,
98 | )
99 |
100 | print(f"Current round: {round_number_int} loaded!")
101 |
102 | current_round_dataframe = create_dataframe()
103 | schema_definition = define_table_schema()
104 | send_dataframe_to_bigquery(current_round_dataframe, schema_definition)
105 |
106 | if __name__ != "__main__":
107 | load_current_round()
108 |
--------------------------------------------------------------------------------
/etl/bigquery/highlights.py:
--------------------------------------------------------------------------------
1 | """
2 | This file pulls data from the YouTube API relating to the English Premier League
3 | highlights and loads it into a BigQuery table.
4 | """
5 |
6 | import googleapiclient.discovery
7 | from google.cloud import secretmanager
8 | from datetime import datetime, timedelta, timezone
9 |
10 | import pandas as pd
11 | from pandas import DataFrame
12 |
13 |
14 | def gcp_secret_rapid_api() -> str:
15 | """This function retrieves the Rapid API key from GCP Secret Manager"""
16 |
17 | client = secretmanager.SecretManagerServiceClient()
18 | name = "projects/463690670206/secrets/youtube-api/versions/1"
19 | response = client.access_secret_version(request={"name": name})
20 | youtube_api_key = response.payload.data.decode("UTF-8")
21 |
22 | return youtube_api_key
23 |
24 |
25 | def call_api(part, channel_id, max_results, query, publishedAfter) -> list:
26 | """This function calls the API then returns a list with the YouTube data"""
27 |
28 | youtube_api_key = gcp_secret_rapid_api()
29 |
30 | # Initialize YouTube Data API v3 service
31 | youtube = googleapiclient.discovery.build(
32 | "youtube", "v3", developerKey=youtube_api_key
33 | )
34 |
35 | search_response = (
36 | youtube.search()
37 | .list(
38 | part=part,
39 | channelId=channel_id,
40 | maxResults=max_results,
41 | q=query,
42 | publishedAfter=publishedAfter,
43 | )
44 | .execute()
45 | )
46 |
47 | videos = search_response.get("items", [])
48 |
49 | return videos
50 |
51 |
52 | def create_dataframe():
53 | """This function creates a dataframe from the API call"""
54 |
55 | current_date = datetime.now(timezone.utc)
56 | ten_days_ago = current_date - timedelta(days=10)
57 | published_date = ten_days_ago.strftime("%Y-%m-%dT00:00:00Z")
58 | videos = call_api(
59 | "snippet",
60 | "UCqZQlzSHbVJrwrn5XvzrzcA",
61 | 10,
62 | "PREMIER LEAGUE HIGHLIGHTS",
63 | published_date,
64 | )
65 |
66 | video_list = []
67 | for video in videos:
68 | video_sublist = []
69 |
70 | video_sublist.append(str(video["id"]["videoId"]))
71 | video_sublist.append(
72 | str("https://www.youtube.com/watch?v=") + str(video["id"]["videoId"])
73 | )
74 | video_sublist.append(str(video["snippet"]["title"]))
75 | video_sublist.append(str(video["snippet"]["thumbnails"]["high"]["url"]))
76 | video_sublist.append(str(video["snippet"]["description"]))
77 |
78 | # Setting the publish time to a datetime object.
79 | publish_time_str = video["snippet"]["publishTime"]
80 | publish_time_datetime = pd.to_datetime(publish_time_str)
81 | video_sublist.append(publish_time_datetime)
82 |
83 | video_list.append(video_sublist)
84 |
85 | headers = [
86 | "video_id",
87 | "video_url",
88 | "title",
89 | "thumbnail",
90 | "description",
91 | "publish_time",
92 | ]
93 | df = pd.DataFrame(video_list, columns=headers)
94 |
95 | return df
96 |
97 |
98 | def define_table_schema() -> list[dict[str, str]]:
99 | """This function defines the schema for the table in BigQuery"""
100 |
101 | schema_definition = [
102 | {"name": "video_id", "type": "STRING"},
103 | {"name": "video_url", "type": "STRING"},
104 | {"name": "title", "type": "STRING"},
105 | {"name": "thumbnail", "type": "STRING"},
106 | {"name": "description", "type": "STRING"},
107 | {"name": "publish_time", "type": "DATETIME"},
108 | ]
109 |
110 | return schema_definition
111 |
112 |
113 | def send_dataframe_to_bigquery(
114 | standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]
115 | ) -> None:
116 | """This function sends the dataframe to BigQuery"""
117 |
118 | highlights_dataframe.to_gbq(
119 | destination_table="premier_league_dataset.highlights",
120 | if_exists="replace",
121 | table_schema=schema_definition,
122 | )
123 |
124 | print("Highlights table loaded!")
125 |
126 |
127 | if __name__ != "__main__":
128 | highlights_dataframe = create_dataframe()
129 | schema_definition = define_table_schema()
130 | send_dataframe_to_bigquery(highlights_dataframe, schema_definition)
131 |
--------------------------------------------------------------------------------
/etl/bigquery/injuries.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from datetime import datetime
4 | import pandas as pd
5 | import requests # type: ignore
6 | from google.cloud import bigquery, secretmanager
7 | from pandas import DataFrame
8 |
9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
10 |
11 | STANDINGS_TABLE = "premier_league_dataset.standings"
12 |
13 |
14 | def gcp_secret_rapid_api() -> str:
15 | client = secretmanager.SecretManagerServiceClient()
16 | name = "projects/463690670206/secrets/rapid-api/versions/1"
17 | response = client.access_secret_version(request={"name": name})
18 | rapid_api_key = response.payload.data.decode("UTF-8")
19 |
20 | return rapid_api_key
21 |
22 |
23 | # Calling the Standings table from BigQuery to get each team's id.
24 | def bigquery_call() -> DataFrame:
25 | bqclient = bigquery.Client()
26 |
27 | query_string = f"""
28 | SELECT *
29 | FROM {STANDINGS_TABLE}
30 | ORDER BY Rank
31 | """
32 |
33 | bigquery_dataframe = (
34 | bqclient.query(query_string)
35 | .result()
36 | .to_dataframe(
37 | create_bqstorage_client=True,
38 | )
39 | )
40 |
41 | return bigquery_dataframe
42 |
43 |
44 | def get_teams_with_injuries() -> list:
45 | rapid_api_key = gcp_secret_rapid_api()
46 | bigquery_dataframe = bigquery_call()
47 |
48 | id_list = [bigquery_dataframe.iloc[i, 0] for i in range(20)]
49 |
50 | headers = {
51 | "X-RapidAPI-Key": rapid_api_key,
52 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
53 | }
54 |
55 | url = "https://api-football-v1.p.rapidapi.com/v3/injuries"
56 | injuried_teams_list = []
57 |
58 | for id in id_list:
59 | current_date = datetime.now()
60 | formatted_date = current_date.strftime("%Y-%m-%d")
61 |
62 | query = {"league": "39", "season": "2023", "team": id, "date": formatted_date}
63 |
64 | response = requests.get(url, headers=headers, params=query, timeout=10)
65 | json_res = response.json()
66 |
67 | if json_res["response"] == []:
68 | pass
69 | else:
70 | injuried_teams_list.append(id)
71 |
72 | return injuried_teams_list
73 |
74 |
75 | def call_api():
76 | rapid_api_key = gcp_secret_rapid_api()
77 | injuried_teams_list = get_teams_with_injuries()
78 |
79 | headers = {
80 | "X-RapidAPI-Key": rapid_api_key,
81 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
82 | }
83 |
84 | url = "https://api-football-v1.p.rapidapi.com/v3/injuries"
85 |
86 | for id in injuried_teams_list:
87 | team_id_list = []
88 | team_name_list = []
89 | player_id_list = []
90 | player_name_list = []
91 | injury_type_list = []
92 | injury_reason_list = []
93 | date_list = []
94 |
95 | current_date = datetime.now()
96 | formatted_date = current_date.strftime("%Y-%m-%d")
97 |
98 | query = {"league": "39", "season": "2023", "team": id, "date": formatted_date}
99 |
100 | response = requests.get(url, headers=headers, params=query, timeout=10)
101 | json_res = response.json()
102 |
103 | response_length = len(json_res["response"])
104 |
105 | inner_count = 0
106 | while inner_count < response_length:
107 | team_id_list.append(int(json_res["response"][0]["team"]["id"]))
108 | team_name_list.append(str(json_res["response"][inner_count]["team"]["name"]))
109 | player_id_list.append(int(json_res["response"][inner_count]["player"]["id"]))
110 | player_name_list.append(str(json_res["response"][inner_count]["player"]["name"]))
111 | injury_type_list.append(str(json_res["response"][inner_count]["player"]["type"]))
112 | injury_reason_list.append(str(json_res["response"][inner_count]["player"]["reason"]))
113 |
114 | date_convert = datetime.strptime(
115 | json_res["response"][inner_count]["fixture"]["date"], "%Y-%m-%dT%H:%M:%S%z"
116 | )
117 | date_list.append(date_convert.strftime("%Y-%m-%d"))
118 |
119 | inner_count += 1
120 |
121 | table_headers = [
122 | "team_id",
123 | "team_name",
124 | "player_id",
125 | "player_name",
126 | "injury_type",
127 | "injury_reason",
128 | "injury_date",
129 | ]
130 | zipped = list(
131 | zip(
132 | team_id_list,
133 | team_name_list,
134 | player_id_list,
135 | player_name_list,
136 | injury_type_list,
137 | injury_reason_list,
138 | date_list,
139 | )
140 | )
141 |
142 | df = pd.DataFrame(zipped, columns=table_headers)
143 |
144 | schema_definition = [
145 | {"name": "team_id", "type": "INTEGER"},
146 | {"name": "team_name", "type": "STRING"},
147 | {"name": "player_id", "type": "INTEGER"},
148 | {"name": "player_name", "type": "STRING"},
149 | {"name": "injury_type", "type": "STRING"},
150 | {"name": "injury_reason", "type": "STRING"},
151 | {"name": "injury_date", "type": "DATE"},
152 | ]
153 |
154 | formatted_team_name = team_name_list[0].replace(" ", "_").lower()
155 |
156 | df.to_gbq(
157 | f"premier_league_injuries.{formatted_team_name}",
158 | project_id="cloud-data-infrastructure",
159 | if_exists="replace",
160 | table_schema=schema_definition,
161 | )
162 |
163 | print(f"{team_name_list[0]}'s injuries table loaded!")
164 |
165 |
166 | if __name__ != "__main__":
167 | call_api()
168 |
--------------------------------------------------------------------------------
/etl/bigquery/news.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from datetime import timedelta as td
4 |
5 | import requests # type: ignore
6 | from google.cloud import secretmanager
7 | from pandas import DataFrame
8 |
9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
10 |
11 |
12 | def gcp_secret_news_api() -> str:
13 | client = secretmanager.SecretManagerServiceClient()
14 | name = "projects/463690670206/secrets/news-api/versions/1"
15 | response = client.access_secret_version(request={"name": name})
16 | news_api_key = response.payload.data.decode("UTF-8")
17 |
18 | return news_api_key
19 |
20 |
21 | def call_api() -> tuple[list[str], list[str], list[str], list[str]]:
22 | news_api_key = gcp_secret_news_api()
23 |
24 | # Getting yesterday's date.
25 | yesteryday = datetime.now() - td(days=1)
26 | yesteryday_str = yesteryday.strftime("%Y-%m-%d")
27 |
28 | url = (
29 | "https://newsapi.org/v2/everything?"
30 | "q=Premier League&"
31 | f"from={yesteryday_str}&"
32 | "language=en&"
33 | "domains=skysports.com,theguardian.com,90min.com&"
34 | "sortBy=popularity&"
35 | f"apiKey={news_api_key}"
36 | )
37 |
38 | response = requests.request("GET", url, timeout=20)
39 | json_res = response.json()
40 |
41 | title_list = []
42 | url_list = []
43 | url_to_image_list = []
44 | published_at_list = []
45 |
46 | for article in json_res["articles"]:
47 | title_list.append(str(article["title"]))
48 | url_list.append(str(article["url"]))
49 | url_to_image_list.append(str(article["urlToImage"]))
50 |
51 | published_at = datetime.strptime(article["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
52 | published_at_list.append(published_at.strftime("%H:%M:%S"))
53 |
54 | return title_list, url_list, url_to_image_list, published_at_list
55 |
56 |
57 | def create_dataframe() -> DataFrame:
58 | title_list, url_list, url_to_image_list, published_at_list = call_api()
59 |
60 | df = DataFrame(
61 | {
62 | "title": title_list,
63 | "url": url_list,
64 | "url_to_image": url_to_image_list,
65 | "published_at": published_at_list,
66 | }
67 | ).sort_values(by="published_at", ascending=False)
68 |
69 | return df
70 |
71 |
72 | def define_table_schema() -> list[dict[str, str]]:
73 | schema_definition = [
74 | {"name": "title", "type": "STRING"},
75 | {"name": "url", "type": "STRING"},
76 | {"name": "url_to_image", "type": "STRING"},
77 | {"name": "published_at", "type": "STRING"},
78 | ]
79 |
80 | return schema_definition
81 |
82 |
83 | def send_dataframe_to_bigquery(
84 | standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]
85 | ) -> None:
86 | standings_dataframe.to_gbq(
87 | destination_table="premier_league_dataset.news",
88 | if_exists="replace",
89 | table_schema=schema_definition,
90 | )
91 |
92 | print("News table loaded!")
93 |
94 |
95 | if __name__ != "__main__":
96 | news_dataframe = create_dataframe()
97 | schema_definition = define_table_schema()
98 | send_dataframe_to_bigquery(news_dataframe, schema_definition)
99 |
--------------------------------------------------------------------------------
/etl/bigquery/squads.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pandas as pd
4 | import requests # type: ignore
5 | from google.cloud import bigquery, secretmanager
6 | from pandas import DataFrame
7 |
8 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
9 |
10 | STANDINGS_TABLE = "premier_league_dataset.standings"
11 |
12 |
13 | def gcp_secret_rapid_api() -> str:
14 | client = secretmanager.SecretManagerServiceClient()
15 | name = "projects/463690670206/secrets/rapid-api/versions/1"
16 | response = client.access_secret_version(request={"name": name})
17 | rapid_api_key = response.payload.data.decode("UTF-8")
18 |
19 | return rapid_api_key
20 |
21 |
22 | # Calling the Standings table from BigQuery to get each team's id.
23 | def bigquery_call() -> DataFrame:
24 | bqclient = bigquery.Client()
25 |
26 | query_string = f"""
27 | SELECT team_id
28 | FROM {STANDINGS_TABLE}
29 | ORDER BY Rank
30 | """
31 |
32 | bigquery_dataframe = (
33 | bqclient.query(query_string)
34 | .result()
35 | .to_dataframe(
36 | create_bqstorage_client=True,
37 | )
38 | )
39 |
40 | return bigquery_dataframe
41 |
42 |
43 | def call_api() -> None:
44 | rapid_api_key = gcp_secret_rapid_api()
45 | bigquery_dataframe = bigquery_call()
46 |
47 | # Iterate through bigquery_dataframe to get the team's id and create a list using list comprehension.
48 | id_list = [bigquery_dataframe.iloc[i, 0] for i in range(20)]
49 |
50 | headers = {
51 | "X-RapidAPI-Key": rapid_api_key,
52 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
53 | }
54 |
55 | url = "https://api-football-v1.p.rapidapi.com/v3/players/squads"
56 |
57 | outer_count = 0
58 | while outer_count < 20:
59 | team_id_list = []
60 | team_name_list = []
61 | player_id_list = []
62 | player_photo_list = []
63 | player_name_list = []
64 | player_age_list = []
65 | player_number_list = []
66 | player_position_list = []
67 |
68 | query = {"team": id_list[outer_count]}
69 |
70 | response = requests.get(url, headers=headers, params=query, timeout=10)
71 | json_res = response.json()
72 |
73 | players_length = len(response.json()["response"][0]["players"])
74 |
75 | inner_count = 0
76 | while inner_count < players_length:
77 | team_id_list.append(int(json_res["response"][0]["team"]["id"]))
78 | team_name_list.append(str(json_res["response"][0]["team"]["name"]))
79 | player_id_list.append(int(json_res["response"][0]["players"][inner_count]["id"]))
80 | player_photo_list.append(str(json_res["response"][0]["players"][inner_count]["photo"]))
81 | player_name_list.append(str(json_res["response"][0]["players"][inner_count]["name"]))
82 |
83 | # The API is missing some player's age and number. Adding try/except blocks.
84 | try:
85 | player_age = json_res["response"][0]["players"][inner_count]["age"]
86 | if player_age is not None:
87 | player_age_list.append(int(player_age))
88 | else:
89 | player_age_list.append(None) # type: ignore
90 | except (ValueError, TypeError):
91 | player_age_list.append(None) # type: ignore
92 |
93 | try:
94 | player_number = json_res["response"][0]["players"][inner_count]["number"]
95 | if player_number is not None:
96 | player_number_list.append(int(player_number))
97 | else:
98 | player_number_list.append(None) # type: ignore
99 | except (ValueError, TypeError):
100 | player_number_list.append(None) # type: ignore
101 |
102 | player_position_list.append(str(json_res["response"][0]["players"][inner_count]["position"]))
103 |
104 | inner_count += 1
105 |
106 | table_headers = [
107 | "team_id",
108 | "team_name",
109 | "player_id",
110 | "player_photo",
111 | "player_name",
112 | "player_age",
113 | "player_number",
114 | "player_position",
115 | ]
116 | zipped = list(
117 | zip(
118 | team_id_list,
119 | team_name_list,
120 | player_id_list,
121 | player_photo_list,
122 | player_name_list,
123 | player_age_list,
124 | player_number_list,
125 | player_position_list,
126 | )
127 | )
128 |
129 | df = pd.DataFrame(zipped, columns=table_headers)
130 |
131 | schema_definition = [
132 | {"name": "team_id", "type": "INTEGER"},
133 | {"name": "team_name", "type": "STRING"},
134 | {"name": "player_id", "type": "INTEGER"},
135 | {"name": "player_photo", "type": "STRING"},
136 | {"name": "player_name", "type": "STRING"},
137 | {"name": "player_age", "type": "INTEGER"},
138 | {"name": "player_number", "type": "INTEGER"},
139 | {"name": "player_position", "type": "STRING"},
140 | ]
141 |
142 | formmated_team_name = team_name_list[0].replace(" ", "_").lower()
143 |
144 | df.to_gbq(
145 | destination_table=f"premier_league_squads.{formmated_team_name}",
146 | if_exists="replace",
147 | table_schema=schema_definition,
148 | )
149 |
150 | print(f"{team_name_list[0]}'s squad table loaded!")
151 |
152 | outer_count += 1
153 |
154 |
155 | if __name__ != "__main__":
156 | call_api()
157 |
--------------------------------------------------------------------------------
/etl/bigquery/stadiums.py:
--------------------------------------------------------------------------------
1 | """
2 | This file pulls data from an API relating to the English Premier League
3 | stadium location data and loads it into a PostgreSQL database.
4 | """
5 |
6 | import os
7 |
8 | # Standard libraries
9 | from typing import Dict, Optional
10 |
11 | import pandas as pd
12 | import requests # type: ignore
13 |
14 | # Importing needed libraries.
15 | from google.cloud import secretmanager
16 | from pandas import DataFrame
17 | from sqlalchemy import create_engine # type: ignore
18 | from sqlalchemy.types import DECIMAL, String # type: ignore
19 |
20 | # Settings the project environment.
21 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
22 |
23 |
24 | def gcp_secret_rapid_api():
25 | """Fetching RapidAPI key from Secret Manager"""
26 |
27 | client = secretmanager.SecretManagerServiceClient()
28 | name = "projects/463690670206/secrets/go-api/versions/1"
29 | response = client.access_secret_version(request={"name": name})
30 | go_api_key = response.payload.data.decode("UTF-8")
31 |
32 | return go_api_key
33 |
34 |
35 | def gcp_secret_database_uri():
36 | client = secretmanager.SecretManagerServiceClient()
37 | name = "projects/463690670206/secrets/premier-league-database-connection-uri/versions/3"
38 | response = client.access_secret_version(request={"name": name})
39 | database_uri = response.payload.data.decode("UTF-8")
40 |
41 | return database_uri
42 |
43 |
44 | def call_api():
45 | """Calling the API then filling in the empty lists"""
46 |
47 | go_api_key = gcp_secret_rapid_api()
48 |
49 | # Building GET request to retrieve data.
50 | response = requests.request("GET", go_api_key, timeout=20)
51 | json_res = response.json()
52 |
53 | # Empty lists that will be filled and then used to create a dataframe.
54 | team_list = []
55 | stadium_list = []
56 | lat_list = []
57 | lon_list = []
58 | capacity_list = []
59 | year_opened = []
60 |
61 | count = 0
62 | while count < 20:
63 | # Retrieving team name.
64 | team_list.append(str(json_res[count]["team"]))
65 |
66 | # Retrieving stadium name.
67 | stadium_list.append(str(json_res[count]["stadium"]))
68 |
69 | # Retrieving stadium's latitude.
70 | lat_list.append(float(json_res[count]["latitude"]))
71 |
72 | # Retrieving stadium's longitude.
73 | lon_list.append(float(json_res[count]["longitude"]))
74 |
75 | # Retrieving stadium's capacity.
76 | capacity_list.append(str(json_res[count]["capacity"]))
77 |
78 | # Retrieving stadium's year opened.
79 | year_opened.append(str(json_res[count]["year_opened"]))
80 |
81 | count += 1
82 |
83 | return team_list, stadium_list, lat_list, lon_list, capacity_list, year_opened
84 |
85 |
86 | def create_dataframe():
87 | """This function creates a datafreame from lists created in the last function: call_api()"""
88 |
89 | team_list, stadium_list, lat_list, lon_list, capacity_list, year_opened = call_api()
90 |
91 | # Setting the headers then zipping the lists to create a dataframe.
92 | headers = ["team", "stadium", "latitude", "longitude", "capacity", "year_opened"]
93 | zipped = list(
94 | zip(team_list, stadium_list, lat_list, lon_list, capacity_list, year_opened)
95 | )
96 |
97 | df = pd.DataFrame(zipped, columns=headers)
98 |
99 | return df
100 |
101 |
102 | def define_table_schema() -> Dict[str, type]:
103 | schema_definition = {
104 | "team": String(64),
105 | "stadium": String(64),
106 | "latitude": DECIMAL(8, 6),
107 | "longitude": DECIMAL(8, 6),
108 | "capacity": String(10),
109 | "year_opened": String(4),
110 | }
111 |
112 | return schema_definition
113 |
114 |
115 | def send_dataframe_to_postgresql(
116 | database_uri: str,
117 | schema_name: str,
118 | table_name: str,
119 | df: DataFrame,
120 | schema_definition: Optional[Dict[str, type]] = None,
121 | ):
122 | """Sending dataframe to PostgreSQL.
123 |
124 | Args:
125 | database_uri (str): The URI to connect to the PostgreSQL database.
126 | schema (str): The schema name in which the table should be created.
127 | table_name (str): The name of the table to be created.
128 | df (DataFrame): The DataFrame containing the data to be inserted.
129 | schema_definition (Dict[str, type], optional): A dictionary defining the table schema with column names
130 | as keys and their corresponding SQLAlchemy data types.
131 | Defaults to None. If None, the function will use the schema
132 | from the define_table_schema() function.
133 |
134 | Raises:
135 | ValueError: If the DataFrame is empty or schema_definition is not a valid dictionary.
136 | """
137 |
138 | if df.empty:
139 | raise ValueError("DataFrame is empty.")
140 |
141 | if schema_definition is None:
142 | schema_definition = define_table_schema()
143 |
144 | if not isinstance(schema_definition, dict):
145 | raise ValueError("schema_definition must be a dictionary.")
146 |
147 | engine = create_engine(database_uri)
148 | df.to_sql(
149 | table_name,
150 | con=engine,
151 | schema=schema_name,
152 | if_exists="replace",
153 | index=False,
154 | dtype=schema_definition,
155 | )
156 |
157 |
158 | if __name__ != "__main__":
159 | database_uri = gcp_secret_database_uri()
160 | schema_name = "premier-league-schema"
161 | table_name = "stadiums"
162 | df = create_dataframe()
163 | schema_definition = define_table_schema()
164 |
165 | send_dataframe_to_postgresql(database_uri, schema_name, table_name, df)
166 | print(f"Data loaded into {table_name}!")
167 |
--------------------------------------------------------------------------------
/etl/bigquery/standings.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import pandas as pd
5 | import requests # type: ignore
6 | from google.cloud import secretmanager
7 | from pandas import DataFrame
8 |
9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
10 |
11 |
12 | def gcp_secret_rapid_api() -> str:
13 | client = secretmanager.SecretManagerServiceClient()
14 | name = "projects/463690670206/secrets/rapid-api/versions/1"
15 | response = client.access_secret_version(request={"name": name})
16 | rapid_api_key = response.payload.data.decode("UTF-8")
17 |
18 | return rapid_api_key
19 |
20 |
21 | def call_api() -> (
22 | tuple[
23 | list[int],
24 | list[int],
25 | list[str],
26 | list[int],
27 | list[int],
28 | list[int],
29 | list[int],
30 | list[str],
31 | list[int],
32 | list[int],
33 | list[int],
34 | list[int],
35 | ]
36 | ):
37 | payload = gcp_secret_rapid_api()
38 |
39 | headers = {
40 | "X-RapidAPI-Key": payload,
41 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
42 | }
43 |
44 | url = "https://api-football-v1.p.rapidapi.com/v3/standings"
45 |
46 | query = {"season": "2023", "league": "39"}
47 | response = requests.get(url, headers=headers, params=query, timeout=10)
48 | json_res = response.json()
49 |
50 | team_id_list = []
51 | rank_list = []
52 | team_list = []
53 | games_played = []
54 | wins_list = []
55 | draws_list = []
56 | loses_list = []
57 | form_list = []
58 | points_list = []
59 | goals_for = []
60 | goals_against = []
61 | goals_diff = []
62 |
63 | count = 0
64 | while count < 20:
65 | team_id_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["team"]["id"]))
66 |
67 | rank_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["rank"]))
68 |
69 | team_list.append(
70 | str(json.dumps(json_res["response"][0]["league"]["standings"][0][count]["team"]["name"])).strip('"')
71 | )
72 |
73 | games_played.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["played"]))
74 |
75 | wins_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["win"]))
76 |
77 | draws_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["draw"]))
78 |
79 | loses_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["lose"]))
80 |
81 | form_list.append(str(json.dumps(json_res["response"][0]["league"]["standings"][0][count]["form"])).strip('"'))
82 |
83 | points_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["points"]))
84 |
85 | goals_for.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["goals"]["for"]))
86 |
87 | goals_against.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["goals"]["against"]))
88 |
89 | goals_diff.append(int(json_res["response"][0]["league"]["standings"][0][count]["goalsDiff"]))
90 |
91 | count += 1
92 |
93 | return (
94 | team_id_list,
95 | rank_list,
96 | team_list,
97 | games_played,
98 | wins_list,
99 | draws_list,
100 | loses_list,
101 | form_list,
102 | points_list,
103 | goals_for,
104 | goals_against,
105 | goals_diff,
106 | )
107 |
108 |
109 | def create_dataframe() -> DataFrame:
110 | (
111 | team_id_list,
112 | rank_list,
113 | team_list,
114 | games_played,
115 | wins_list,
116 | draws_list,
117 | loses_list,
118 | form_list,
119 | points_list,
120 | goals_for,
121 | goals_against,
122 | goals_diff,
123 | ) = call_api()
124 |
125 | headers = [
126 | "team_id",
127 | "rank",
128 | "team",
129 | "games_played",
130 | "wins",
131 | "draws",
132 | "loses",
133 | "recent_form",
134 | "points",
135 | "goals_for",
136 | "goals_against",
137 | "goal_difference",
138 | ]
139 | zipped = list(
140 | zip(
141 | team_id_list,
142 | rank_list,
143 | team_list,
144 | games_played,
145 | wins_list,
146 | draws_list,
147 | loses_list,
148 | form_list,
149 | points_list,
150 | goals_for,
151 | goals_against,
152 | goals_diff,
153 | )
154 | )
155 |
156 | df = pd.DataFrame(zipped, columns=headers)
157 |
158 | return df
159 |
160 |
161 | def define_table_schema() -> list[dict[str, str]]:
162 | schema_definition = [
163 | {"name": "team_id", "type": "INTEGER"},
164 | {"name": "rank", "type": "INTEGER"},
165 | {"name": "team", "type": "STRING"},
166 | {"name": "games_played", "type": "INTEGER"},
167 | {"name": "wins", "type": "INTEGER"},
168 | {"name": "draws", "type": "INTEGER"},
169 | {"name": "loses", "type": "INTEGER"},
170 | {"name": "recent_form", "type": "STRING"},
171 | {"name": "points", "type": "INTEGER"},
172 | {"name": "goals_for", "type": "INTEGER"},
173 | {"name": "goals_against", "type": "INTEGER"},
174 | {"name": "goal_difference", "type": "INTEGER"},
175 | ]
176 |
177 | return schema_definition
178 |
179 |
180 | def send_dataframe_to_bigquery(standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]) -> None:
181 | standings_dataframe.to_gbq(
182 | destination_table="premier_league_dataset.standings",
183 | if_exists="replace",
184 | table_schema=schema_definition,
185 | )
186 |
187 | print("Standings table loaded!")
188 |
189 |
190 | if __name__ != "__main__":
191 | standings_dataframe = create_dataframe()
192 | schema_definition = define_table_schema()
193 | send_dataframe_to_bigquery(standings_dataframe, schema_definition)
194 |
--------------------------------------------------------------------------------
/etl/bigquery/teams.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pandas as pd
4 | import requests # type: ignore
5 | from google.cloud import bigquery, secretmanager
6 | from pandas import DataFrame
7 |
8 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
9 |
10 | STANDINGS_TABLE = "premier_league_dataset.standings"
11 | TEAMS_TABLE = "premier_league_dataset.teams"
12 |
13 |
14 | def gcp_secret_rapid_api() -> str:
15 | client = secretmanager.SecretManagerServiceClient()
16 | name = "projects/463690670206/secrets/rapid-api/versions/1"
17 | response = client.access_secret_version(request={"name": name})
18 | rapid_api_key = response.payload.data.decode("UTF-8")
19 |
20 | return rapid_api_key
21 |
22 |
23 | # Calling the Standings table from BigQuery to get each team's id.
24 | def bigquery_call() -> DataFrame:
25 | bqclient = bigquery.Client()
26 |
27 | query_string = f"""
28 | SELECT *
29 | FROM {STANDINGS_TABLE}
30 | ORDER BY Rank
31 | """
32 |
33 | bigquery_dataframe = (
34 | bqclient.query(query_string)
35 | .result()
36 | .to_dataframe(
37 | create_bqstorage_client=True,
38 | )
39 | )
40 |
41 | return bigquery_dataframe
42 |
43 |
44 | def call_api() -> (
45 | tuple[
46 | list[int],
47 | list[str],
48 | list[str],
49 | list[str],
50 | list[int],
51 | list[int],
52 | list[int],
53 | list[float],
54 | list[int],
55 | ]
56 | ):
57 | rapid_api_key = gcp_secret_rapid_api()
58 | bigquery_dataframe = bigquery_call()
59 |
60 | # Iterate through bigquery_dataframe to get the team's id and create a list using list comprehension.
61 | id_list = [bigquery_dataframe.iloc[i, 0] for i in range(20)]
62 |
63 | headers = {
64 | "X-RapidAPI-Key": rapid_api_key,
65 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
66 | }
67 |
68 | url = "https://api-football-v1.p.rapidapi.com/v3/teams/statistics"
69 |
70 | team_id_list = []
71 | team_list = []
72 | logo_list = []
73 | form_list = []
74 | clean_sheets_list = []
75 | penalty_scored_list = []
76 | penalty_missed_list = []
77 | average_goals_list = []
78 | win_streak_list = []
79 |
80 | count = 0
81 | while count < 20:
82 | query = {"league": "39", "season": "2023", "team": id_list[count]}
83 | response = requests.get(url, headers=headers, params=query, timeout=10)
84 | json_res = response.json()
85 |
86 | team_id_list.append(int(json_res["response"]["team"]["id"]))
87 |
88 | team_list.append(str(json_res["response"]["team"]["name"]))
89 |
90 | logo_list.append(str(json_res["response"]["team"]["logo"]))
91 |
92 | form_list.append(str(json_res["response"]["form"]))
93 |
94 | clean_sheets_list.append(int(json_res["response"]["clean_sheet"]["total"]))
95 |
96 | penalty_scored_list.append(int(json_res["response"]["penalty"]["scored"]["total"]))
97 |
98 | penalty_missed_list.append(int(json_res["response"]["penalty"]["missed"]["total"]))
99 |
100 | average_goals_list.append(float(json_res["response"]["goals"]["for"]["average"]["total"]))
101 |
102 | win_streak_list.append(int(json_res["response"]["biggest"]["streak"]["wins"]))
103 |
104 | count += 1
105 |
106 | return (
107 | team_id_list,
108 | team_list,
109 | logo_list,
110 | form_list,
111 | clean_sheets_list,
112 | penalty_scored_list,
113 | penalty_missed_list,
114 | average_goals_list,
115 | win_streak_list,
116 | )
117 |
118 |
119 | def create_dataframe() -> DataFrame:
120 | (
121 | team_id_list,
122 | team_list,
123 | logo_list,
124 | form_list,
125 | clean_sheets_list,
126 | penalty_scored_list,
127 | penalty_missed_list,
128 | average_goals_list,
129 | win_streak_list,
130 | ) = call_api()
131 |
132 | headers = [
133 | "team_id",
134 | "team",
135 | "logo",
136 | "form",
137 | "clean_sheets",
138 | "penalties_scored",
139 | "penalties_missed",
140 | "average_goals",
141 | "win_streak",
142 | ]
143 | zipped = list(
144 | zip(
145 | team_id_list,
146 | team_list,
147 | logo_list,
148 | form_list,
149 | clean_sheets_list,
150 | penalty_scored_list,
151 | penalty_missed_list,
152 | average_goals_list,
153 | win_streak_list,
154 | )
155 | )
156 |
157 | df = pd.DataFrame(zipped, columns=headers)
158 |
159 | return df
160 |
161 |
162 | def define_table_schema() -> list[dict[str, str]]:
163 | schema_definition = [
164 | {"name": "team_id", "type": "INTEGER"},
165 | {"name": "team", "type": "STRING"},
166 | {"name": "logo", "type": "STRING"},
167 | {"name": "form", "type": "STRING"},
168 | {"name": "clean_sheets", "type": "INTEGER"},
169 | {"name": "penalties_scored", "type": "INTEGER"},
170 | {"name": "penalties_missed", "type": "INTEGER"},
171 | {"name": "average_goals", "type": "FLOAT"},
172 | {"name": "win_streak", "type": "INTEGER"},
173 | ]
174 |
175 | return schema_definition
176 |
177 |
178 | def send_dataframe_to_bigquery(standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]) -> None:
179 | teams_dataframe.to_gbq(
180 | destination_table="premier_league_dataset.teams",
181 | if_exists="replace",
182 | table_schema=schema_definition,
183 | )
184 |
185 | print("Teams table loaded!")
186 |
187 |
188 | if __name__ != "__main__":
189 | teams_dataframe = create_dataframe()
190 | schema_definition = define_table_schema()
191 | send_dataframe_to_bigquery(teams_dataframe, schema_definition)
192 |
--------------------------------------------------------------------------------
/etl/bigquery/top_scorers.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import pandas as pd
5 | import requests # type: ignore
6 | from google.cloud import secretmanager
7 | from pandas import DataFrame
8 |
9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
10 |
11 |
12 | def gcp_secret_rapid_api() -> str:
13 | client = secretmanager.SecretManagerServiceClient()
14 | name = "projects/463690670206/secrets/rapid-api/versions/1"
15 | response = client.access_secret_version(request={"name": name})
16 | rapid_api_key = response.payload.data.decode("UTF-8")
17 |
18 | return rapid_api_key
19 |
20 |
21 | def call_api() -> tuple[list[str], list[int], list[str], list[int], list[str], list[str]]:
22 | rapid_api_key = gcp_secret_rapid_api()
23 | headers = {
24 | "X-RapidAPI-Key": rapid_api_key,
25 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
26 | }
27 |
28 | url = "https://api-football-v1.p.rapidapi.com/v3/players/topscorers"
29 |
30 | query = {"league": "39", "season": "2023"}
31 | response = requests.get(url, headers=headers, params=query, timeout=10)
32 | json_res = response.json()
33 |
34 | full_name_list = []
35 | goals_list = []
36 | assists_list = []
37 | team_list = []
38 | nationality_list = []
39 | photo_list = []
40 |
41 | count = 0
42 | while count < 5:
43 | # Retrieving player's first and last name then combining for full name.
44 | first_name = (
45 | str(
46 | json.dumps(
47 | json_res["response"][count]["player"]["firstname"],
48 | ensure_ascii=False,
49 | )
50 | )
51 | ).strip('"')
52 | last_name = (
53 | str(
54 | json.dumps(
55 | json_res["response"][count]["player"]["lastname"],
56 | ensure_ascii=False,
57 | )
58 | )
59 | ).strip('"')
60 |
61 | full_name = first_name + " " + last_name
62 |
63 | full_name_list.append(full_name)
64 |
65 | goals_list.append(int(json_res["response"][count]["statistics"][0]["goals"]["total"]))
66 |
67 | try:
68 | assists = json_res["response"][count]["statistics"][0]["goals"]["assists"]
69 | if assists is not None:
70 | assists_list.append(int(assists))
71 | else:
72 | assists_list.append(None) # type: ignore
73 | except (ValueError, TypeError):
74 | assists_list.append(0)
75 |
76 | team_list.append(str(json_res["response"][count]["statistics"][0]["team"]["name"]).strip('"'))
77 |
78 | nationality_list.append(str(json_res["response"][count]["player"]["nationality"]).strip('"'))
79 |
80 | photo_list.append(str(json_res["response"][count]["player"]["photo"]).strip('"'))
81 |
82 | count += 1
83 |
84 | return (
85 | full_name_list,
86 | goals_list,
87 | team_list,
88 | assists_list,
89 | nationality_list,
90 | photo_list,
91 | )
92 |
93 |
94 | def create_dataframe() -> DataFrame:
95 | (
96 | full_name_list,
97 | goals_list,
98 | team_list,
99 | assists_list,
100 | nationality_list,
101 | photo_list,
102 | ) = call_api()
103 |
104 | headers = ["name", "goals", "team", "assists", "nationality", "photo"]
105 | zipped = list(
106 | zip(
107 | full_name_list,
108 | goals_list,
109 | team_list,
110 | assists_list,
111 | nationality_list,
112 | photo_list,
113 | )
114 | )
115 |
116 | df = pd.DataFrame(zipped, columns=headers)
117 |
118 | return df
119 |
120 |
121 | def define_table_schema() -> list[dict[str, str]]:
122 | schema_definition = [
123 | {"name": "name", "type": "STRING"},
124 | {"name": "goals", "type": "INTEGER"},
125 | {"name": "team", "type": "STRING"},
126 | {"name": "assists", "type": "INTEGER"},
127 | {"name": "nationality", "type": "STRING"},
128 | {"name": "photo", "type": "STRING"},
129 | ]
130 |
131 | return schema_definition
132 |
133 |
134 | def send_dataframe_to_bigquery(standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]) -> None:
135 | top_scorers_dataframe.to_gbq(
136 | destination_table="premier_league_dataset.top_scorers",
137 | if_exists="replace",
138 | table_schema=schema_definition,
139 | )
140 |
141 | print("Top Scorers table loaded!")
142 |
143 |
144 | if __name__ != "__main__":
145 | top_scorers_dataframe = create_dataframe()
146 | schema_definition = define_table_schema()
147 | send_dataframe_to_bigquery(top_scorers_dataframe, schema_definition)
148 |
--------------------------------------------------------------------------------
/etl/cloud_functions/standings_transfer.py:
--------------------------------------------------------------------------------
1 | from google.cloud import bigquery
2 | import pandas as pd
3 | import time
4 |
5 | client = bigquery.Client()
6 | bucket_name = "premier_league_bucket"
7 | project = "cloud-data-infrastructure"
8 | dataset_id = "premier_league_dataset"
9 | table_id = "standings"
10 |
11 |
12 | def transfer(request) -> str:
13 | destination_uri = f"gs://{bucket_name}/standings.csv"
14 | dataset_ref = bigquery.DatasetReference(project, dataset_id)
15 | table_ref = dataset_ref.table(table_id)
16 |
17 | extract_job = client.extract_table(
18 | table_ref,
19 | destination_uri,
20 | location="US",
21 | )
22 | extract_job.result()
23 |
24 | print(f"Exported {project}.{dataset_id}.{table_id} to {destination_uri}")
25 |
26 | time.sleep(5)
27 |
28 | df = pd.read_csv("https://storage.googleapis.com/premier_league_bucket/standings.csv")
29 | sorted_df = df.sort_values(by=["rank"], ascending=True)
30 | removed_columns = sorted_df.drop(columns=["team_id"])
31 | removed_columns.to_csv(destination_uri, index=False)
32 |
33 | return "OK"
34 |
--------------------------------------------------------------------------------
/etl/cloud_functions/top_scorers_transfer.py:
--------------------------------------------------------------------------------
1 | from google.cloud import bigquery
2 | import pandas as pd
3 | import time
4 |
5 | client = bigquery.Client()
6 | bucket_name = "premier_league_bucket"
7 | project = "cloud-data-infrastructure"
8 | dataset_id = "premier_league_dataset"
9 | table_id = "top_scorers"
10 |
11 |
12 | def transfer(request) -> str:
13 | destination_uri = f"gs://{bucket_name}/top_scorers.csv"
14 | dataset_ref = bigquery.DatasetReference(project, dataset_id)
15 | table_ref = dataset_ref.table(table_id)
16 |
17 | extract_job = client.extract_table(
18 | table_ref,
19 | destination_uri,
20 | location="US",
21 | )
22 | extract_job.result()
23 |
24 | print(f"Exported {project}.{dataset_id}.{table_id} to {destination_uri}")
25 |
26 | time.sleep(5)
27 |
28 | df = pd.read_csv("https://storage.googleapis.com/premier_league_bucket/top_scorers.csv")
29 | sorted_df = df.sort_values(by=["goals"], ascending=False)
30 | removed_columns = sorted_df.drop(columns=["photo"])
31 | removed_columns.to_csv(destination_uri, index=False)
32 |
33 | return "OK"
34 |
--------------------------------------------------------------------------------
/etl/firestore/fixtures.py:
--------------------------------------------------------------------------------
1 | """
2 | This file calls the Football API to extract match fixture data
3 | and load the collection and documents into Firestore.
4 | """
5 |
6 | # System libraries
7 | import os
8 |
9 | # Google Cloud library imports.
10 | from google.cloud import secretmanager
11 | from firebase_admin import firestore
12 | import firebase_admin
13 | import requests
14 |
15 | # Settings the project environment.
16 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
17 |
18 |
19 | def call_api(secret_name):
20 | """
21 | This function fetches the RapidAPI key from Secret Manager and
22 | and sets up the headers for an API call.
23 | """
24 |
25 | client = secretmanager.SecretManagerServiceClient()
26 | response = client.access_secret_version(request={"name": secret_name})
27 | payload = response.payload.data.decode("UTF-8")
28 |
29 | # Headers used for RapidAPI.
30 | headers = {
31 | "content-type": "application/octet-stream",
32 | "X-RapidAPI-Key": payload,
33 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
34 | }
35 |
36 | return headers
37 |
38 |
39 | class Fixture:
40 | """Building JSON structure for documents."""
41 |
42 | def __init__(self, date, teams, goals=None):
43 | self.date = date
44 | self.teams = teams
45 | self.goals = goals
46 |
47 | def __repr__(self):
48 | return f"Fixture(\
49 | name={self.date}, \
50 | country={self.teams}, \
51 | goals={self.goals}\
52 | )"
53 |
54 | def to_dict(self):
55 | return {"date": self.date, "teams": self.teams, "goals": self.goals}
56 |
57 |
58 | def get_current_round():
59 | """
60 | This function calls the Football API to get the current round of the regular season.
61 | This will get the string of "Regular Season - 1" which is needed as a parameter
62 | in the next function to pull correct round.
63 | """
64 |
65 | headers = call_api("projects/463690670206/secrets/rapid-api/versions/1")
66 |
67 | url = "https://api-football-v1.p.rapidapi.com/v3/fixtures/rounds"
68 | querystring = {"league": "39", "season": "2023", "current": "true"}
69 | response = requests.get(url, headers=headers, params=querystring, timeout=20)
70 |
71 | current_round_response = response.json()["response"][0]
72 | # example response: "Regular Season - 12"
73 |
74 | return current_round_response
75 |
76 |
77 | def retrieve_data_for_current_round():
78 | """Retrieving the data for the current round based on get_current_round() function's response"""
79 |
80 | headers = call_api("projects/463690670206/secrets/rapid-api/versions/1")
81 | current_round_response = get_current_round()
82 |
83 | url = "https://api-football-v1.p.rapidapi.com/v3/fixtures"
84 | querystring = {"league": "39", "season": "2023", "round": current_round_response}
85 | build_current_response = requests.get(
86 | url, headers=headers, params=querystring, timeout=20
87 | )
88 |
89 | return build_current_response
90 |
91 |
92 | def load_firestore():
93 | """This function loads the data into Firestore"""
94 |
95 | current_round_response = get_current_round()
96 | build_current_response = retrieve_data_for_current_round()
97 |
98 | # Check to see if firebase app has been initialized.
99 | if not firebase_admin._apps:
100 | firebase_admin.initialize_app()
101 | db = firestore.client()
102 |
103 | count = 0
104 | while count < 10:
105 | # Dictionaries to be written to each document.
106 | fixture_date = build_current_response.json()["response"][count]["fixture"][
107 | "date"
108 | ]
109 | teams_dict = build_current_response.json()["response"][count]["teams"]
110 | goal_dict = build_current_response.json()["response"][count]["goals"]
111 |
112 | # Calling the away and home team names to build document name.
113 | away_team = build_current_response.json()["response"][count]["teams"]["away"][
114 | "name"
115 | ]
116 | home_team = build_current_response.json()["response"][count]["teams"]["home"][
117 | "name"
118 | ]
119 |
120 | fixture = Fixture(date=(fixture_date), teams=teams_dict, goals=goal_dict)
121 |
122 | db.collection(f"{current_round_response}").document(
123 | f"{away_team} vs {home_team}"
124 | ).set(fixture.to_dict())
125 |
126 | count += 1
127 |
128 | print(f"Document {current_round_response} has been loaded!")
129 |
130 |
131 | if __name__ != "__main__":
132 | load_firestore()
133 |
--------------------------------------------------------------------------------
/etl/postgres/stock.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import polars as pl
4 | import requests # type: ignore
5 | from google.cloud import secretmanager
6 |
7 | # Settings the project environment.
8 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure"
9 |
10 |
11 | def gcp_secret_stock_api() -> str:
12 | """This function retrieves the Rapid API key from GCP Secret Manager"""
13 |
14 | client = secretmanager.SecretManagerServiceClient()
15 | name = "projects/463690670206/secrets/stock-api/versions/1"
16 | response = client.access_secret_version(request={"name": name})
17 | stock_api_key = response.payload.data.decode("UTF-8")
18 |
19 | return stock_api_key
20 |
21 |
22 | def gcp_secret_postgresql_uri() -> str:
23 | """This function retrieves the Rapid API key from GCP Secret Manager"""
24 |
25 | client = secretmanager.SecretManagerServiceClient()
26 | name = "projects/463690670206/secrets/postgresql-uri/versions/1"
27 | response = client.access_secret_version(request={"name": name})
28 | postgresql_uri = response.payload.data.decode("UTF-8")
29 |
30 | return postgresql_uri
31 |
32 |
33 | def send_dataframe_to_postgres() -> None:
34 | stock_api_key = gcp_secret_stock_api()
35 | postgresql_uri = gcp_secret_postgresql_uri()
36 |
37 | url = f"https://financialmodelingprep.com/api/v3/quote/MANU?apikey={stock_api_key}"
38 |
39 | response = requests.request("GET", url)
40 |
41 | json_res = response.json()
42 | df = pl.DataFrame(json_res)
43 |
44 | df.write_database(
45 | table_name="stocks", connection=postgresql_uri, if_table_exists="append"
46 | )
47 |
48 |
49 | if __name__ != "__main__":
50 | send_dataframe_to_postgres()
51 |
--------------------------------------------------------------------------------
/etl/requirements-data.txt:
--------------------------------------------------------------------------------
1 | # These libraries are used for the ETL data pipelines.
2 |
3 | requests==2.31.0
4 |
5 | # Data
6 | pandas==2.1.4
7 | pandas-gbq==0.20.0
8 | polars==0.20.2
9 | psycopg2-binary==2.9.9
10 | sqlalchemy==2.0.25
11 | prefect-soda-core==0.1.8
12 | soda-core-bigquery==3.1.3
13 | dbt-bigquery==1.7.4
14 |
15 | # Google Cloud
16 | firebase-admin==6.3.0
17 | google-cloud-bigquery==3.14.1
18 | google-cloud-secret-manager==2.17.0
19 | google-cloud-run==0.10.1
20 | google-api-python-client==2.111.0
--------------------------------------------------------------------------------
/monitoring/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | prometheus:
3 | image: prom/prometheus
4 | volumes:
5 | - "./prometheus.yml:/etc/prometheus/prometheus.yml"
6 | ports:
7 | - 9090:9090
8 |
9 | grafana:
10 | image: grafana/grafana
11 | ports:
12 | - "3000:3000"
13 | user: "501"
14 | volumes:
15 | - ./grafana_storage:/var/lib/grafana
16 |
17 | postgres-exporter:
18 | image: prometheuscommunity/postgres-exporter
19 | volumes:
20 | - "./postgres_exporter.yml:/postgres_exporter.yml:ro"
21 | ports:
22 | - 9187:9187
23 | environment:
24 | DATA_SOURCE_NAME: "{{ db.string }}"
25 |
--------------------------------------------------------------------------------
/monitoring/postgres_exporter.yml:
--------------------------------------------------------------------------------
1 | auth_modules:
2 | postgres_conn:
3 | type: userpass
4 | userpass:
5 | username: "{{ db.username }}"
6 | password: "{{ db.password }}"
7 | options:
8 | port: 5432
9 | sslmode: disable
--------------------------------------------------------------------------------
/monitoring/prometheus.yml:
--------------------------------------------------------------------------------
1 | global:
2 | scrape_interval: 45s
3 |
4 | scrape_configs:
5 | - job_name: 'prometheus'
6 | static_configs:
7 | - targets: ['localhost:9090']
8 |
9 | - job_name: 'postgresql_exporter'
10 | static_configs:
11 | - targets: ['host.docker.internal:9187']
--------------------------------------------------------------------------------
/prefect/data_quality_scan.py:
--------------------------------------------------------------------------------
1 | from prefect import flow # type: ignore
2 | from prefect.context import get_run_context # type: ignore
3 | from prefect_soda_core.soda_configuration import SodaConfiguration # type: ignore
4 | from prefect_soda_core.sodacl_check import SodaCLCheck # type: ignore
5 | from prefect_soda_core.tasks import soda_scan_execute # type: ignore
6 |
7 |
8 | @flow
9 | def run_soda_scan():
10 | soda_configuration_block = SodaConfiguration(
11 | configuration_yaml_path="./soda/configuration.yaml"
12 | )
13 | soda_check_block = SodaCLCheck(sodacl_yaml_path="./soda/checks.yaml")
14 |
15 | # Using the flow_run_name as the name of the file to store the scan results
16 | flow_run_name = get_run_context().flow_run.name
17 | scan_results_file_path = f"{flow_run_name}.json"
18 |
19 | return soda_scan_execute(
20 | data_source_name="bigquery_connection",
21 | configuration=soda_configuration_block,
22 | checks=soda_check_block,
23 | variables={"var": "value"},
24 | scan_results_file=scan_results_file_path,
25 | verbose=True,
26 | return_scan_result_file_content=False,
27 | )
28 |
29 |
30 | run_soda_scan()
31 |
--------------------------------------------------------------------------------
/prefect/deployments/current_round-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'current-round'
3 | ###
4 | name: current_round
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: current-round
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: b7fedd41-43ec-4767-a37d-71a761b2a50a
31 | _block_document_name: anonymous-148860db-e1fc-4935-9b52-df498459df91
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:current_round
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:55:34.176016+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/deployments/highlights-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'highlights'
3 | ###
4 | name: hightlights
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: highlights
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: cc1a4def-5006-481c-8a99-fd7b50d69d87
31 | _block_document_name: anonymous-94073dd6-53db-44a1-9dc2-09c93c7d9117
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:highlights
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:54:49.874866+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/deployments/injuries-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'injuries'
3 | ###
4 | name: injuries
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: injuries
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: 5a7ad7a1-341f-4b70-8ac8-4a18768aa962
31 | _block_document_name: anonymous-2bd75ab0-bb08-4153-b8e4-de4247a132d0
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:injuries
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:55:26.497708+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/deployments/news-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'news'
3 | ###
4 | name: news
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: news
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: be253629-d0c6-4a28-853f-cf861cca30d7
31 | _block_document_name: anonymous-f5036643-492b-49fd-a992-eb64855b0732
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:news
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:55:04.964169+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/deployments/run_soda_scan-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'run-soda-scan'
3 | ###
4 | name: data_quality
5 | description: null
6 | version: b7519422832beb73bb96d4191849271b
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: run-soda-scan
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: 231c21a0-bea6-4800-86f4-f9924fe459a2
31 | _block_document_name: anonymous-54354ea5-5dfd-48a1-a920-7f8795b80fd5
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/data_quality_scan.py:run_soda_scan
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:55:42.096102+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/deployments/squads-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'squads'
3 | ###
4 | name: squads
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: squads
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: 9eb2d8f7-7ef9-4f30-bf97-a8217320d4bc
31 | _block_document_name: anonymous-67d88fb7-ee19-4c67-83b0-a0c4de72b28e
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:squads
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:55:19.207112+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/deployments/statistics-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'statistics'
3 | ###
4 | name: statistics
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: statistics
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: d07769e0-7478-47f9-a89f-fdba7df219df
31 | _block_document_name: anonymous-085bb46e-2985-452e-840a-222a128de535
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:statistics
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:54:36.287059+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
47 |
--------------------------------------------------------------------------------
/prefect/deployments/stocks-deployment.yaml:
--------------------------------------------------------------------------------
1 | ###
2 | ### A complete description of a Prefect Deployment for flow 'stocks'
3 | ###
4 | name: stocks
5 | description: null
6 | version: d0781db7af4df7adb6a04236cfa5bb1c
7 | # The work queue that will handle this deployment's runs
8 | work_queue_name: premier-league-work-queue
9 | work_pool_name: premier-league-work-pool
10 | tags: []
11 | parameters: {}
12 | schedule: null
13 | schedules: []
14 | is_schedule_active: null
15 | infra_overrides: {}
16 |
17 | ###
18 | ### DO NOT EDIT BELOW THIS LINE
19 | ###
20 | flow_name: stocks
21 | manifest_path: null
22 | infrastructure:
23 | type: process
24 | env: {}
25 | labels: {}
26 | name: null
27 | command: null
28 | stream_output: true
29 | working_dir: null
30 | _block_document_id: d00fda5e-d9ad-4e29-9186-3a82f6c7387b
31 | _block_document_name: anonymous-a7b04bd9-9549-458a-9263-3adc7756e42e
32 | _is_anonymous: true
33 | block_type_slug: process
34 | _block_type_slug: process
35 | storage: null
36 | path: /home/pythonsanchez/premier-league
37 | entrypoint: prefect/flows.py:stocks
38 | parameter_openapi_schema:
39 | title: Parameters
40 | type: object
41 | properties: {}
42 | required: null
43 | definitions: null
44 | timestamp: '2024-03-19T00:55:12.070787+00:00'
45 | triggers: []
46 | enforce_parameter_schema: null
--------------------------------------------------------------------------------
/prefect/flows.py:
--------------------------------------------------------------------------------
1 | from prefect import task, flow
2 |
3 | # --- Statistics ---
4 | @task
5 | def task_standings():
6 | from etl.bigquery.standings import send_dataframe_to_bigquery
7 |
8 | @task
9 | def task_teams():
10 | from etl.bigquery.teams import send_dataframe_to_bigquery
11 |
12 | @task
13 | def task_top_scorers():
14 | from etl.bigquery.top_scorers import send_dataframe_to_bigquery
15 |
16 | @task
17 | def task_fixtues():
18 | from etl.firestore.fixtures import load_firestore
19 |
20 | @flow
21 | def statistics():
22 | a = task_standings()
23 | b = task_teams(wait_for=[a])
24 | c = task_top_scorers(wait_for=[a, b])
25 | d = task_fixtues(wait_for=[a, b, c])
26 |
27 | # --- News ---
28 | @task
29 | def task_news():
30 | from etl.bigquery.news import send_dataframe_to_bigquery
31 |
32 | @flow
33 | def news():
34 | a = task_news()
35 |
36 | # --- Highlights ---
37 | @task
38 | def task_highlights():
39 | from etl.bigquery.highlights import send_dataframe_to_bigquery
40 |
41 | @flow
42 | def highlights():
43 | a = task_highlights()
44 |
45 | # --- Stocks ---
46 | @task
47 | def task_stocks():
48 | from etl.postgres.stock import send_dataframe_to_postgres
49 |
50 | @flow
51 | def stocks():
52 | a = task_stocks()
53 |
54 | # --- Squads ---
55 | @task
56 | def task_squads():
57 | from etl.bigquery.squads import call_api
58 |
59 | @flow
60 | def squads():
61 | a = task_squads()
62 |
63 | # --- Injuries ---
64 | @task
65 | def task_injuries():
66 | from etl.bigquery.injuries import call_api
67 |
68 | @flow
69 | def injuries():
70 | a = task_injuries()
71 |
72 | # --- Current Round ---
73 | @task
74 | def task_current_round():
75 | from etl.bigquery.current_round import load_current_round
76 |
77 | @flow
78 | def current_round():
79 | a = task_current_round()
80 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.bandit]
2 | exclude_dirs = ["tests"]
3 | skips = ["B608"] # Never enforce `B608` (hardcoded credentials).
4 |
5 | [tool.coverage.report]
6 | show_missing = true
7 |
8 | [tool.ruff]
9 | ignore = ["E501"] # Never enforce `E501` (line length violations).
10 | line-length = 120
11 |
12 | [tool.ruff.format]
13 | quote-style = "double"
14 | indent-style = "tab"
15 | line-ending = "auto"
16 |
17 | [tool.pytest.ini_options]
18 | pythonpath = [
19 | ".", "components", "tests"
20 | ]
21 | testpaths = "tests"
22 | filterwarnings = [
23 | "ignore:Deprecated call to `pkg_resources\\.declare_namespace\\('.*'\\):DeprecationWarning",
24 | "ignore::DeprecationWarning:google.rpc",
25 | ]
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # These libraries are used ONLY for the Streamlit app.
2 | # This is to make the Docker image as small as possible and reduce security issues.
3 |
4 | # Streamlit
5 | pandas==2.1.4
6 | plotly==5.18.0
7 | streamlit==1.33.0
8 |
9 | # Google Cloud
10 | firebase-admin==6.3.0
11 | google-cloud-core==2.4.0
12 | google-cloud-bigquery==3.14.1
13 | google-cloud-secret-manager==2.17.0
--------------------------------------------------------------------------------
/soda/checks.yaml:
--------------------------------------------------------------------------------
1 | checks for news:
2 | - row_count > 1
3 | - invalid_count(url) = 0:
4 | valid regex: ^https://
5 |
6 | checks for stadiums:
7 | - row_count = 20
8 |
9 | checks for standings:
10 | - row_count = 20
11 | - duplicate_count(team) = 0
12 | - max(points) < 114
13 | - min(points) > 0
14 |
15 | checks for teams:
16 | - row_count = 20
17 | - duplicate_count(team) = 0
18 |
19 | checks for top_scorers:
20 | - row_count = 5
--------------------------------------------------------------------------------
/soda/configuration.yaml:
--------------------------------------------------------------------------------
1 | data_source bigquery_connection:
2 | type: bigquery
3 | use_context_auth: true
4 | auth_scopes:
5 | - https://www.googleapis.com/auth/bigquery
6 | - https://www.googleapis.com/auth/cloud-platform
7 | - https://www.googleapis.com/auth/drive
8 | project_id: "cloud-data-infrastructure"
9 | dataset: premier_league_dataset
--------------------------------------------------------------------------------
/streamlit_app.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | from datetime import datetime
5 |
6 | import pandas as pd
7 | import streamlit as st
8 | from streamlit.delta_generator import DeltaGenerator
9 |
10 | # Importing classes from components/ directory.
11 | from components.about_section import AboutSection
12 | from components.fixtures_section import FixturesSection
13 | from components.highlights_section import HighlightsSection
14 | from components.injuries_section import InjuriesSection
15 | from components.league_form_section import LeagueFormsSection
16 | from components.news_section import NewsSection
17 | from components.point_progression_section import PointProgressionSection
18 | from components.point_slider_section import PointSliderSection
19 | from components.social_media_section import SocialMediaSection
20 | from components.squads_section import SquadSection
21 | from components.stadiums_map_section import StadiumMapSection
22 | from components.stock_section import StockSection
23 | from components.top_scorers_section import TopScorersSection
24 | from components.top_teams_section import TopTeamsSection
25 | from components.connections import (
26 | firestore_connection,
27 | get_highlights,
28 | get_injuries,
29 | get_league_statistics,
30 | get_max_round,
31 | get_min_round,
32 | get_news,
33 | get_squads,
34 | get_stadiums,
35 | get_standings,
36 | get_stocks,
37 | get_teams,
38 | get_top_scorers,
39 | )
40 |
41 | import google.auth
42 |
43 | project_id = "cloud-data-infrastructure"
44 | os.environ["GCLOUD_PROJECT"] = project_id
45 | credentials, project_id = google.auth.default()
46 |
47 | st.set_page_config(page_title="Streamlit: Premier League", layout="wide")
48 |
49 |
50 | def streamlit_app():
51 | # Get the dataframes.
52 | firestore_database = firestore_connection()
53 | highlights_df = get_highlights()
54 | injuries_df = get_injuries()
55 | league_statistics_df = get_league_statistics()
56 | max_round = get_max_round()
57 | min_round = get_min_round()
58 | news_df = get_news()
59 | squads_df = get_squads()
60 | standings_df = get_standings()
61 | stadiums_df = get_stadiums()
62 | stocks_df = get_stocks()
63 | teams_df = get_teams()
64 | top_scorers_df = get_top_scorers()
65 |
66 | fixtures_section = FixturesSection(firestore_database, max_round, min_round)
67 |
68 | # Image, title, and subheader.
69 | with st.container():
70 | st.markdown(
71 | ' ',
72 | unsafe_allow_html=True,
73 | )
74 | st.title("Premier League Statistics / 2023-24")
75 | st.subheader(f"Current Round: {max_round}")
76 |
77 | # Get the current date
78 | def get_suffix(day):
79 | if 10 < day % 100 < 20:
80 | suffix = "th"
81 | else:
82 | suffix = {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th")
83 | return suffix
84 |
85 | current_date = datetime.now()
86 | day = current_date.day
87 | suffix = get_suffix(day)
88 | formatted_day = str(day).lstrip("0")
89 | formatted_date = current_date.strftime("%B ") + formatted_day + suffix + current_date.strftime(", %Y")
90 |
91 | st.write(f"{formatted_date}")
92 |
93 | # Tab menu.
94 | tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8 = st.tabs(
95 | [
96 | "Standings & Overview",
97 | "Teams Statistics",
98 | "Players & Injuries",
99 | "Fixtures",
100 | "Squads",
101 | "News & Hightlights",
102 | "Manchester United Stock (Beta)",
103 | "About",
104 | ]
105 | )
106 |
107 | # --------- Overview Tab ---------
108 | # Tab 1 holds the following sections: [League Statistics, Current Standings, Location of Stadiums].
109 | with tab1:
110 | st.subheader("League Statistics")
111 | col1, col2, col3, col4 = st.columns(4)
112 |
113 | # Average goals scored column.
114 | with col1:
115 | teams_df_average_goals = teams_df.sort_values(by=["average_goals"], ascending=False)
116 | max_average_goals = teams_df_average_goals.iloc[0, 6]
117 |
118 | average_goals_df = pd.DataFrame(
119 | {
120 | "Average Goals": [
121 | max_average_goals,
122 | teams_df_average_goals.iloc[1, 6],
123 | teams_df_average_goals.iloc[2, 6],
124 | teams_df_average_goals.iloc[3, 6],
125 | teams_df_average_goals.iloc[4, 6],
126 | ],
127 | "Team": [
128 | teams_df_average_goals.iloc[0, 2],
129 | teams_df_average_goals.iloc[1, 2],
130 | teams_df_average_goals.iloc[2, 2],
131 | teams_df_average_goals.iloc[3, 2],
132 | teams_df_average_goals.iloc[4, 2],
133 | ],
134 | }
135 | )
136 |
137 | st.dataframe(
138 | average_goals_df,
139 | column_config={
140 | "Average Goals": st.column_config.ProgressColumn(
141 | "Average Goals",
142 | help="The Average Goals Scored by Each Team.",
143 | format="%f",
144 | min_value=0,
145 | max_value=int(round(max_average_goals, 2)) * 2,
146 | ),
147 | },
148 | hide_index=True,
149 | )
150 |
151 | with col2:
152 | teams_df_penalties_scored = teams_df.sort_values(by=["penalties_scored"], ascending=False)
153 | max_penalties_scored = teams_df_penalties_scored.iloc[0, 4]
154 |
155 | penalties_scored_df = pd.DataFrame(
156 | {
157 | "Penalties Scored": [
158 | max_penalties_scored,
159 | teams_df_penalties_scored.iloc[1, 4],
160 | teams_df_penalties_scored.iloc[2, 4],
161 | teams_df_penalties_scored.iloc[3, 4],
162 | teams_df_penalties_scored.iloc[4, 4],
163 | ],
164 | "Team": [
165 | teams_df_penalties_scored.iloc[0, 2],
166 | teams_df_penalties_scored.iloc[1, 2],
167 | teams_df_penalties_scored.iloc[2, 2],
168 | teams_df_penalties_scored.iloc[3, 2],
169 | teams_df_penalties_scored.iloc[4, 2],
170 | ],
171 | }
172 | )
173 |
174 | st.dataframe(
175 | penalties_scored_df,
176 | column_config={
177 | "Penalties Scored": st.column_config.ProgressColumn(
178 | "Penalties Scored",
179 | help="The Amount of Penalties Scored by Each Team.",
180 | format="%d",
181 | min_value=0,
182 | max_value=int(max_penalties_scored) * 2,
183 | ),
184 | },
185 | hide_index=True,
186 | )
187 |
188 | with col3:
189 | teams_df_win_streak = teams_df.sort_values(by=["win_streak"], ascending=False)
190 | max_win_streak = teams_df_win_streak.iloc[0, 7]
191 |
192 | win_streak_df = pd.DataFrame(
193 | {
194 | "Biggest Win Streak": [
195 | max_win_streak,
196 | teams_df_win_streak.iloc[1, 7],
197 | teams_df_win_streak.iloc[2, 7],
198 | teams_df_win_streak.iloc[3, 7],
199 | teams_df_win_streak.iloc[4, 7],
200 | ],
201 | "Team": [
202 | teams_df_win_streak.iloc[0, 2],
203 | teams_df_win_streak.iloc[1, 2],
204 | teams_df_win_streak.iloc[2, 2],
205 | teams_df_win_streak.iloc[3, 2],
206 | teams_df_win_streak.iloc[4, 2],
207 | ],
208 | }
209 | )
210 |
211 | st.dataframe(
212 | win_streak_df,
213 | column_config={
214 | "Biggest Win Streak": st.column_config.ProgressColumn(
215 | "Biggest Win Streak",
216 | help="The Biggest Win Streak by Each Team.",
217 | format="%d",
218 | min_value=0,
219 | max_value=int(max_win_streak) * 2,
220 | ),
221 | },
222 | hide_index=True,
223 | )
224 |
225 | with col4:
226 | st.markdown("**League Statistics**")
227 |
228 | with st.container():
229 | league_statistics_df = pd.DataFrame(
230 | {
231 | "labels": ["Goals Scored", "Penalties Scored", "Clean Sheets"],
232 | "metrics": [
233 | league_statistics_df.iloc[0, 0],
234 | league_statistics_df.iloc[0, 1],
235 | league_statistics_df.iloc[0, 2],
236 | ],
237 | }
238 | )
239 |
240 | st.dataframe(
241 | league_statistics_df,
242 | column_config={
243 | "metrics": st.column_config.NumberColumn(
244 | "Amount",
245 | help="The Amount of Goals, Penalties Scored, and Clean Sheets in the League.",
246 | min_value=0,
247 | max_value=1000,
248 | step=1,
249 | ),
250 | "labels": st.column_config.TextColumn(
251 | "Metric",
252 | ),
253 | },
254 | hide_index=True,
255 | )
256 |
257 | # Function to create the standings table (dataframe).
258 | def standings_table() -> DeltaGenerator:
259 | st.subheader("Current Standings")
260 |
261 | standings_table = st.dataframe(
262 | standings_df.style.set_table_styles([{"selector": "th", "props": [("background-color", "yellow")]}]),
263 | column_config={
264 | "logo": st.column_config.ImageColumn("Icon", width="small"),
265 | "rank": "Rank",
266 | "points": "Points",
267 | "team": "Club",
268 | "games_played": "Games Played",
269 | "wins": "Wins",
270 | "draws": "Draws",
271 | "loses": "Loses",
272 | "goals_for": "Goals For",
273 | "goals_against": "Goals Against",
274 | "goal_difference": "Goal Difference",
275 | },
276 | hide_index=True,
277 | use_container_width=True,
278 | )
279 |
280 | return standings_table
281 |
282 | standings_table()
283 |
284 | # Stadiums
285 | stadium_map_section = StadiumMapSection()
286 | stadium_map_section.display(stadiums_df)
287 |
288 | # --------- Team Statistics Tab ---------
289 | # Tab 2 holds the following sections: [Top Teams, Point Progression, Top Scorers, League Forms].
290 | with tab2:
291 | def top_teams_func():
292 | top_teams_section = TopTeamsSection(teams_df)
293 | with st.container():
294 | top_teams_section.display()
295 |
296 | def point_progression_func():
297 | point_progression_section = PointProgressionSection(teams_df, standings_df)
298 | with st.container():
299 | point_progression_section.display()
300 |
301 | @st.experimental_fragment
302 | def point_slider_func():
303 | point_slider_section = PointSliderSection(standings_df)
304 | with st.container():
305 | point_slider_section.display()
306 |
307 | def league_forms_func():
308 | league_forms_section = LeagueFormsSection(teams_df)
309 | with st.container():
310 | league_forms_section.display()
311 |
312 | top_teams_func()
313 | point_progression_func()
314 | point_slider_func()
315 | league_forms_func()
316 |
317 | # --------- Player Statistics Tab ---------
318 | # Tab 3 holds the following sections: [Player Statistics].
319 | with tab3:
320 |
321 | def top_scorers_func():
322 | top_scorers_section = TopScorersSection(top_scorers_df)
323 | with st.container():
324 | top_scorers_section.display()
325 |
326 | @st.experimental_fragment
327 | def injuries_func():
328 | injuries_section = InjuriesSection(injuries_df)
329 | with st.container():
330 | injuries_section.display()
331 |
332 | top_scorers_func()
333 | injuries_func()
334 |
335 | # --------- Fixtures Tab ---------
336 | # Tab 4 holds the following sections: [Fixtures].
337 | with tab4:
338 | # Fixtures section.
339 | fixtures_section.display()
340 |
341 | # --------- Squads Tab ---------
342 | # Tab 5 holds the following sections: [Squads].
343 | with tab5:
344 | st.subheader("Team Squads")
345 | st.markdown("**Note:** Double click on the player's photo to expand it.")
346 |
347 | @st.experimental_fragment
348 | def squads_func():
349 | squads = SquadSection(squads_df)
350 |
351 | col1, _, _ = st.columns(3)
352 | with col1:
353 | option = st.selectbox(
354 | index=None,
355 | label="Use the dropdown menu to select a team:",
356 | options=squads.teams,
357 | placeholder="Please make a selection",
358 | )
359 | if option:
360 | selected_team_logo = teams_df[teams_df["team"] == option]["logo"].iloc[0]
361 | st.image(selected_team_logo, width=75)
362 | squads.display(option)
363 |
364 | squads_func()
365 |
366 | # --------- News Tab ---------
367 | # Tab 6 holds the following sections: [News, Highlights].
368 | with tab6:
369 | with st.container():
370 | NewsSection(news_df).display()
371 |
372 | with st.container():
373 | HighlightsSection(highlights_df).display_first_row()
374 | HighlightsSection(highlights_df).display_second_row()
375 |
376 | # --------- Stock Tab ---------
377 | # Tab 7 holds the following sections: [Stock Price].
378 | with tab7:
379 | stock_section = StockSection(stocks_df)
380 | stock_section.display()
381 |
382 | # --------- About Tab ---------
383 | # Tab 8 holds the following sections: [About].
384 | with tab8:
385 | # About
386 | about_section = AboutSection()
387 | about_section.display()
388 |
389 | # Social Media
390 | social_media_section = SocialMediaSection()
391 | social_media_section.display()
392 |
393 |
394 | if __name__ == "__main__":
395 | streamlit_app()
396 |
--------------------------------------------------------------------------------
/terraform/installations.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | sudo apt-get remove -y --purge man-db
4 | sudo apt-get install -y python3-pip
5 | sudo apt-get install -y python3-venv
6 | sudo apt install -y git
7 | git clone https://github.com/digitalghost-dev/premier-league.git
8 | cd premier-league
9 | python3 -m venv env
10 | source env/bin/activate
11 | pip install -r terraform/req.txt
12 | prefect --version
13 | sleep 10
14 | prefect deployment build -n premier-league-etl -p premier-league-work-pool -q premier-league-work-queue prefect/flows.py:premier_league_flow
15 | prefect work-pool create 'premier-league-work-pool' --type prefect-agent
16 | prefect deployment apply premier_league_flow-deployment.yaml
--------------------------------------------------------------------------------
/terraform/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_providers {
3 | google = {
4 | source = "hashicorp/google"
5 | version = "4.51.0"
6 | }
7 | }
8 | }
9 |
10 | # ------ Variables ------
11 | variable "email_address" {
12 | type = string
13 | description = "email address"
14 | }
15 |
16 | variable "user" {
17 | type = string
18 | description = "user name"
19 | }
20 |
21 | variable "public_key_path" {
22 | type = string
23 | description = "path to public key"
24 | }
25 |
26 | variable "private_key_path" {
27 | type = string
28 | description = "path to private key"
29 | }
30 |
31 | variable "project_id" {
32 | type = string
33 | description = "project id"
34 | }
35 |
36 | # ------ Compute Engine Resource ------
37 | resource "google_compute_instance" "premier-league-vm" {
38 | name = "premier-league-vm"
39 | machine_type = "e2-small"
40 | zone = "us-central1-a"
41 | project = var.project_id
42 | tags = ["premier-league", "virtual-machine", "http", "https"]
43 |
44 | metadata = {
45 | ssh-keys = "${var.user}:${file(var.public_key_path)}"
46 | }
47 |
48 | boot_disk {
49 | initialize_params {
50 | image = "debian-cloud/debian-11"
51 | }
52 | }
53 |
54 | network_interface {
55 | network = "https://www.googleapis.com/compute/v1/projects/${var.project_id}/global/networks/default"
56 | subnetwork = "https://www.googleapis.com/compute/v1/projects/${var.project_id}/regions/us-central1/subnetworks/default"
57 | subnetwork_project = var.project_id
58 | access_config {
59 | // Ephemeral public IP
60 | }
61 | }
62 |
63 | service_account {
64 | email = var.email_address
65 | scopes = ["cloud-platform"]
66 | }
67 |
68 | provisioner "remote-exec" {
69 | connection {
70 | type = "ssh"
71 | user = var.user
72 | host = google_compute_instance.premier-league-vm.network_interface[0].access_config[0].nat_ip
73 | private_key = file(var.private_key_path)
74 | }
75 | script = "./installations.sh"
76 | # inline = [
77 |
78 | # ]
79 | }
80 | }
--------------------------------------------------------------------------------
/tests/requirements-tests.txt:
--------------------------------------------------------------------------------
1 | mypy==1.8.0
2 | pre-commit==3.6.0
3 | pytest==7.4.4
4 | pytest-cov==4.1.0
5 | ruff==0.1.13
--------------------------------------------------------------------------------
/tests/test_streamlit_app.py:
--------------------------------------------------------------------------------
1 | from streamlit.testing.v1 import AppTest
2 |
3 | at = AppTest.from_file("streamlit_app.py", default_timeout=1000)
4 | at.run()
5 |
6 |
7 | def test_main_page():
8 | assert at.title[0].value == "Premier League Statistics / 2023-24"
9 | assert "Current Round: " in at.subheader[0].value
10 | assert at.subheader[-1].value == "Social"
11 |
12 |
13 | # Standings & Overivew
14 | def test_tab_one():
15 | assert at.tabs[0].subheader[0].value == "League Statistics"
16 | assert at.tabs[0].subheader[1].value == "Current Standings"
17 | assert at.tabs[0].subheader[2].value == "Location of Stadiums"
18 |
19 |
20 | # Teams Statistics
21 | def test_tab_two():
22 | assert at.tabs[1].subheader[0].value == "Top 5 Teams"
23 | assert at.tabs[1].subheader[1].value == "Point Progression throughout the Season"
24 | assert at.tabs[1].subheader[2].value == "Points per Team:"
25 | assert at.tabs[1].subheader[3].value == "Forms for the Rest of the League"
26 |
27 |
28 | # Players & Injuries Statistics
29 | def test_tab_three():
30 | assert at.tabs[2].subheader[0].value == "Top 5 Scorers"
31 |
32 | # Column 1
33 | assert "Goals:" in at.tabs[2].markdown[2].value
34 | assert "Assists:" in at.tabs[2].markdown[3].value
35 | assert "Team:" in at.tabs[2].markdown[4].value
36 | assert "Nationality:" in at.tabs[2].markdown[5].value
37 |
38 | # Column 2
39 | assert "Goals:" in at.tabs[2].markdown[8].value
40 | assert "Assists:" in at.tabs[2].markdown[9].value
41 | assert "Team:" in at.tabs[2].markdown[10].value
42 | assert "Nationality:" in at.tabs[2].markdown[11].value
43 |
44 | # Column 3
45 | assert "Goals:" in at.tabs[2].markdown[14].value
46 | assert "Assists:" in at.tabs[2].markdown[15].value
47 | assert "Team:" in at.tabs[2].markdown[16].value
48 | assert "Nationality:" in at.tabs[2].markdown[17].value
49 |
50 | # Column 4
51 | assert "Goals:" in at.tabs[2].markdown[20].value
52 | assert "Assists:" in at.tabs[2].markdown[21].value
53 | assert "Team:" in at.tabs[2].markdown[22].value
54 | assert "Nationality:" in at.tabs[2].markdown[23].value
55 |
56 | # Column 5
57 | assert "Goals:" in at.tabs[2].markdown[26].value
58 | assert "Assists:" in at.tabs[2].markdown[27].value
59 | assert "Team:" in at.tabs[2].markdown[28].value
60 | assert "Nationality:" in at.tabs[2].markdown[29].value
61 |
62 | assert at.tabs[2].subheader[1].value == "Recent Injuries"
63 |
64 |
65 | # Fixtures
66 | def test_tab_four():
67 | assert at.tabs[3].subheader[0].value == "Fixtures"
68 |
69 |
70 | # Squads
71 | def test_tab_five():
72 | assert at.tabs[4].subheader[0].value == "Team Squads"
73 | assert at.tabs[4].markdown[0].value == "**Note:** Double click on the player's photo to expand it."
74 | assert at.tabs[4].selectbox[0].label == "Use the dropdown menu to select a team:"
75 | assert at.tabs[4].selectbox[0].placeholder == "Please make a selection"
76 | assert at.tabs[4].selectbox[0].options == [
77 | "Arsenal",
78 | "Aston Villa",
79 | "Bournemouth",
80 | "Brentford",
81 | "Brighton",
82 | "Burnley",
83 | "Chelsea",
84 | "Crystal Palace",
85 | "Everton",
86 | "Fulham",
87 | "Liverpool",
88 | "Luton",
89 | "Manchester City",
90 | "Manchester United",
91 | "Newcastle",
92 | "Nottingham Forest",
93 | "Sheffield Utd",
94 | "Tottenham",
95 | "West Ham",
96 | "Wolves",
97 | ]
98 |
99 |
100 | # News & Highlights
101 | def test_tab_six():
102 | assert at.tabs[5].header[0].value == "Recent News"
103 | assert at.tabs[5].header[1].value == "Recent Highlights"
104 |
105 |
106 | # MANU Stock Price
107 | def test_tab_seven():
108 | assert at.tabs[6].subheader[0].value == "MANU - Stock Price"
109 |
110 |
111 | # About
112 | def test_tab_eight():
113 | assert at.tabs[7].subheader[0].value == "About"
114 |
--------------------------------------------------------------------------------