├── .dockerignore ├── .github └── workflows │ └── ci_streamlit.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .prefectignore ├── CHANGELOG.md ├── Dockerfile ├── README.md ├── api ├── Dockerfile ├── api.go ├── go.mod └── go.sum ├── components ├── __init__.py ├── about_section.py ├── connections.py ├── fixtures_section.py ├── highlights_section.py ├── injuries_section.py ├── league_form_section.py ├── news_section.py ├── point_progression_section.py ├── point_slider_section.py ├── social_media_section.py ├── squads_section.py ├── stadiums_map_section.py ├── stock_section.py ├── top_scorers_section.py └── top_teams_section.py ├── dbt_prod └── models │ ├── schema.yml │ └── stocks.sql ├── etl ├── README.md ├── bigquery │ ├── __init__.py │ ├── current_round.py │ ├── highlights.py │ ├── injuries.py │ ├── news.py │ ├── squads.py │ ├── stadiums.py │ ├── standings.py │ ├── teams.py │ └── top_scorers.py ├── cloud_functions │ ├── standings_transfer.py │ └── top_scorers_transfer.py ├── firestore │ └── fixtures.py ├── postgres │ └── stock.py └── requirements-data.txt ├── monitoring ├── docker-compose.yml ├── postgres_exporter.yml └── prometheus.yml ├── prefect ├── data_quality_scan.py ├── deployments │ ├── current_round-deployment.yaml │ ├── highlights-deployment.yaml │ ├── injuries-deployment.yaml │ ├── news-deployment.yaml │ ├── run_soda_scan-deployment.yaml │ ├── squads-deployment.yaml │ ├── statistics-deployment.yaml │ └── stocks-deployment.yaml └── flows.py ├── pyproject.toml ├── requirements.txt ├── soda ├── checks.yaml └── configuration.yaml ├── streamlit_app.py ├── terraform ├── installations.sh └── main.tf └── tests ├── requirements-tests.txt └── test_streamlit_app.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Files 2 | **.json 3 | **.DS_Store 4 | .streamlit/ 5 | .gitignore 6 | .pylintrc 7 | README.md 8 | 9 | # Folders 10 | testing/ 11 | .github/ 12 | api/ 13 | etl/ 14 | 15 | # Terraform 16 | terraform/ 17 | 18 | # Cache 19 | .mypy_cache/ 20 | .ruff_cache/ 21 | components/__pycache__/ 22 | 23 | # Byte-compiled / optimized / DLL files 24 | **/__pycache__ 25 | *.py[cod] 26 | *$py.class 27 | 28 | # Environments 29 | .env 30 | .venv 31 | env/ 32 | venv/ 33 | ENV/ 34 | env.bak/ 35 | venv.bak/ -------------------------------------------------------------------------------- /.github/workflows/ci_streamlit.yaml: -------------------------------------------------------------------------------- 1 | name: CI/CD - Streamlit Image 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | logLevel: 7 | description: 'Log level' 8 | required: true 9 | default: 'warning' 10 | type: choice 11 | options: 12 | - info 13 | 14 | push: 15 | paths-ignore: 16 | - '.github/**' 17 | - 'api/**' 18 | - 'dbt_prod/**' 19 | - 'etl/**' 20 | - 'monitoring/**' 21 | - 'prefect/**' 22 | - 'soda/**' 23 | - 'terraform/**' 24 | - 'tests/**' 25 | - 'CHANGELOG.md' 26 | - '.dockerignore' 27 | - '.gitignore' 28 | - '.prefectignore' 29 | - '.pre-commit-config.yaml' 30 | - '.streamlit' 31 | - 'pyproject.toml' 32 | - 'README.md' 33 | branches: 34 | - "main" 35 | 36 | env: 37 | GAR_LOCATION: us-central1 38 | VERSION_NUMBER: '2.17.1' 39 | REGISTRY_IMAGE: digitalghostdev/premier-league 40 | 41 | jobs: 42 | 43 | snyk: 44 | runs-on: ubuntu-22.04 45 | 46 | permissions: 47 | actions: read 48 | contents: read 49 | security-events: write 50 | 51 | steps: 52 | 53 | - name: Checkout 54 | uses: actions/checkout@v4 55 | 56 | - name: Run Snyk 57 | uses: snyk/actions/python-3.10@master 58 | continue-on-error: true 59 | env: 60 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} 61 | with: 62 | args: --sarif-file-output=snyk.sarif --skip-unresolved=true 63 | 64 | - name: Upload Result to GitHub Code Scanning 65 | uses: github/codeql-action/upload-sarif@v2 66 | with: 67 | sarif_file: snyk.sarif 68 | 69 | pytest: 70 | permissions: 71 | contents: 'read' 72 | id-token: 'write' 73 | 74 | runs-on: ubuntu-22.04 75 | needs: [snyk] 76 | if: | 77 | always() && 78 | (needs.snyk.result == 'success') 79 | steps: 80 | - name: Checkout 81 | uses: actions/checkout@v4 82 | 83 | - name: Set up Python 84 | uses: 'actions/setup-python@v5.0.0' 85 | with: 86 | python-version: 3.12.0 87 | 88 | - name: Install Dependencies 89 | run: | 90 | python -m pip install --upgrade pip 91 | pip install -r requirements.txt 92 | pip install pytest==7.4.3 93 | pip install pytest-cov==4.1.0 94 | 95 | - name: Google Auth 96 | id: auth 97 | uses: 'google-github-actions/auth@v2' 98 | with: 99 | token_format: 'access_token' 100 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}' 101 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_DATABASE }}' 102 | project_id: '${{ secrets.PROJECT_ID }}' 103 | 104 | - name: Run Tests 105 | run: | 106 | pytest --cov=streamlit_app tests/ -v 107 | 108 | build-streamlit-image: 109 | runs-on: ubuntu-22.04 110 | needs: [pytest] 111 | if: | 112 | always() && 113 | (needs.pytest.result == 'success') 114 | 115 | steps: 116 | 117 | - name: Checkout 118 | uses: actions/checkout@v4 119 | 120 | - name: Set up Docker Buildx 121 | uses: 'docker/setup-buildx-action@v3.0.0' 122 | 123 | - name: Prepare Docker Build Context 124 | run: | 125 | mkdir docker-context 126 | cp ./.dockerignore docker-context 127 | cp ./Dockerfile docker-context 128 | cp -r ./components docker-context/components 129 | cp ./streamlit_app.py docker-context 130 | cp ./requirements.txt docker-context 131 | 132 | - name: Build and Export 133 | uses: 'docker/build-push-action@v5.0.0' 134 | with: 135 | context: ./docker-context 136 | tags: streamlit:${{ env.VERSION_NUMBER }} 137 | outputs: type=docker,dest=/tmp/streamlit.tar 138 | 139 | - name: Upload Artifact 140 | uses: actions/upload-artifact@v4 141 | with: 142 | name: streamlit 143 | path: /tmp/streamlit.tar 144 | 145 | push-artifact-registry: 146 | permissions: 147 | contents: 'read' 148 | id-token: 'write' 149 | 150 | runs-on: ubuntu-22.04 151 | needs: [build-streamlit-image] 152 | if: | 153 | always() && 154 | (needs.build-streamlit-image.result == 'success') 155 | 156 | steps: 157 | 158 | - name: Checkout 159 | uses: actions/checkout@v4 160 | 161 | - name: Set up Docker Buildx 162 | uses: 'docker/setup-buildx-action@v3.0.0' 163 | 164 | - name: Download Artifact 165 | uses: actions/download-artifact@v4 166 | with: 167 | name: streamlit 168 | path: /tmp 169 | 170 | - name: Google Auth 171 | id: auth 172 | uses: 'google-github-actions/auth@v2' 173 | with: 174 | token_format: 'access_token' 175 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}' 176 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_ARTIFACT_REGISTRY }}' 177 | project_id: '${{ secrets.PROJECT_ID }}' 178 | 179 | - name: Docker Auth 180 | id: docker-auth 181 | uses: 'docker/login-action@v3' 182 | with: 183 | username: 'oauth2accesstoken' 184 | password: '${{ steps.auth.outputs.access_token }}' 185 | registry: '${{ env.GAR_LOCATION }}-docker.pkg.dev' 186 | 187 | - name: Load Image 188 | run: | 189 | docker load --input /tmp/streamlit.tar 190 | docker image ls -a 191 | 192 | - name: Tag Image 193 | run: | 194 | docker tag \ 195 | streamlit:${{ env.VERSION_NUMBER }} \ 196 | "${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ secrets.REGISTRY_REPO }}/streamlit:${{ env.VERSION_NUMBER }}" 197 | 198 | - name: Push Image 199 | run: | 200 | docker push \ 201 | "${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ secrets.REGISTRY_REPO }}/streamlit:${{ env.VERSION_NUMBER }}" 202 | 203 | deploy-streamlit-image: 204 | permissions: 205 | contents: 'read' 206 | id-token: 'write' 207 | 208 | runs-on: ubuntu-22.04 209 | needs: [push-artifact-registry] 210 | if: | 211 | always() && 212 | (needs.push-artifact-registry.result == 'success') 213 | 214 | steps: 215 | 216 | - name: Checkout 217 | uses: actions/checkout@v4 218 | 219 | - name: Google Auth 220 | id: auth 221 | uses: 'google-github-actions/auth@v2' 222 | with: 223 | token_format: 'access_token' 224 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}' 225 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_CLOUD_RUN }}' 226 | project_id: '${{ secrets.PROJECT_ID }}' 227 | 228 | - name: Deploy Image 229 | id: 'deploy' 230 | uses: 'google-github-actions/deploy-cloudrun@v2.0.0' 231 | with: 232 | service: streamlit 233 | image: "${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ secrets.REGISTRY_REPO }}/streamlit:${{ env.VERSION_NUMBER }}" 234 | flags: "--service-account=${{ secrets.WIF_SERVICE_ACCOUNT_CLOUD_RUN }} --max-instances=3" 235 | no_traffic: false 236 | 237 | set-latest-revision: 238 | permissions: 239 | contents: 'read' 240 | id-token: 'write' 241 | 242 | runs-on: ubuntu-22.04 243 | needs: [deploy-streamlit-image] 244 | if: | 245 | always() && 246 | (needs.deploy-streamlit-image.result == 'success') 247 | 248 | steps: 249 | 250 | - name: Checkout 251 | uses: actions/checkout@v4 252 | 253 | - name: Google Auth 254 | id: auth 255 | uses: 'google-github-actions/auth@v2' 256 | with: 257 | token_format: 'access_token' 258 | workload_identity_provider: '${{ secrets.WIF_PROVIDER }}' 259 | service_account: '${{ secrets.WIF_SERVICE_ACCOUNT_CLOUD_RUN }}' 260 | project_id: '${{ secrets.PROJECT_ID }}' 261 | 262 | - name: 'Set up Cloud SDK' 263 | uses: 'google-github-actions/setup-gcloud@v2' 264 | 265 | - name: 'Set Latest Revision' 266 | run: | 267 | gcloud run services update-traffic streamlit \ 268 | --to-latest \ 269 | --region=${{ env.GAR_LOCATION }} \ 270 | 271 | architecture-build: 272 | runs-on: ubuntu-22.04 273 | needs: [pytest] 274 | if: | 275 | always() && 276 | (needs.pytest.result == 'success') 277 | 278 | strategy: 279 | fail-fast: false 280 | matrix: 281 | platform: [linux/amd64, linux/arm64] 282 | 283 | steps: 284 | 285 | - name: Checkout 286 | uses: actions/checkout@v4 287 | 288 | - name: Docker Meta 289 | id: meta 290 | uses: 'docker/metadata-action@v5.0.0' 291 | with: 292 | images: ${{ env.REGISTRY_IMAGE }} 293 | 294 | - name: Set up QEMU 295 | uses: 'docker/setup-qemu-action@v3' 296 | 297 | - name: Set up Docker Buildx 298 | uses: 'docker/setup-buildx-action@v3.0.0' 299 | 300 | - name: Login to Docker Hub 301 | uses: 'docker/login-action@v3' 302 | with: 303 | username: ${{ secrets.DOCKERHUB_USERNAME }} 304 | password: ${{ secrets.DOCKERHUB_TOKEN }} 305 | 306 | - name: Build and Push by Digest 307 | id: build 308 | uses: 'docker/build-push-action@v5.0.0' 309 | with: 310 | context: . 311 | platforms: ${{ matrix.platform }} 312 | labels: ${{ steps.meta.outputs.labels }} 313 | outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true 314 | 315 | - name: Export Digest 316 | run: | 317 | mkdir -p /tmp/digests 318 | digest="${{ steps.build.outputs.digest }}" 319 | touch "/tmp/digests/${digest#sha256:}" 320 | 321 | - name: Upload Digest for AMD64 322 | if: matrix.platform == 'linux/amd64' 323 | uses: actions/upload-artifact@v4 324 | with: 325 | name: digests-amd64 326 | path: /tmp/digests/* 327 | if-no-files-found: error 328 | retention-days: 1 329 | 330 | - name: Upload Digest for ARM64 331 | if: matrix.platform == 'linux/arm64' 332 | uses: actions/upload-artifact@v4 333 | with: 334 | name: digests-arm64 335 | path: /tmp/digests/* 336 | if-no-files-found: error 337 | retention-days: 1 338 | 339 | 340 | create-manifest-and-push: 341 | runs-on: ubuntu-22.04 342 | needs: 343 | - architecture-build 344 | 345 | steps: 346 | 347 | - name: Download Digests 348 | uses: actions/download-artifact@v4 349 | with: 350 | pattern: digests-* 351 | path: /tmp/digests 352 | merge-multiple: true 353 | 354 | - name: Set up Docker Buildx 355 | uses: 'docker/setup-buildx-action@v3.0.0' 356 | 357 | - name: Docker meta 358 | id: meta 359 | uses: 'docker/metadata-action@v5.0.0' 360 | with: 361 | images: ${{ env.REGISTRY_IMAGE }} 362 | tags: ${{ env.VERSION_NUMBER }} 363 | 364 | - name: Login to Docker Hub 365 | uses: 'docker/login-action@v3' 366 | with: 367 | username: ${{ secrets.DOCKERHUB_USERNAME }} 368 | password: ${{ secrets.DOCKERHUB_TOKEN }} 369 | 370 | - name: Create Manifest List and Push 371 | working-directory: /tmp/digests 372 | run: | 373 | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ 374 | $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) 375 | 376 | - name: Inspect image 377 | run: | 378 | docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} 379 | 380 | syft: 381 | permissions: 382 | contents: 'read' 383 | id-token: 'write' 384 | 385 | runs-on: ubuntu-22.04 386 | needs: [build-streamlit-image] 387 | if: | 388 | always() && 389 | (needs.build-streamlit-image.result == 'success') 390 | 391 | steps: 392 | 393 | - name: Checkout 394 | uses: actions/checkout@v4 395 | 396 | - name: Set up Docker Buildx 397 | uses: 'docker/setup-buildx-action@v3.0.0' 398 | 399 | - name: Download Artifact 400 | uses: actions/download-artifact@v4 401 | with: 402 | name: streamlit 403 | path: /tmp 404 | 405 | - name: Load Image 406 | run: | 407 | docker load --input /tmp/streamlit.tar 408 | docker image ls -a 409 | 410 | - name: Create and Upload SBOM 411 | uses: anchore/sbom-action@v0 412 | with: 413 | image: streamlit:${{ env.VERSION_NUMBER }} 414 | artifact-name: streamlit-sbom-${{ env.VERSION_NUMBER }}.spdx.json 415 | upload-artifact: true 416 | 417 | grype: 418 | permissions: 419 | actions: read 420 | contents: read 421 | security-events: write 422 | 423 | runs-on: ubuntu-22.04 424 | needs: [syft] 425 | 426 | if: | 427 | always() && 428 | (needs.syft.result == 'success') 429 | 430 | steps: 431 | 432 | - name: Download SBOM 433 | uses: actions/download-artifact@v3 434 | with: 435 | name: streamlit-sbom-${{ env.VERSION_NUMBER }}.spdx.json 436 | 437 | - name: Scan SBOM 438 | uses: anchore/scan-action@v3 439 | id: scan 440 | with: 441 | sbom: streamlit-sbom-${{ env.VERSION_NUMBER }}.spdx.json 442 | fail-build: false 443 | output-format: sarif 444 | severity-cutoff: critical 445 | 446 | - name: Upload SARIF Report 447 | uses: github/codeql-action/upload-sarif@v2 448 | with: 449 | sarif_file: ${{ steps.scan.outputs.sarif }} 450 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Files 2 | **.json 3 | **.DS_store 4 | .streamlit/secrets.toml 5 | 6 | # Testing files and directories 7 | testing/ 8 | test.py 9 | 10 | # Cache files and directories 11 | **.*cache 12 | 13 | # Terraform secrets and state files 14 | terraform/.terraform* 15 | terraform/terraform* 16 | terraform/cloud_sql/.terraform* 17 | terraform/cloud_sql/terraform* 18 | 19 | # Monitoring 20 | monitoring/values* 21 | monitoring/grafana_storage/ 22 | 23 | # Soda configuration files 24 | soda/configuration.yaml 25 | 26 | # dbt configuration files 27 | dbt_prod/target/ 28 | dbt_prod/dbt_packages/ 29 | dbt_prod/logs/ 30 | 31 | # Byte-compiled / optimized / DLL files 32 | **/__pycache__ 33 | *.py[cod] 34 | *$py.class 35 | 36 | # C extensions 37 | *.so 38 | 39 | # Distribution / packaging 40 | .Python 41 | build/ 42 | develop-eggs/ 43 | dist/ 44 | downloads/ 45 | eggs/ 46 | .eggs/ 47 | lib/ 48 | lib64/ 49 | parts/ 50 | sdist/ 51 | var/ 52 | wheels/ 53 | share/python-wheels/ 54 | *.egg-info/ 55 | .installed.cfg 56 | *.egg 57 | MANIFEST 58 | 59 | # PyInstaller 60 | # Usually these files are written by a python script from a template 61 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 62 | *.manifest 63 | *.spec 64 | 65 | # Installer logs 66 | pip-log.txt 67 | pip-delete-this-directory.txt 68 | 69 | # Unit test / coverage reports 70 | htmlcov/ 71 | .tox/ 72 | .nox/ 73 | .coverage 74 | .coverage.* 75 | .cache 76 | nosetests.xml 77 | coverage.xml 78 | *.cover 79 | *.py,cover 80 | .hypothesis/ 81 | .pytest_cache/ 82 | cover/ 83 | 84 | # Translations 85 | *.mo 86 | *.pot 87 | 88 | # Django stuff: 89 | *.log 90 | local_settings.py 91 | db.sqlite3 92 | db.sqlite3-journal 93 | 94 | # Flask stuff: 95 | instance/ 96 | .webassets-cache 97 | 98 | # Scrapy stuff: 99 | .scrapy 100 | 101 | # Sphinx documentation 102 | docs/_build/ 103 | 104 | # PyBuilder 105 | .pybuilder/ 106 | target/ 107 | 108 | # Jupyter Notebook 109 | .ipynb_checkpoints 110 | 111 | # IPython 112 | profile_default/ 113 | ipython_config.py 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv*/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/PyCQA/bandit 3 | rev: '1.7.5' 4 | hooks: 5 | - id: bandit 6 | args: ["-c", "pyproject.toml"] 7 | additional_dependencies: [".[toml]"] 8 | 9 | - repo: https://github.com/pre-commit/mirrors-mypy 10 | rev: 'v1.4.1' 11 | hooks: 12 | - id: mypy 13 | 14 | - repo: https://github.com/astral-sh/ruff-pre-commit 15 | # Ruff version. 16 | rev: v0.1.6 17 | hooks: 18 | - id: ruff-format 19 | 20 | - repo: https://github.com/astral-sh/ruff-pre-commit 21 | rev: v0.1.6 22 | hooks: 23 | - id: ruff 24 | 25 | exclude: prefect/flows.py -------------------------------------------------------------------------------- /.prefectignore: -------------------------------------------------------------------------------- 1 | # prefect artifacts 2 | .prefectignore 3 | 4 | # python artifacts 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.egg-info/ 9 | *.egg 10 | 11 | # Type checking artifacts 12 | .mypy_cache/ 13 | .dmypy.json 14 | dmypy.json 15 | .pyre/ 16 | 17 | # IPython 18 | profile_default/ 19 | ipython_config.py 20 | *.ipynb_checkpoints/* 21 | 22 | # Environments 23 | .python-version 24 | .env 25 | .venv 26 | env/ 27 | venv/ 28 | 29 | # MacOS 30 | .DS_Store 31 | 32 | # Dask 33 | dask-worker-space/ 34 | 35 | # Editors 36 | .idea/ 37 | .vscode/ 38 | 39 | # VCS 40 | .git/ 41 | .hg/ 42 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | This change log provides version history for the Streamlit Dashboard. 3 | 4 | View the Streamlit dasboard: https://streamlit.digitalghost.dev/ 5 | 6 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 7 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 8 | 9 | * **MAJOR:** Any changes to the backend infrastructure that requires new methods of moving data that won't work with the previous architecture, mainly with the addition of new databases or data sources. 10 | * **MINOR:** Any changes to the Streamlit dashboard that adds a new interaction/feature or removal of one. 11 | * **PATCH:** Any changes that fix bugs, typos or small edits. 12 | 13 | # Update History 14 | 15 | ## 2.17.1 | 2024-04-27 16 | 17 | ### Changed 18 | * [#184](https://github.com/digitalghost-dev/premier-league/issues/184) - Changed the calling of the dashboard's different components to using the new `@st.experimental_fragment` decorator in Streamlit's `1.33.0` version. 19 | * [#185](https://github.com/digitalghost-dev/premier-league/issues/185) - Changed the News section into an importable `class`. 20 | 21 | --- 22 | 23 | ## [2.17.0] | 2024-03-17 24 | 25 | ### Added 26 | * [#183](https://github.com/digitalghost-dev/premier-league/issues/183) - Added a new *Recent Injuries* section under *Players & Injuries* tab. 27 | 28 | ### Changed 29 | * [#182](https://github.com/digitalghost-dev/premier-league/issues/182) - Changed the tab name for *Players Statistics* to *Players & Injuries*. 30 | 31 | --- 32 | 33 | ## [2.16.1] | 2024-03-01 34 | 35 | ### Changed 36 | * [#181](https://github.com/digitalghost-dev/premier-league/issues/181) - Changed `components/connections.py` to use new dataset in BigQuery for team squads. 37 | 38 | --- 39 | 40 | ## [2.16.0] | 2024-02-11 41 | 42 | ### Added 43 | * [#179](https://github.com/digitalghost-dev/premier-league/issues/179) - Added a new tab that shows a stock chart for **MANU**, Manchester United's stock ticker. 44 | * [#180](https://github.com/digitalghost-dev/premier-league/issues/180) - Added a `st.info` and `st.warning` message to explain the tab with its functions and to explain that no data was found, respectively. 45 | 46 | --- 47 | 48 | ## [2.15.0] | 2024-01-28 49 | 50 | ### Added 51 | * [#165](https://github.com/digitalghost-dev/premier-league/issues/165) - Added each team's club icon to the **Squads** tab when a team is selected from the dropdown menu. 52 | * [#172](https://github.com/digitalghost-dev/premier-league/issues/172) - Added a new **Players Statistics** tab. 53 | 54 | ### Changed 55 | * [#164](https://github.com/digitalghost-dev/premier-league/issues/164) - Changed the default value `st.selectbox` to `None` in the **Squads** tab. 56 | * [#168](https://github.com/digitalghost-dev/premier-league/issues/168) - Changed the `max_value` for each `st.dataframe` to programatically calaculate based on current max value in the DataFrame under the **League Statistics** section. 57 | * [#171](https://github.com/digitalghost-dev/premier-league/issues/171) - Changed line chart under **Point Progression throughout the Season** section to use plotly instead of Streamlit's built in `st.line_chart` method. 58 | 59 | ### Removed 60 | * [#170](https://github.com/digitalghost-dev/premier-league/issues/168) - Removed `for` loop that previously generated the sections for **Goalkeepers**, **Defenders**, **Midfielders**, and **Attackers** under the **Squads** tab. 61 | * [#173](https://github.com/digitalghost-dev/premier-league/issues/173) - Removed `st.container` border from **Top 5 Teams** and **Top 5 Scorers** sections. 62 | 63 | --- 64 | 65 | ## [2.14.1] | 2024-01-25 66 | 67 | ### Changed 68 | * [#169](https://github.com/digitalghost-dev/premier-league/issues/154) - Changed the query for `components/connections.py` to reflect table schema changes for the standings `st.dataframe`. 69 | 70 | --- 71 | 72 | ## [2.14.0] | 2024-01-08 73 | 74 | ### Added 75 | * [#154](https://github.com/digitalghost-dev/premier-league/issues/154) - Added a new tab called **Squads** that displays the current squad for each team in the league. 76 | 77 | ### Changed 78 | * [#153](https://github.com/digitalghost-dev/premier-league/issues/153) - Changed the Fixtures `st.header()` to `st.subheader()`. 79 | * [#155](https://github.com/digitalghost-dev/premier-league/issues/155) - Changed the About `st.header()` to `st.subheader()`. 80 | 81 | --- 82 | 83 | ## [2.13.0] | 2023-12-19 84 | 85 | ### Added 86 | * [#148](https://github.com/digitalghost-dev/premier-league/issues/148) - Added a `st.header` titled **Fixtures** to the fixtures tab. 87 | * [#146](https://github.com/digitalghost-dev/premier-league/issues/147) - Added a new section that shows highlights using the YouTube API under the **News & Highlights** tab. 88 | 89 | ### Changed 90 | * [#149](https://github.com/digitalghost-dev/premier-league/issues/149) - Changed the current `st.subheader` to `st.header` on the **About** tab. 91 | * [#147](https://github.com/digitalghost-dev/premier-league/issues/147) - Changed the **News** tab to **News & Highlights** to reflect the new section that was added. 92 | 93 | --- 94 | 95 | ## [2.12.1] | 2023-12-12 96 | 97 | ### Fixed 98 | * [#144](https://github.com/digitalghost-dev/premier-league/issues/144) - Fixed the `st.line_chart` **Point Progression** section to display the correct column for the legend. 99 | 100 | --- 101 | 102 | ## [2.12.0] | 2023-12-11 103 | 104 | ### Added 105 | * [#138](https://github.com/digitalghost-dev/premier-league/issues/138) - Added borders around the **Top 5 Teams** and **Top 5 Scorers** `st.container` sections. 106 | * [#125](https://github.com/digitalghost-dev/premier-league/issues/125) - Added a **Games Played** column to the `st.dataframe` **Standings** table. 107 | 108 | ### Changed 109 | * [#143](https://github.com/digitalghost-dev/premier-league/issues/143) - Changed the postiion of the **Points** column in the `st.dataframe` **Standings** table to be the second column. 110 | 111 | --- 112 | 113 | ## [2.11.5] | 2023-12-02 114 | 115 | ### Changed 116 | * [#137](https://github.com/digitalghost-dev/premier-league/issues/137) - Changed the Points Progression section into an importable `class`. 117 | * [#136](https://github.com/digitalghost-dev/premier-league/issues/136) - Changed the Top Teams section into an importable `class`. 118 | * [#135](https://github.com/digitalghost-dev/premier-league/issues/135) - Changed the League Forms section into an importable `class`. 119 | * [#134](https://github.com/digitalghost-dev/premier-league/issues/134) - Changed the Top Scorers section into an importable `class`. 120 | 121 | ### Fixed 122 | * [#139](https://github.com/digitalghost-dev/premier-league/issues/139) - Fixed the `st.subheader` typo in "Points Progression" section. 123 | 124 | --- 125 | 126 | ## [2.11.4] | 2023-12-01 127 | 128 | ### Fixed 129 | * [#128](https://github.com/digitalghost-dev/premier-league/issues/128) - Fixed the method of retrieving an item from a pandas DataFrame since the previous method will be deprecated. 130 | 131 | ### Removed 132 | * [#133](https://github.com/digitalghost-dev/premier-league/issues/133) - Removed dependency on a `.streamlit/secrets.toml` file for authentication. 133 | 134 | --- 135 | 136 | ## [2.11.3] | 2023-11-27 137 | 138 | ### Changed 139 | * [#127](https://github.com/digitalghost-dev/premier-league/issues/126) - Changed the maximum value for the `average_goals_df` `st.dataframe` and for the `win_streak_df` `st.dataframe`. 140 | * [#126](https://github.com/digitalghost-dev/premier-league/issues/126) - Changed the text for the win streak `st.dataframe()` to display *Biggest Win Streak* instead of *Current Win Streak*. 141 | * [#124](https://github.com/digitalghost-dev/premier-league/issues/124) - Changed the `social_media_section.display()` function to be called only once at the end of the `streamlit_app()` function instead of in each tab. 142 | * [#123](https://github.com/digitalghost-dev/premier-league/issues/123) - Changed the data connection functions into importable functions where all queries are now cached. 143 | 144 | --- 145 | 146 | ## [2.11.2] | 2023-11-17 147 | 148 | ### Changed 149 | * [#122](https://github.com/digitalghost-dev/premier-league/issues/122) - Changed the `Dockerfile` to handle the theme configuration instead of using a `.streamlit/config.toml` file. 150 | * [#121](https://github.com/digitalghost-dev/premier-league/issues/121) - Changed the icon for the dashboard from an image hosted on GCP's Cloud Storage to using [SimpleIcon's Premier League icon](https://simpleicons.org/?q=premier+league). 151 | 152 | --- 153 | 154 | ## [2.11.1] | 2023-11-15 155 | 156 | ### Changed 157 | * [#119](https://github.com/digitalghost-dev/premier-league/issues/119) - Changed import names in `streamlit_app.py` to match new naming standard. 158 | * [#118](https://github.com/digitalghost-dev/premier-league/issues/118) - Changed file names under `components/` to end with `_section.py` for better clarity. 159 | * [#117](https://github.com/digitalghost-dev/premier-league/issues/117) - Changed the `firestore_pull()` function into an importable `class`. 160 | 161 | ### Removed 162 | * [#120](https://github.com/digitalghost-dev/premier-league/issues/120) - Removed the `toast()` function. 163 | 164 | --- 165 | 166 | ## [2.11.0] | 2023-11-03 167 | 168 | ### Added 169 | * [#112](https://github.com/digitalghost-dev/premier-league/issues/112) - Added an **About** tab to display information about the project and the author. 170 | 171 | ### Changed 172 | * [#114](https://github.com/digitalghost-dev/premier-league/issues/114) - Changed the `stadiums_map()` function into an importable `class`. 173 | 174 | ### Fixed 175 | * [#115](https://github.com/digitalghost-dev/premier-league/issues/115) - Fixed the SQL responsible for populating the `st.dataframe` for **Standings** to order rows by `rank`. 176 | 177 | ### Removed 178 | * [#113](https://github.com/digitalghost-dev/premier-league/issues/113) - Removed **Top Teams Movement** section from **Standings & Overview** tab. 179 | 180 | --- 181 | 182 | ## [2.10.3] | 2023-10-26 183 | 184 | ### Changed 185 | * [#104](https://github.com/digitalghost-dev/premier-league/issues/104) - Changed the `social_media()` function into an importable `class` from the newly created `components/` directory. 186 | 187 | --- 188 | 189 | ## [2.10.2] | 2023-10-20 190 | 191 | ### Changed 192 | * [#103](https://github.com/digitalghost-dev/premier-league/issues/103) - Changed social media icons into static `.svg` files instead of using Font Awesome icons. 193 | 194 | --- 195 | 196 | ## [2.10.1] | 2023-09-10 197 | 198 | ### Fixed 199 | * [#91](https://github.com/digitalghost-dev/premier-league/issues/91) - Fixed the **News** tab to not error out when the table does not have at least 4 rows of data by implementing a `try/except` block. 200 | 201 | --- 202 | 203 | ## [2.10.0] | 2023-09-04 204 | 205 | ### Added 206 | * [#90](https://github.com/digitalghost-dev/premier-league/issues/90) - Added `st.subheader` under main header to display current round. 207 | * [#89](https://github.com/digitalghost-dev/premier-league/issues/89) - Added **News** tab to display the latest news from the Premier League using the [News API](https://newsapi.org/). 208 | * [#88](https://github.com/digitalghost-dev/premier-league/issues/88) - Added club logo to the Standings `st.dataframe`. 209 | 210 | --- 211 | 212 | ## [2.9.1] | 2023-08-27 213 | 214 | ### Fixed 215 | * [#87](https://github.com/digitalghost-dev/premier-league/issues/87) - Fixed the Standings column headers in the `st.dataframe` element to display proper column names instead of the SQL column names. 216 | 217 | --- 218 | 219 | ## [2.9.0] | 2023-08-20 220 | 221 | ### Added 222 | * [#75](https://github.com/digitalghost-dev/premier-league/issues/75) - Added Docker logo to social media section with link to Docker Hub repository. 223 | * [#72](https://github.com/digitalghost-dev/premier-league/issues/72) - Added `st.toast` to display a more subtle message to the user that the page is loading and when the data has loaded. 224 | * [#78](https://github.com/digitalghost-dev/premier-league/issues/78) Added a new `st.dataframe` table to display current total metrics for the league (Goals Scored, Penalties Scored, and Clean Sheets). 225 | 226 | ### Changed 227 | * [#74](https://github.com/digitalghost-dev/premier-league/issues/74) - Changed page title to **"Streamlit: Premier League"**. 228 | * [#73](https://github.com/digitalghost-dev/premier-league/issues/73) - Changed tab names from **Standings** to **Standings & Overview** and **Statistics** to **Top Teams & Scorers**. 229 | * [#76](https://github.com/digitalghost-dev/premier-league/issues/76) - Changed `st.data_editor` to `st.dataframe` for displaying the statistic tables. 230 | 231 | ### Fixed 232 | * [#79](https://github.com/digitalghost-dev/premier-league/issues/79) - Fixed the `st.dataframe` tables under the **Top Teams Movement** section to display the correct data by sorting columns in descending order. 233 | 234 | ### Removed 235 | * Removed `st.spinner`. 236 | 237 | --- 238 | 239 | ## [2.8.0] | 2023-08-12 240 | 241 | ### Added 242 | * Added `st.spinner` to run when page loads to allow all tabs and data to load before a user can start navigating. 243 | * Added type annotations to `standings_table()` function to return `DeltaGenerator`. 244 | * Added type annotations to `stadiums_map()` function to return `DeltaGenerator`. 245 | 246 | ### Changed 247 | * Changed `st.subheader` from "Standings" to "Current Standings". 248 | * Changed `st.table` to `st.dataframe` for showing current standings. 249 | * Changed `st.map` location from *Playground* to *Standings* tab. 250 | * Changed the Social Media section to exist inside a function: `social_media()` and be called later in each tab. 251 | * Changed the standings table code to exist inside a function: `standings_table()`. 252 | * Changed the map code to exist inside a function: `stadiums_map()`. 253 | * Changed the format of writing out the Top 5 Teams, Top 5 Scorers, and Forms for the Rest of the League sections to use a `for` loop instead of writing out each section individually. 254 | 255 | ### Fixed 256 | * Fixed the date to correctly display the suffix of the number *(i.e. 1st, 2nd, 3rd, etc.)* and to remove leading zeroes for single digit dates. 257 | 258 | ### Removed 259 | * Removed *Playground Tab*. 260 | * Removed `pages/` directory as this app will continue development as a single page. 261 | * Removed `style.css`, standings table is no longer stylized with CSS. 262 | * Removed `st.slider` as interactive Streamlit elements in dashboards with tabs seemed to currently be bugged. 263 | * Related issues: [#4996](https://github.com/streamlit/streamlit/issues/4996), [#6257](https://github.com/streamlit/streamlit/issues/6257), and [#7017](https://github.com/streamlit/streamlit/issues/7017). 264 | * `st.bar_chart` has also been removed due to this bug. 265 | * Removed `import os`, `import psycopg2`, `import plotly.graph_objects as go` as they are no longer needed. 266 | 267 | --- 268 | 269 | ## [2.7.1] | 2023-07-13 270 | 271 | ### Fixed 272 | * **Main Page**, *Standings Tab*: Fixed `iloc[X][X]` values to match the correct column to pull in correct data for the Top 5 Teams section. 273 | 274 | --- 275 | 276 | ## [2.7.0] | 2023-07-12 277 | 278 | ### **Added** 279 | * **Main Page**, *Standings Tab*: Added 3 `st.column_config.ProgressColumn` cards to display rankings of teams with the highest `penalties_scored`, `average_goals`, and `win_streak` during the season. 280 | 281 | ### **Changed** 282 | * **Main Pages**, *Standings Tab*: Changed the data values for `label` and `value` for the `st.metric` card. 283 | 284 | --- 285 | 286 | ## [2.6.0] | 2023-06-28 287 | 288 | ### **Added** 289 | * **Playground Page**: Added social media icons to bottom of page. 290 | * **Main Page**, *Statistics Tab*: Added `assists` metric to the *Top 5 Scorers Section*. 291 | * **Main Page**, *Standings Tab*: Added a metric card to display the top 5 teams' position movement throughout the season. 292 | 293 | ### **Changed** 294 | * **Main Page**: Changed title to "2023-24" to reflect the new season. 295 | * **Main Page**, *Fixtures Tab*: Changed ordering of `fixtures` to appear in chronological order. 296 | 297 | ### **Removed** 298 | * **Main Page**, *Fixtures Tab*: Removed extra comma from `fixtures` date. 299 | 300 | --- 301 | 302 | ## [2.5.0] | 2023-06-19 303 | 304 | ### **Added** 305 | * Added a new page: **Playground**, that holds graphs with slicers, filters, and other sortable features that allows the end user view statitics in a custom way. 306 | * Added `Recent_Form` to `standings` table as a new column. 307 | * Added string to display current date on **Standings** tab. 308 | 309 | ### **Changed** 310 | * Changed page title from **Overivew** to **Premier League - Statistics, Scores & More**. 311 | * Changed **Overview** tab name to **Standings**. 312 | 313 | ### **Removed** 314 | * Removed map of stadium locations from **Main** page; moved it to the new **Playground** page. 315 | 316 | --- 317 | 318 | ## [2.4.0] | 2023-05-26 319 | 320 | ### **Added** 321 | * Added number to *Top 5 Teams* section to indicate current rank. 322 | * Added suffix to rank number in *Forms for the Rest of the League section*. 323 | 324 | ### **Changed** 325 | * Changed hyperlink for GitHub icon to point to GitHub profile instead of repository for project. A link to GitHub repository already exists by default. 326 | 327 | ### **Fixed** 328 | * Added `target="_blank" rel="noopener noreferrer"` to anchor elements to allow linked icons to open properly. 329 | 330 | --- 331 | 332 | ## [2.3.1] | 2023-05-25 333 | 334 | ### **Fixed** 335 | * Fixed broken link for GitHub Icon on all tabs. 336 | 337 | --- 338 | 339 | ## [2.3.0] | 2023-05-24 340 | 341 | ### **Added** 342 | * Added text that displays the final gameday of the season. 343 | * Added linked icons to social media pages. 344 | 345 | ### **Changed** 346 | * Changed tab title from **Top Teams & Top Scorers** to **Statistics**. 347 | 348 | --- 349 | 350 | ## [2.2.1] | 2023-05-19 351 | 352 | ### **Fixed** 353 | * Fixed promotion/demotion legend by displaying items as a column instead of in a row. 354 | 355 | --- 356 | 357 | ## [2.2.0] | 2023-05-17 358 | 359 | ### **Changed** 360 | * Changed the hex colors used for promtion/demotion status. 361 | * Changed the color of `locations` map markers to `indigo` to match the rest of the theme. 362 | 363 | ### **Added** 364 | * Added an extra color to denote europa conference league qualification promotion. 365 | * Added solid border element to `standings` table to better denote promotion/demotion status. 366 | * Added text under table to explain which color denotes which promotion/demotion status. 367 | 368 | --- 369 | 370 | ## [2.1.0] | 2023-05-10 371 | 372 | ### **Changed** 373 | * Changed stadium `locations` map to use [plotly express](https://plotly.com/python/mapbox-layers/) `scatter_mapbox` instead of Streamlit's built in `st.map()` function. 374 | * This allows the stadium points to be hoverable which enables a tooltip that provides more information about the venue. 375 | * Changed title to display ***Premier League Statistics / 2022-23*** instead of ***Premier League Statistics / '22-'23***. 376 | 377 | --- 378 | 379 | ## [2.0.2] | 2023-05-08 380 | 381 | ### **Fixed** 382 | * Fixed the sorting of `rounds` to appear in decending order on the `fixtures` tab. 383 | 384 | --- 385 | 386 | ## [2.0.1] | 2023-05-05 387 | 388 | ### **Fixed** 389 | * Adding '`<=`' to `while` loop to get the current round. Previously, the Streamlit app would only select rounds that were *less* than the `MAX` round which would omit the final round. 390 | 391 | --- 392 | 393 | ## [2.0.0] | 2023-05-02 394 | Now using [Firestore](https://firebase.google.com/docs/firestore/) to store fixture data in a document format. 395 | 396 | ### **Added** 397 | * Added `Fixtures` tab for all rounds in the current season. Updates 3 times a day and will add new rounds as they start. 398 | 399 | --- 400 | 401 | ## [1.3.0] | 2023-04-17 402 | 403 | ### **Added** 404 | 405 | * Added page title. 406 | * Added position number to teams in **Forms for the Rest of the League** section. 407 | 408 | ### **Fixed** 409 | 410 | * Fixing capitalization for **Forms for the Rest of the League** subheader. 411 | 412 | ### **Removed** 413 | 414 | * Removed Emojis from tab titles. 415 | 416 | --- 417 | 418 | ## [1.2.0] | 2023-04-16 419 | 420 | ### **Changed** 421 | 422 | Top Teams Tab 423 | * Renamed tab to: "⚽️ Top Teams & 🏃🏻‍♂️ Top Scorers". 424 | * Changed `st.plotly_chart` to `st.line_chart`. 425 | * Moved top scorers to this tab. 426 | 427 | ### **Removed** 428 | 429 | Top Players Tab 430 | * Removed this tab, combined with top teams tab. 431 | 432 | --- 433 | 434 | ## [1.1.0] | 2023-04-07 435 | 436 | ### **Added** 437 | 438 | Top Teams Tab 439 | * Added `logo` and `form` for the rest of the league. 440 | 441 | ### **Changed** 442 | 443 | Top Teams Tab 444 | * Center aligning `logo`, `form (last 5)`, `clean sheets`, `penalties scored`, and `penalties missed` in their containers. 445 | * Setting `logo` width for top 5 teams to `150px`. 446 | 447 | Top Players Tab 448 | * Center aligning `photo`, `name`, `goals`, `team`, and `nationality` in their containers. 449 | * Setting `photo` width for top 5 players to `150px`. 450 | 451 | ### **Removed** 452 | * Removed `LIMIT 5` from SQL query to pull all teams. 453 | 454 | --- 455 | 456 | ## [1.0.0] | 2023-04-05 457 | 458 | ### **Added** 459 | 460 | Overview Tab 461 | * View the current standings for the league for the current season. 462 | * An adjustable slider gives control to focus in on teams that fit within a certain number of points. 463 | * A bar chart with teams (x-axis) and points (y-axis) adjusts accordingly to the slider. 464 | * A map with plots for the stadium locations for each team in the current season. 465 | 466 | Top Teams Tab 467 | * Shows the `logo`, `form (last 5)`, `clean sheets`, `penalties scored`, and `penalties missed` for the current top five teams in the league. 468 | * A line graph depicts the rise in points over each matchday. 469 | 470 | Top Players Tab 471 | * Shows the `portrait`, `goals`, `team`, and `nationality` of the current top five goal scorers in the league. 472 | 473 | [2.17.0]: https://github.com/digitalghost-dev/premier-league/commit/f097df039469c361d992c4e52eaa6211354aefb5 474 | 475 | [2.16.1]: https://github.com/digitalghost-dev/premier-league/commit/950590251f6559beb2376acf491a3cf1edec8a8e 476 | 477 | [2.16.0]: https://github.com/digitalghost-dev/premier-league/commit/aae9d9c814eafc905104a765c475b5763d0881f8 478 | 479 | [2.15.0]: https://github.com/digitalghost-dev/premier-league/commit/95aac28fbf4ab29f7965e8bc326f631198cf7272 480 | 481 | [2.14.1]: https://github.com/digitalghost-dev/premier-league/commit/e4a0ba46fd3dee96544b34b2022140c73a4d2ccd 482 | 483 | [2.14.0]: https://github.com/digitalghost-dev/premier-league/commit/62a27e488c3fbc91c585e55e73c91adbe9edf0b8#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 484 | 485 | [2.13.0]: https://github.com/digitalghost-dev/premier-league/commit/dec0426ca5d3de50e8093874635f5bf01718aaa6 486 | 487 | [2.12.1]: https://github.com/digitalghost-dev/premier-league/commit/11e04f7aa42e607d65300600aef7b6743c520542 488 | 489 | [2.12.0]: https://github.com/digitalghost-dev/premier-league/commit/3df7c162a9d1deb587fe6f9681e3c8e028d2e094#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 490 | 491 | [2.11.5]: https://github.com/digitalghost-dev/premier-league/commit/d3f4e7416e6b667364235a070cf4715413091f8b#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 492 | 493 | [2.11.4]: https://github.com/digitalghost-dev/premier-league/commit/71f0424ff0c1b14571390ee6fe0775dd8da6d7ae#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 494 | 495 | [2.11.3]: https://github.com/digitalghost-dev/premier-league/commit/b13541d5a64ea67e42c1b10e87dd2a7e32798463#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 496 | 497 | [2.11.2]: https://github.com/digitalghost-dev/premier-league/commit/25bfb7f76f46a0f8badce8a896937ddf12690332#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 498 | 499 | [2.11.1]: https://github.com/digitalghost-dev/premier-league/commit/fad6ab3060540f7034435971e9d38c125af1ff06#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 500 | 501 | [2.11.0]: https://github.com/digitalghost-dev/premier-league/commit/4436a5387a3c9969236af2ec83fb0f7bef03ef7e#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 502 | 503 | [2.10.3]: https://github.com/digitalghost-dev/premier-league/commit/c18d9bfaf762ba7c4c2714150c1f6cd0f722b9e8#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 504 | 505 | [2.10.2]: https://github.com/digitalghost-dev/premier-league/commit/53218cf868e3bc8128327932512f5ac1d28e6740#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 506 | 507 | [2.10.1]: https://github.com/digitalghost-dev/premier-league/commit/c2a0d39eb7cab1b7ed3013bb5811490f70bd256e#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 508 | 509 | [2.10.0]: https://github.com/digitalghost-dev/premier-league/commit/483e68208487c1632d2aa93ac098683a6c3515cc#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 510 | 511 | [2.9.1]: https://github.com/digitalghost-dev/premier-league/commit/a726d8fbf9f99bddc03a7fbf465ddba14ed97aee#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 512 | 513 | [2.9.0]: https://github.com/digitalghost-dev/premier-league/commit/d905a2a26b38200a519c78fa4e3847b598dc3d8f#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 514 | 515 | [2.8.0]: https://github.com/digitalghost-dev/premier-league/commit/ffc31af3ca6bc58294ab6c8c6daba105d9e7c1a5#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 516 | 517 | [2.7.1]: https://github.com/digitalghost-dev/premier-league/commit/a18341f802c46043fa8122c517e479103c067870#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 518 | 519 | [2.7.0]: https://github.com/digitalghost-dev/premier-league/commit/522600c0da5c6c20dd51528794bc959c1adcd9e3#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 520 | 521 | [2.6.0]: https://github.com/digitalghost-dev/premier-league/commit/de5b6c14e370ec08f0a79a2cc1dafd84a144411a#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 522 | 523 | [2.5.0]: https://github.com/digitalghost-dev/premier-league/commit/247029c3a94e607d5ffd2adabc41178647d1796e#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 524 | 525 | [2.4.0]: https://github.com/digitalghost-dev/premier-league/commit/19ff4063496a646aad3b8750a7c434cdeb1004e9#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 526 | 527 | [2.3.1]: https://github.com/digitalghost-dev/premier-league/commit/c11bfaa2f2aa0317783be65f935387e25cf180de#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 528 | 529 | [2.3.0]: https://github.com/digitalghost-dev/premier-league/commit/5e3cadd68cefef3abf7dbe1809257a9fae39af4a#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 530 | 531 | [2.2.1]: https://github.com/digitalghost-dev/premier-league/commit/903d457765df9de9d3a0ea879082dc0096bdbb38#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 532 | 533 | [2.2.0]: https://github.com/digitalghost-dev/premier-league/commit/11606ed57e6a4460d5059fc0141fbeccd268b716#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 534 | 535 | [2.1.0]: https://github.com/digitalghost-dev/premier-league/commit/f4e580d998e8e1042b9b824aa846bf3e738b3fd4#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 536 | 537 | [2.0.2]: https://github.com/digitalghost-dev/premier-league/commit/72337e2ac3ee365612a6a02eda25f390ab2690b9#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 538 | 539 | [2.0.1]: https://github.com/digitalghost-dev/premier-league/commit/dc92180f52a325f79e14d89097940162711ac35f#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 540 | 541 | [2.0.0]: https://github.com/digitalghost-dev/premier-league/commit/a8b11f02c8b517453c1d7d2e34b0986ea73588ba#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 542 | 543 | [1.3.0]: https://github.com/digitalghost-dev/premier-league/commit/4b2063a3663f48e166f7b13cbe06e51b24fd2056#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 544 | 545 | [1.2.0]: https://github.com/digitalghost-dev/premier-league/commit/8d5fbb7cdf91263eb55f2bc7ecd09236d975a704#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 546 | 547 | [1.1.0]: https://github.com/digitalghost-dev/premier-league/commit/e99f1f4a6eab3ef967c30b6c21b4fffa109de8e9#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d 548 | 549 | [1.0.0]: https://github.com/digitalghost-dev/premier-league/commit/429a6f3ca12bcdbb5bee4505d390838b25edb8bb#diff-4dc66906e3c3b7f7a82967d85af564f2d5a6e0bee5829aa5eda607dd9756c87d -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile to build the Streamlit app. 2 | 3 | FROM python:3.12-slim-bookworm 4 | 5 | RUN groupadd -r streamlit_group 6 | 7 | RUN useradd -r -g streamlit_group streamlit_user 8 | 9 | WORKDIR /app 10 | 11 | RUN apt-get update && apt-get install -y \ 12 | build-essential \ 13 | curl \ 14 | software-properties-common \ 15 | git \ 16 | && rm -rf /var/lib/apt/lists/* 17 | 18 | COPY requirements.txt . 19 | COPY components components 20 | COPY streamlit_app.py . 21 | 22 | RUN pip3 install --no-cache-dir -r requirements.txt 23 | 24 | RUN chown -R streamlit_user:streamlit_group /app 25 | 26 | EXPOSE 8501 27 | 28 | HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health 29 | 30 | USER streamlit_user 31 | 32 | ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--theme.primaryColor=indigo", "--theme.textColor=black", "--theme.backgroundColor=#FFF", "--theme.secondaryBackgroundColor=#FFF"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 |

Premier League Data Pipeline

6 | 7 |

8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |

17 | 18 | > [!WARNING] 19 | > After a year and some change of building this project, it's time for me to archive it. I've started to use these tools in my current position so learning these on my own and spending my own money on paying for the Football API and Google Cloud services no longer makes sense. I'm switching my focus on learning Golang! 20 | 21 | ## Overview 22 | This repository contains a personal project designed to enhance my skills in Data Engineering. It focuses on developing data pipelines that extract, transform, and load data from various sources into diverse databases. Additionally, it involves creating a dashboard with visualizations using Streamlit. 23 | 24 | > [!IMPORTANT] 25 | > Many architectural choices and decisions in this project may not make the most efficent sense on purpose for the sake of practicing and learning. 26 | 27 | ## Infrastructure 28 | ### Tools & Services 29 | ![cloud](https://img.shields.io/badge/Google_Cloud-4285F4?style=flat-square&logo=googlecloud&logoColor=white) ![streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=flat-square&logo=streamlit&logoColor=white) ![terraform](https://img.shields.io/badge/Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) ![docker](https://img.shields.io/badge/Docker-2496ED?style=flat-square&logo=docker&logoColor=white) ![prefect](https://img.shields.io/badge/-Prefect-070E10?style=flat-square&logo=prefect) ![dbt](https://img.shields.io/badge/dbt-FF694B?style=flat-square&logo=dbt&logoColor=white) 30 | 31 | ### Databases 32 | ![firestore](https://img.shields.io/badge/Firestore-FFCA28?style=flat-square&logo=firebase&logoColor=white) ![postgres](https://img.shields.io/badge/PostgreSQL-4169E1?style=flat-square&logo=postgresql&logoColor=white) ![bigquery](https://img.shields.io/badge/BigQuery-669DF6?style=flat-square&logo=googlebigquery&logoColor=white) 33 | 34 | ### Code Quality 35 | ![pre-commit](https://img.shields.io/badge/pre--commit-FAB040?style=flat-square&logo=pre-commit&logoColor=white) 36 | 37 | | Security Linter | Code Formatting | Type Checking | Code Linting | 38 | | --- | --- | --- | --- | 39 | | [`bandit`](https://github.com/PyCQA/bandit) | [`ruff-format`](https://github.com/astral-sh/ruff) | [`mypy`](https://github.com/python/mypy) | [`ruff`](https://github.com/astral-sh/ruff) | 40 | 41 | --- 42 | 43 | ## Data and CI/CD Pipelines 44 | ### Data Pipelines 45 | 46 |

Data Pipeline 1

47 | 48 | Orchestrated with [Prefect](https://www.prefect.io), a Python file is ran to extract stock data for Manchester United. 49 | 50 | 1. Data from the [Financial Modeling Prep API](https://site.financialmodelingprep.com) is extracted with Python using the `/quote` endpoint. 51 | 2. The data is loaded directly into a PostgreSQL database hosted on [Cloud SQL](https://cloud.google.com/sql?hl=en) with no transformations. 52 | 3. Once the data is loaded into PostgreSQL, Datastream replicates the data into BigQuery. Datastream checks for staleness every 15 minutes. 53 | 4. [dbt](https://getdbt.com) is used to transform the data in BigQuery and create a view with transformed data. 54 | 55 |

Data Pipeline 2

56 | 57 | Orchestrated with [Prefect](https://www.prefect.io), Python files are ran that perform a full ETL process. 58 | 59 | 1. Data is extracted from multiple API sources: 60 | * Data from the [Football Data API](https://www.football-data.org/) is extracted to retrieve information on the current standings, team statistics, top scorers, squads, fixtures, and the current round. The following endpoints are used: 61 | * `/standings` 62 | * `/teams` 63 | * `/top_scorers` 64 | * `/squads` 65 | * `/fixtures/current_round` 66 | * `/fixtures` 67 | * Data from the [NewsAPI](https://newsapi.org) is extracted to retrieve news article links with filters set to the Premier League from Sky Sports, The Guardian, and 90min. The following endpoints are used: 68 | * `/everything` 69 | * Data from a self-built API written in Golang is extracted to retrieve information on teams' stadiums. The following endpoints are used: 70 | * `/stadiums` 71 | * Data from the [YouTube API](https://developers.google.com/youtube/v3) is extracted to retrieve the latest highlights from NBC Sports YouTube channel. 72 | 2. Python performs any necessary transformations such as coverting data types or checking for `NULL` values 73 | 3. Majority of the data is then loaded into **BigQuery** in their respective tables. Fixture data is loaded into **Firestore** as documents categoirzed by the round number. 74 | 75 |

Data Pipeline 3

76 | 1. Daily exports of the standings and top scorers data in BigQuery are exported to a Cloud Storage bucket using Cloud Scheduler to be used in another project. 77 | * The other project is a [CLI](https://github.com/digitalghost-dev/pl-cli/) tool written in Golang. 78 | 79 |

Pipeline Diagram

80 | 81 | ![data-pipeline-flowchart](https://storage.googleapis.com/premier_league_bucket/flowcharts/data_pipelines_flowchart.png) 82 | 83 | ### CI/CD Pipeline 84 | The CI/CD pipeline is focused on building the Streamlit app into a Docker container that is then pushed to Artifact Registry and deployed to Cloud Run as a Service. Different architecutres are buit for different machine types and pushed to Docker Hub. 85 | 86 | 1. The repository code is checked out and a Docker image containing the updated `streamlit_app.py` file will build. 87 | 2. The newly built Docker image will be pushed to [Artifact Registry](https://cloud.google.com/artifact-registry). 88 | 3. The Docker image is then deployed to [Cloud Run](https://cloud.google.com/run/docs/overview/what-is-cloud-run) as a Service. 89 | 90 | #### Pipeline Diagram 91 | ![cicd_pipeline](https://storage.googleapis.com/premier_league_bucket/flowcharts/cicd_pipeline_flowchart.png) 92 | 93 | --- 94 | 95 | ## Security 96 | * [Syft](https://github.com/anchore/syft) and [Grype](https://github.com/anchore/grype) work together to scan the Streamlit Docker image. Syft creates an [`SBOM`](https://www.linuxfoundation.org/blog/blog/what-is-an-sbom) and Grype scans the `SBOM` for vulnerabilities. The results are sent to the repository's Security tab. 97 | * [Snyk](https://github.com/snyk/actions/tree/master/python-3.10) is also used to scan the repository for vulnerabilities in the Python packages. 98 | -------------------------------------------------------------------------------- /api/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | FROM golang:1.19-alpine 3 | 4 | WORKDIR /app 5 | 6 | COPY go.mod . 7 | COPY go.sum . 8 | 9 | RUN go mod download 10 | 11 | COPY api.go . 12 | 13 | RUN go build -o /docker-gs-ping 14 | 15 | EXPOSE 8080 16 | 17 | CMD [ "/docker-gs-ping" ] -------------------------------------------------------------------------------- /api/api.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | // data structure 12 | type stadium struct { 13 | Team string `json:"team"` 14 | Stadium string `json:"stadium"` 15 | City string `json:"city"` 16 | Latitude float64 `json:"latitude"` 17 | Longitude float64 `json:"longitude"` 18 | Capacity string `json:"capacity"` 19 | Year_Opened string `json:"year_opened"` 20 | } 21 | 22 | var stadiums = []stadium{ 23 | {Team: "Arsenal", Stadium: "Emirates Stadium", City: "London", Latitude: 51.554867, Longitude: -0.109112, Capacity: "60,704", Year_Opened: "2006"}, 24 | {Team: "Aston Villa", Stadium: "Villa Park", City: "Birmingham", Latitude: 52.509090, Longitude: -1.885249, Capacity: "42,657", Year_Opened: "1897"}, 25 | {Team: "Bournemouth", Stadium: "Vitality Stadium", City: "Bournemouth", Latitude: 50.7348, Longitude: -1.8391, Capacity: "11,307", Year_Opened: "1910"}, 26 | {Team: "Brentford", Stadium: "Gtech Community Stadium", City: "London", Latitude: 51.490715, Longitude: -0.289048, Capacity: "17,250", Year_Opened: "2020"}, 27 | {Team: "Brighton", Stadium: "Falmer Stadium", City: "Falmer", Latitude: 50.861782, Longitude: -0.084357, Capacity: "31,800", Year_Opened: "2011"}, 28 | {Team: "Burnley", Stadium: "Turf Moor", City: "Burnley", Latitude: 53.789108, Longitude: -2.230575, Capacity: "21,944", Year_Opened: "1883"}, 29 | {Team: "Chelsea", Stadium: "Stamford Bridge", City: "London", Latitude: 51.481834, Longitude: -0.191390, Capacity: "40,343", Year_Opened: "1877"}, 30 | {Team: "Crystal Palace", Stadium: "Selhurst Park", City: "London", Latitude: 51.398338, Longitude: -0.086084, Capacity: "25,486", Year_Opened: "1924"}, 31 | {Team: "Everton", Stadium: "Goodison Park", City: "Liverpool", Latitude: 53.438751, Longitude: -2.966681, Capacity: "39,414", Year_Opened: "1892"}, 32 | {Team: "Fulham", Stadium: "Craven Cottage", City: "London", Latitude: 51.281799, Longitude: -0.131080, Capacity: "29,600", Year_Opened: "1896"}, 33 | {Team: "Liverpool", Stadium: "Anfield", City: "Liverpool", Latitude: 53.430759, Longitude: -2.961425, Capacity: "53,394", Year_Opened: "1884"}, 34 | {Team: "Luton Town", Stadium: "Kenilworth Road", City: "Luton", Latitude: 51.883829798, Longitude: -0.425664964, Capacity: "10,356", Year_Opened: "1905"}, 35 | {Team: "Manchester City", Stadium: "Etihad Sadium", City: "Manchester", Latitude: 53.483135, Longitude: -2.200941, Capacity: "53,400", Year_Opened: "2003"}, 36 | {Team: "Manchester United", Stadium: "Old Trafford", City: "Manchester", Latitude: 53.463493, Longitude: -2.292279, Capacity: "74,310", Year_Opened: "1910"}, 37 | {Team: "Newcastle", Stadium: "St James' Park", City: "Newcastle upon Tyne", Latitude: 54.975170, Longitude: -1.622539, Capacity: "52,305", Year_Opened: "1892"}, 38 | {Team: "Nottingham Forest", Stadium: "City Ground", City: "West Bridgford", Latitude: 52.939938, Longitude: -1.13287, Capacity: "30,332", Year_Opened: "1898"}, 39 | {Team: "Sheffield United", Stadium: "Bramall Lane", City: "Sheffield", Latitude: 53.368831858, Longitude: -1.46916479, Capacity: "32,050", Year_Opened: "1855"}, 40 | {Team: "Tottemham", Stadium: "Tottenham Hotspur Stadium", City: "London", Latitude: 51.604252, Longitude: -0.067007, Capacity: "62,850", Year_Opened: "2019"}, 41 | {Team: "West Ham", Stadium: "London Stadium", City: "London", Latitude: 51.538811, Longitude: -0.017136, Capacity: "62,500", Year_Opened: "2012"}, 42 | {Team: "Wolves", Stadium: "Molineux Stadium", City: "Wolverhampton", Latitude: 52.590382, Longitude: -2.130924, Capacity: "31,750", Year_Opened: "1889"}, 43 | } 44 | 45 | // getStadium responds with the list of all stadiums as JSON. 46 | func getStadium(c *gin.Context) { 47 | c.IndentedJSON(http.StatusOK, stadiums) 48 | } 49 | 50 | // setting up the endpoint. 51 | func main() { 52 | router := gin.Default() 53 | router.GET("/stadiums", getStadium) 54 | 55 | router.Run() 56 | 57 | port := os.Getenv("PORT") 58 | if port == "" { 59 | port = "8080" 60 | } 61 | 62 | http.HandleFunc("/v1/", func(w http.ResponseWriter, r *http.Request) { 63 | fmt.Fprintf(w, "{status: 'running'}") 64 | }) 65 | 66 | log.Println("listening on port", port) 67 | if err := http.ListenAndServe(":"+port, nil); err != nil { 68 | log.Fatalf("Error launching REST API server: %v", err) 69 | } 70 | } -------------------------------------------------------------------------------- /api/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/digitalghost-dev/api 2 | 3 | go 1.21 4 | 5 | require github.com/gin-gonic/gin v1.8.1 6 | 7 | require ( 8 | github.com/gin-contrib/sse v0.1.0 // indirect 9 | github.com/go-playground/locales v0.14.0 // indirect 10 | github.com/go-playground/universal-translator v0.18.0 // indirect 11 | github.com/go-playground/validator/v10 v10.10.0 // indirect 12 | github.com/goccy/go-json v0.9.7 // indirect 13 | github.com/json-iterator/go v1.1.12 // indirect 14 | github.com/leodido/go-urn v1.2.1 // indirect 15 | github.com/mattn/go-isatty v0.0.14 // indirect 16 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect 17 | github.com/modern-go/reflect2 v1.0.2 // indirect 18 | github.com/pelletier/go-toml/v2 v2.0.1 // indirect 19 | github.com/ugorji/go/codec v1.2.7 // indirect 20 | golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 // indirect 21 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 // indirect 22 | golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069 // indirect 23 | golang.org/x/text v0.3.6 // indirect 24 | google.golang.org/protobuf v1.28.0 // indirect 25 | gopkg.in/yaml.v2 v2.4.0 // indirect 26 | ) 27 | -------------------------------------------------------------------------------- /api/go.sum: -------------------------------------------------------------------------------- 1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 6 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 7 | github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8= 8 | github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= 9 | github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= 10 | github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= 11 | github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= 12 | github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= 13 | github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho= 14 | github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= 15 | github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0= 16 | github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= 17 | github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= 18 | github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= 19 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 20 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 21 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 22 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 23 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 24 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 25 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 26 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 27 | github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 28 | github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 29 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 30 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 31 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 32 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 33 | github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= 34 | github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= 35 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= 36 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= 37 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= 38 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 39 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 40 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 41 | github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU= 42 | github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= 43 | github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= 44 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 45 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 46 | github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= 47 | github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8= 48 | github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= 49 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 50 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 51 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 52 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 53 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= 54 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 55 | github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= 56 | github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= 57 | github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= 58 | golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 h1:/UOmuWzQfxxo9UtlXMwuQU8CMgg1eZXqTRwkSQJWKOI= 59 | golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 60 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 h1:qWPm9rbaAMKs8Bq/9LRpbMqxWRVUAQwMI9fVrssnTfw= 61 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 62 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 63 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 64 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 65 | golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069 h1:siQdpVirKtzPhKl3lZWozZraCFObP8S1v6PRp0bLrtU= 66 | golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 67 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 68 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 69 | golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= 70 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 71 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 72 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 73 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 74 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 75 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= 76 | google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 77 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 78 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 79 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 80 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 81 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 82 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 83 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 84 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 85 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= 86 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -------------------------------------------------------------------------------- /components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digitalghost-dev/premier-league/964156b1e7876fe6ced410c8a902ca30d3dd7cdf/components/__init__.py -------------------------------------------------------------------------------- /components/about_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class AboutSection: 5 | def __init__(self): 6 | pass 7 | 8 | def display(self): 9 | st.subheader("About") 10 | st.write( 11 | """ 12 | This project is created by maintained by [myself](https://github.com/digitalghost-dev) to practice my skills in Data Engineering to one day break into the field. 13 | 14 | I chose using data for Premier League because I am a huge fan of the sport and I am always interested in learning more about the game. 15 | 16 | This is the only project that I'm currently working on and plan to continue to add more features and tools to it as I learn more about Data Engineering. 17 | """ 18 | ) 19 | -------------------------------------------------------------------------------- /components/connections.py: -------------------------------------------------------------------------------- 1 | import firebase_admin # type: ignore 2 | import pandas as pd 3 | import streamlit as st 4 | from firebase_admin import firestore # type: ignore 5 | from google.cloud import bigquery 6 | import google.auth 7 | 8 | 9 | # Firestore Connection 10 | @st.cache_resource 11 | def firestore_connection() -> firestore.Client: 12 | credentials, project = google.auth.default() 13 | if not firebase_admin._apps: 14 | firebase_admin.initialize_app() 15 | 16 | return firestore.Client(credentials=credentials) 17 | 18 | 19 | # BigQuery Connection 20 | @st.cache_data(ttl=600) 21 | def run_query(query): 22 | credentials, project = google.auth.default() 23 | query_job = bigquery.Client(credentials=credentials).query(query) 24 | raw_data = query_job.result() 25 | data = [dict(data) for data in raw_data] 26 | return data 27 | 28 | 29 | @st.cache_resource 30 | def get_standings() -> pd.DataFrame: 31 | standings_data = run_query( 32 | """ 33 | SELECT rank, points, t.logo, t.team, games_played, wins, draws, loses, goals_for, goals_against, goal_difference 34 | FROM `premier_league_dataset.standings` AS s 35 | INNER JOIN `premier_league_dataset.teams` AS t 36 | ON s.team_id = t.team_id 37 | ORDER BY rank ASC; 38 | """ 39 | ) 40 | return pd.DataFrame(data=standings_data) 41 | 42 | 43 | @st.cache_resource 44 | def get_stadiums() -> pd.DataFrame: 45 | stadiums_data = run_query( 46 | """ 47 | SELECT team, stadium, latitude, longitude 48 | FROM `premier_league_dataset.stadiums`; 49 | """ 50 | ) 51 | return pd.DataFrame(data=stadiums_data) 52 | 53 | 54 | @st.cache_resource 55 | def get_teams() -> pd.DataFrame: 56 | teams_data = run_query( 57 | """ 58 | SELECT t.logo, form, t.team, clean_sheets, penalties_scored, penalties_missed, average_goals, win_streak 59 | FROM `premier_league_dataset.teams` AS t 60 | LEFT JOIN `premier_league_dataset.standings` AS s 61 | ON t.team = s.Team 62 | ORDER BY s.rank; 63 | """ 64 | ) 65 | return pd.DataFrame(data=teams_data) 66 | 67 | 68 | @st.cache_resource 69 | def get_top_scorers() -> pd.DataFrame: 70 | top_scorers_data = run_query( 71 | """ 72 | SELECT * 73 | FROM `premier_league_dataset.top_scorers` 74 | ORDER BY Goals DESC; 75 | """ 76 | ) 77 | return pd.DataFrame(data=top_scorers_data) 78 | 79 | 80 | @st.cache_resource 81 | def get_news() -> pd.DataFrame: 82 | news_data = run_query( 83 | """ 84 | SELECT * 85 | FROM `premier_league_dataset.news` 86 | ORDER BY published_at DESC; 87 | """ 88 | ) 89 | return pd.DataFrame(data=news_data) 90 | 91 | 92 | @st.cache_resource 93 | def get_highlights() -> pd.DataFrame: 94 | highlights_data = run_query( 95 | """ 96 | SELECT * 97 | FROM `premier_league_dataset.highlights` 98 | ORDER BY publish_time DESC; 99 | """ 100 | ) 101 | return pd.DataFrame(data=highlights_data) 102 | 103 | 104 | @st.cache_resource 105 | def get_league_statistics() -> pd.DataFrame: 106 | league_statistics = run_query( 107 | """ 108 | SELECT 109 | SUM(goals_for) AS league_goals_scored, 110 | SUM(penalties_scored) AS league_penalties_scored, 111 | SUM(clean_sheets) AS league_clean_sheets 112 | FROM premier_league_dataset.teams AS t 113 | JOIN premier_league_dataset.standings AS s 114 | ON t.team_id = s.team_id; 115 | """ 116 | ) 117 | return pd.DataFrame(data=league_statistics) 118 | 119 | 120 | @st.cache_resource 121 | def get_min_round() -> int: 122 | min_round_row = run_query( 123 | """ 124 | SELECT MIN(round) AS round 125 | FROM `premier_league_dataset.current_round`; 126 | """ 127 | ) 128 | min_round_df = pd.DataFrame(data=min_round_row) 129 | min_round = min_round_df["round"][0] 130 | return min_round 131 | 132 | 133 | @st.cache_resource 134 | def get_max_round() -> int: 135 | max_round_row = run_query( 136 | """ 137 | SELECT MAX(round) AS round 138 | FROM `premier_league_dataset.current_round`; 139 | """ 140 | ) 141 | max_round_row = pd.DataFrame(data=max_round_row) 142 | max_round = max_round_row["round"][0] 143 | return max_round 144 | 145 | 146 | @st.cache_resource 147 | def get_squads() -> pd.DataFrame: 148 | squads_data = run_query( 149 | """ 150 | SELECT * 151 | FROM `premier_league_squads.all_teams_squads_view` 152 | 153 | """ 154 | ) 155 | return pd.DataFrame(data=squads_data) 156 | 157 | 158 | @st.cache_resource 159 | def get_injuries() -> pd.DataFrame: 160 | injuries_data = run_query( 161 | """ 162 | SELECT * 163 | FROM `premier_league_injuries.all_teams_injuries_view` 164 | """ 165 | ) 166 | return pd.DataFrame(data=injuries_data) 167 | 168 | 169 | @st.cache_resource 170 | def get_stocks() -> pd.DataFrame: 171 | stock_data = run_query( 172 | """ 173 | SELECT new_york_time, price 174 | FROM `dbt_production.stocks` 175 | """ 176 | ) 177 | return pd.DataFrame(data=stock_data) 178 | -------------------------------------------------------------------------------- /components/fixtures_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from firebase_admin import firestore # type: ignore 3 | from datetime import datetime 4 | 5 | from typing import List 6 | from typing import Tuple 7 | 8 | 9 | class FixturesSection: 10 | def __init__(self, firestore_database, max_round: int, min_round: int): 11 | self.firestore_database = firestore_database 12 | self.max_round = int(max_round) 13 | self.min_round = int(min_round) 14 | 15 | def firestore_pull( 16 | self, round_count 17 | ) -> Tuple[List[str], List[int], List[int], List[str], List[str], List[str], List[str]]: 18 | # Calling each document in the collection in ascending order by date. 19 | collection_ref = self.firestore_database.collection(f"Regular Season - {round_count}") 20 | query = collection_ref.order_by("date", direction=firestore.Query.ASCENDING) 21 | results = query.stream() 22 | 23 | # Setting an empty list. This list will contain each fixture's details that can later be called by referencing its index. 24 | documents = [] 25 | 26 | # Iterating through the query results to get the document ID (e.g., 'Manchester City vs Burnley') and its data. 27 | for doc in results: 28 | document_dict = {"id": doc.id, "data": doc.to_dict()} 29 | documents.append(document_dict) 30 | 31 | # Retrieving and formatting match date. 32 | match_date = [ 33 | datetime.strptime(documents[count]["data"]["date"], "%Y-%m-%dT%H:%M:%S+00:00") 34 | .strftime("%B %d{}, %Y - %H:%M") 35 | .format( 36 | "th" 37 | if 4 38 | <= int(datetime.strptime(documents[count]["data"]["date"], "%Y-%m-%dT%H:%M:%S+00:00").strftime("%d")) 39 | <= 20 40 | else {1: "st", 2: "nd", 3: "rd"}.get( 41 | int(datetime.strptime(documents[count]["data"]["date"], "%Y-%m-%dT%H:%M:%S+00:00").strftime("%d")) 42 | % 10, 43 | "th", 44 | ) 45 | ) 46 | for count in range(10) 47 | ] 48 | 49 | # Retrieving away and home goals for each match. 50 | away_goals = [documents[count]["data"]["goals"]["away"] for count in range(10)] 51 | home_goals = [documents[count]["data"]["goals"]["home"] for count in range(10)] 52 | 53 | # Retrieving away and home team for each match. 54 | away_team = [documents[count]["data"]["teams"]["away"]["name"] for count in range(10)] 55 | home_team = [documents[count]["data"]["teams"]["home"]["name"] for count in range(10)] 56 | 57 | # Retrieving away and home logo for each team. 58 | away_logo = [documents[count]["data"]["teams"]["away"]["logo"] for count in range(10)] 59 | home_logo = [documents[count]["data"]["teams"]["home"]["logo"] for count in range(10)] 60 | 61 | return ( 62 | match_date, 63 | away_goals, 64 | home_goals, 65 | away_team, 66 | home_team, 67 | away_logo, 68 | home_logo, 69 | ) 70 | 71 | def display(self): 72 | round_count = self.max_round 73 | st.subheader("Fixtures") 74 | 75 | while round_count >= self.min_round: 76 | with st.expander(f"Round {round_count}"): 77 | ( 78 | match_date, 79 | away_goals, 80 | home_goals, 81 | away_team, 82 | home_team, 83 | away_logo, 84 | home_logo, 85 | ) = self.firestore_pull(round_count) 86 | 87 | count = 0 88 | 89 | while count < 10: 90 | # Creating a container for each match. 91 | with st.container(): 92 | col1, col2, col3, col4, col5 = st.columns(5) 93 | 94 | with col1: 95 | st.write("") 96 | 97 | # Home teams 98 | with col2: 99 | st.markdown( 100 | f"

{home_goals[count]}

", 101 | unsafe_allow_html=True, 102 | ) 103 | st.markdown( 104 | f"", 105 | unsafe_allow_html=True, 106 | ) 107 | st.write("") 108 | st.write("") 109 | 110 | # Match date 111 | with col3: 112 | st.write("") 113 | st.markdown( 114 | "

Match Date & Time

", 115 | unsafe_allow_html=True, 116 | ) 117 | st.markdown( 118 | f"

{match_date[count]}

", 119 | unsafe_allow_html=True, 120 | ) 121 | st.markdown( 122 | f"

{home_team[count]} vs. {away_team[count]}

", 123 | unsafe_allow_html=True, 124 | ) 125 | 126 | # Away teams 127 | with col4: 128 | st.markdown( 129 | f"

{away_goals[count]}

", 130 | unsafe_allow_html=True, 131 | ) 132 | st.markdown( 133 | f"", 134 | unsafe_allow_html=True, 135 | ) 136 | st.write("") 137 | st.write("") 138 | 139 | with col5: 140 | st.write("") 141 | 142 | count += 1 143 | 144 | round_count -= 1 145 | -------------------------------------------------------------------------------- /components/highlights_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class HighlightsSection: 5 | def __init__(self, highlights_df): 6 | self.highlights_df = highlights_df 7 | 8 | def display_first_row(self): 9 | st.header("Recent Highlights") 10 | columns = st.columns(3) 11 | 12 | for i, col in enumerate(columns): 13 | with col: 14 | try: 15 | st.image(self.highlights_df.iloc[i, 3], use_column_width="auto") 16 | st.subheader(self.highlights_df.iloc[i, 2]) 17 | st.write(f"Publish time: {self.highlights_df.iloc[i, -1]}") 18 | st.markdown( 19 | f"Watch on YouTube", 20 | unsafe_allow_html=True, 21 | ) 22 | except IndexError: 23 | pass 24 | 25 | def display_second_row(self): 26 | columns = st.columns(3) 27 | 28 | for i, col in enumerate(columns): 29 | with col: 30 | try: 31 | st.image(self.highlights_df.iloc[i + 3, 3], use_column_width="auto") 32 | st.subheader(self.highlights_df.iloc[i + 3, 2]) 33 | st.write(f"Publish time: {self.highlights_df.iloc[i + 3, -1]}") 34 | st.markdown( 35 | f"Watch on YouTube", 36 | unsafe_allow_html=True, 37 | ) 38 | except IndexError: 39 | pass 40 | -------------------------------------------------------------------------------- /components/injuries_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class InjuriesSection: 5 | def __init__(self, injuries_df): 6 | self.injuries_df = injuries_df 7 | self.teams = ( 8 | "Arsenal", 9 | "Aston Villa", 10 | "Bournemouth", 11 | "Brentford", 12 | "Brighton", 13 | "Burnley", 14 | "Chelsea", 15 | "Crystal Palace", 16 | "Everton", 17 | "Fulham", 18 | "Liverpool", 19 | "Luton", 20 | "Manchester City", 21 | "Manchester United", 22 | "Newcastle", 23 | "Nottingham Forest", 24 | "Sheffield Utd", 25 | "Tottenham", 26 | "West Ham", 27 | "Wolves", 28 | ) 29 | 30 | def display(self): 31 | st.divider() 32 | st.subheader("Recent Injuries") 33 | st.write("Select the teams you want to see recent injuries for.") 34 | popover = st.popover("Filter Teams") 35 | filtered_df = self.injuries_df.drop(columns=["team_id", "player_id"]) 36 | team_checkboxes = {} 37 | 38 | for team in self.teams: 39 | team_checkboxes[team] = popover.checkbox(f"{team}", value=False) 40 | 41 | for team, is_checked in team_checkboxes.items(): 42 | if is_checked: 43 | team_df = filtered_df[(filtered_df["team_name"] == team)] 44 | team_df = team_df.drop(columns=["team_name"]) 45 | st.write(f"**{team}**") 46 | if team_df.empty: 47 | st.write("No recent injuries reported.") 48 | st.empty() 49 | else: 50 | st.dataframe( 51 | team_df, 52 | column_config={ 53 | "player_name": "Player", 54 | "injury_type": "Injury Type", 55 | "injury_reason": "Reason", 56 | "injury_date": "Date", 57 | }, 58 | hide_index=True, 59 | use_container_width=True, 60 | ) 61 | -------------------------------------------------------------------------------- /components/league_form_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class LeagueFormsSection: 5 | def __init__(self, teams_df): 6 | self.teams_df = teams_df 7 | 8 | def generate_team_html(self, team_indices): 9 | markdown_list = [] 10 | for index in team_indices: 11 | team_info = self.teams_df.iloc[index] 12 | markdown_list.append( 13 | f"" 14 | ) 15 | markdown_list.append( 16 | f"

{index + 1}th / {team_info.iloc[1][-5:]}

" 17 | ) 18 | return markdown_list 19 | 20 | def display(self): 21 | st.subheader("Forms for the Rest of the League") 22 | columns = st.columns(5) 23 | 24 | for i, col in enumerate(columns): 25 | with col: 26 | team_indices = [i + 5, i + 10, i + 15] 27 | markdown_list = self.generate_team_html(team_indices) 28 | for item in markdown_list: 29 | st.markdown(item, unsafe_allow_html=True) 30 | -------------------------------------------------------------------------------- /components/news_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | class NewsSection: 4 | def __init__(self, news_df): 5 | self.news_df = news_df 6 | 7 | def display(self): 8 | st.header("Recent News") 9 | col1, col2, col3, col4 = st.columns(4) 10 | 11 | with col1: 12 | # Your code here 13 | with st.container(): 14 | try: 15 | st.image(self.news_df.iloc[0, 2], use_column_width=True) 16 | st.subheader(self.news_df.iloc[0, 0]) 17 | st.write(f"Publish time: {self.news_df.iloc[0, 3]}") 18 | st.markdown( 19 | f"Read More", 20 | unsafe_allow_html=True, 21 | ) 22 | except IndexError: 23 | pass 24 | 25 | with col2: 26 | with st.container(): 27 | try: 28 | st.image(self.news_df.iloc[1, 2], use_column_width=True) 29 | st.subheader(self.news_df.iloc[1, 0]) 30 | st.write(f"Publish time: {self.news_df.iloc[1, 3]}") 31 | st.markdown( 32 | f"Read More", 33 | unsafe_allow_html=True, 34 | ) 35 | except IndexError: 36 | pass 37 | 38 | with col3: 39 | with st.container(): 40 | try: 41 | st.image(self.news_df.iloc[2, 2], use_column_width=True) 42 | st.subheader(self.news_df.iloc[2, 0]) 43 | st.write(f"Publish time: {self.news_df.iloc[2, 3]}") 44 | st.markdown( 45 | f"Read More", 46 | unsafe_allow_html=True, 47 | ) 48 | except IndexError: 49 | pass 50 | 51 | with col4: 52 | with st.container(): 53 | try: 54 | st.image(self.news_df.iloc[3, 2], use_column_width=True) 55 | st.subheader(self.news_df.iloc[3, 0]) 56 | st.write(f"Publish time: {self.news_df.iloc[3, 3]}") 57 | st.markdown( 58 | f"Read More", 59 | unsafe_allow_html=True, 60 | ) 61 | except IndexError: 62 | pass 63 | 64 | st.divider() 65 | -------------------------------------------------------------------------------- /components/point_progression_section.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import plotly.graph_objects as go 3 | import streamlit as st 4 | 5 | 6 | class PointProgressionSection: 7 | def __init__(self, teams_df, standings_df): 8 | self.teams_df = teams_df 9 | self.standings_df = standings_df 10 | 11 | def calculate_points(self): 12 | team_forms = [[], [], [], [], []] 13 | forms = [self.teams_df.iloc[i, 1] for i in range(5)] 14 | 15 | for count, form in enumerate(forms): 16 | points = 0 17 | for char in form: 18 | if char == "W": 19 | points += 3 20 | elif char == "D": 21 | points += 1 22 | else: 23 | points += 0 24 | 25 | team_forms[count].append(points) 26 | 27 | return team_forms 28 | 29 | def create_dataframe(self, team_forms): 30 | headers = [str(self.standings_df.iloc[i, 3]) for i in range(5)] 31 | zipped = list(zip(*team_forms)) # Transpose the list of lists 32 | return pd.DataFrame(zipped, columns=headers) 33 | 34 | def display(self): 35 | team_forms = self.calculate_points() 36 | df = self.create_dataframe(team_forms) 37 | 38 | st.subheader("Point Progression throughout the Season") 39 | 40 | labels = [str(f"{self.standings_df.iloc[i, 3]} - {self.standings_df.iloc[i, 1]} points") for i in range(5)] 41 | colors = ["#1e90ff", "#ff4500", "#ffd700", "#228b22", "#000000"] 42 | 43 | fig = go.Figure() 44 | 45 | for i in range(5): 46 | fig.add_trace(go.Scatter(x=df.index, y=df.iloc[:, i], name=labels[i], line=dict(color=colors[i], width=2))) 47 | 48 | # add markers 49 | fig.update_traces(mode="markers+lines", marker=dict(size=8, line=dict(width=2))) 50 | 51 | fig.update_layout( 52 | xaxis_title="Gameweek", 53 | yaxis_title="Points", 54 | legend_title="Team", 55 | legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01), 56 | height=600, 57 | ) 58 | 59 | st.plotly_chart(fig, use_container_width=True) 60 | -------------------------------------------------------------------------------- /components/point_slider_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import plotly.graph_objects as go 3 | 4 | 5 | class PointSliderSection: 6 | def __init__(self, standings_df): 7 | self.standings_df = standings_df 8 | 9 | def display(self): 10 | st.subheader("Points per Team:") 11 | # Creating the slider. 12 | points = self.standings_df["points"].tolist() 13 | points_selection = st.slider( 14 | "Select a Range of Points:", min_value=min(points), max_value=max(points), value=(min(points), max(points)) 15 | ) 16 | # Picking colors to use for the bar chart. 17 | colors = [ 18 | "indigo", 19 | ] * 20 20 | # Making sure the bar chart changes with the slider. 21 | mask = self.standings_df["points"].between(*points_selection) 22 | amount_of_teams = self.standings_df[mask].shape[0] 23 | 24 | df_grouped = self.standings_df[mask] 25 | df_grouped = df_grouped.reset_index() 26 | lowest_number = df_grouped["points"].min() 27 | st.markdown(f"Number of teams with {lowest_number} or more points: {amount_of_teams}") 28 | # Creating the bar chart. 29 | points_chart = go.Figure( 30 | data=[ 31 | go.Bar( 32 | x=df_grouped["team"], 33 | y=df_grouped["points"], 34 | marker_color=colors, 35 | text=df_grouped["points"], 36 | textposition="auto", 37 | ) 38 | ] 39 | ) 40 | # Rotating x axis lables. 41 | points_chart.update_layout( 42 | xaxis_tickangle=-35, 43 | autosize=False, 44 | margin=dict( 45 | l=0, # left 46 | r=0, # right 47 | b=0, # bottom 48 | t=0, # top 49 | ), 50 | ) 51 | 52 | st.plotly_chart(points_chart, use_container_width=True) 53 | -------------------------------------------------------------------------------- /components/social_media_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import streamlit.components.v1 as components 3 | 4 | 5 | class SocialMediaSection: 6 | def __init__(self): 7 | self.social_links = [ 8 | { 9 | "url": "https://hub.docker.com/r/digitalghostdev/premier-league/tags", 10 | "icon_url": "https://storage.googleapis.com/premier_league_bucket/icons/companies/docker.svg", 11 | "alt_text": "Docker", 12 | }, 13 | { 14 | "url": "https://github.com/digitalghost-dev/", 15 | "icon_url": "https://storage.googleapis.com/premier_league_bucket/icons/companies/github.svg", 16 | "alt_text": "GitHub", 17 | }, 18 | ] 19 | 20 | def generate_html(self): 21 | html = "" 22 | for link in self.social_links: 23 | html += f""" 24 | 25 | {link['alt_text']} 26 | 27 | """ 28 | return f"
{html}
" 29 | 30 | def display(self): 31 | st.divider() 32 | st.subheader("Social") 33 | social_html = self.generate_html() 34 | components.html(social_html) 35 | -------------------------------------------------------------------------------- /components/squads_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class SquadSection: 5 | def __init__(self, squads_df): 6 | self.squads_df = squads_df 7 | self.teams = ( 8 | "Arsenal", 9 | "Aston Villa", 10 | "Bournemouth", 11 | "Brentford", 12 | "Brighton", 13 | "Burnley", 14 | "Chelsea", 15 | "Crystal Palace", 16 | "Everton", 17 | "Fulham", 18 | "Liverpool", 19 | "Luton", 20 | "Manchester City", 21 | "Manchester United", 22 | "Newcastle", 23 | "Nottingham Forest", 24 | "Sheffield Utd", 25 | "Tottenham", 26 | "West Ham", 27 | "Wolves", 28 | ) 29 | 30 | def display(self, team_name): 31 | ( 32 | col1, 33 | col2, 34 | ) = st.columns(2) 35 | with col1: 36 | top_positions = ["Goalkeeper", "Midfielder"] 37 | for position in top_positions: 38 | filtered_df = self.squads_df[ 39 | (self.squads_df["team_name"] == team_name) & (self.squads_df["player_position"] == position) 40 | ] 41 | filtered_df = filtered_df.drop(columns=["team_id", "team_name", "player_id", "player_position"]) 42 | 43 | st.write(f"**{position}s**") 44 | st.data_editor( 45 | filtered_df, 46 | column_config={ 47 | "player_name": st.column_config.TextColumn("Player Name"), 48 | "player_photo": st.column_config.ImageColumn("Photo", width="small"), 49 | }, 50 | hide_index=True, 51 | key=f"{team_name}-{position}", 52 | ) 53 | 54 | with col2: 55 | bottom_positions = ["Defender", "Attacker"] 56 | for position in bottom_positions: 57 | filtered_df = self.squads_df[ 58 | (self.squads_df["team_name"] == team_name) & (self.squads_df["player_position"] == position) 59 | ] 60 | filtered_df = filtered_df.drop(columns=["team_id", "team_name", "player_id", "player_position"]) 61 | 62 | st.write(f"**{position}s**") 63 | st.data_editor( 64 | filtered_df, 65 | column_config={ 66 | "player_name": st.column_config.TextColumn("Player Name"), 67 | "player_photo": st.column_config.ImageColumn("Photo", width="small"), 68 | }, 69 | hide_index=True, 70 | key=f"{team_name}-{position}", 71 | ) 72 | -------------------------------------------------------------------------------- /components/stadiums_map_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import plotly.express as px # type: ignore 3 | from google.cloud import secretmanager 4 | 5 | 6 | def gcp_secret_rapid_api() -> str: 7 | """This function retrieves the Mapbox API key from GCP Secret Manager""" 8 | 9 | client = secretmanager.SecretManagerServiceClient() 10 | name = "projects/463690670206/secrets/mapbox-api/versions/1" 11 | response = client.access_secret_version(request={"name": name}) 12 | mapbox_api_key = response.payload.data.decode("UTF-8") 13 | 14 | return mapbox_api_key 15 | 16 | 17 | class StadiumMapSection: 18 | def __init__(self): 19 | self.mapbox_access_token = gcp_secret_rapid_api() 20 | px.set_mapbox_access_token(self.mapbox_access_token) 21 | 22 | def create_stadium_map(self, stadiums_df): 23 | stadium_map = px.scatter_mapbox( 24 | stadiums_df, 25 | lat="latitude", 26 | lon="longitude", 27 | hover_name="stadium", 28 | hover_data="team", 29 | ) 30 | 31 | stadium_map.update_layout( 32 | mapbox_style="light", 33 | margin={"r": 0, "t": 0, "l": 0, "b": 0}, 34 | mapbox_bounds={"west": -17, "east": 17, "south": 45, "north": 60}, 35 | ) 36 | 37 | stadium_map.update_traces(marker=dict(size=8), marker_color="indigo") 38 | 39 | stadium_map.update_mapboxes(zoom=4) 40 | 41 | map_plotly_chart = st.plotly_chart( 42 | stadium_map, height=1000, use_container_width=True 43 | ) 44 | 45 | return map_plotly_chart 46 | 47 | def display(self, stadiums_df): 48 | st.subheader("Location of Stadiums") 49 | map_plotly_chart = self.create_stadium_map(stadiums_df) 50 | return map_plotly_chart 51 | -------------------------------------------------------------------------------- /components/stock_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import altair as alt 3 | 4 | 5 | class StockSection: 6 | def __init__(self, stock_df): 7 | self.stock_df = stock_df 8 | self.line_chart = None 9 | 10 | def display(self): 11 | st.subheader("MANU - Stock Price") 12 | st.info( 13 | """ 14 | **INFO**\n 15 | This tab shows a stock price chart for the ***previous*** trading day for **MANU** ticker.\n 16 | Currently, the chart price is shown with 30 minutes intervals. Still testing this tab and hope to move it to every 10 minutes.\n 17 | Since this shows the previous trading day's data, there will be no data displayed on Sunday and Monday, New York time. 18 | """ 19 | ) 20 | if self.stock_df.empty: 21 | st.warning("No data for today. Check back **after** the next trading day.") 22 | else: 23 | # Check if the timezone is already set 24 | if self.stock_df["new_york_time"].dt.tz is not None: 25 | self.stock_df["new_york_time"] = self.stock_df["new_york_time"].dt.tz_convert("US/Eastern") 26 | else: 27 | self.stock_df["new_york_time"] = self.stock_df["new_york_time"].dt.tz_localize("US/Eastern") 28 | 29 | self.line_chart = ( 30 | alt.Chart(self.stock_df) 31 | .mark_line() 32 | .encode( 33 | x=alt.X("new_york_time:T", title="Time"), 34 | y=alt.Y("price:Q", title="Price").scale(zero=False), 35 | ) 36 | ) 37 | 38 | st.altair_chart(self.line_chart, use_container_width=True) 39 | -------------------------------------------------------------------------------- /components/top_scorers_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class TopScorersSection: 5 | def __init__(self, top_scorers_df): 6 | self.top_scorers_df = top_scorers_df 7 | 8 | def generate_scorer_html(self, index): 9 | scorer = self.top_scorers_df.iloc[index] 10 | return [ 11 | f"", 12 | f"

{scorer.iloc[0]}

", 13 | f"

Goals: {scorer.iloc[1]}

", 14 | f"

Assists: {scorer.iloc[3]}

", 15 | f"

Team: {scorer.iloc[2]}

", 16 | f"

Nationality: {scorer.iloc[4]}

", 17 | ] 18 | 19 | def display(self): 20 | with st.container(): 21 | st.subheader("Top 5 Scorers") 22 | columns = st.columns(5) 23 | 24 | for i, col in enumerate(columns): 25 | with col: 26 | markdown_list = self.generate_scorer_html(i) 27 | for item in markdown_list: 28 | st.markdown(item, unsafe_allow_html=True) 29 | -------------------------------------------------------------------------------- /components/top_teams_section.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | class TopTeamsSection: 5 | def __init__(self, teams_df): 6 | self.teams_df = teams_df 7 | 8 | def generate_team_html(self, index): 9 | team = self.teams_df.iloc[index] 10 | return [ 11 | f"", 12 | f"

{index + 1}st / Form (Last 5): {team.iloc[1][-5:]}

", 13 | f"

Clean Sheets: {team.iloc[3]}

", 14 | f"

Penalties Scored: {team.iloc[4]}

", 15 | f"

Penalties Missed: {team.iloc[5]}

", 16 | ] 17 | 18 | def display(self): 19 | with st.container(): 20 | st.subheader("Top 5 Teams") 21 | columns = st.columns(5) 22 | 23 | for i, col in enumerate(columns): 24 | with col: 25 | markdown_list = self.generate_team_html(i) 26 | for item in markdown_list: 27 | st.markdown(item, unsafe_allow_html=True) 28 | -------------------------------------------------------------------------------- /dbt_prod/models/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stocks 5 | columns: 6 | - name: formatted_time 7 | tests: 8 | - not_null 9 | - unique 10 | - name: new_york_time 11 | tests: 12 | - not_null 13 | - unique 14 | - name: price 15 | tests: 16 | - not_null -------------------------------------------------------------------------------- /dbt_prod/models/stocks.sql: -------------------------------------------------------------------------------- 1 | WITH date_cte AS ( 2 | SELECT 3 | FORMAT_TIMESTAMP('%Y%m%d', TIMESTAMP_SECONDS(timestamp)) AS formatted_date, 4 | FORMAT_TIMESTAMP('%H:%M:%S', TIMESTAMP_SECONDS(timestamp), 'America/New_York') AS formatted_time, 5 | DATETIME(TIMESTAMP_SECONDS(timestamp), 'America/New_York') AS new_york_time, 6 | ROUND(price, 2) AS price 7 | FROM 8 | `premier_league_dataset.public_stocks` 9 | WHERE 10 | DATE(TIMESTAMP_SECONDS(timestamp), 'America/New_York') IN (DATE_SUB(DATE(CURRENT_DATETIME('America/New_York')), INTERVAL 1 DAY)) 11 | ) 12 | 13 | SELECT formatted_time, new_york_time, price 14 | FROM date_cte 15 | WHERE formatted_time < '16:00:00' 16 | ORDER BY date_cte.new_york_time DESC -------------------------------------------------------------------------------- /etl/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Data Pipelines 6 | 7 | This directory contains the ETL (Extract, Transform, Load) scripts and related files for the Premier League project. 8 | 9 | ## Overview 10 | 11 | The `etl` directory is responsible for extracting data from various sources, transforming it into a consistent format, and loading it into BigQuery, Firestore, and PostgreSQL. 12 | 13 | ## Data Pipelines Diagram 14 |
15 | data-pipeline 20 |
Diagram of a data pipelines in this project
21 |
22 | 23 | ## Data Sources 24 | * [Football API](https://rapidapi.com/api-sports/api/api-football) 25 | * [News API](https://newsapi.org) 26 | * [Financial Modeling Prep](https://site.financialmodelingprep.com/developer) 27 | * [MapBox](https://www.mapbox.com) 28 | * [YouTube API](https://developers.google.com/youtube/v3) -------------------------------------------------------------------------------- /etl/bigquery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digitalghost-dev/premier-league/964156b1e7876fe6ced410c8a902ca30d3dd7cdf/etl/bigquery/__init__.py -------------------------------------------------------------------------------- /etl/bigquery/current_round.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import requests # type: ignore 5 | 6 | import google.auth 7 | from google.cloud import secretmanager, bigquery 8 | from pandas import DataFrame 9 | 10 | PROJECT_ID = "cloud-data-infrastructure" 11 | os.environ["GCLOUD_PROJECT"] = PROJECT_ID 12 | credentials, project_id = google.auth.default() 13 | 14 | 15 | class DataRetrieval: 16 | def __init__(self, project_id): 17 | self.project_id = project_id 18 | 19 | def _get_rapid_api_key(self) -> str: 20 | client = secretmanager.SecretManagerServiceClient() 21 | name = f"projects/{self.project_id}/secrets/rapid-api/versions/1" 22 | response = client.access_secret_version(request={"name": name}) 23 | return response.payload.data.decode("UTF-8") 24 | 25 | def _call_api(self) -> str: 26 | payload = self._get_rapid_api_key() 27 | headers = { 28 | "X-RapidAPI-Key": payload, 29 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 30 | } 31 | url = "https://api-football-v1.p.rapidapi.com/v3/fixtures/rounds" 32 | querystring = {"league": "39", "season": "2023", "current": "true"} 33 | response = requests.get(url, headers=headers, params=querystring, timeout=10) 34 | return response.json()["response"][0] 35 | 36 | def _call_bigquery(self) -> int: 37 | client = bigquery.Client() 38 | query = f""" 39 | SELECT CONCAT(season, " - ", MAX(round)) AS max_round 40 | FROM `{self.project_id}.premier_league_dataset.current_round` 41 | GROUP BY season 42 | LIMIT 1 43 | """ 44 | query_job = client.query(query) 45 | results = query_job.result() 46 | for row in results: 47 | bigquery_current_round = row.max_round 48 | return bigquery_current_round 49 | 50 | def retrieve_data(self) -> tuple[str, int]: 51 | """Retrieve data for the current round""" 52 | rapid_api_current_round = self._call_api() 53 | bigquery_current_round = self._call_bigquery() 54 | return rapid_api_current_round, bigquery_current_round 55 | 56 | 57 | rapid_api_current_round, bigquery_current_round = DataRetrieval(PROJECT_ID).retrieve_data() 58 | 59 | 60 | def load_current_round() -> None: 61 | if rapid_api_current_round == bigquery_current_round: 62 | print("Current round is already loaded!") 63 | else: 64 | print("Current round is not loaded!") 65 | 66 | def create_dataframe() -> DataFrame: 67 | # Spliting a string that looks like: "Regular Season - 12" 68 | regular_season = [rapid_api_current_round[:14]] 69 | round_number = [rapid_api_current_round[17:]] 70 | round_number_int = int(round_number[0]) 71 | 72 | data = {"season": regular_season, "round": round_number_int} 73 | 74 | # create a pandas dataframe from the dictionary 75 | df = pd.DataFrame(data, columns=["season", "round"]) 76 | 77 | return df, round_number_int 78 | 79 | def define_table_schema() -> list[dict[str, str]]: 80 | schema_definition = [ 81 | {"name": "season", "type": "STRING"}, 82 | {"name": "round", "type": "INTEGER"}, 83 | ] 84 | 85 | return schema_definition 86 | 87 | # Tranforming data and loading into the PostgreSQL database. 88 | def send_dataframe_to_bigquery( 89 | current_round_dataframe: DataFrame, schema_definition: list[dict[str, str]] 90 | ) -> None: 91 | """This function sends the dataframe to BigQuery.""" 92 | current_round_dataframe, round_number_int = create_dataframe() 93 | 94 | current_round_dataframe.to_gbq( 95 | destination_table="premier_league_dataset.current_round", 96 | if_exists="append", 97 | table_schema=schema_definition, 98 | ) 99 | 100 | print(f"Current round: {round_number_int} loaded!") 101 | 102 | current_round_dataframe = create_dataframe() 103 | schema_definition = define_table_schema() 104 | send_dataframe_to_bigquery(current_round_dataframe, schema_definition) 105 | 106 | if __name__ != "__main__": 107 | load_current_round() 108 | -------------------------------------------------------------------------------- /etl/bigquery/highlights.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file pulls data from the YouTube API relating to the English Premier League 3 | highlights and loads it into a BigQuery table. 4 | """ 5 | 6 | import googleapiclient.discovery 7 | from google.cloud import secretmanager 8 | from datetime import datetime, timedelta, timezone 9 | 10 | import pandas as pd 11 | from pandas import DataFrame 12 | 13 | 14 | def gcp_secret_rapid_api() -> str: 15 | """This function retrieves the Rapid API key from GCP Secret Manager""" 16 | 17 | client = secretmanager.SecretManagerServiceClient() 18 | name = "projects/463690670206/secrets/youtube-api/versions/1" 19 | response = client.access_secret_version(request={"name": name}) 20 | youtube_api_key = response.payload.data.decode("UTF-8") 21 | 22 | return youtube_api_key 23 | 24 | 25 | def call_api(part, channel_id, max_results, query, publishedAfter) -> list: 26 | """This function calls the API then returns a list with the YouTube data""" 27 | 28 | youtube_api_key = gcp_secret_rapid_api() 29 | 30 | # Initialize YouTube Data API v3 service 31 | youtube = googleapiclient.discovery.build( 32 | "youtube", "v3", developerKey=youtube_api_key 33 | ) 34 | 35 | search_response = ( 36 | youtube.search() 37 | .list( 38 | part=part, 39 | channelId=channel_id, 40 | maxResults=max_results, 41 | q=query, 42 | publishedAfter=publishedAfter, 43 | ) 44 | .execute() 45 | ) 46 | 47 | videos = search_response.get("items", []) 48 | 49 | return videos 50 | 51 | 52 | def create_dataframe(): 53 | """This function creates a dataframe from the API call""" 54 | 55 | current_date = datetime.now(timezone.utc) 56 | ten_days_ago = current_date - timedelta(days=10) 57 | published_date = ten_days_ago.strftime("%Y-%m-%dT00:00:00Z") 58 | videos = call_api( 59 | "snippet", 60 | "UCqZQlzSHbVJrwrn5XvzrzcA", 61 | 10, 62 | "PREMIER LEAGUE HIGHLIGHTS", 63 | published_date, 64 | ) 65 | 66 | video_list = [] 67 | for video in videos: 68 | video_sublist = [] 69 | 70 | video_sublist.append(str(video["id"]["videoId"])) 71 | video_sublist.append( 72 | str("https://www.youtube.com/watch?v=") + str(video["id"]["videoId"]) 73 | ) 74 | video_sublist.append(str(video["snippet"]["title"])) 75 | video_sublist.append(str(video["snippet"]["thumbnails"]["high"]["url"])) 76 | video_sublist.append(str(video["snippet"]["description"])) 77 | 78 | # Setting the publish time to a datetime object. 79 | publish_time_str = video["snippet"]["publishTime"] 80 | publish_time_datetime = pd.to_datetime(publish_time_str) 81 | video_sublist.append(publish_time_datetime) 82 | 83 | video_list.append(video_sublist) 84 | 85 | headers = [ 86 | "video_id", 87 | "video_url", 88 | "title", 89 | "thumbnail", 90 | "description", 91 | "publish_time", 92 | ] 93 | df = pd.DataFrame(video_list, columns=headers) 94 | 95 | return df 96 | 97 | 98 | def define_table_schema() -> list[dict[str, str]]: 99 | """This function defines the schema for the table in BigQuery""" 100 | 101 | schema_definition = [ 102 | {"name": "video_id", "type": "STRING"}, 103 | {"name": "video_url", "type": "STRING"}, 104 | {"name": "title", "type": "STRING"}, 105 | {"name": "thumbnail", "type": "STRING"}, 106 | {"name": "description", "type": "STRING"}, 107 | {"name": "publish_time", "type": "DATETIME"}, 108 | ] 109 | 110 | return schema_definition 111 | 112 | 113 | def send_dataframe_to_bigquery( 114 | standings_dataframe: DataFrame, schema_definition: list[dict[str, str]] 115 | ) -> None: 116 | """This function sends the dataframe to BigQuery""" 117 | 118 | highlights_dataframe.to_gbq( 119 | destination_table="premier_league_dataset.highlights", 120 | if_exists="replace", 121 | table_schema=schema_definition, 122 | ) 123 | 124 | print("Highlights table loaded!") 125 | 126 | 127 | if __name__ != "__main__": 128 | highlights_dataframe = create_dataframe() 129 | schema_definition = define_table_schema() 130 | send_dataframe_to_bigquery(highlights_dataframe, schema_definition) 131 | -------------------------------------------------------------------------------- /etl/bigquery/injuries.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from datetime import datetime 4 | import pandas as pd 5 | import requests # type: ignore 6 | from google.cloud import bigquery, secretmanager 7 | from pandas import DataFrame 8 | 9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 10 | 11 | STANDINGS_TABLE = "premier_league_dataset.standings" 12 | 13 | 14 | def gcp_secret_rapid_api() -> str: 15 | client = secretmanager.SecretManagerServiceClient() 16 | name = "projects/463690670206/secrets/rapid-api/versions/1" 17 | response = client.access_secret_version(request={"name": name}) 18 | rapid_api_key = response.payload.data.decode("UTF-8") 19 | 20 | return rapid_api_key 21 | 22 | 23 | # Calling the Standings table from BigQuery to get each team's id. 24 | def bigquery_call() -> DataFrame: 25 | bqclient = bigquery.Client() 26 | 27 | query_string = f""" 28 | SELECT * 29 | FROM {STANDINGS_TABLE} 30 | ORDER BY Rank 31 | """ 32 | 33 | bigquery_dataframe = ( 34 | bqclient.query(query_string) 35 | .result() 36 | .to_dataframe( 37 | create_bqstorage_client=True, 38 | ) 39 | ) 40 | 41 | return bigquery_dataframe 42 | 43 | 44 | def get_teams_with_injuries() -> list: 45 | rapid_api_key = gcp_secret_rapid_api() 46 | bigquery_dataframe = bigquery_call() 47 | 48 | id_list = [bigquery_dataframe.iloc[i, 0] for i in range(20)] 49 | 50 | headers = { 51 | "X-RapidAPI-Key": rapid_api_key, 52 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 53 | } 54 | 55 | url = "https://api-football-v1.p.rapidapi.com/v3/injuries" 56 | injuried_teams_list = [] 57 | 58 | for id in id_list: 59 | current_date = datetime.now() 60 | formatted_date = current_date.strftime("%Y-%m-%d") 61 | 62 | query = {"league": "39", "season": "2023", "team": id, "date": formatted_date} 63 | 64 | response = requests.get(url, headers=headers, params=query, timeout=10) 65 | json_res = response.json() 66 | 67 | if json_res["response"] == []: 68 | pass 69 | else: 70 | injuried_teams_list.append(id) 71 | 72 | return injuried_teams_list 73 | 74 | 75 | def call_api(): 76 | rapid_api_key = gcp_secret_rapid_api() 77 | injuried_teams_list = get_teams_with_injuries() 78 | 79 | headers = { 80 | "X-RapidAPI-Key": rapid_api_key, 81 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 82 | } 83 | 84 | url = "https://api-football-v1.p.rapidapi.com/v3/injuries" 85 | 86 | for id in injuried_teams_list: 87 | team_id_list = [] 88 | team_name_list = [] 89 | player_id_list = [] 90 | player_name_list = [] 91 | injury_type_list = [] 92 | injury_reason_list = [] 93 | date_list = [] 94 | 95 | current_date = datetime.now() 96 | formatted_date = current_date.strftime("%Y-%m-%d") 97 | 98 | query = {"league": "39", "season": "2023", "team": id, "date": formatted_date} 99 | 100 | response = requests.get(url, headers=headers, params=query, timeout=10) 101 | json_res = response.json() 102 | 103 | response_length = len(json_res["response"]) 104 | 105 | inner_count = 0 106 | while inner_count < response_length: 107 | team_id_list.append(int(json_res["response"][0]["team"]["id"])) 108 | team_name_list.append(str(json_res["response"][inner_count]["team"]["name"])) 109 | player_id_list.append(int(json_res["response"][inner_count]["player"]["id"])) 110 | player_name_list.append(str(json_res["response"][inner_count]["player"]["name"])) 111 | injury_type_list.append(str(json_res["response"][inner_count]["player"]["type"])) 112 | injury_reason_list.append(str(json_res["response"][inner_count]["player"]["reason"])) 113 | 114 | date_convert = datetime.strptime( 115 | json_res["response"][inner_count]["fixture"]["date"], "%Y-%m-%dT%H:%M:%S%z" 116 | ) 117 | date_list.append(date_convert.strftime("%Y-%m-%d")) 118 | 119 | inner_count += 1 120 | 121 | table_headers = [ 122 | "team_id", 123 | "team_name", 124 | "player_id", 125 | "player_name", 126 | "injury_type", 127 | "injury_reason", 128 | "injury_date", 129 | ] 130 | zipped = list( 131 | zip( 132 | team_id_list, 133 | team_name_list, 134 | player_id_list, 135 | player_name_list, 136 | injury_type_list, 137 | injury_reason_list, 138 | date_list, 139 | ) 140 | ) 141 | 142 | df = pd.DataFrame(zipped, columns=table_headers) 143 | 144 | schema_definition = [ 145 | {"name": "team_id", "type": "INTEGER"}, 146 | {"name": "team_name", "type": "STRING"}, 147 | {"name": "player_id", "type": "INTEGER"}, 148 | {"name": "player_name", "type": "STRING"}, 149 | {"name": "injury_type", "type": "STRING"}, 150 | {"name": "injury_reason", "type": "STRING"}, 151 | {"name": "injury_date", "type": "DATE"}, 152 | ] 153 | 154 | formatted_team_name = team_name_list[0].replace(" ", "_").lower() 155 | 156 | df.to_gbq( 157 | f"premier_league_injuries.{formatted_team_name}", 158 | project_id="cloud-data-infrastructure", 159 | if_exists="replace", 160 | table_schema=schema_definition, 161 | ) 162 | 163 | print(f"{team_name_list[0]}'s injuries table loaded!") 164 | 165 | 166 | if __name__ != "__main__": 167 | call_api() 168 | -------------------------------------------------------------------------------- /etl/bigquery/news.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from datetime import timedelta as td 4 | 5 | import requests # type: ignore 6 | from google.cloud import secretmanager 7 | from pandas import DataFrame 8 | 9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 10 | 11 | 12 | def gcp_secret_news_api() -> str: 13 | client = secretmanager.SecretManagerServiceClient() 14 | name = "projects/463690670206/secrets/news-api/versions/1" 15 | response = client.access_secret_version(request={"name": name}) 16 | news_api_key = response.payload.data.decode("UTF-8") 17 | 18 | return news_api_key 19 | 20 | 21 | def call_api() -> tuple[list[str], list[str], list[str], list[str]]: 22 | news_api_key = gcp_secret_news_api() 23 | 24 | # Getting yesterday's date. 25 | yesteryday = datetime.now() - td(days=1) 26 | yesteryday_str = yesteryday.strftime("%Y-%m-%d") 27 | 28 | url = ( 29 | "https://newsapi.org/v2/everything?" 30 | "q=Premier League&" 31 | f"from={yesteryday_str}&" 32 | "language=en&" 33 | "domains=skysports.com,theguardian.com,90min.com&" 34 | "sortBy=popularity&" 35 | f"apiKey={news_api_key}" 36 | ) 37 | 38 | response = requests.request("GET", url, timeout=20) 39 | json_res = response.json() 40 | 41 | title_list = [] 42 | url_list = [] 43 | url_to_image_list = [] 44 | published_at_list = [] 45 | 46 | for article in json_res["articles"]: 47 | title_list.append(str(article["title"])) 48 | url_list.append(str(article["url"])) 49 | url_to_image_list.append(str(article["urlToImage"])) 50 | 51 | published_at = datetime.strptime(article["publishedAt"], "%Y-%m-%dT%H:%M:%SZ") 52 | published_at_list.append(published_at.strftime("%H:%M:%S")) 53 | 54 | return title_list, url_list, url_to_image_list, published_at_list 55 | 56 | 57 | def create_dataframe() -> DataFrame: 58 | title_list, url_list, url_to_image_list, published_at_list = call_api() 59 | 60 | df = DataFrame( 61 | { 62 | "title": title_list, 63 | "url": url_list, 64 | "url_to_image": url_to_image_list, 65 | "published_at": published_at_list, 66 | } 67 | ).sort_values(by="published_at", ascending=False) 68 | 69 | return df 70 | 71 | 72 | def define_table_schema() -> list[dict[str, str]]: 73 | schema_definition = [ 74 | {"name": "title", "type": "STRING"}, 75 | {"name": "url", "type": "STRING"}, 76 | {"name": "url_to_image", "type": "STRING"}, 77 | {"name": "published_at", "type": "STRING"}, 78 | ] 79 | 80 | return schema_definition 81 | 82 | 83 | def send_dataframe_to_bigquery( 84 | standings_dataframe: DataFrame, schema_definition: list[dict[str, str]] 85 | ) -> None: 86 | standings_dataframe.to_gbq( 87 | destination_table="premier_league_dataset.news", 88 | if_exists="replace", 89 | table_schema=schema_definition, 90 | ) 91 | 92 | print("News table loaded!") 93 | 94 | 95 | if __name__ != "__main__": 96 | news_dataframe = create_dataframe() 97 | schema_definition = define_table_schema() 98 | send_dataframe_to_bigquery(news_dataframe, schema_definition) 99 | -------------------------------------------------------------------------------- /etl/bigquery/squads.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import requests # type: ignore 5 | from google.cloud import bigquery, secretmanager 6 | from pandas import DataFrame 7 | 8 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 9 | 10 | STANDINGS_TABLE = "premier_league_dataset.standings" 11 | 12 | 13 | def gcp_secret_rapid_api() -> str: 14 | client = secretmanager.SecretManagerServiceClient() 15 | name = "projects/463690670206/secrets/rapid-api/versions/1" 16 | response = client.access_secret_version(request={"name": name}) 17 | rapid_api_key = response.payload.data.decode("UTF-8") 18 | 19 | return rapid_api_key 20 | 21 | 22 | # Calling the Standings table from BigQuery to get each team's id. 23 | def bigquery_call() -> DataFrame: 24 | bqclient = bigquery.Client() 25 | 26 | query_string = f""" 27 | SELECT team_id 28 | FROM {STANDINGS_TABLE} 29 | ORDER BY Rank 30 | """ 31 | 32 | bigquery_dataframe = ( 33 | bqclient.query(query_string) 34 | .result() 35 | .to_dataframe( 36 | create_bqstorage_client=True, 37 | ) 38 | ) 39 | 40 | return bigquery_dataframe 41 | 42 | 43 | def call_api() -> None: 44 | rapid_api_key = gcp_secret_rapid_api() 45 | bigquery_dataframe = bigquery_call() 46 | 47 | # Iterate through bigquery_dataframe to get the team's id and create a list using list comprehension. 48 | id_list = [bigquery_dataframe.iloc[i, 0] for i in range(20)] 49 | 50 | headers = { 51 | "X-RapidAPI-Key": rapid_api_key, 52 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 53 | } 54 | 55 | url = "https://api-football-v1.p.rapidapi.com/v3/players/squads" 56 | 57 | outer_count = 0 58 | while outer_count < 20: 59 | team_id_list = [] 60 | team_name_list = [] 61 | player_id_list = [] 62 | player_photo_list = [] 63 | player_name_list = [] 64 | player_age_list = [] 65 | player_number_list = [] 66 | player_position_list = [] 67 | 68 | query = {"team": id_list[outer_count]} 69 | 70 | response = requests.get(url, headers=headers, params=query, timeout=10) 71 | json_res = response.json() 72 | 73 | players_length = len(response.json()["response"][0]["players"]) 74 | 75 | inner_count = 0 76 | while inner_count < players_length: 77 | team_id_list.append(int(json_res["response"][0]["team"]["id"])) 78 | team_name_list.append(str(json_res["response"][0]["team"]["name"])) 79 | player_id_list.append(int(json_res["response"][0]["players"][inner_count]["id"])) 80 | player_photo_list.append(str(json_res["response"][0]["players"][inner_count]["photo"])) 81 | player_name_list.append(str(json_res["response"][0]["players"][inner_count]["name"])) 82 | 83 | # The API is missing some player's age and number. Adding try/except blocks. 84 | try: 85 | player_age = json_res["response"][0]["players"][inner_count]["age"] 86 | if player_age is not None: 87 | player_age_list.append(int(player_age)) 88 | else: 89 | player_age_list.append(None) # type: ignore 90 | except (ValueError, TypeError): 91 | player_age_list.append(None) # type: ignore 92 | 93 | try: 94 | player_number = json_res["response"][0]["players"][inner_count]["number"] 95 | if player_number is not None: 96 | player_number_list.append(int(player_number)) 97 | else: 98 | player_number_list.append(None) # type: ignore 99 | except (ValueError, TypeError): 100 | player_number_list.append(None) # type: ignore 101 | 102 | player_position_list.append(str(json_res["response"][0]["players"][inner_count]["position"])) 103 | 104 | inner_count += 1 105 | 106 | table_headers = [ 107 | "team_id", 108 | "team_name", 109 | "player_id", 110 | "player_photo", 111 | "player_name", 112 | "player_age", 113 | "player_number", 114 | "player_position", 115 | ] 116 | zipped = list( 117 | zip( 118 | team_id_list, 119 | team_name_list, 120 | player_id_list, 121 | player_photo_list, 122 | player_name_list, 123 | player_age_list, 124 | player_number_list, 125 | player_position_list, 126 | ) 127 | ) 128 | 129 | df = pd.DataFrame(zipped, columns=table_headers) 130 | 131 | schema_definition = [ 132 | {"name": "team_id", "type": "INTEGER"}, 133 | {"name": "team_name", "type": "STRING"}, 134 | {"name": "player_id", "type": "INTEGER"}, 135 | {"name": "player_photo", "type": "STRING"}, 136 | {"name": "player_name", "type": "STRING"}, 137 | {"name": "player_age", "type": "INTEGER"}, 138 | {"name": "player_number", "type": "INTEGER"}, 139 | {"name": "player_position", "type": "STRING"}, 140 | ] 141 | 142 | formmated_team_name = team_name_list[0].replace(" ", "_").lower() 143 | 144 | df.to_gbq( 145 | destination_table=f"premier_league_squads.{formmated_team_name}", 146 | if_exists="replace", 147 | table_schema=schema_definition, 148 | ) 149 | 150 | print(f"{team_name_list[0]}'s squad table loaded!") 151 | 152 | outer_count += 1 153 | 154 | 155 | if __name__ != "__main__": 156 | call_api() 157 | -------------------------------------------------------------------------------- /etl/bigquery/stadiums.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file pulls data from an API relating to the English Premier League 3 | stadium location data and loads it into a PostgreSQL database. 4 | """ 5 | 6 | import os 7 | 8 | # Standard libraries 9 | from typing import Dict, Optional 10 | 11 | import pandas as pd 12 | import requests # type: ignore 13 | 14 | # Importing needed libraries. 15 | from google.cloud import secretmanager 16 | from pandas import DataFrame 17 | from sqlalchemy import create_engine # type: ignore 18 | from sqlalchemy.types import DECIMAL, String # type: ignore 19 | 20 | # Settings the project environment. 21 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 22 | 23 | 24 | def gcp_secret_rapid_api(): 25 | """Fetching RapidAPI key from Secret Manager""" 26 | 27 | client = secretmanager.SecretManagerServiceClient() 28 | name = "projects/463690670206/secrets/go-api/versions/1" 29 | response = client.access_secret_version(request={"name": name}) 30 | go_api_key = response.payload.data.decode("UTF-8") 31 | 32 | return go_api_key 33 | 34 | 35 | def gcp_secret_database_uri(): 36 | client = secretmanager.SecretManagerServiceClient() 37 | name = "projects/463690670206/secrets/premier-league-database-connection-uri/versions/3" 38 | response = client.access_secret_version(request={"name": name}) 39 | database_uri = response.payload.data.decode("UTF-8") 40 | 41 | return database_uri 42 | 43 | 44 | def call_api(): 45 | """Calling the API then filling in the empty lists""" 46 | 47 | go_api_key = gcp_secret_rapid_api() 48 | 49 | # Building GET request to retrieve data. 50 | response = requests.request("GET", go_api_key, timeout=20) 51 | json_res = response.json() 52 | 53 | # Empty lists that will be filled and then used to create a dataframe. 54 | team_list = [] 55 | stadium_list = [] 56 | lat_list = [] 57 | lon_list = [] 58 | capacity_list = [] 59 | year_opened = [] 60 | 61 | count = 0 62 | while count < 20: 63 | # Retrieving team name. 64 | team_list.append(str(json_res[count]["team"])) 65 | 66 | # Retrieving stadium name. 67 | stadium_list.append(str(json_res[count]["stadium"])) 68 | 69 | # Retrieving stadium's latitude. 70 | lat_list.append(float(json_res[count]["latitude"])) 71 | 72 | # Retrieving stadium's longitude. 73 | lon_list.append(float(json_res[count]["longitude"])) 74 | 75 | # Retrieving stadium's capacity. 76 | capacity_list.append(str(json_res[count]["capacity"])) 77 | 78 | # Retrieving stadium's year opened. 79 | year_opened.append(str(json_res[count]["year_opened"])) 80 | 81 | count += 1 82 | 83 | return team_list, stadium_list, lat_list, lon_list, capacity_list, year_opened 84 | 85 | 86 | def create_dataframe(): 87 | """This function creates a datafreame from lists created in the last function: call_api()""" 88 | 89 | team_list, stadium_list, lat_list, lon_list, capacity_list, year_opened = call_api() 90 | 91 | # Setting the headers then zipping the lists to create a dataframe. 92 | headers = ["team", "stadium", "latitude", "longitude", "capacity", "year_opened"] 93 | zipped = list( 94 | zip(team_list, stadium_list, lat_list, lon_list, capacity_list, year_opened) 95 | ) 96 | 97 | df = pd.DataFrame(zipped, columns=headers) 98 | 99 | return df 100 | 101 | 102 | def define_table_schema() -> Dict[str, type]: 103 | schema_definition = { 104 | "team": String(64), 105 | "stadium": String(64), 106 | "latitude": DECIMAL(8, 6), 107 | "longitude": DECIMAL(8, 6), 108 | "capacity": String(10), 109 | "year_opened": String(4), 110 | } 111 | 112 | return schema_definition 113 | 114 | 115 | def send_dataframe_to_postgresql( 116 | database_uri: str, 117 | schema_name: str, 118 | table_name: str, 119 | df: DataFrame, 120 | schema_definition: Optional[Dict[str, type]] = None, 121 | ): 122 | """Sending dataframe to PostgreSQL. 123 | 124 | Args: 125 | database_uri (str): The URI to connect to the PostgreSQL database. 126 | schema (str): The schema name in which the table should be created. 127 | table_name (str): The name of the table to be created. 128 | df (DataFrame): The DataFrame containing the data to be inserted. 129 | schema_definition (Dict[str, type], optional): A dictionary defining the table schema with column names 130 | as keys and their corresponding SQLAlchemy data types. 131 | Defaults to None. If None, the function will use the schema 132 | from the define_table_schema() function. 133 | 134 | Raises: 135 | ValueError: If the DataFrame is empty or schema_definition is not a valid dictionary. 136 | """ 137 | 138 | if df.empty: 139 | raise ValueError("DataFrame is empty.") 140 | 141 | if schema_definition is None: 142 | schema_definition = define_table_schema() 143 | 144 | if not isinstance(schema_definition, dict): 145 | raise ValueError("schema_definition must be a dictionary.") 146 | 147 | engine = create_engine(database_uri) 148 | df.to_sql( 149 | table_name, 150 | con=engine, 151 | schema=schema_name, 152 | if_exists="replace", 153 | index=False, 154 | dtype=schema_definition, 155 | ) 156 | 157 | 158 | if __name__ != "__main__": 159 | database_uri = gcp_secret_database_uri() 160 | schema_name = "premier-league-schema" 161 | table_name = "stadiums" 162 | df = create_dataframe() 163 | schema_definition = define_table_schema() 164 | 165 | send_dataframe_to_postgresql(database_uri, schema_name, table_name, df) 166 | print(f"Data loaded into {table_name}!") 167 | -------------------------------------------------------------------------------- /etl/bigquery/standings.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pandas as pd 5 | import requests # type: ignore 6 | from google.cloud import secretmanager 7 | from pandas import DataFrame 8 | 9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 10 | 11 | 12 | def gcp_secret_rapid_api() -> str: 13 | client = secretmanager.SecretManagerServiceClient() 14 | name = "projects/463690670206/secrets/rapid-api/versions/1" 15 | response = client.access_secret_version(request={"name": name}) 16 | rapid_api_key = response.payload.data.decode("UTF-8") 17 | 18 | return rapid_api_key 19 | 20 | 21 | def call_api() -> ( 22 | tuple[ 23 | list[int], 24 | list[int], 25 | list[str], 26 | list[int], 27 | list[int], 28 | list[int], 29 | list[int], 30 | list[str], 31 | list[int], 32 | list[int], 33 | list[int], 34 | list[int], 35 | ] 36 | ): 37 | payload = gcp_secret_rapid_api() 38 | 39 | headers = { 40 | "X-RapidAPI-Key": payload, 41 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 42 | } 43 | 44 | url = "https://api-football-v1.p.rapidapi.com/v3/standings" 45 | 46 | query = {"season": "2023", "league": "39"} 47 | response = requests.get(url, headers=headers, params=query, timeout=10) 48 | json_res = response.json() 49 | 50 | team_id_list = [] 51 | rank_list = [] 52 | team_list = [] 53 | games_played = [] 54 | wins_list = [] 55 | draws_list = [] 56 | loses_list = [] 57 | form_list = [] 58 | points_list = [] 59 | goals_for = [] 60 | goals_against = [] 61 | goals_diff = [] 62 | 63 | count = 0 64 | while count < 20: 65 | team_id_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["team"]["id"])) 66 | 67 | rank_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["rank"])) 68 | 69 | team_list.append( 70 | str(json.dumps(json_res["response"][0]["league"]["standings"][0][count]["team"]["name"])).strip('"') 71 | ) 72 | 73 | games_played.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["played"])) 74 | 75 | wins_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["win"])) 76 | 77 | draws_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["draw"])) 78 | 79 | loses_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["lose"])) 80 | 81 | form_list.append(str(json.dumps(json_res["response"][0]["league"]["standings"][0][count]["form"])).strip('"')) 82 | 83 | points_list.append(int(json_res["response"][0]["league"]["standings"][0][count]["points"])) 84 | 85 | goals_for.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["goals"]["for"])) 86 | 87 | goals_against.append(int(json_res["response"][0]["league"]["standings"][0][count]["all"]["goals"]["against"])) 88 | 89 | goals_diff.append(int(json_res["response"][0]["league"]["standings"][0][count]["goalsDiff"])) 90 | 91 | count += 1 92 | 93 | return ( 94 | team_id_list, 95 | rank_list, 96 | team_list, 97 | games_played, 98 | wins_list, 99 | draws_list, 100 | loses_list, 101 | form_list, 102 | points_list, 103 | goals_for, 104 | goals_against, 105 | goals_diff, 106 | ) 107 | 108 | 109 | def create_dataframe() -> DataFrame: 110 | ( 111 | team_id_list, 112 | rank_list, 113 | team_list, 114 | games_played, 115 | wins_list, 116 | draws_list, 117 | loses_list, 118 | form_list, 119 | points_list, 120 | goals_for, 121 | goals_against, 122 | goals_diff, 123 | ) = call_api() 124 | 125 | headers = [ 126 | "team_id", 127 | "rank", 128 | "team", 129 | "games_played", 130 | "wins", 131 | "draws", 132 | "loses", 133 | "recent_form", 134 | "points", 135 | "goals_for", 136 | "goals_against", 137 | "goal_difference", 138 | ] 139 | zipped = list( 140 | zip( 141 | team_id_list, 142 | rank_list, 143 | team_list, 144 | games_played, 145 | wins_list, 146 | draws_list, 147 | loses_list, 148 | form_list, 149 | points_list, 150 | goals_for, 151 | goals_against, 152 | goals_diff, 153 | ) 154 | ) 155 | 156 | df = pd.DataFrame(zipped, columns=headers) 157 | 158 | return df 159 | 160 | 161 | def define_table_schema() -> list[dict[str, str]]: 162 | schema_definition = [ 163 | {"name": "team_id", "type": "INTEGER"}, 164 | {"name": "rank", "type": "INTEGER"}, 165 | {"name": "team", "type": "STRING"}, 166 | {"name": "games_played", "type": "INTEGER"}, 167 | {"name": "wins", "type": "INTEGER"}, 168 | {"name": "draws", "type": "INTEGER"}, 169 | {"name": "loses", "type": "INTEGER"}, 170 | {"name": "recent_form", "type": "STRING"}, 171 | {"name": "points", "type": "INTEGER"}, 172 | {"name": "goals_for", "type": "INTEGER"}, 173 | {"name": "goals_against", "type": "INTEGER"}, 174 | {"name": "goal_difference", "type": "INTEGER"}, 175 | ] 176 | 177 | return schema_definition 178 | 179 | 180 | def send_dataframe_to_bigquery(standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]) -> None: 181 | standings_dataframe.to_gbq( 182 | destination_table="premier_league_dataset.standings", 183 | if_exists="replace", 184 | table_schema=schema_definition, 185 | ) 186 | 187 | print("Standings table loaded!") 188 | 189 | 190 | if __name__ != "__main__": 191 | standings_dataframe = create_dataframe() 192 | schema_definition = define_table_schema() 193 | send_dataframe_to_bigquery(standings_dataframe, schema_definition) 194 | -------------------------------------------------------------------------------- /etl/bigquery/teams.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import requests # type: ignore 5 | from google.cloud import bigquery, secretmanager 6 | from pandas import DataFrame 7 | 8 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 9 | 10 | STANDINGS_TABLE = "premier_league_dataset.standings" 11 | TEAMS_TABLE = "premier_league_dataset.teams" 12 | 13 | 14 | def gcp_secret_rapid_api() -> str: 15 | client = secretmanager.SecretManagerServiceClient() 16 | name = "projects/463690670206/secrets/rapid-api/versions/1" 17 | response = client.access_secret_version(request={"name": name}) 18 | rapid_api_key = response.payload.data.decode("UTF-8") 19 | 20 | return rapid_api_key 21 | 22 | 23 | # Calling the Standings table from BigQuery to get each team's id. 24 | def bigquery_call() -> DataFrame: 25 | bqclient = bigquery.Client() 26 | 27 | query_string = f""" 28 | SELECT * 29 | FROM {STANDINGS_TABLE} 30 | ORDER BY Rank 31 | """ 32 | 33 | bigquery_dataframe = ( 34 | bqclient.query(query_string) 35 | .result() 36 | .to_dataframe( 37 | create_bqstorage_client=True, 38 | ) 39 | ) 40 | 41 | return bigquery_dataframe 42 | 43 | 44 | def call_api() -> ( 45 | tuple[ 46 | list[int], 47 | list[str], 48 | list[str], 49 | list[str], 50 | list[int], 51 | list[int], 52 | list[int], 53 | list[float], 54 | list[int], 55 | ] 56 | ): 57 | rapid_api_key = gcp_secret_rapid_api() 58 | bigquery_dataframe = bigquery_call() 59 | 60 | # Iterate through bigquery_dataframe to get the team's id and create a list using list comprehension. 61 | id_list = [bigquery_dataframe.iloc[i, 0] for i in range(20)] 62 | 63 | headers = { 64 | "X-RapidAPI-Key": rapid_api_key, 65 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 66 | } 67 | 68 | url = "https://api-football-v1.p.rapidapi.com/v3/teams/statistics" 69 | 70 | team_id_list = [] 71 | team_list = [] 72 | logo_list = [] 73 | form_list = [] 74 | clean_sheets_list = [] 75 | penalty_scored_list = [] 76 | penalty_missed_list = [] 77 | average_goals_list = [] 78 | win_streak_list = [] 79 | 80 | count = 0 81 | while count < 20: 82 | query = {"league": "39", "season": "2023", "team": id_list[count]} 83 | response = requests.get(url, headers=headers, params=query, timeout=10) 84 | json_res = response.json() 85 | 86 | team_id_list.append(int(json_res["response"]["team"]["id"])) 87 | 88 | team_list.append(str(json_res["response"]["team"]["name"])) 89 | 90 | logo_list.append(str(json_res["response"]["team"]["logo"])) 91 | 92 | form_list.append(str(json_res["response"]["form"])) 93 | 94 | clean_sheets_list.append(int(json_res["response"]["clean_sheet"]["total"])) 95 | 96 | penalty_scored_list.append(int(json_res["response"]["penalty"]["scored"]["total"])) 97 | 98 | penalty_missed_list.append(int(json_res["response"]["penalty"]["missed"]["total"])) 99 | 100 | average_goals_list.append(float(json_res["response"]["goals"]["for"]["average"]["total"])) 101 | 102 | win_streak_list.append(int(json_res["response"]["biggest"]["streak"]["wins"])) 103 | 104 | count += 1 105 | 106 | return ( 107 | team_id_list, 108 | team_list, 109 | logo_list, 110 | form_list, 111 | clean_sheets_list, 112 | penalty_scored_list, 113 | penalty_missed_list, 114 | average_goals_list, 115 | win_streak_list, 116 | ) 117 | 118 | 119 | def create_dataframe() -> DataFrame: 120 | ( 121 | team_id_list, 122 | team_list, 123 | logo_list, 124 | form_list, 125 | clean_sheets_list, 126 | penalty_scored_list, 127 | penalty_missed_list, 128 | average_goals_list, 129 | win_streak_list, 130 | ) = call_api() 131 | 132 | headers = [ 133 | "team_id", 134 | "team", 135 | "logo", 136 | "form", 137 | "clean_sheets", 138 | "penalties_scored", 139 | "penalties_missed", 140 | "average_goals", 141 | "win_streak", 142 | ] 143 | zipped = list( 144 | zip( 145 | team_id_list, 146 | team_list, 147 | logo_list, 148 | form_list, 149 | clean_sheets_list, 150 | penalty_scored_list, 151 | penalty_missed_list, 152 | average_goals_list, 153 | win_streak_list, 154 | ) 155 | ) 156 | 157 | df = pd.DataFrame(zipped, columns=headers) 158 | 159 | return df 160 | 161 | 162 | def define_table_schema() -> list[dict[str, str]]: 163 | schema_definition = [ 164 | {"name": "team_id", "type": "INTEGER"}, 165 | {"name": "team", "type": "STRING"}, 166 | {"name": "logo", "type": "STRING"}, 167 | {"name": "form", "type": "STRING"}, 168 | {"name": "clean_sheets", "type": "INTEGER"}, 169 | {"name": "penalties_scored", "type": "INTEGER"}, 170 | {"name": "penalties_missed", "type": "INTEGER"}, 171 | {"name": "average_goals", "type": "FLOAT"}, 172 | {"name": "win_streak", "type": "INTEGER"}, 173 | ] 174 | 175 | return schema_definition 176 | 177 | 178 | def send_dataframe_to_bigquery(standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]) -> None: 179 | teams_dataframe.to_gbq( 180 | destination_table="premier_league_dataset.teams", 181 | if_exists="replace", 182 | table_schema=schema_definition, 183 | ) 184 | 185 | print("Teams table loaded!") 186 | 187 | 188 | if __name__ != "__main__": 189 | teams_dataframe = create_dataframe() 190 | schema_definition = define_table_schema() 191 | send_dataframe_to_bigquery(teams_dataframe, schema_definition) 192 | -------------------------------------------------------------------------------- /etl/bigquery/top_scorers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pandas as pd 5 | import requests # type: ignore 6 | from google.cloud import secretmanager 7 | from pandas import DataFrame 8 | 9 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 10 | 11 | 12 | def gcp_secret_rapid_api() -> str: 13 | client = secretmanager.SecretManagerServiceClient() 14 | name = "projects/463690670206/secrets/rapid-api/versions/1" 15 | response = client.access_secret_version(request={"name": name}) 16 | rapid_api_key = response.payload.data.decode("UTF-8") 17 | 18 | return rapid_api_key 19 | 20 | 21 | def call_api() -> tuple[list[str], list[int], list[str], list[int], list[str], list[str]]: 22 | rapid_api_key = gcp_secret_rapid_api() 23 | headers = { 24 | "X-RapidAPI-Key": rapid_api_key, 25 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 26 | } 27 | 28 | url = "https://api-football-v1.p.rapidapi.com/v3/players/topscorers" 29 | 30 | query = {"league": "39", "season": "2023"} 31 | response = requests.get(url, headers=headers, params=query, timeout=10) 32 | json_res = response.json() 33 | 34 | full_name_list = [] 35 | goals_list = [] 36 | assists_list = [] 37 | team_list = [] 38 | nationality_list = [] 39 | photo_list = [] 40 | 41 | count = 0 42 | while count < 5: 43 | # Retrieving player's first and last name then combining for full name. 44 | first_name = ( 45 | str( 46 | json.dumps( 47 | json_res["response"][count]["player"]["firstname"], 48 | ensure_ascii=False, 49 | ) 50 | ) 51 | ).strip('"') 52 | last_name = ( 53 | str( 54 | json.dumps( 55 | json_res["response"][count]["player"]["lastname"], 56 | ensure_ascii=False, 57 | ) 58 | ) 59 | ).strip('"') 60 | 61 | full_name = first_name + " " + last_name 62 | 63 | full_name_list.append(full_name) 64 | 65 | goals_list.append(int(json_res["response"][count]["statistics"][0]["goals"]["total"])) 66 | 67 | try: 68 | assists = json_res["response"][count]["statistics"][0]["goals"]["assists"] 69 | if assists is not None: 70 | assists_list.append(int(assists)) 71 | else: 72 | assists_list.append(None) # type: ignore 73 | except (ValueError, TypeError): 74 | assists_list.append(0) 75 | 76 | team_list.append(str(json_res["response"][count]["statistics"][0]["team"]["name"]).strip('"')) 77 | 78 | nationality_list.append(str(json_res["response"][count]["player"]["nationality"]).strip('"')) 79 | 80 | photo_list.append(str(json_res["response"][count]["player"]["photo"]).strip('"')) 81 | 82 | count += 1 83 | 84 | return ( 85 | full_name_list, 86 | goals_list, 87 | team_list, 88 | assists_list, 89 | nationality_list, 90 | photo_list, 91 | ) 92 | 93 | 94 | def create_dataframe() -> DataFrame: 95 | ( 96 | full_name_list, 97 | goals_list, 98 | team_list, 99 | assists_list, 100 | nationality_list, 101 | photo_list, 102 | ) = call_api() 103 | 104 | headers = ["name", "goals", "team", "assists", "nationality", "photo"] 105 | zipped = list( 106 | zip( 107 | full_name_list, 108 | goals_list, 109 | team_list, 110 | assists_list, 111 | nationality_list, 112 | photo_list, 113 | ) 114 | ) 115 | 116 | df = pd.DataFrame(zipped, columns=headers) 117 | 118 | return df 119 | 120 | 121 | def define_table_schema() -> list[dict[str, str]]: 122 | schema_definition = [ 123 | {"name": "name", "type": "STRING"}, 124 | {"name": "goals", "type": "INTEGER"}, 125 | {"name": "team", "type": "STRING"}, 126 | {"name": "assists", "type": "INTEGER"}, 127 | {"name": "nationality", "type": "STRING"}, 128 | {"name": "photo", "type": "STRING"}, 129 | ] 130 | 131 | return schema_definition 132 | 133 | 134 | def send_dataframe_to_bigquery(standings_dataframe: DataFrame, schema_definition: list[dict[str, str]]) -> None: 135 | top_scorers_dataframe.to_gbq( 136 | destination_table="premier_league_dataset.top_scorers", 137 | if_exists="replace", 138 | table_schema=schema_definition, 139 | ) 140 | 141 | print("Top Scorers table loaded!") 142 | 143 | 144 | if __name__ != "__main__": 145 | top_scorers_dataframe = create_dataframe() 146 | schema_definition = define_table_schema() 147 | send_dataframe_to_bigquery(top_scorers_dataframe, schema_definition) 148 | -------------------------------------------------------------------------------- /etl/cloud_functions/standings_transfer.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | import pandas as pd 3 | import time 4 | 5 | client = bigquery.Client() 6 | bucket_name = "premier_league_bucket" 7 | project = "cloud-data-infrastructure" 8 | dataset_id = "premier_league_dataset" 9 | table_id = "standings" 10 | 11 | 12 | def transfer(request) -> str: 13 | destination_uri = f"gs://{bucket_name}/standings.csv" 14 | dataset_ref = bigquery.DatasetReference(project, dataset_id) 15 | table_ref = dataset_ref.table(table_id) 16 | 17 | extract_job = client.extract_table( 18 | table_ref, 19 | destination_uri, 20 | location="US", 21 | ) 22 | extract_job.result() 23 | 24 | print(f"Exported {project}.{dataset_id}.{table_id} to {destination_uri}") 25 | 26 | time.sleep(5) 27 | 28 | df = pd.read_csv("https://storage.googleapis.com/premier_league_bucket/standings.csv") 29 | sorted_df = df.sort_values(by=["rank"], ascending=True) 30 | removed_columns = sorted_df.drop(columns=["team_id"]) 31 | removed_columns.to_csv(destination_uri, index=False) 32 | 33 | return "OK" 34 | -------------------------------------------------------------------------------- /etl/cloud_functions/top_scorers_transfer.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | import pandas as pd 3 | import time 4 | 5 | client = bigquery.Client() 6 | bucket_name = "premier_league_bucket" 7 | project = "cloud-data-infrastructure" 8 | dataset_id = "premier_league_dataset" 9 | table_id = "top_scorers" 10 | 11 | 12 | def transfer(request) -> str: 13 | destination_uri = f"gs://{bucket_name}/top_scorers.csv" 14 | dataset_ref = bigquery.DatasetReference(project, dataset_id) 15 | table_ref = dataset_ref.table(table_id) 16 | 17 | extract_job = client.extract_table( 18 | table_ref, 19 | destination_uri, 20 | location="US", 21 | ) 22 | extract_job.result() 23 | 24 | print(f"Exported {project}.{dataset_id}.{table_id} to {destination_uri}") 25 | 26 | time.sleep(5) 27 | 28 | df = pd.read_csv("https://storage.googleapis.com/premier_league_bucket/top_scorers.csv") 29 | sorted_df = df.sort_values(by=["goals"], ascending=False) 30 | removed_columns = sorted_df.drop(columns=["photo"]) 31 | removed_columns.to_csv(destination_uri, index=False) 32 | 33 | return "OK" 34 | -------------------------------------------------------------------------------- /etl/firestore/fixtures.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file calls the Football API to extract match fixture data 3 | and load the collection and documents into Firestore. 4 | """ 5 | 6 | # System libraries 7 | import os 8 | 9 | # Google Cloud library imports. 10 | from google.cloud import secretmanager 11 | from firebase_admin import firestore 12 | import firebase_admin 13 | import requests 14 | 15 | # Settings the project environment. 16 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 17 | 18 | 19 | def call_api(secret_name): 20 | """ 21 | This function fetches the RapidAPI key from Secret Manager and 22 | and sets up the headers for an API call. 23 | """ 24 | 25 | client = secretmanager.SecretManagerServiceClient() 26 | response = client.access_secret_version(request={"name": secret_name}) 27 | payload = response.payload.data.decode("UTF-8") 28 | 29 | # Headers used for RapidAPI. 30 | headers = { 31 | "content-type": "application/octet-stream", 32 | "X-RapidAPI-Key": payload, 33 | "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com", 34 | } 35 | 36 | return headers 37 | 38 | 39 | class Fixture: 40 | """Building JSON structure for documents.""" 41 | 42 | def __init__(self, date, teams, goals=None): 43 | self.date = date 44 | self.teams = teams 45 | self.goals = goals 46 | 47 | def __repr__(self): 48 | return f"Fixture(\ 49 | name={self.date}, \ 50 | country={self.teams}, \ 51 | goals={self.goals}\ 52 | )" 53 | 54 | def to_dict(self): 55 | return {"date": self.date, "teams": self.teams, "goals": self.goals} 56 | 57 | 58 | def get_current_round(): 59 | """ 60 | This function calls the Football API to get the current round of the regular season. 61 | This will get the string of "Regular Season - 1" which is needed as a parameter 62 | in the next function to pull correct round. 63 | """ 64 | 65 | headers = call_api("projects/463690670206/secrets/rapid-api/versions/1") 66 | 67 | url = "https://api-football-v1.p.rapidapi.com/v3/fixtures/rounds" 68 | querystring = {"league": "39", "season": "2023", "current": "true"} 69 | response = requests.get(url, headers=headers, params=querystring, timeout=20) 70 | 71 | current_round_response = response.json()["response"][0] 72 | # example response: "Regular Season - 12" 73 | 74 | return current_round_response 75 | 76 | 77 | def retrieve_data_for_current_round(): 78 | """Retrieving the data for the current round based on get_current_round() function's response""" 79 | 80 | headers = call_api("projects/463690670206/secrets/rapid-api/versions/1") 81 | current_round_response = get_current_round() 82 | 83 | url = "https://api-football-v1.p.rapidapi.com/v3/fixtures" 84 | querystring = {"league": "39", "season": "2023", "round": current_round_response} 85 | build_current_response = requests.get( 86 | url, headers=headers, params=querystring, timeout=20 87 | ) 88 | 89 | return build_current_response 90 | 91 | 92 | def load_firestore(): 93 | """This function loads the data into Firestore""" 94 | 95 | current_round_response = get_current_round() 96 | build_current_response = retrieve_data_for_current_round() 97 | 98 | # Check to see if firebase app has been initialized. 99 | if not firebase_admin._apps: 100 | firebase_admin.initialize_app() 101 | db = firestore.client() 102 | 103 | count = 0 104 | while count < 10: 105 | # Dictionaries to be written to each document. 106 | fixture_date = build_current_response.json()["response"][count]["fixture"][ 107 | "date" 108 | ] 109 | teams_dict = build_current_response.json()["response"][count]["teams"] 110 | goal_dict = build_current_response.json()["response"][count]["goals"] 111 | 112 | # Calling the away and home team names to build document name. 113 | away_team = build_current_response.json()["response"][count]["teams"]["away"][ 114 | "name" 115 | ] 116 | home_team = build_current_response.json()["response"][count]["teams"]["home"][ 117 | "name" 118 | ] 119 | 120 | fixture = Fixture(date=(fixture_date), teams=teams_dict, goals=goal_dict) 121 | 122 | db.collection(f"{current_round_response}").document( 123 | f"{away_team} vs {home_team}" 124 | ).set(fixture.to_dict()) 125 | 126 | count += 1 127 | 128 | print(f"Document {current_round_response} has been loaded!") 129 | 130 | 131 | if __name__ != "__main__": 132 | load_firestore() 133 | -------------------------------------------------------------------------------- /etl/postgres/stock.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import polars as pl 4 | import requests # type: ignore 5 | from google.cloud import secretmanager 6 | 7 | # Settings the project environment. 8 | os.environ["GCLOUD_PROJECT"] = "cloud-data-infrastructure" 9 | 10 | 11 | def gcp_secret_stock_api() -> str: 12 | """This function retrieves the Rapid API key from GCP Secret Manager""" 13 | 14 | client = secretmanager.SecretManagerServiceClient() 15 | name = "projects/463690670206/secrets/stock-api/versions/1" 16 | response = client.access_secret_version(request={"name": name}) 17 | stock_api_key = response.payload.data.decode("UTF-8") 18 | 19 | return stock_api_key 20 | 21 | 22 | def gcp_secret_postgresql_uri() -> str: 23 | """This function retrieves the Rapid API key from GCP Secret Manager""" 24 | 25 | client = secretmanager.SecretManagerServiceClient() 26 | name = "projects/463690670206/secrets/postgresql-uri/versions/1" 27 | response = client.access_secret_version(request={"name": name}) 28 | postgresql_uri = response.payload.data.decode("UTF-8") 29 | 30 | return postgresql_uri 31 | 32 | 33 | def send_dataframe_to_postgres() -> None: 34 | stock_api_key = gcp_secret_stock_api() 35 | postgresql_uri = gcp_secret_postgresql_uri() 36 | 37 | url = f"https://financialmodelingprep.com/api/v3/quote/MANU?apikey={stock_api_key}" 38 | 39 | response = requests.request("GET", url) 40 | 41 | json_res = response.json() 42 | df = pl.DataFrame(json_res) 43 | 44 | df.write_database( 45 | table_name="stocks", connection=postgresql_uri, if_table_exists="append" 46 | ) 47 | 48 | 49 | if __name__ != "__main__": 50 | send_dataframe_to_postgres() 51 | -------------------------------------------------------------------------------- /etl/requirements-data.txt: -------------------------------------------------------------------------------- 1 | # These libraries are used for the ETL data pipelines. 2 | 3 | requests==2.31.0 4 | 5 | # Data 6 | pandas==2.1.4 7 | pandas-gbq==0.20.0 8 | polars==0.20.2 9 | psycopg2-binary==2.9.9 10 | sqlalchemy==2.0.25 11 | prefect-soda-core==0.1.8 12 | soda-core-bigquery==3.1.3 13 | dbt-bigquery==1.7.4 14 | 15 | # Google Cloud 16 | firebase-admin==6.3.0 17 | google-cloud-bigquery==3.14.1 18 | google-cloud-secret-manager==2.17.0 19 | google-cloud-run==0.10.1 20 | google-api-python-client==2.111.0 -------------------------------------------------------------------------------- /monitoring/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | prometheus: 3 | image: prom/prometheus 4 | volumes: 5 | - "./prometheus.yml:/etc/prometheus/prometheus.yml" 6 | ports: 7 | - 9090:9090 8 | 9 | grafana: 10 | image: grafana/grafana 11 | ports: 12 | - "3000:3000" 13 | user: "501" 14 | volumes: 15 | - ./grafana_storage:/var/lib/grafana 16 | 17 | postgres-exporter: 18 | image: prometheuscommunity/postgres-exporter 19 | volumes: 20 | - "./postgres_exporter.yml:/postgres_exporter.yml:ro" 21 | ports: 22 | - 9187:9187 23 | environment: 24 | DATA_SOURCE_NAME: "{{ db.string }}" 25 | -------------------------------------------------------------------------------- /monitoring/postgres_exporter.yml: -------------------------------------------------------------------------------- 1 | auth_modules: 2 | postgres_conn: 3 | type: userpass 4 | userpass: 5 | username: "{{ db.username }}" 6 | password: "{{ db.password }}" 7 | options: 8 | port: 5432 9 | sslmode: disable -------------------------------------------------------------------------------- /monitoring/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 45s 3 | 4 | scrape_configs: 5 | - job_name: 'prometheus' 6 | static_configs: 7 | - targets: ['localhost:9090'] 8 | 9 | - job_name: 'postgresql_exporter' 10 | static_configs: 11 | - targets: ['host.docker.internal:9187'] -------------------------------------------------------------------------------- /prefect/data_quality_scan.py: -------------------------------------------------------------------------------- 1 | from prefect import flow # type: ignore 2 | from prefect.context import get_run_context # type: ignore 3 | from prefect_soda_core.soda_configuration import SodaConfiguration # type: ignore 4 | from prefect_soda_core.sodacl_check import SodaCLCheck # type: ignore 5 | from prefect_soda_core.tasks import soda_scan_execute # type: ignore 6 | 7 | 8 | @flow 9 | def run_soda_scan(): 10 | soda_configuration_block = SodaConfiguration( 11 | configuration_yaml_path="./soda/configuration.yaml" 12 | ) 13 | soda_check_block = SodaCLCheck(sodacl_yaml_path="./soda/checks.yaml") 14 | 15 | # Using the flow_run_name as the name of the file to store the scan results 16 | flow_run_name = get_run_context().flow_run.name 17 | scan_results_file_path = f"{flow_run_name}.json" 18 | 19 | return soda_scan_execute( 20 | data_source_name="bigquery_connection", 21 | configuration=soda_configuration_block, 22 | checks=soda_check_block, 23 | variables={"var": "value"}, 24 | scan_results_file=scan_results_file_path, 25 | verbose=True, 26 | return_scan_result_file_content=False, 27 | ) 28 | 29 | 30 | run_soda_scan() 31 | -------------------------------------------------------------------------------- /prefect/deployments/current_round-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'current-round' 3 | ### 4 | name: current_round 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: current-round 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: b7fedd41-43ec-4767-a37d-71a761b2a50a 31 | _block_document_name: anonymous-148860db-e1fc-4935-9b52-df498459df91 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:current_round 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:55:34.176016+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/deployments/highlights-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'highlights' 3 | ### 4 | name: hightlights 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: highlights 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: cc1a4def-5006-481c-8a99-fd7b50d69d87 31 | _block_document_name: anonymous-94073dd6-53db-44a1-9dc2-09c93c7d9117 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:highlights 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:54:49.874866+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/deployments/injuries-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'injuries' 3 | ### 4 | name: injuries 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: injuries 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: 5a7ad7a1-341f-4b70-8ac8-4a18768aa962 31 | _block_document_name: anonymous-2bd75ab0-bb08-4153-b8e4-de4247a132d0 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:injuries 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:55:26.497708+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/deployments/news-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'news' 3 | ### 4 | name: news 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: news 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: be253629-d0c6-4a28-853f-cf861cca30d7 31 | _block_document_name: anonymous-f5036643-492b-49fd-a992-eb64855b0732 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:news 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:55:04.964169+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/deployments/run_soda_scan-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'run-soda-scan' 3 | ### 4 | name: data_quality 5 | description: null 6 | version: b7519422832beb73bb96d4191849271b 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: run-soda-scan 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: 231c21a0-bea6-4800-86f4-f9924fe459a2 31 | _block_document_name: anonymous-54354ea5-5dfd-48a1-a920-7f8795b80fd5 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/data_quality_scan.py:run_soda_scan 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:55:42.096102+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/deployments/squads-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'squads' 3 | ### 4 | name: squads 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: squads 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: 9eb2d8f7-7ef9-4f30-bf97-a8217320d4bc 31 | _block_document_name: anonymous-67d88fb7-ee19-4c67-83b0-a0c4de72b28e 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:squads 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:55:19.207112+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/deployments/statistics-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'statistics' 3 | ### 4 | name: statistics 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: statistics 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: d07769e0-7478-47f9-a89f-fdba7df219df 31 | _block_document_name: anonymous-085bb46e-2985-452e-840a-222a128de535 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:statistics 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:54:36.287059+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null 47 | -------------------------------------------------------------------------------- /prefect/deployments/stocks-deployment.yaml: -------------------------------------------------------------------------------- 1 | ### 2 | ### A complete description of a Prefect Deployment for flow 'stocks' 3 | ### 4 | name: stocks 5 | description: null 6 | version: d0781db7af4df7adb6a04236cfa5bb1c 7 | # The work queue that will handle this deployment's runs 8 | work_queue_name: premier-league-work-queue 9 | work_pool_name: premier-league-work-pool 10 | tags: [] 11 | parameters: {} 12 | schedule: null 13 | schedules: [] 14 | is_schedule_active: null 15 | infra_overrides: {} 16 | 17 | ### 18 | ### DO NOT EDIT BELOW THIS LINE 19 | ### 20 | flow_name: stocks 21 | manifest_path: null 22 | infrastructure: 23 | type: process 24 | env: {} 25 | labels: {} 26 | name: null 27 | command: null 28 | stream_output: true 29 | working_dir: null 30 | _block_document_id: d00fda5e-d9ad-4e29-9186-3a82f6c7387b 31 | _block_document_name: anonymous-a7b04bd9-9549-458a-9263-3adc7756e42e 32 | _is_anonymous: true 33 | block_type_slug: process 34 | _block_type_slug: process 35 | storage: null 36 | path: /home/pythonsanchez/premier-league 37 | entrypoint: prefect/flows.py:stocks 38 | parameter_openapi_schema: 39 | title: Parameters 40 | type: object 41 | properties: {} 42 | required: null 43 | definitions: null 44 | timestamp: '2024-03-19T00:55:12.070787+00:00' 45 | triggers: [] 46 | enforce_parameter_schema: null -------------------------------------------------------------------------------- /prefect/flows.py: -------------------------------------------------------------------------------- 1 | from prefect import task, flow 2 | 3 | # --- Statistics --- 4 | @task 5 | def task_standings(): 6 | from etl.bigquery.standings import send_dataframe_to_bigquery 7 | 8 | @task 9 | def task_teams(): 10 | from etl.bigquery.teams import send_dataframe_to_bigquery 11 | 12 | @task 13 | def task_top_scorers(): 14 | from etl.bigquery.top_scorers import send_dataframe_to_bigquery 15 | 16 | @task 17 | def task_fixtues(): 18 | from etl.firestore.fixtures import load_firestore 19 | 20 | @flow 21 | def statistics(): 22 | a = task_standings() 23 | b = task_teams(wait_for=[a]) 24 | c = task_top_scorers(wait_for=[a, b]) 25 | d = task_fixtues(wait_for=[a, b, c]) 26 | 27 | # --- News --- 28 | @task 29 | def task_news(): 30 | from etl.bigquery.news import send_dataframe_to_bigquery 31 | 32 | @flow 33 | def news(): 34 | a = task_news() 35 | 36 | # --- Highlights --- 37 | @task 38 | def task_highlights(): 39 | from etl.bigquery.highlights import send_dataframe_to_bigquery 40 | 41 | @flow 42 | def highlights(): 43 | a = task_highlights() 44 | 45 | # --- Stocks --- 46 | @task 47 | def task_stocks(): 48 | from etl.postgres.stock import send_dataframe_to_postgres 49 | 50 | @flow 51 | def stocks(): 52 | a = task_stocks() 53 | 54 | # --- Squads --- 55 | @task 56 | def task_squads(): 57 | from etl.bigquery.squads import call_api 58 | 59 | @flow 60 | def squads(): 61 | a = task_squads() 62 | 63 | # --- Injuries --- 64 | @task 65 | def task_injuries(): 66 | from etl.bigquery.injuries import call_api 67 | 68 | @flow 69 | def injuries(): 70 | a = task_injuries() 71 | 72 | # --- Current Round --- 73 | @task 74 | def task_current_round(): 75 | from etl.bigquery.current_round import load_current_round 76 | 77 | @flow 78 | def current_round(): 79 | a = task_current_round() 80 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.bandit] 2 | exclude_dirs = ["tests"] 3 | skips = ["B608"] # Never enforce `B608` (hardcoded credentials). 4 | 5 | [tool.coverage.report] 6 | show_missing = true 7 | 8 | [tool.ruff] 9 | ignore = ["E501"] # Never enforce `E501` (line length violations). 10 | line-length = 120 11 | 12 | [tool.ruff.format] 13 | quote-style = "double" 14 | indent-style = "tab" 15 | line-ending = "auto" 16 | 17 | [tool.pytest.ini_options] 18 | pythonpath = [ 19 | ".", "components", "tests" 20 | ] 21 | testpaths = "tests" 22 | filterwarnings = [ 23 | "ignore:Deprecated call to `pkg_resources\\.declare_namespace\\('.*'\\):DeprecationWarning", 24 | "ignore::DeprecationWarning:google.rpc", 25 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # These libraries are used ONLY for the Streamlit app. 2 | # This is to make the Docker image as small as possible and reduce security issues. 3 | 4 | # Streamlit 5 | pandas==2.1.4 6 | plotly==5.18.0 7 | streamlit==1.33.0 8 | 9 | # Google Cloud 10 | firebase-admin==6.3.0 11 | google-cloud-core==2.4.0 12 | google-cloud-bigquery==3.14.1 13 | google-cloud-secret-manager==2.17.0 -------------------------------------------------------------------------------- /soda/checks.yaml: -------------------------------------------------------------------------------- 1 | checks for news: 2 | - row_count > 1 3 | - invalid_count(url) = 0: 4 | valid regex: ^https:// 5 | 6 | checks for stadiums: 7 | - row_count = 20 8 | 9 | checks for standings: 10 | - row_count = 20 11 | - duplicate_count(team) = 0 12 | - max(points) < 114 13 | - min(points) > 0 14 | 15 | checks for teams: 16 | - row_count = 20 17 | - duplicate_count(team) = 0 18 | 19 | checks for top_scorers: 20 | - row_count = 5 -------------------------------------------------------------------------------- /soda/configuration.yaml: -------------------------------------------------------------------------------- 1 | data_source bigquery_connection: 2 | type: bigquery 3 | use_context_auth: true 4 | auth_scopes: 5 | - https://www.googleapis.com/auth/bigquery 6 | - https://www.googleapis.com/auth/cloud-platform 7 | - https://www.googleapis.com/auth/drive 8 | project_id: "cloud-data-infrastructure" 9 | dataset: premier_league_dataset -------------------------------------------------------------------------------- /streamlit_app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | from datetime import datetime 5 | 6 | import pandas as pd 7 | import streamlit as st 8 | from streamlit.delta_generator import DeltaGenerator 9 | 10 | # Importing classes from components/ directory. 11 | from components.about_section import AboutSection 12 | from components.fixtures_section import FixturesSection 13 | from components.highlights_section import HighlightsSection 14 | from components.injuries_section import InjuriesSection 15 | from components.league_form_section import LeagueFormsSection 16 | from components.news_section import NewsSection 17 | from components.point_progression_section import PointProgressionSection 18 | from components.point_slider_section import PointSliderSection 19 | from components.social_media_section import SocialMediaSection 20 | from components.squads_section import SquadSection 21 | from components.stadiums_map_section import StadiumMapSection 22 | from components.stock_section import StockSection 23 | from components.top_scorers_section import TopScorersSection 24 | from components.top_teams_section import TopTeamsSection 25 | from components.connections import ( 26 | firestore_connection, 27 | get_highlights, 28 | get_injuries, 29 | get_league_statistics, 30 | get_max_round, 31 | get_min_round, 32 | get_news, 33 | get_squads, 34 | get_stadiums, 35 | get_standings, 36 | get_stocks, 37 | get_teams, 38 | get_top_scorers, 39 | ) 40 | 41 | import google.auth 42 | 43 | project_id = "cloud-data-infrastructure" 44 | os.environ["GCLOUD_PROJECT"] = project_id 45 | credentials, project_id = google.auth.default() 46 | 47 | st.set_page_config(page_title="Streamlit: Premier League", layout="wide") 48 | 49 | 50 | def streamlit_app(): 51 | # Get the dataframes. 52 | firestore_database = firestore_connection() 53 | highlights_df = get_highlights() 54 | injuries_df = get_injuries() 55 | league_statistics_df = get_league_statistics() 56 | max_round = get_max_round() 57 | min_round = get_min_round() 58 | news_df = get_news() 59 | squads_df = get_squads() 60 | standings_df = get_standings() 61 | stadiums_df = get_stadiums() 62 | stocks_df = get_stocks() 63 | teams_df = get_teams() 64 | top_scorers_df = get_top_scorers() 65 | 66 | fixtures_section = FixturesSection(firestore_database, max_round, min_round) 67 | 68 | # Image, title, and subheader. 69 | with st.container(): 70 | st.markdown( 71 | '', 72 | unsafe_allow_html=True, 73 | ) 74 | st.title("Premier League Statistics / 2023-24") 75 | st.subheader(f"Current Round: {max_round}") 76 | 77 | # Get the current date 78 | def get_suffix(day): 79 | if 10 < day % 100 < 20: 80 | suffix = "th" 81 | else: 82 | suffix = {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th") 83 | return suffix 84 | 85 | current_date = datetime.now() 86 | day = current_date.day 87 | suffix = get_suffix(day) 88 | formatted_day = str(day).lstrip("0") 89 | formatted_date = current_date.strftime("%B ") + formatted_day + suffix + current_date.strftime(", %Y") 90 | 91 | st.write(f"{formatted_date}") 92 | 93 | # Tab menu. 94 | tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8 = st.tabs( 95 | [ 96 | "Standings & Overview", 97 | "Teams Statistics", 98 | "Players & Injuries", 99 | "Fixtures", 100 | "Squads", 101 | "News & Hightlights", 102 | "Manchester United Stock (Beta)", 103 | "About", 104 | ] 105 | ) 106 | 107 | # --------- Overview Tab --------- 108 | # Tab 1 holds the following sections: [League Statistics, Current Standings, Location of Stadiums]. 109 | with tab1: 110 | st.subheader("League Statistics") 111 | col1, col2, col3, col4 = st.columns(4) 112 | 113 | # Average goals scored column. 114 | with col1: 115 | teams_df_average_goals = teams_df.sort_values(by=["average_goals"], ascending=False) 116 | max_average_goals = teams_df_average_goals.iloc[0, 6] 117 | 118 | average_goals_df = pd.DataFrame( 119 | { 120 | "Average Goals": [ 121 | max_average_goals, 122 | teams_df_average_goals.iloc[1, 6], 123 | teams_df_average_goals.iloc[2, 6], 124 | teams_df_average_goals.iloc[3, 6], 125 | teams_df_average_goals.iloc[4, 6], 126 | ], 127 | "Team": [ 128 | teams_df_average_goals.iloc[0, 2], 129 | teams_df_average_goals.iloc[1, 2], 130 | teams_df_average_goals.iloc[2, 2], 131 | teams_df_average_goals.iloc[3, 2], 132 | teams_df_average_goals.iloc[4, 2], 133 | ], 134 | } 135 | ) 136 | 137 | st.dataframe( 138 | average_goals_df, 139 | column_config={ 140 | "Average Goals": st.column_config.ProgressColumn( 141 | "Average Goals", 142 | help="The Average Goals Scored by Each Team.", 143 | format="%f", 144 | min_value=0, 145 | max_value=int(round(max_average_goals, 2)) * 2, 146 | ), 147 | }, 148 | hide_index=True, 149 | ) 150 | 151 | with col2: 152 | teams_df_penalties_scored = teams_df.sort_values(by=["penalties_scored"], ascending=False) 153 | max_penalties_scored = teams_df_penalties_scored.iloc[0, 4] 154 | 155 | penalties_scored_df = pd.DataFrame( 156 | { 157 | "Penalties Scored": [ 158 | max_penalties_scored, 159 | teams_df_penalties_scored.iloc[1, 4], 160 | teams_df_penalties_scored.iloc[2, 4], 161 | teams_df_penalties_scored.iloc[3, 4], 162 | teams_df_penalties_scored.iloc[4, 4], 163 | ], 164 | "Team": [ 165 | teams_df_penalties_scored.iloc[0, 2], 166 | teams_df_penalties_scored.iloc[1, 2], 167 | teams_df_penalties_scored.iloc[2, 2], 168 | teams_df_penalties_scored.iloc[3, 2], 169 | teams_df_penalties_scored.iloc[4, 2], 170 | ], 171 | } 172 | ) 173 | 174 | st.dataframe( 175 | penalties_scored_df, 176 | column_config={ 177 | "Penalties Scored": st.column_config.ProgressColumn( 178 | "Penalties Scored", 179 | help="The Amount of Penalties Scored by Each Team.", 180 | format="%d", 181 | min_value=0, 182 | max_value=int(max_penalties_scored) * 2, 183 | ), 184 | }, 185 | hide_index=True, 186 | ) 187 | 188 | with col3: 189 | teams_df_win_streak = teams_df.sort_values(by=["win_streak"], ascending=False) 190 | max_win_streak = teams_df_win_streak.iloc[0, 7] 191 | 192 | win_streak_df = pd.DataFrame( 193 | { 194 | "Biggest Win Streak": [ 195 | max_win_streak, 196 | teams_df_win_streak.iloc[1, 7], 197 | teams_df_win_streak.iloc[2, 7], 198 | teams_df_win_streak.iloc[3, 7], 199 | teams_df_win_streak.iloc[4, 7], 200 | ], 201 | "Team": [ 202 | teams_df_win_streak.iloc[0, 2], 203 | teams_df_win_streak.iloc[1, 2], 204 | teams_df_win_streak.iloc[2, 2], 205 | teams_df_win_streak.iloc[3, 2], 206 | teams_df_win_streak.iloc[4, 2], 207 | ], 208 | } 209 | ) 210 | 211 | st.dataframe( 212 | win_streak_df, 213 | column_config={ 214 | "Biggest Win Streak": st.column_config.ProgressColumn( 215 | "Biggest Win Streak", 216 | help="The Biggest Win Streak by Each Team.", 217 | format="%d", 218 | min_value=0, 219 | max_value=int(max_win_streak) * 2, 220 | ), 221 | }, 222 | hide_index=True, 223 | ) 224 | 225 | with col4: 226 | st.markdown("**League Statistics**") 227 | 228 | with st.container(): 229 | league_statistics_df = pd.DataFrame( 230 | { 231 | "labels": ["Goals Scored", "Penalties Scored", "Clean Sheets"], 232 | "metrics": [ 233 | league_statistics_df.iloc[0, 0], 234 | league_statistics_df.iloc[0, 1], 235 | league_statistics_df.iloc[0, 2], 236 | ], 237 | } 238 | ) 239 | 240 | st.dataframe( 241 | league_statistics_df, 242 | column_config={ 243 | "metrics": st.column_config.NumberColumn( 244 | "Amount", 245 | help="The Amount of Goals, Penalties Scored, and Clean Sheets in the League.", 246 | min_value=0, 247 | max_value=1000, 248 | step=1, 249 | ), 250 | "labels": st.column_config.TextColumn( 251 | "Metric", 252 | ), 253 | }, 254 | hide_index=True, 255 | ) 256 | 257 | # Function to create the standings table (dataframe). 258 | def standings_table() -> DeltaGenerator: 259 | st.subheader("Current Standings") 260 | 261 | standings_table = st.dataframe( 262 | standings_df.style.set_table_styles([{"selector": "th", "props": [("background-color", "yellow")]}]), 263 | column_config={ 264 | "logo": st.column_config.ImageColumn("Icon", width="small"), 265 | "rank": "Rank", 266 | "points": "Points", 267 | "team": "Club", 268 | "games_played": "Games Played", 269 | "wins": "Wins", 270 | "draws": "Draws", 271 | "loses": "Loses", 272 | "goals_for": "Goals For", 273 | "goals_against": "Goals Against", 274 | "goal_difference": "Goal Difference", 275 | }, 276 | hide_index=True, 277 | use_container_width=True, 278 | ) 279 | 280 | return standings_table 281 | 282 | standings_table() 283 | 284 | # Stadiums 285 | stadium_map_section = StadiumMapSection() 286 | stadium_map_section.display(stadiums_df) 287 | 288 | # --------- Team Statistics Tab --------- 289 | # Tab 2 holds the following sections: [Top Teams, Point Progression, Top Scorers, League Forms]. 290 | with tab2: 291 | def top_teams_func(): 292 | top_teams_section = TopTeamsSection(teams_df) 293 | with st.container(): 294 | top_teams_section.display() 295 | 296 | def point_progression_func(): 297 | point_progression_section = PointProgressionSection(teams_df, standings_df) 298 | with st.container(): 299 | point_progression_section.display() 300 | 301 | @st.experimental_fragment 302 | def point_slider_func(): 303 | point_slider_section = PointSliderSection(standings_df) 304 | with st.container(): 305 | point_slider_section.display() 306 | 307 | def league_forms_func(): 308 | league_forms_section = LeagueFormsSection(teams_df) 309 | with st.container(): 310 | league_forms_section.display() 311 | 312 | top_teams_func() 313 | point_progression_func() 314 | point_slider_func() 315 | league_forms_func() 316 | 317 | # --------- Player Statistics Tab --------- 318 | # Tab 3 holds the following sections: [Player Statistics]. 319 | with tab3: 320 | 321 | def top_scorers_func(): 322 | top_scorers_section = TopScorersSection(top_scorers_df) 323 | with st.container(): 324 | top_scorers_section.display() 325 | 326 | @st.experimental_fragment 327 | def injuries_func(): 328 | injuries_section = InjuriesSection(injuries_df) 329 | with st.container(): 330 | injuries_section.display() 331 | 332 | top_scorers_func() 333 | injuries_func() 334 | 335 | # --------- Fixtures Tab --------- 336 | # Tab 4 holds the following sections: [Fixtures]. 337 | with tab4: 338 | # Fixtures section. 339 | fixtures_section.display() 340 | 341 | # --------- Squads Tab --------- 342 | # Tab 5 holds the following sections: [Squads]. 343 | with tab5: 344 | st.subheader("Team Squads") 345 | st.markdown("**Note:** Double click on the player's photo to expand it.") 346 | 347 | @st.experimental_fragment 348 | def squads_func(): 349 | squads = SquadSection(squads_df) 350 | 351 | col1, _, _ = st.columns(3) 352 | with col1: 353 | option = st.selectbox( 354 | index=None, 355 | label="Use the dropdown menu to select a team:", 356 | options=squads.teams, 357 | placeholder="Please make a selection", 358 | ) 359 | if option: 360 | selected_team_logo = teams_df[teams_df["team"] == option]["logo"].iloc[0] 361 | st.image(selected_team_logo, width=75) 362 | squads.display(option) 363 | 364 | squads_func() 365 | 366 | # --------- News Tab --------- 367 | # Tab 6 holds the following sections: [News, Highlights]. 368 | with tab6: 369 | with st.container(): 370 | NewsSection(news_df).display() 371 | 372 | with st.container(): 373 | HighlightsSection(highlights_df).display_first_row() 374 | HighlightsSection(highlights_df).display_second_row() 375 | 376 | # --------- Stock Tab --------- 377 | # Tab 7 holds the following sections: [Stock Price]. 378 | with tab7: 379 | stock_section = StockSection(stocks_df) 380 | stock_section.display() 381 | 382 | # --------- About Tab --------- 383 | # Tab 8 holds the following sections: [About]. 384 | with tab8: 385 | # About 386 | about_section = AboutSection() 387 | about_section.display() 388 | 389 | # Social Media 390 | social_media_section = SocialMediaSection() 391 | social_media_section.display() 392 | 393 | 394 | if __name__ == "__main__": 395 | streamlit_app() 396 | -------------------------------------------------------------------------------- /terraform/installations.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo apt-get remove -y --purge man-db 4 | sudo apt-get install -y python3-pip 5 | sudo apt-get install -y python3-venv 6 | sudo apt install -y git 7 | git clone https://github.com/digitalghost-dev/premier-league.git 8 | cd premier-league 9 | python3 -m venv env 10 | source env/bin/activate 11 | pip install -r terraform/req.txt 12 | prefect --version 13 | sleep 10 14 | prefect deployment build -n premier-league-etl -p premier-league-work-pool -q premier-league-work-queue prefect/flows.py:premier_league_flow 15 | prefect work-pool create 'premier-league-work-pool' --type prefect-agent 16 | prefect deployment apply premier_league_flow-deployment.yaml -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | google = { 4 | source = "hashicorp/google" 5 | version = "4.51.0" 6 | } 7 | } 8 | } 9 | 10 | # ------ Variables ------ 11 | variable "email_address" { 12 | type = string 13 | description = "email address" 14 | } 15 | 16 | variable "user" { 17 | type = string 18 | description = "user name" 19 | } 20 | 21 | variable "public_key_path" { 22 | type = string 23 | description = "path to public key" 24 | } 25 | 26 | variable "private_key_path" { 27 | type = string 28 | description = "path to private key" 29 | } 30 | 31 | variable "project_id" { 32 | type = string 33 | description = "project id" 34 | } 35 | 36 | # ------ Compute Engine Resource ------ 37 | resource "google_compute_instance" "premier-league-vm" { 38 | name = "premier-league-vm" 39 | machine_type = "e2-small" 40 | zone = "us-central1-a" 41 | project = var.project_id 42 | tags = ["premier-league", "virtual-machine", "http", "https"] 43 | 44 | metadata = { 45 | ssh-keys = "${var.user}:${file(var.public_key_path)}" 46 | } 47 | 48 | boot_disk { 49 | initialize_params { 50 | image = "debian-cloud/debian-11" 51 | } 52 | } 53 | 54 | network_interface { 55 | network = "https://www.googleapis.com/compute/v1/projects/${var.project_id}/global/networks/default" 56 | subnetwork = "https://www.googleapis.com/compute/v1/projects/${var.project_id}/regions/us-central1/subnetworks/default" 57 | subnetwork_project = var.project_id 58 | access_config { 59 | // Ephemeral public IP 60 | } 61 | } 62 | 63 | service_account { 64 | email = var.email_address 65 | scopes = ["cloud-platform"] 66 | } 67 | 68 | provisioner "remote-exec" { 69 | connection { 70 | type = "ssh" 71 | user = var.user 72 | host = google_compute_instance.premier-league-vm.network_interface[0].access_config[0].nat_ip 73 | private_key = file(var.private_key_path) 74 | } 75 | script = "./installations.sh" 76 | # inline = [ 77 | 78 | # ] 79 | } 80 | } -------------------------------------------------------------------------------- /tests/requirements-tests.txt: -------------------------------------------------------------------------------- 1 | mypy==1.8.0 2 | pre-commit==3.6.0 3 | pytest==7.4.4 4 | pytest-cov==4.1.0 5 | ruff==0.1.13 -------------------------------------------------------------------------------- /tests/test_streamlit_app.py: -------------------------------------------------------------------------------- 1 | from streamlit.testing.v1 import AppTest 2 | 3 | at = AppTest.from_file("streamlit_app.py", default_timeout=1000) 4 | at.run() 5 | 6 | 7 | def test_main_page(): 8 | assert at.title[0].value == "Premier League Statistics / 2023-24" 9 | assert "Current Round: " in at.subheader[0].value 10 | assert at.subheader[-1].value == "Social" 11 | 12 | 13 | # Standings & Overivew 14 | def test_tab_one(): 15 | assert at.tabs[0].subheader[0].value == "League Statistics" 16 | assert at.tabs[0].subheader[1].value == "Current Standings" 17 | assert at.tabs[0].subheader[2].value == "Location of Stadiums" 18 | 19 | 20 | # Teams Statistics 21 | def test_tab_two(): 22 | assert at.tabs[1].subheader[0].value == "Top 5 Teams" 23 | assert at.tabs[1].subheader[1].value == "Point Progression throughout the Season" 24 | assert at.tabs[1].subheader[2].value == "Points per Team:" 25 | assert at.tabs[1].subheader[3].value == "Forms for the Rest of the League" 26 | 27 | 28 | # Players & Injuries Statistics 29 | def test_tab_three(): 30 | assert at.tabs[2].subheader[0].value == "Top 5 Scorers" 31 | 32 | # Column 1 33 | assert "Goals:" in at.tabs[2].markdown[2].value 34 | assert "Assists:" in at.tabs[2].markdown[3].value 35 | assert "Team:" in at.tabs[2].markdown[4].value 36 | assert "Nationality:" in at.tabs[2].markdown[5].value 37 | 38 | # Column 2 39 | assert "Goals:" in at.tabs[2].markdown[8].value 40 | assert "Assists:" in at.tabs[2].markdown[9].value 41 | assert "Team:" in at.tabs[2].markdown[10].value 42 | assert "Nationality:" in at.tabs[2].markdown[11].value 43 | 44 | # Column 3 45 | assert "Goals:" in at.tabs[2].markdown[14].value 46 | assert "Assists:" in at.tabs[2].markdown[15].value 47 | assert "Team:" in at.tabs[2].markdown[16].value 48 | assert "Nationality:" in at.tabs[2].markdown[17].value 49 | 50 | # Column 4 51 | assert "Goals:" in at.tabs[2].markdown[20].value 52 | assert "Assists:" in at.tabs[2].markdown[21].value 53 | assert "Team:" in at.tabs[2].markdown[22].value 54 | assert "Nationality:" in at.tabs[2].markdown[23].value 55 | 56 | # Column 5 57 | assert "Goals:" in at.tabs[2].markdown[26].value 58 | assert "Assists:" in at.tabs[2].markdown[27].value 59 | assert "Team:" in at.tabs[2].markdown[28].value 60 | assert "Nationality:" in at.tabs[2].markdown[29].value 61 | 62 | assert at.tabs[2].subheader[1].value == "Recent Injuries" 63 | 64 | 65 | # Fixtures 66 | def test_tab_four(): 67 | assert at.tabs[3].subheader[0].value == "Fixtures" 68 | 69 | 70 | # Squads 71 | def test_tab_five(): 72 | assert at.tabs[4].subheader[0].value == "Team Squads" 73 | assert at.tabs[4].markdown[0].value == "**Note:** Double click on the player's photo to expand it." 74 | assert at.tabs[4].selectbox[0].label == "Use the dropdown menu to select a team:" 75 | assert at.tabs[4].selectbox[0].placeholder == "Please make a selection" 76 | assert at.tabs[4].selectbox[0].options == [ 77 | "Arsenal", 78 | "Aston Villa", 79 | "Bournemouth", 80 | "Brentford", 81 | "Brighton", 82 | "Burnley", 83 | "Chelsea", 84 | "Crystal Palace", 85 | "Everton", 86 | "Fulham", 87 | "Liverpool", 88 | "Luton", 89 | "Manchester City", 90 | "Manchester United", 91 | "Newcastle", 92 | "Nottingham Forest", 93 | "Sheffield Utd", 94 | "Tottenham", 95 | "West Ham", 96 | "Wolves", 97 | ] 98 | 99 | 100 | # News & Highlights 101 | def test_tab_six(): 102 | assert at.tabs[5].header[0].value == "Recent News" 103 | assert at.tabs[5].header[1].value == "Recent Highlights" 104 | 105 | 106 | # MANU Stock Price 107 | def test_tab_seven(): 108 | assert at.tabs[6].subheader[0].value == "MANU - Stock Price" 109 | 110 | 111 | # About 112 | def test_tab_eight(): 113 | assert at.tabs[7].subheader[0].value == "About" 114 | --------------------------------------------------------------------------------