├── .devcontainer └── devcontainer.json ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ └── new-feature.yml ├── labeler.yml └── workflows │ ├── build_publish_api.yml │ ├── build_publish_cli.yml │ ├── build_publish_compute.yml │ ├── build_publish_scraper.yml │ ├── pr_validation.yml │ ├── quality_check.yml │ └── stale.yml ├── .gitignore ├── .vscode └── launch.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── Taskfile.yml ├── bases └── ecoindex │ ├── backend │ ├── VERSION │ ├── __init__.py │ ├── dependencies │ │ ├── __init__.py │ │ ├── bff.py │ │ ├── compute.py │ │ ├── dates.py │ │ ├── host.py │ │ ├── id.py │ │ ├── pagination.py │ │ ├── validation.py │ │ └── version.py │ ├── main.py │ ├── middlewares │ │ ├── cors.py │ │ └── exception_handler.py │ ├── models │ │ ├── __init__.py │ │ ├── dependencies_parameters │ │ │ ├── __init__.py │ │ │ ├── bff.py │ │ │ ├── compute.py │ │ │ ├── dates.py │ │ │ ├── host.py │ │ │ ├── id.py │ │ │ ├── pagination.py │ │ │ └── version.py │ │ └── parameters.py │ ├── routers │ │ ├── __init__.py │ │ ├── bff.py │ │ ├── compute.py │ │ ├── ecoindex.py │ │ ├── health.py │ │ ├── host.py │ │ └── tasks.py │ ├── scripts │ │ ├── __init__.py │ │ └── openapi.py │ ├── services │ │ ├── __init__.py │ │ ├── cache.py │ │ └── ecoindex.py │ └── utils │ │ └── __init__.py │ ├── cli │ ├── VERSION │ ├── __init__.py │ ├── app.py │ ├── arguments_handler.py │ ├── console_output.py │ ├── crawl.py │ ├── helper.py │ ├── report.py │ ├── sitemap.py │ └── template.html │ └── worker │ ├── __init__.py │ ├── health.py │ └── tasks.py ├── components └── ecoindex │ ├── compute │ ├── VERSION │ ├── __init__.py │ └── ecoindex.py │ ├── config │ ├── __init__.py │ └── settings.py │ ├── data │ ├── __init__.py │ ├── colors.py │ ├── grades.py │ ├── medians.py │ ├── quantiles.py │ └── targets.py │ ├── database │ ├── __init__.py │ ├── engine.py │ ├── exceptions │ │ └── quota.py │ ├── helper.py │ ├── models │ │ └── __init__.py │ └── repositories │ │ ├── __init__.py │ │ ├── ecoindex.py │ │ ├── host.py │ │ └── worker.py │ ├── exceptions │ ├── __init__.py │ ├── scraper.py │ └── worker.py │ ├── models │ ├── __init__.py │ ├── api.py │ ├── cli.py │ ├── compute.py │ ├── enums.py │ ├── response_examples.py │ ├── scraper.py │ ├── sort.py │ └── tasks.py │ ├── scraper │ ├── VERSION │ ├── __init__.py │ ├── helper.py │ └── scrap.py │ ├── scripts │ ├── __init__.py │ └── update_values.py │ ├── utils │ ├── __init__.py │ ├── cli_translations │ │ ├── en.yml │ │ └── fr.yml │ ├── files.py │ └── screenshots.py │ └── worker_component │ └── __init__.py ├── development ├── ecoindex_compute.py ├── ecoindex_scraper.py └── scraper_test.py ├── docs └── images │ └── ecoindex-python-fullstack.png ├── poetry.lock ├── poetry.toml ├── projects ├── ecoindex_api │ ├── .dockerignore │ ├── .env.template │ ├── .gitignore │ ├── README.md │ ├── Taskfile.yml │ ├── alembic.ini │ ├── alembic │ │ ├── README │ │ ├── env.py │ │ ├── script.py.mako │ │ └── versions │ │ │ ├── 5afa2faea43f_.py │ │ │ ├── 7eaafaa65b32_update_url_field_type_to_text.py │ │ │ ├── 826abb0c4222_add_ecoindex_version_field.py │ │ │ ├── e83263a5def4_add_index_id_and_host.py │ │ │ └── fd9a1f5662c8_first_migration.py │ ├── docker-compose.yml.template │ ├── docker │ │ ├── backend │ │ │ ├── dockerfile │ │ │ └── entrypoint.sh │ │ └── worker │ │ │ ├── dockerfile │ │ │ └── entrypoint.sh │ ├── openapi.json │ ├── poetry.lock │ ├── pyproject.toml │ └── screenshots │ │ └── .gitkeep ├── ecoindex_cli │ ├── .dockerignore │ ├── README.md │ ├── Taskfile.yml │ ├── doc │ │ └── report.png │ ├── dockerfile │ ├── poetry.lock │ └── pyproject.toml ├── ecoindex_compute │ ├── README.md │ ├── Taskfile.yml │ ├── poetry.lock │ └── pyproject.toml └── ecoindex_scraper │ ├── README.md │ ├── Taskfile.yml │ ├── dockerfile │ ├── poetry.lock │ └── pyproject.toml ├── pyproject.toml ├── tasks ├── DockerTaskfile.yml ├── PoetryTaskfile.yml ├── PypiTaskFile.yml └── QualityTaskFile.yml ├── test ├── bases │ └── ecoindex │ │ ├── backend │ │ └── __init__.py │ │ ├── cli │ │ ├── __init__.py │ │ ├── test_app.py │ │ ├── test_arguments_handler.py │ │ └── test_helper.py │ │ └── worker │ │ └── __init__.py └── components │ └── ecoindex │ ├── compute │ ├── __init__.py │ ├── test_ecoindex.py │ └── test_models.py │ ├── data │ └── __init__.py │ ├── exceptions │ └── __init__.py │ ├── models │ ├── __init__.py │ └── test_scraper.py │ ├── scraper │ ├── __init__.py │ └── test_scraper.py │ ├── scripts │ └── __init__.py │ ├── utils │ └── __init__.py │ └── worker │ └── __init__.py └── workspace.toml /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Ecoindex python full stack dev container", 3 | "image": "mcr.microsoft.com/devcontainers/python:3.12", 4 | "postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin", 5 | "features": { 6 | "ghcr.io/audacioustux/devcontainers/taskfile": {}, 7 | "ghcr.io/devcontainers/features/docker-in-docker:2": { 8 | "installDockerBuildx": true, 9 | "version": "latest", 10 | "dockerDashComposeVersion": "v2" 11 | } 12 | }, 13 | "forwardPorts": [ 14 | 8000 15 | ], 16 | "customizations": { 17 | "vscode": { 18 | "extensions": [ 19 | "-ms-python.autopep8", 20 | "adrianwilczynski.alpine-js-intellisense", 21 | "adrianwilczynski.alpine-js-intellisense", 22 | "bierner.markdown-emoji", 23 | "charliermarsh.ruff", 24 | "Codeium.codeium", 25 | "github.vscode-github-actions", 26 | "Gruntfuggly.todo-tree", 27 | "mhutchie.git-graph", 28 | "ms-azuretools.vscode-docker", 29 | "ms-python.mypy-type-checker", 30 | "ms-python.python", 31 | "Perkovec.emoji", 32 | "samuelcolvin.jinjahtml", 33 | "tamasfe.even-better-toml", 34 | "ue.alphabetical-sorter", 35 | "yzhang.markdown-all-in-one", 36 | "esbenp.prettier-vscode", 37 | "ms-pyright.pyright", 38 | "-ms-python.vscode-pylance" 39 | ] 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report 3 | title: "[Bug]: " 4 | labels: [bug, triage] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to fill out this bug report! :heart: 10 | - type: textarea 11 | id: what-happened 12 | attributes: 13 | label: What happened? 14 | description: Also tell us, what did you expect to happen? 15 | placeholder: Tell us what you see! 16 | value: "A bug happened!" 17 | validations: 18 | required: true 19 | - type: dropdown 20 | id: project 21 | attributes: 22 | label: Project 23 | description: What project is concerned by this bug? 24 | options: 25 | - Ecoindex API 26 | - Ecoindex CLI 27 | - Ecoindex Compute 28 | - Ecoindex Scraper 29 | validations: 30 | required: true 31 | - type: dropdown 32 | id: os 33 | attributes: 34 | label: What OS do you use? 35 | multiple: true 36 | options: 37 | - Windows 38 | - Linux 39 | - Mac 40 | - type: textarea 41 | id: urls 42 | attributes: 43 | label: urls 44 | description: Can you provide one or more url example where you have this issue? 45 | validations: 46 | required: false 47 | - type: textarea 48 | id: logs 49 | attributes: 50 | label: Relevant log output 51 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. 52 | render: shell 53 | - type: checkboxes 54 | id: terms 55 | attributes: 56 | label: Code of Conduct 57 | description: By submitting this issue, you agree to follow our [Code of Conduct](CODE_OF_CONDUCT.md) 58 | options: 59 | - label: I agree to follow this project's Code of Conduct 60 | required: true 61 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-feature.yml: -------------------------------------------------------------------------------- 1 | name: New feature 2 | description: Request a new feature 3 | title: "[Feature]: " 4 | labels: [enhancement, triage] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to request a new feature! :heart: 10 | - type: textarea 11 | id: what-feature 12 | attributes: 13 | label: What feature do you want? 14 | description: Describe the feature you want to see in this project 15 | placeholder: Tell us what you want! 16 | value: "I want a new feature!" 17 | validations: 18 | required: true 19 | - type: dropdown 20 | id: project 21 | attributes: 22 | label: Project 23 | description: What project is concerned by this bug? 24 | options: 25 | - Ecoindex API 26 | - Ecoindex CLI 27 | - Ecoindex Compute 28 | - Ecoindex Scraper 29 | validations: 30 | required: true 31 | - type: textarea 32 | id: why-feature 33 | attributes: 34 | label: Why do you want this feature? 35 | description: Tell us why you want this feature 36 | placeholder: Tell us why you want this feature! 37 | value: "I want this feature because..." 38 | validations: 39 | required: true 40 | - type: checkboxes 41 | id: terms 42 | attributes: 43 | label: Code of Conduct 44 | description: By submitting this issue, you agree to follow our [Code of Conduct](CODE_OF_CONDUCT.md) 45 | options: 46 | - label: I agree to follow this project's Code of Conduct 47 | required: true 48 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | ci/cd: 2 | - .github/workflows/* 3 | 4 | documentation: 5 | - README.md 6 | - ./**/*.md 7 | 8 | tooling: 9 | - ./**/*TaskFile.yml 10 | 11 | tests: 12 | - test/** 13 | 14 | compute: 15 | - components/ecoindex/compute/** 16 | 17 | scraper: 18 | - components/ecoindex/scraper/** 19 | 20 | cli: 21 | - bases/ecoindex/cli/** 22 | 23 | api: 24 | - bases/ecoindex/backend/** 25 | - bases/ecoindex/worker/** 26 | 27 | components: 28 | - components -------------------------------------------------------------------------------- /.github/workflows/pr_validation.yml: -------------------------------------------------------------------------------- 1 | name: "Validate PR" 2 | 3 | on: 4 | pull_request: 5 | types: [opened, edited, synchronize, reopened] 6 | 7 | permissions: 8 | pull-requests: write 9 | 10 | jobs: 11 | title-lint: 12 | name: Validate PR title 13 | runs-on: ubuntu-latest 14 | concurrency: pr-lint 15 | steps: 16 | - uses: amannn/action-semantic-pull-request@v5 17 | env: 18 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 19 | 20 | triage: 21 | runs-on: ubuntu-latest 22 | concurrency: triage 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: actions/labeler@v4 26 | with: 27 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 28 | 29 | size-label: 30 | runs-on: ubuntu-latest 31 | concurrency: size-label 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: size-label 35 | uses: "pascalgn/size-label-action@v0.5.0" 36 | env: 37 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 38 | with: 39 | sizes: > 40 | { 41 | "0": "XS", 42 | "20": "S", 43 | "50": "M", 44 | "250": "Too Large" 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/quality_check.yml: -------------------------------------------------------------------------------- 1 | name: Validate project quality 2 | 3 | on: 4 | pull_request: 5 | types: [opened, edited, synchronize, reopened] 6 | push: 7 | branches: [main] 8 | 9 | permissions: 10 | contents: write 11 | pull-requests: write 12 | 13 | jobs: 14 | project-quality: 15 | name: Validate project quality 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Install Task 20 | uses: arduino/setup-task@v1 21 | with: 22 | version: 3.x 23 | repo-token: ${{ secrets.GITHUB_TOKEN }} 24 | - name: Install poetry and plugins 25 | run: | 26 | curl -sSL https://install.python-poetry.org | python - 27 | poetry config virtualenvs.create true 28 | poetry self add poetry-multiproject-plugin 29 | poetry self add poetry-polylith-plugin 30 | - name: Install dependencies 31 | run: | 32 | poetry install 33 | - name: Validate polylith project 34 | run: | 35 | task project-check 36 | - name: Validate code quality 37 | run: | 38 | task quality 39 | - name: Pytest coverage comment 40 | uses: MishaKav/pytest-coverage-comment@main 41 | with: 42 | pytest-xml-coverage-path: ./coverage.xml 43 | title: Branch coverage 44 | badge-title: Coverage PR ${{ github.event.pull_request.number }} 45 | report-only-changed-files: true 46 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" 6 | 7 | permissions: 8 | contents: write 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/stale@v9 17 | with: 18 | repo-token: ${{ secrets.GITHUB_TOKEN }} 19 | stale-issue-message: This issue has been marked as inactive because it hasn't been updated for 30 days. If it's not updated within 7 days, it will be automatically closed. To prevent it from being closed, you can add the `keep open` label. 20 | stale-pr-message: This pull request has been marked as inactive because it hasn't been updated for 30 days. If it's not updated within 7 days, it will be automatically closed. To prevent it from being closed, you can add the `keep open` label. 21 | exempt-pr-labels: "keep open" 22 | exempt-issue-labels: "keep open" 23 | close-issue-message: This issue has been closed because it's been inactive for 37 days. If you think this is a mistake, you can reopen it. 24 | close-pr-message: This pull request has been closed because it's been inactive for 37 days. If you think this is a mistake, you can reopen it. 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | dist 3 | *.webp 4 | *.sqlite3 5 | .coverage 6 | coverage.xml 7 | *.csv -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | 8 | { 9 | "name": "Python Debugger: FastAPI", 10 | "type": "debugpy", 11 | "request": "launch", 12 | "module": "uvicorn", 13 | "args": [ 14 | "ecoindex.backend.main:app", 15 | "--reload" 16 | ], 17 | "jinja": true 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 44 | 45 | [homepage]: https://www.contributor-covenant.org 46 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ecoindex_python 2 | 3 | We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's: 4 | 5 | - Reporting a bug 6 | - Discussing the current state of the code 7 | - Submitting a fix 8 | - Proposing new features 9 | - Becoming a maintainer 10 | 11 | ## We Develop with Github 12 | 13 | We use github to host code, to track issues and feature requests, as well as accept pull requests. 14 | 15 | ## We Use [Github Flow](https://guides.github.com/introduction/flow/index.html), So All Code Changes Happen Through Pull Requests 16 | 17 | Pull requests are the best way to propose changes to the codebase (we use [Github Flow](https://guides.github.com/introduction/flow/index.html)). We actively welcome your pull requests: 18 | 19 | 1. Fork the repo and create your branch from `master`. 20 | 2. If you've added code that should be tested, add tests. 21 | 3. Ensure the test suite passes. 22 | 4. Make sure your code lints. 23 | 5. Issue that pull request! 24 | 25 | ## Any contributions you make will be under the Creative Commons Software License 26 | 27 | In short, when you submit code changes, your submissions are understood to be under the same [Creative Commons License](LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern. 28 | 29 | ## Report bugs using Github's issues 30 | 31 | We use GitHub issues to track public bugs. Report a bug by opening a new issue; it's that easy! 32 | 33 | ## Write bug reports with detail, background, and sample code 34 | 35 | **Great Bug Reports** tend to have: 36 | 37 | - A quick summary and/or background 38 | - Steps to reproduce 39 | - Be specific! 40 | - Give sample code if you can. [An example question](http://stackoverflow.com/q/12488905/180626) includes sample code that *anyone* with a base R setup can run to reproduce what I was seeing 41 | - What you expected would happen 42 | - What actually happens 43 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 44 | 45 | People *love* thorough bug reports. I'm not even kidding. 46 | 47 | ## Use a Consistent Coding Style 48 | 49 | We use [black](https://github.com/psf/black) for linting. 50 | 51 | ## License 52 | 53 | By contributing, you agree that your contributions will be licensed under its Creative Commons License. 54 | 55 | ## References 56 | 57 | This document was adapted from the open-source contribution guidelines for [Facebook's Draft](https://github.com/facebook/draft-js/blob/a9316a723f9e918afde44dea68b5f9f39b7d9b00/CONTRIBUTING.md) 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ecoindex Python Fullstack 2 | 3 | [![Validate project quality](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml/badge.svg?branch=main)](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml) 4 | 5 | ![PyPI - Version](https://img.shields.io/pypi/v/ecoindex-compute?logo=pypi&label=ecoindex-compute) 6 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/ecoindex-compute?style=social&logo=pypi&label=ecoindex-compute) 7 | 8 | ![PyPI - Version](https://img.shields.io/pypi/v/ecoindex-scraper?logo=pypi&label=ecoindex-scraper) 9 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/ecoindex-scraper?style=social&logo=pypi&label=ecoindex-scraper) 10 | 11 | ![PyPI - Version](https://img.shields.io/pypi/v/ecoindex-cli?logo=pypi&label=ecoindex-cli) 12 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/ecoindex-cli?style=social&logo=pypi&label=ecoindex-cli) 13 | ![Docker Pulls](https://img.shields.io/docker/pulls/vvatelot/ecoindex-cli?style=social&logo=docker&label=CLI) 14 | 15 | ![Docker Pulls](https://img.shields.io/docker/pulls/vvatelot/ecoindex-api-worker?style=social&logo=docker&label=API%20Worker) 16 | ![Docker Pulls](https://img.shields.io/docker/pulls/vvatelot/ecoindex-api-backend?style=social&logo=docker&label=API%20Backend) 17 | 18 | - [Ecoindex Python Fullstack](#ecoindex-python-fullstack) 19 | - [Projects](#projects) 20 | - [Getting started](#getting-started) 21 | - [Requirements](#requirements) 22 | - [Installation](#installation) 23 | - [Usage](#usage) 24 | - [Disclaimer](#disclaimer) 25 | - [License](#license) 26 | - [Contributing](#contributing) 27 | - [Code of conduct](#code-of-conduct) 28 | 29 | This project is a polylith repository for the Ecoindex project. It is called "fullstack" because it contains all the components of the project, including the backend that is used in production, but also a CLI tool and modules that can be used in other projects. 30 | 31 | You can get more information about polylith in the [official documentation](https://polylith.gitbook.io/polylith) and in the [python-polylith tool](https://github.com/DavidVujic/python-polylith) that is used to manage this repository. 32 | 33 | ## Projects 34 | 35 | This repository contains the following projects: 36 | 37 | - [Ecoindex Compute](projects/ecoindex_compute/README.md): this is the base module that provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) based on 3 parameters: 38 | - The number of DOM elements in the page 39 | - The size of the page 40 | - The number of external requests of the page 41 | - [Ecoindex Scraper](projects/ecoindex_scraper/README.md): This module provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) based on a URL. It uses [Playwright](https://playwright.dev/) to get the DOM elements, size and requests of the page. 42 | - [Ecoindex CLI](projects/ecoindex_cli/README.md): This module provides a CLI tool to get the [Ecoindex](http://www.ecoindex.fr) based on a URL. It uses the [Ecoindex Scraper](projects/ecoindex_scraper/README.md) module. 43 | - [Ecoindex API](projects/ecoindex_api/README.md): This module provides a REST API to get the [Ecoindex](http://www.ecoindex.fr) based on a URL. It uses the [Ecoindex Scraper](projects/ecoindex_scraper/README.md) module. 44 | 45 | Here is a diagram of the dependencies between the projects: 46 | 47 | ![Ecoindex Python Fullstack](docs/images/ecoindex-python-fullstack.png) 48 | 49 | ## Getting started 50 | 51 | ### Requirements 52 | 53 | - [Python 3.9+](https://www.python.org/downloads/) 54 | - [Poetry](https://python-poetry.org/docs/#installation) 55 | - [Task](https://taskfile.dev/#/installation) 56 | - [Docker](https://docs.docker.com/get-docker/) (optional) 57 | 58 | ### Installation 59 | 60 | To install main dependencies, run: 61 | 62 | ```bash 63 | task poetry:install 64 | ``` 65 | 66 | ### Usage 67 | 68 | Have a look at the task help: 69 | 70 | ```bash 71 | task --list 72 | ``` 73 | 74 | ## Disclaimer 75 | 76 | The LCA values used by [ecoindex](https://github.com/cnumr/ecoindex_monorepo) to evaluate environmental impacts are not under free license - ©Frédéric Bordage 77 | Please also refer to the mentions provided in the code files for specifics on the IP regime. 78 | 79 | ## [License](LICENSE) 80 | 81 | ## [Contributing](CONTRIBUTING.md) 82 | 83 | ## [Code of conduct](CODE_OF_CONDUCT.md) -------------------------------------------------------------------------------- /Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | includes: 4 | api: 5 | taskfile: ./projects/ecoindex_api/Taskfile.yml 6 | dir: ./projects/ecoindex_api/ 7 | 8 | cli: 9 | taskfile: ./projects/ecoindex_cli/Taskfile.yml 10 | dir: ./projects/ecoindex_cli/ 11 | 12 | compute: 13 | taskfile: ./projects/ecoindex_compute/Taskfile.yml 14 | dir: ./projects/ecoindex_compute/ 15 | 16 | scraper: 17 | taskfile: ./projects/ecoindex_scraper/Taskfile.yml 18 | dir: ./projects/ecoindex_scraper/ 19 | 20 | poetry: ./tasks/PoetryTaskfile.yml 21 | 22 | quality: ./tasks/QualityTaskFile.yml 23 | 24 | tasks: 25 | project-check: 26 | desc: Check polylith structure project 27 | cmds: 28 | - poetry poly check 29 | 30 | project-sync: 31 | desc: Sync polylith structure project 32 | cmds: 33 | - poetry poly sync 34 | 35 | bump: 36 | desc: Bump the Main project version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease. 37 | cmds: 38 | - task: poetry:bump 39 | vars: 40 | VERSION_FILE_PATH: "VERSION" 41 | VERSION_RULE: "{{.CLI_ARGS}}" 42 | silent: true 43 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/VERSION: -------------------------------------------------------------------------------- 1 | 3.11.1 2 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import lru_cache 3 | 4 | 5 | @lru_cache 6 | def get_api_version() -> str: 7 | current_directory = os.path.dirname(os.path.realpath(__file__)) 8 | version_filename = os.path.join(current_directory, "VERSION") 9 | 10 | with open(version_filename, "r") as f: 11 | return (f.read()).strip() 12 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/bff.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.models.dependencies_parameters.version import VersionParameter 4 | from ecoindex.backend.models.parameters import BffParameters 5 | from ecoindex.models.enums import Version 6 | from fastapi import Query 7 | from pydantic import AnyHttpUrl 8 | 9 | 10 | def get_bff_parameters( 11 | url: Annotated[AnyHttpUrl, Query(description="Url to be searched in database")], 12 | refresh: Annotated[ 13 | bool, 14 | Query( 15 | description="Force the refresh of the cache", 16 | ), 17 | ] = False, 18 | version: VersionParameter = Version.v1, 19 | ) -> BffParameters: 20 | return BffParameters( 21 | url=url, 22 | refresh=refresh, 23 | version=version, 24 | ) 25 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/compute.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.models.parameters import ComputeParameters 4 | from fastapi import Query 5 | 6 | 7 | def get_compute_parameters( 8 | dom: Annotated[ 9 | int, 10 | Query( 11 | default=..., 12 | description="Number of DOM nodes of the page", 13 | gt=0, 14 | example=204, 15 | ), 16 | ], 17 | size: Annotated[ 18 | float, 19 | Query( 20 | default=..., description="Total size of the page in Kb", gt=0, example=109 21 | ), 22 | ], 23 | requests: Annotated[ 24 | int, 25 | Query( 26 | default=..., description="Number of requests of the page", gt=0, example=5 27 | ), 28 | ], 29 | ) -> ComputeParameters: 30 | return ComputeParameters(dom=dom, size=size, requests=requests) 31 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/dates.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Annotated 3 | 4 | from ecoindex.backend.models.parameters import DateRange 5 | from fastapi import Query 6 | 7 | 8 | def get_date_parameters( 9 | date_from: Annotated[ 10 | date | None, 11 | Query(description="Start date of the filter elements (example: 2020-01-01)"), 12 | ] = None, 13 | date_to: Annotated[ 14 | date | None, 15 | Query(description="End date of the filter elements (example: 2020-01-01)"), 16 | ] = None, 17 | ) -> DateRange: 18 | return DateRange(date_from=date_from, date_to=date_to) 19 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/host.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from fastapi import Query 4 | 5 | 6 | def get_host_parameter( 7 | host: Annotated[ 8 | str | None, Query(description="Host name you want to filter (can be partial)") 9 | ] = None, 10 | ) -> str | None: 11 | return host 12 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/id.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from uuid import UUID 3 | 4 | from fastapi import Path 5 | 6 | 7 | def get_id_parameter( 8 | id: Annotated[ 9 | UUID, 10 | Path(default=..., description="Unique identifier of the ecoindex analysis"), 11 | ] 12 | ) -> UUID: 13 | return id 14 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/pagination.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.models.parameters import Pagination 4 | from fastapi import Query 5 | 6 | 7 | def get_pagination_parameters( 8 | page: Annotated[int, Query(description="Page number", ge=1)] = 1, 9 | size: Annotated[ 10 | int, Query(description="Number of elements per page", ge=1, le=100) 11 | ] = 50, 12 | ) -> Pagination: 13 | return Pagination(page=page, size=size) 14 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/validation.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.config.settings import Settings 4 | from fastapi import Header, HTTPException, status 5 | 6 | 7 | def validate_api_key_batch( 8 | api_key: Annotated[ 9 | str, 10 | Header(alias="X-Api-Key"), 11 | ], 12 | ): 13 | if not api_key: 14 | raise HTTPException( 15 | status_code=status.HTTP_403_FORBIDDEN, 16 | detail="Invalid API key", 17 | ) 18 | 19 | for authorized_api_key in Settings().API_KEYS_BATCH: 20 | if api_key == authorized_api_key["key"]: 21 | return authorized_api_key 22 | 23 | raise HTTPException( 24 | status_code=status.HTTP_403_FORBIDDEN, 25 | detail="Invalid API key", 26 | ) 27 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/dependencies/version.py: -------------------------------------------------------------------------------- 1 | from ecoindex.models.enums import Version 2 | from fastapi import Path 3 | 4 | 5 | def get_version_parameter( 6 | version: Version = Path( 7 | default=..., 8 | title="Engine version", 9 | description="Engine version used to run the analysis (v0 or v1)", 10 | example=Version.v1.value, 11 | ) 12 | ) -> Version: 13 | return version 14 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/main.py: -------------------------------------------------------------------------------- 1 | from ecoindex.backend import get_api_version 2 | from ecoindex.backend.routers import router 3 | from ecoindex.backend.services.cache import cache 4 | from ecoindex.config import Settings 5 | from ecoindex.database.engine import init_db 6 | from fastapi import FastAPI 7 | from fastapi.concurrency import asynccontextmanager 8 | from sentry_sdk import init as sentry_init 9 | 10 | 11 | def init_app(): 12 | cache.init() 13 | if Settings().GLITCHTIP_DSN: 14 | sentry_init(Settings().GLITCHTIP_DSN) 15 | 16 | @asynccontextmanager 17 | async def lifespan(app: FastAPI): 18 | await init_db() 19 | yield 20 | 21 | app = FastAPI( 22 | title="Ecoindex API", 23 | version=get_api_version(), 24 | description=( 25 | "Ecoindex API enables you to perform ecoindex analysis of given web pages" 26 | ), 27 | lifespan=lifespan, 28 | ) 29 | 30 | app.include_router(router) 31 | 32 | from ecoindex.backend.middlewares.cors import add_cors_middleware 33 | from ecoindex.backend.middlewares.exception_handler import handle_exceptions 34 | 35 | handle_exceptions(app) 36 | add_cors_middleware(app) 37 | 38 | return app 39 | 40 | 41 | app = init_app() 42 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/middlewares/cors.py: -------------------------------------------------------------------------------- 1 | from ecoindex.config import Settings 2 | from fastapi import FastAPI 3 | from fastapi.middleware.cors import CORSMiddleware 4 | 5 | 6 | def add_cors_middleware(app: FastAPI): 7 | app.add_middleware( 8 | CORSMiddleware, 9 | allow_credentials=Settings().CORS_ALLOWED_CREDENTIALS, 10 | allow_headers=Settings().CORS_ALLOWED_HEADERS, 11 | allow_methods=Settings().CORS_ALLOWED_METHODS, 12 | allow_origins=Settings().CORS_ALLOWED_ORIGINS, 13 | ) 14 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/middlewares/exception_handler.py: -------------------------------------------------------------------------------- 1 | from ecoindex.backend.utils import format_exception_response 2 | from ecoindex.database.exceptions.quota import QuotaExceededException 3 | from fastapi import FastAPI, Request, status 4 | from fastapi.responses import JSONResponse 5 | 6 | HTTP_520_ECOINDEX_TYPE_ERROR = 520 7 | HTTP_521_ECOINDEX_CONNECTION_ERROR = 521 8 | 9 | 10 | def handle_exceptions(app: FastAPI): 11 | @app.exception_handler(RuntimeError) 12 | async def handle_screenshot_not_found_exception(_: Request, exc: FileNotFoundError): 13 | return JSONResponse( 14 | content={"detail": str(exc)}, 15 | status_code=status.HTTP_404_NOT_FOUND, 16 | ) 17 | 18 | @app.exception_handler(TypeError) 19 | async def handle_resource_type_error(_: Request, exc: TypeError): 20 | return JSONResponse( 21 | content={"detail": exc.args[0]}, 22 | status_code=HTTP_520_ECOINDEX_TYPE_ERROR, 23 | ) 24 | 25 | @app.exception_handler(ConnectionError) 26 | async def handle_connection_error(_: Request, exc: ConnectionError): 27 | return JSONResponse( 28 | content={"detail": exc.args[0]}, 29 | status_code=HTTP_521_ECOINDEX_CONNECTION_ERROR, 30 | ) 31 | 32 | @app.exception_handler(QuotaExceededException) 33 | async def handle_quota_exceeded_exception(_: Request, exc: QuotaExceededException): 34 | return JSONResponse( 35 | status_code=status.HTTP_429_TOO_MANY_REQUESTS, 36 | content={"detail": exc.__dict__}, 37 | ) 38 | 39 | @app.exception_handler(Exception) 40 | async def handle_exception(_: Request, exc: Exception): 41 | exception_response = await format_exception_response(exception=exc) 42 | return JSONResponse( 43 | content={"detail": exception_response.model_dump()}, 44 | status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, 45 | ) 46 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/models/__init__.py -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/models/dependencies_parameters/__init__.py -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/bff.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.dependencies.bff import get_bff_parameters 4 | from ecoindex.backend.models.parameters import BffParameters 5 | from fastapi import Depends 6 | 7 | BffDepParameters = Annotated[BffParameters, Depends(get_bff_parameters)] 8 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/compute.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.dependencies.compute import get_compute_parameters 4 | from ecoindex.backend.models.parameters import ComputeParameters 5 | from fastapi import Depends 6 | 7 | ComputeDepParameters = Annotated[ComputeParameters, Depends(get_compute_parameters)] 8 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/dates.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.dependencies.dates import get_date_parameters 4 | from ecoindex.backend.models.parameters import DateRange 5 | from fastapi import Depends 6 | 7 | DateRangeParameters = Annotated[DateRange, Depends(get_date_parameters)] 8 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/host.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.dependencies.host import get_host_parameter 4 | from fastapi import Depends 5 | 6 | HostParameter = Annotated[str | None, Depends(get_host_parameter)] 7 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/id.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from uuid import UUID 3 | 4 | from ecoindex.backend.dependencies.id import get_id_parameter 5 | from fastapi import Depends 6 | 7 | IdParameter = Annotated[UUID, Depends(get_id_parameter)] 8 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/pagination.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.dependencies.pagination import get_pagination_parameters 4 | from ecoindex.backend.models.parameters import Pagination 5 | from fastapi import Depends 6 | 7 | PaginationParameters = Annotated[Pagination, Depends(get_pagination_parameters)] 8 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/dependencies_parameters/version.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.dependencies.version import get_version_parameter 4 | from ecoindex.models.enums import Version 5 | from fastapi import Depends 6 | 7 | VersionParameter = Annotated[Version, Depends(get_version_parameter)] 8 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/models/parameters.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | 3 | from ecoindex.models.enums import Version 4 | from pydantic import AnyHttpUrl, BaseModel 5 | 6 | 7 | class Pagination(BaseModel): 8 | page: int = 1 9 | size: int = 50 10 | 11 | 12 | class DateRange(BaseModel): 13 | date_from: date | None = None 14 | date_to: date | None = None 15 | 16 | 17 | class BffParameters(BaseModel): 18 | url: AnyHttpUrl 19 | refresh: bool = False 20 | version: Version = Version.v1 21 | 22 | 23 | class ComputeParameters(BaseModel): 24 | dom: int 25 | size: float 26 | requests: int 27 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/routers/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.backend.routers.bff import router as router_bff 2 | from ecoindex.backend.routers.compute import router as router_compute 3 | from ecoindex.backend.routers.ecoindex import router as router_ecoindex 4 | from ecoindex.backend.routers.health import router as router_health 5 | from ecoindex.backend.routers.host import router as router_host 6 | from ecoindex.backend.routers.tasks import router as router_task 7 | from fastapi import APIRouter 8 | 9 | router = APIRouter() 10 | 11 | router.include_router(router=router_bff) 12 | router.include_router(router=router_ecoindex) 13 | router.include_router(router=router_compute) 14 | router.include_router(router=router_host) 15 | router.include_router(router=router_task) 16 | router.include_router(router=router_health) 17 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/routers/bff.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.models.dependencies_parameters.bff import BffDepParameters 4 | from ecoindex.backend.services.ecoindex import get_badge, get_latest_result_by_url 5 | from ecoindex.config.settings import Settings 6 | from ecoindex.database.engine import get_session 7 | from ecoindex.database.models import EcoindexSearchResults 8 | from ecoindex.models import example_file_not_found 9 | from ecoindex.models.enums import BadgeTheme 10 | from fastapi import APIRouter, Depends, HTTPException, Query, Response, status 11 | from fastapi.responses import RedirectResponse 12 | from sqlmodel.ext.asyncio.session import AsyncSession 13 | 14 | router = router = APIRouter(prefix="/{version}/ecoindexes", tags=["BFF"]) 15 | 16 | 17 | @router.get( 18 | name="Get latest results", 19 | path="/latest", 20 | response_model=EcoindexSearchResults, 21 | response_description="Get latest results for a given url", 22 | ) 23 | async def get_latest_results( 24 | response: Response, 25 | parameters: BffDepParameters, 26 | session: AsyncSession = Depends(get_session), 27 | ) -> EcoindexSearchResults: 28 | """ 29 | This returns the latest results for a given url. This feature is used by the Ecoindex 30 | browser extension. By default, the results are cached for 7 days. 31 | 32 | If the url is not found in the database, the response status code will be 404. 33 | """ 34 | latest_result = await get_latest_result_by_url( 35 | session=session, 36 | url=parameters.url, 37 | refresh=parameters.refresh, 38 | version=parameters.version, 39 | ) 40 | 41 | if latest_result.count == 0: 42 | response.status_code = status.HTTP_404_NOT_FOUND 43 | 44 | return latest_result 45 | 46 | 47 | @router.get( 48 | name="Get badge", 49 | path="/latest/badge", 50 | response_description="Badge of the given url from [CDN V1](https://www.jsdelivr.com/package/gh/cnumr/ecoindex_badge)", 51 | responses={status.HTTP_404_NOT_FOUND: example_file_not_found}, 52 | ) 53 | async def get_badge_enpoint( 54 | parameters: BffDepParameters, 55 | theme: Annotated[ 56 | BadgeTheme, Query(description="Theme of the badge") 57 | ] = BadgeTheme.light, 58 | session: AsyncSession = Depends(get_session), 59 | ) -> Response: 60 | """ 61 | This returns the SVG badge of the given url. This feature is used by the Ecoindex 62 | badge. By default, the results are cached for 7 days. 63 | 64 | If the url is not found in the database, it will return a badge with the grade `?`. 65 | """ 66 | return Response( 67 | content=await get_badge( 68 | session=session, 69 | url=parameters.url, 70 | refresh=parameters.refresh, 71 | version=parameters.version, 72 | theme=theme.value, 73 | ), 74 | media_type="image/svg+xml", 75 | ) 76 | 77 | 78 | @router.get( 79 | name="Get latest results redirect", 80 | path="/latest/redirect", 81 | response_description="Redirect to the latest results for a given url", 82 | ) 83 | async def get_latest_result_redirect( 84 | parameters: BffDepParameters, 85 | session: AsyncSession = Depends(get_session), 86 | ) -> RedirectResponse: 87 | """ 88 | This redirects to the latest results on the frontend website for the given url. 89 | This feature is used by the Ecoindex browser extension and badge. 90 | 91 | If the url is not found in the database, the response status code will be 404. 92 | """ 93 | latest_result = await get_latest_result_by_url( 94 | session=session, 95 | url=parameters.url, 96 | refresh=parameters.refresh, 97 | version=parameters.version, 98 | ) 99 | 100 | if latest_result.count == 0: 101 | raise HTTPException( 102 | status_code=status.HTTP_404_NOT_FOUND, 103 | detail=f"No analysis found for {parameters.url}", 104 | ) 105 | 106 | return RedirectResponse( 107 | url=f"{Settings().FRONTEND_BASE_URL}/resultat/?id={latest_result.latest_result.id}" # type: ignore 108 | ) 109 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/routers/compute.py: -------------------------------------------------------------------------------- 1 | from ecoindex.backend.models.dependencies_parameters.compute import ComputeDepParameters 2 | from ecoindex.compute.ecoindex import compute_ecoindex 3 | from ecoindex.models.compute import Ecoindex 4 | from fastapi import APIRouter 5 | 6 | router = APIRouter(prefix="/ecoindex", tags=["Ecoindex"]) 7 | 8 | 9 | @router.get( 10 | name="Compute ecoindex", 11 | path="/ecoindex", 12 | tags=["Ecoindex"], 13 | description=( 14 | "This returns the ecoindex computed based on the given parameters: " 15 | "DOM (number of DOM nodes), size (total size in Kb) and requests" 16 | ), 17 | ) 18 | async def compute_ecoindex_api(parameters: ComputeDepParameters) -> Ecoindex: 19 | return await compute_ecoindex( 20 | nodes=parameters.dom, size=parameters.size, requests=parameters.requests 21 | ) 22 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/routers/ecoindex.py: -------------------------------------------------------------------------------- 1 | from os import getcwd 2 | from typing import Annotated 3 | 4 | from ecoindex.backend.models.dependencies_parameters.dates import DateRangeParameters 5 | from ecoindex.backend.models.dependencies_parameters.host import HostParameter 6 | from ecoindex.backend.models.dependencies_parameters.id import IdParameter 7 | from ecoindex.backend.models.dependencies_parameters.pagination import ( 8 | PaginationParameters, 9 | ) 10 | from ecoindex.backend.models.dependencies_parameters.version import VersionParameter 11 | from ecoindex.backend.models.parameters import DateRange, Pagination 12 | from ecoindex.backend.utils import get_sort_parameters, get_status_code 13 | from ecoindex.database.engine import get_session 14 | from ecoindex.database.models import ( 15 | ApiEcoindex, 16 | PageApiEcoindexes, 17 | ) 18 | from ecoindex.database.repositories.ecoindex import ( 19 | get_count_analysis_db, 20 | get_ecoindex_result_by_id_db, 21 | get_ecoindex_result_list_db, 22 | ) 23 | from ecoindex.models import example_ecoindex_not_found, example_file_not_found 24 | from ecoindex.models.enums import Version 25 | from fastapi import APIRouter, Depends, HTTPException, Response, status 26 | from fastapi.params import Query 27 | from fastapi.responses import FileResponse 28 | from sqlmodel.ext.asyncio.session import AsyncSession 29 | 30 | router = APIRouter(prefix="/{version}/ecoindexes", tags=["Ecoindex"]) 31 | 32 | 33 | @router.get( 34 | name="Get ecoindex analysis list", 35 | path="", 36 | response_model=PageApiEcoindexes, 37 | response_description="List of corresponding ecoindex results", 38 | responses={ 39 | status.HTTP_206_PARTIAL_CONTENT: {"model": PageApiEcoindexes}, 40 | status.HTTP_404_NOT_FOUND: {"model": PageApiEcoindexes}, 41 | }, 42 | description=( 43 | "This returns a list of ecoindex analysis " 44 | "corresponding to query filters and the given version engine. " 45 | "The results are ordered by ascending date" 46 | ), 47 | ) 48 | async def get_ecoindex_analysis_list( 49 | response: Response, 50 | host: HostParameter, 51 | version: VersionParameter = Version.v1, 52 | date_range: DateRangeParameters = DateRange(), 53 | pagination: PaginationParameters = Pagination(), 54 | sort: Annotated[ 55 | list[str], 56 | Query( 57 | description=( 58 | "You can sort results using this param with the format " 59 | "`sort=param1:asc&sort=param2:desc`" 60 | ) 61 | ), 62 | ] = ["date:desc"], 63 | session: AsyncSession = Depends(get_session), 64 | ) -> PageApiEcoindexes: 65 | ecoindexes = await get_ecoindex_result_list_db( 66 | session=session, 67 | date_from=date_range.date_from, 68 | date_to=date_range.date_to, 69 | host=host, 70 | version=version, 71 | page=pagination.page, 72 | size=pagination.size, 73 | sort_params=await get_sort_parameters( 74 | query_params=sort, 75 | model=ApiEcoindex, # type: ignore 76 | ), 77 | ) 78 | total_results = await get_count_analysis_db( 79 | session=session, 80 | version=version, 81 | date_from=date_range.date_from, 82 | date_to=date_range.date_to, 83 | host=host, 84 | ) 85 | 86 | response.status_code = await get_status_code(items=ecoindexes, total=total_results) 87 | 88 | return PageApiEcoindexes( 89 | items=ecoindexes, 90 | total=total_results, 91 | page=pagination.page, 92 | size=pagination.size, 93 | ) 94 | 95 | 96 | @router.get( 97 | name="Get ecoindex analysis by id", 98 | path="/{id}", 99 | response_model=ApiEcoindex, 100 | response_description="Get one ecoindex result by its id", 101 | responses={status.HTTP_404_NOT_FOUND: example_ecoindex_not_found}, 102 | description="This returns an ecoindex given by its unique identifier", 103 | ) 104 | async def get_ecoindex_analysis_by_id( 105 | id: IdParameter, 106 | version: VersionParameter = Version.v1, 107 | session: AsyncSession = Depends(get_session), 108 | ) -> ApiEcoindex: 109 | ecoindex = await get_ecoindex_result_by_id_db( 110 | session=session, id=id, version=version 111 | ) 112 | 113 | if not ecoindex: 114 | raise HTTPException( 115 | status_code=status.HTTP_404_NOT_FOUND, 116 | detail=f"Analysis {id} not found for version {version.value}", 117 | ) 118 | return ecoindex 119 | 120 | 121 | @router.get( 122 | name="Get screenshot", 123 | path="/{id}/screenshot", 124 | description="This returns the screenshot of the webpage analysis if it exists", 125 | responses={status.HTTP_404_NOT_FOUND: example_file_not_found}, 126 | ) 127 | async def get_screenshot_endpoint( 128 | id: IdParameter, 129 | version: VersionParameter = Version.v1, 130 | ): 131 | return FileResponse( 132 | path=f"{getcwd()}/screenshots/{version.value}/{id}.webp", 133 | filename=f"{id}.webp", 134 | content_disposition_type="inline", 135 | media_type="image/webp", 136 | ) 137 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/routers/health.py: -------------------------------------------------------------------------------- 1 | from ecoindex.database.engine import get_session 2 | from ecoindex.models.api import HealthResponse 3 | from ecoindex.worker.health import is_worker_healthy 4 | from fastapi import APIRouter, Depends 5 | from sqlmodel.ext.asyncio.session import AsyncSession 6 | 7 | router = APIRouter(prefix="/health", tags=["Infra"]) 8 | 9 | 10 | @router.get( 11 | name="Health check", 12 | path="", 13 | description="This returns the health of the service", 14 | ) 15 | async def health_check(session: AsyncSession = Depends(get_session)) -> HealthResponse: 16 | return HealthResponse(database=session.is_active, workers=is_worker_healthy()) 17 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/routers/host.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from ecoindex.backend.models.dependencies_parameters.dates import DateRangeParameters 4 | from ecoindex.backend.models.dependencies_parameters.host import HostParameter 5 | from ecoindex.backend.models.dependencies_parameters.pagination import ( 6 | PaginationParameters, 7 | ) 8 | from ecoindex.backend.models.dependencies_parameters.version import VersionParameter 9 | from ecoindex.backend.models.parameters import DateRange, Pagination 10 | from ecoindex.backend.utils import check_quota, get_status_code 11 | from ecoindex.database.engine import get_session 12 | from ecoindex.database.repositories.host import get_count_hosts_db, get_host_list_db 13 | from ecoindex.models.api import Host, PageHosts 14 | from ecoindex.models.enums import Version 15 | from ecoindex.models.response_examples import example_daily_limit_response 16 | from fastapi import Depends, Path, status 17 | from fastapi.param_functions import Query 18 | from fastapi.responses import Response 19 | from fastapi.routing import APIRouter 20 | from sqlmodel.ext.asyncio.session import AsyncSession 21 | 22 | router = APIRouter(prefix="/{version}/hosts", tags=["Host"]) 23 | 24 | 25 | @router.get( 26 | name="Get host list", 27 | path="", 28 | response_model=PageHosts, 29 | response_description="List ecoindex hosts", 30 | responses={ 31 | status.HTTP_206_PARTIAL_CONTENT: {"model": PageHosts}, 32 | status.HTTP_404_NOT_FOUND: {"model": PageHosts}, 33 | }, 34 | description=( 35 | "This returns a list of hosts that " 36 | "ran an ecoindex analysis order by most request made" 37 | ), 38 | ) 39 | async def get_host_list( 40 | response: Response, 41 | host: HostParameter, 42 | version: VersionParameter = Version.v1, 43 | date_range: DateRangeParameters = DateRange(), 44 | pagination: PaginationParameters = Pagination(), 45 | q: str = Query( 46 | default=None, 47 | description="Filter by partial host name (replaced by `host`)", 48 | deprecated=True, 49 | ), 50 | session: AsyncSession = Depends(get_session), 51 | ) -> PageHosts: 52 | hosts = await get_host_list_db( 53 | session=session, 54 | date_from=date_range.date_from, 55 | date_to=date_range.date_to, 56 | host=host or q, 57 | version=version, 58 | page=pagination.page, 59 | size=pagination.size, 60 | ) 61 | 62 | total_hosts = await get_count_hosts_db( 63 | session=session, 64 | version=version, 65 | q=q, 66 | date_from=date_range.date_from, 67 | date_to=date_range.date_to, 68 | ) 69 | 70 | response.status_code = await get_status_code(items=hosts, total=total_hosts) 71 | 72 | return PageHosts( 73 | items=hosts, total=total_hosts, page=pagination.page, size=pagination.size 74 | ) 75 | 76 | 77 | @router.get( 78 | name="Get host details", 79 | path="/{host}", 80 | response_description="Host details", 81 | responses={ 82 | status.HTTP_200_OK: {"model": Host}, 83 | status.HTTP_404_NOT_FOUND: {"model": Host}, 84 | status.HTTP_429_TOO_MANY_REQUESTS: example_daily_limit_response, 85 | }, 86 | description=( 87 | "This returns the details of a host. If no no quota is set, " 88 | "remaining_daily_requests will be null" 89 | ), 90 | ) 91 | async def get_daily_remaining( 92 | host: Annotated[str, Path(..., description="Exact matching host name")], 93 | version: VersionParameter = Version.v1, 94 | session: AsyncSession = Depends(get_session), 95 | ) -> Host: 96 | return Host( 97 | name=host, 98 | remaining_daily_requests=await check_quota(session=session, host=host), 99 | total_count=await get_count_hosts_db( 100 | session=session, name=host, version=version, group_by_host=False 101 | ), 102 | ) 103 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/scripts/__init__.py -------------------------------------------------------------------------------- /bases/ecoindex/backend/scripts/openapi.py: -------------------------------------------------------------------------------- 1 | from json import dumps 2 | 3 | from ecoindex.backend.main import app 4 | 5 | 6 | def main() -> None: 7 | openapi = app.openapi() 8 | 9 | print(dumps(openapi, indent=2, sort_keys=True)) 10 | 11 | 12 | if __name__ == "__main__": 13 | main() 14 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/services/__init__.py -------------------------------------------------------------------------------- /bases/ecoindex/backend/services/cache.py: -------------------------------------------------------------------------------- 1 | from hashlib import sha1 2 | 3 | from ecoindex.config import Settings 4 | from redis import Redis 5 | 6 | 7 | class EcoindexCache: 8 | def init(self) -> None: 9 | self._r = Redis(host=Settings().REDIS_CACHE_HOST, db=2) 10 | 11 | def set_cache_key(self, key: str): 12 | self.cache_key = sha1(key.encode("utf-8")).hexdigest() 13 | 14 | return self 15 | 16 | async def get( 17 | self, 18 | ) -> str | None: 19 | results = self._r.get(name=self.cache_key) 20 | 21 | if results: 22 | return results.decode("utf-8") # type: ignore 23 | 24 | return None 25 | 26 | async def set(self, data: str) -> None: 27 | self._r.set( 28 | name=self.cache_key, 29 | value=data, 30 | ex=60 * 60 * 24 * 7, # set default expiration to 7 days 31 | ) 32 | 33 | 34 | cache = EcoindexCache() 35 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/services/ecoindex.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from ecoindex.backend.services.cache import cache 4 | from ecoindex.database.models import EcoindexSearchResults 5 | from ecoindex.database.repositories.ecoindex import get_ecoindex_result_list_db 6 | from ecoindex.models.enums import Version 7 | from ecoindex.models.sort import Sort 8 | from pydantic import AnyHttpUrl 9 | from requests import get 10 | from sqlmodel.ext.asyncio.session import AsyncSession 11 | 12 | 13 | async def get_latest_result_by_url( 14 | session: AsyncSession, url: AnyHttpUrl, refresh: bool, version: Version 15 | ) -> EcoindexSearchResults: 16 | """ 17 | Get the latest ecoindex result for a given url. This function will first try to find 18 | an exact match for the url path, and if it doesn't find any, it will return the latest 19 | result for the host. 20 | 21 | Results are cached for 1 week by default. If you want to force the refresh of the cache, 22 | set the refresh parameter to True. 23 | 24 | params: 25 | url: the url to search for 26 | refresh: if True, will force the refresh of the cache 27 | version: the version of the ecoindex to use 28 | 29 | returns: 30 | EcoindexSearchResults: the results for the given url 31 | """ 32 | ecoindex_cache = cache.set_cache_key(key=f"ecoindex-{url.host}/{url.path}") 33 | cached_results = await ecoindex_cache.get() 34 | 35 | if not refresh and cached_results: 36 | return EcoindexSearchResults(**json.loads(cached_results)) 37 | 38 | ecoindexes = await get_ecoindex_result_list_db( 39 | session=session, 40 | host=str(url.host), 41 | version=version, 42 | size=20, 43 | sort_params=[Sort(clause="date", sort="desc")], 44 | ) 45 | 46 | if not ecoindexes: 47 | await ecoindex_cache.set_cached_ecoindex_search_results( 48 | ecoindex_search_results=EcoindexSearchResults(count=0).model_dump_json() 49 | ) 50 | 51 | return EcoindexSearchResults(count=0) 52 | 53 | exact_url_results = [] 54 | host_results = [] 55 | 56 | for ecoindex in ecoindexes: 57 | if ecoindex.get_url_path() == str(url.path): 58 | exact_url_results.append(ecoindex) 59 | else: 60 | host_results.append(ecoindex) 61 | 62 | results = EcoindexSearchResults( 63 | count=len(exact_url_results), 64 | latest_result=exact_url_results[0] if exact_url_results else None, 65 | older_results=exact_url_results[1:] if len(exact_url_results) > 1 else [], 66 | host_results=host_results, 67 | ) 68 | 69 | await ecoindex_cache.set( 70 | data=results.model_dump_json(), 71 | ) 72 | 73 | return results 74 | 75 | 76 | async def get_badge( 77 | session: AsyncSession, url: AnyHttpUrl, refresh: bool, version: Version, theme: str 78 | ) -> str: 79 | """ 80 | Get the badge for a given url. This function will use the method `get_latest_result_by_url`. 81 | If the url is not found, it will return the badge for the grade "unknown". 82 | 83 | This returns the badge that [are hosted on jsdelivr.net](https://cdn.jsdelivr.net/gh/cnumr/ecoindex_badge@1/assets/svg/). 84 | 85 | params: 86 | url: the url to search for 87 | refresh: if True, will force the refresh of the cache 88 | version: the version of the ecoindex to use 89 | theme: the theme of the badge to use (light or dark) 90 | 91 | returns: 92 | str: the badge image 93 | """ 94 | results = await get_latest_result_by_url( 95 | session=session, url=url, refresh=refresh, version=version 96 | ) 97 | 98 | grade = results.latest_result.grade if results.latest_result else "unknown" 99 | ecoindex_cache = cache.set_cache_key(key=f"badge-{grade}-{theme}") 100 | 101 | cached_badge = await ecoindex_cache.get() 102 | 103 | if cached_badge: 104 | return cached_badge 105 | 106 | base_url = f"https://cdn.jsdelivr.net/gh/cnumr/ecoindex_badge@1/assets/svg/{theme}/{grade}.svg" 107 | 108 | image = get(base_url).text 109 | 110 | await ecoindex_cache.set(data=image) 111 | 112 | return image 113 | -------------------------------------------------------------------------------- /bases/ecoindex/backend/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | from json import loads 3 | from uuid import UUID, uuid4 4 | 5 | from ecoindex.config.settings import Settings 6 | from ecoindex.database.exceptions.quota import QuotaExceededException 7 | from ecoindex.database.repositories.ecoindex import ( 8 | get_count_daily_request_per_host, 9 | get_latest_result, 10 | ) 11 | from ecoindex.models.api import ExceptionResponse 12 | from ecoindex.models.sort import Sort 13 | from fastapi import HTTPException, status 14 | from pydantic import BaseModel 15 | from sqlmodel.ext.asyncio.session import AsyncSession 16 | 17 | 18 | async def format_exception_response(exception: Exception) -> ExceptionResponse: 19 | return ExceptionResponse( 20 | exception=type(exception).__name__, 21 | args=[arg for arg in exception.args if arg] if exception.args else [], 22 | message=exception.msg if hasattr(exception, "msg") else None, # type: ignore 23 | ) 24 | 25 | 26 | async def new_uuid() -> UUID: 27 | val = uuid4() 28 | while val.hex[0] == "0": 29 | val = uuid4() 30 | return val 31 | 32 | 33 | async def get_status_code(items: list, total: int) -> int: 34 | if not items: 35 | return status.HTTP_404_NOT_FOUND 36 | 37 | if total > len(items): 38 | return status.HTTP_206_PARTIAL_CONTENT 39 | 40 | return status.HTTP_200_OK 41 | 42 | 43 | async def get_sort_parameters(query_params: list[str], model: BaseModel) -> list[Sort]: 44 | validation_error = [] 45 | result = [] 46 | 47 | for query_param in query_params: 48 | pattern = re.compile("^\w+:(asc|desc)$") # type: ignore 49 | 50 | if not re.fullmatch(pattern, query_param): 51 | validation_error.append( 52 | { 53 | "loc": ["query", "sort", query_param], 54 | "message": "this parameter does not respect the sort format", 55 | "type": "value_error.sort", 56 | } 57 | ) 58 | continue 59 | 60 | sort_params = query_param.split(":") 61 | 62 | if sort_params[0] not in model.__fields__: 63 | validation_error.append( 64 | { 65 | "loc": ["query", "sort", sort_params[0]], 66 | "message": "this parameter does not exist", 67 | "type": "value_error.sort", 68 | } 69 | ) 70 | continue 71 | 72 | result.append(Sort(clause=sort_params[0], sort=sort_params[1])) # type: ignore 73 | 74 | if validation_error: 75 | raise HTTPException( 76 | status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=validation_error 77 | ) 78 | 79 | return result 80 | 81 | 82 | async def check_quota( 83 | session: AsyncSession, 84 | host: str, 85 | ) -> int | None: 86 | if not Settings().DAILY_LIMIT_PER_HOST: 87 | return None 88 | 89 | count_daily_request_per_host = await get_count_daily_request_per_host( 90 | session=session, host=host 91 | ) 92 | 93 | if count_daily_request_per_host >= Settings().DAILY_LIMIT_PER_HOST: 94 | latest_result = await get_latest_result(session=session, host=host) 95 | raise QuotaExceededException( 96 | limit=Settings().DAILY_LIMIT_PER_HOST, 97 | host=host, 98 | latest_result=loads(latest_result.model_dump_json() or "{}"), # type: ignore 99 | ) 100 | 101 | return Settings().DAILY_LIMIT_PER_HOST - count_daily_request_per_host 102 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/VERSION: -------------------------------------------------------------------------------- 1 | 2.30.0 2 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/cli/__init__.py -------------------------------------------------------------------------------- /bases/ecoindex/cli/arguments_handler.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | from typing import Set 3 | from urllib.parse import urlparse, urlunparse 4 | 5 | from ecoindex.cli.crawl import EcoindexSpider 6 | from ecoindex.cli.helper import replace_localhost_with_hostdocker 7 | from ecoindex.cli.sitemap import EcoindexSitemapSpider 8 | from ecoindex.models import WindowSize 9 | 10 | from pydantic import AnyHttpUrl, validate_call 11 | from pydantic.types import FilePath 12 | from scrapy.crawler import CrawlerProcess 13 | 14 | 15 | @validate_call 16 | def validate_list_of_urls(urls: list[AnyHttpUrl]) -> Set[str]: 17 | result = set() 18 | 19 | for url in urls: 20 | splitted_url = str(url).split("?") 21 | result.add(splitted_url[0]) 22 | 23 | return result 24 | 25 | 26 | @validate_call 27 | def get_urls_from_file(urls_file: FilePath) -> Set[str]: 28 | with open(urls_file) as fp: 29 | urls_from_file = set() 30 | 31 | for url in fp.readlines(): 32 | url = url.strip() 33 | 34 | if url: 35 | urls_from_file.add(url) 36 | 37 | return validate_list_of_urls(urls_from_file) # type: ignore 38 | 39 | 40 | def get_urls_recursive(main_url: str) -> Set[str]: 41 | parsed_url = urlparse(main_url) 42 | host_infos = replace_localhost_with_hostdocker(parsed_url.netloc) 43 | netloc = host_infos.netloc 44 | domain = host_infos.domain 45 | main_url = f"{parsed_url.scheme}://{netloc}" 46 | process = CrawlerProcess() 47 | 48 | with NamedTemporaryFile(mode="w+t") as temp_file: 49 | process.crawl( 50 | crawler_or_spidercls=EcoindexSpider, 51 | allowed_domains=[domain], 52 | start_urls=[main_url], 53 | temp_file=temp_file, 54 | ) 55 | process.start() 56 | temp_file.seek(0) 57 | urls = temp_file.readlines() 58 | return validate_list_of_urls(urls) # type: ignore 59 | 60 | 61 | def get_urls_from_sitemap(main_url: str) -> Set[str]: 62 | process = CrawlerProcess() 63 | if "sitemap" not in main_url or not main_url.endswith(".xml"): 64 | raise ValueError("The provided url is not a valid sitemap url") 65 | 66 | with NamedTemporaryFile(mode="w+t") as temp_file: 67 | process.crawl( 68 | crawler_or_spidercls=EcoindexSitemapSpider, 69 | sitemap_urls=[main_url], 70 | temp_file=temp_file, 71 | ) 72 | process.start() 73 | temp_file.seek(0) 74 | urls = list() 75 | str_urls = temp_file.readlines() 76 | for url in str_urls: 77 | urls.append(AnyHttpUrl(url)) 78 | 79 | return validate_list_of_urls(urls) 80 | 81 | 82 | @validate_call 83 | def get_url_from_args(urls_arg: list[AnyHttpUrl]) -> set[AnyHttpUrl]: 84 | urls_from_args = set() 85 | for url in urls_arg: 86 | parsed_url = urlparse(str(url)) 87 | host_infos = replace_localhost_with_hostdocker(parsed_url.netloc) 88 | url = AnyHttpUrl(urlunparse((parsed_url.scheme, host_infos.netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment))) 89 | urls_from_args.add(url) 90 | 91 | return urls_from_args 92 | 93 | 94 | def get_window_sizes_from_args(window_sizes: list[str]) -> list[WindowSize]: 95 | result = [] 96 | errors = "" 97 | for window_size in window_sizes: 98 | try: 99 | width, height = window_size.split(",") 100 | result.append(WindowSize(width=int(width), height=int(height))) 101 | except ValueError: 102 | errors += f"🔥 `{window_size}` is not a valid window size. Must be of type `1920,1080`\n" 103 | 104 | if errors: 105 | raise ValueError(errors) 106 | 107 | return result 108 | 109 | 110 | def get_file_prefix_input_file_logger_file( 111 | urls: list[AnyHttpUrl], 112 | urls_file: str | None = None, 113 | tmp_folder: str = "/tmp/ecoindex-cli", 114 | ) -> tuple[str, str, str]: 115 | """ 116 | Returns file prefix, input file and logger file based on provided urls 117 | and provider method: If this is based on an existing csv file, we take 118 | the name of the file, else, we take the first provided url's domain 119 | """ 120 | if urls_file: 121 | file_prefix = urls_file.split("/")[-1] 122 | input_file = urls_file 123 | else: 124 | first_url = str(next(iter(urls))) 125 | file_prefix = urlparse(first_url).netloc 126 | input_file = f"{tmp_folder}/input/{file_prefix}.csv" 127 | 128 | return (file_prefix, input_file, f"{tmp_folder}/logs/{file_prefix}.log") 129 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/console_output.py: -------------------------------------------------------------------------------- 1 | from rich.console import Console 2 | from rich.table import Table 3 | 4 | 5 | def display_result_synthesis(total: int, count_errors: int) -> None: 6 | console = Console() 7 | 8 | table = Table(show_header=True) 9 | table.add_column("Total analysis") 10 | table.add_column("Success", header_style="green") 11 | table.add_column("Failed", header_style="red") 12 | table.add_row(str(total), str(total - count_errors), str(count_errors)) 13 | 14 | console.print(table) 15 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/crawl.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | 3 | from scrapy.linkextractors import LinkExtractor 4 | from scrapy.spiders import CrawlSpider, Rule 5 | 6 | 7 | class EcoindexSpider(CrawlSpider): 8 | name = "EcoindexSpider" 9 | custom_settings = {"LOG_ENABLED": False} 10 | rules = (Rule(LinkExtractor(), callback="parse_item", follow=True),) 11 | 12 | def __init__( 13 | self, 14 | allowed_domains: list[str], 15 | start_urls: list[str], 16 | temp_file: NamedTemporaryFile, # type: ignore 17 | *a, 18 | **kw, 19 | ): 20 | self.links: set[str] = set() 21 | self.allowed_domains = allowed_domains 22 | self.start_urls = start_urls 23 | self.temp_file = temp_file 24 | super().__init__(*a, **kw) 25 | 26 | def parse_item(self, response): 27 | self.temp_file.write(f"{response.url}\n") 28 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/helper.py: -------------------------------------------------------------------------------- 1 | from ecoindex.config import Settings 2 | from ecoindex.models import CliHost 3 | 4 | 5 | def replace_localhost_with_hostdocker(netloc: str) -> CliHost: 6 | if Settings().DOCKER_CONTAINER and "localhost" in netloc: 7 | domain = "host.docker.internal" 8 | netloc = netloc.replace("localhost", domain) 9 | elif "localhost" in netloc: 10 | domain = "localhost" 11 | else: 12 | domain = netloc 13 | 14 | return CliHost(domain=domain, netloc=netloc) 15 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/sitemap.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | from scrapy.spiders import SitemapSpider 3 | 4 | 5 | class EcoindexSitemapSpider(SitemapSpider): 6 | name = "EcoindexSitemapSpider" 7 | custom_settings = {"LOG_ENABLED": False} 8 | 9 | def __init__( 10 | self, 11 | sitemap_urls: list[str], 12 | temp_file: NamedTemporaryFile, # type: ignore 13 | *a, 14 | **kw, 15 | ): 16 | self.sitemap_urls = sitemap_urls 17 | self.temp_file = temp_file 18 | super().__init__(*a, **kw) 19 | 20 | def parse(self, response): 21 | self.temp_file.write(f"{response.url}\n") 22 | 23 | -------------------------------------------------------------------------------- /bases/ecoindex/cli/template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {{ title }} {{ site }} 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 35 | 36 |
37 |
38 |

⬆️ {{ synthesis }}

39 |
40 |
41 | 42 |
43 |
44 |
45 |
46 |

{{ ecoindex_score }}

47 |
48 |
49 | {{ ecoindex_body }} 50 | 51 |

{{ ecoindex_body_end_pre}} {{ nb_page }} {{ ecoindex_body_end_mid }} {{ site }}


52 | 53 | {{ ecoindex_body_end_suf }} 54 |
55 |
56 |
57 | {{ summary }} 58 |
59 |
60 | 61 |
62 |

⬆️ {{ top10 }}

63 |

✔️ {{ best_pages }}

64 | {{ best }} 65 |

❌️ {{ worst_pages }}

66 | {{ worst }} 67 |
68 |
69 | 70 |
71 |

⬆️ {{ indicators }}

72 |

{{ number_of_requests }}

73 |
74 |
75 | 76 |
77 |
78 |
79 |
80 | {{ requests_body }} 81 |
82 |
83 | {{ requests_comment }} 84 |
85 |
86 |
87 |
88 | 89 |

{{ pages_size }}

90 |
91 |
92 | 93 |
94 |
95 |
96 |
97 | {{ size_body }} 98 |
99 |
100 | {{ size_comment }} 101 |
102 |
103 |
104 |
105 | 106 |

{{ number_of_dom_nodes }}

107 |
108 |
109 | 110 |
111 |
112 |
113 |
114 | {{ nodes_body }} 115 |
116 |
117 | {{ nodes_comment }} 118 |
119 |
120 |
121 |
122 |
123 |
124 | 128 | 133 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /bases/ecoindex/worker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/worker/__init__.py -------------------------------------------------------------------------------- /bases/ecoindex/worker/health.py: -------------------------------------------------------------------------------- 1 | from ecoindex.models.api import HealthWorker, HealthWorkers 2 | from ecoindex.worker.tasks import app 3 | 4 | 5 | def is_worker_healthy() -> HealthWorkers: 6 | workers = [] 7 | workers_ping = app.control.ping() 8 | 9 | for worker in workers_ping: 10 | for name in worker: 11 | workers.append( 12 | HealthWorker(name=name, healthy=True if "ok" in worker[name] else False) 13 | ) 14 | 15 | return HealthWorkers( 16 | healthy=False if False in [w.healthy for w in workers] or not workers else True, 17 | workers=workers, 18 | ) 19 | -------------------------------------------------------------------------------- /components/ecoindex/compute/VERSION: -------------------------------------------------------------------------------- 1 | 5.9.0 2 | -------------------------------------------------------------------------------- /components/ecoindex/compute/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.compute.ecoindex import ( 2 | compute_ecoindex, 3 | get_ecoindex, 4 | get_grade, 5 | get_greenhouse_gases_emmission, 6 | get_quantile, 7 | get_score, 8 | get_water_consumption, 9 | ) 10 | 11 | __all__ = [ 12 | "compute_ecoindex", 13 | "get_ecoindex", 14 | "get_grade", 15 | "get_greenhouse_gases_emmission", 16 | "get_quantile", 17 | "get_score", 18 | "get_water_consumption", 19 | ] 20 | -------------------------------------------------------------------------------- /components/ecoindex/compute/ecoindex.py: -------------------------------------------------------------------------------- 1 | from ecoindex.data import ( # noqa: F401 2 | A, 3 | B, 4 | C, 5 | D, 6 | E, 7 | F, 8 | G, 9 | quantiles_dom, 10 | quantiles_req, 11 | quantiles_size, 12 | ) 13 | from ecoindex.models import Ecoindex 14 | from ecoindex.models.enums import Grade 15 | from typing_extensions import deprecated 16 | 17 | 18 | async def get_quantile(quantiles: list[int | float], value: int | float) -> float: 19 | for i in range(1, len(quantiles)): 20 | if value < quantiles[i]: 21 | return ( 22 | i - 1 + (value - quantiles[i - 1]) / (quantiles[i] - quantiles[i - 1]) 23 | ) 24 | 25 | return len(quantiles) - 1 26 | 27 | 28 | async def get_score(dom: int, size: float, requests: int) -> float: 29 | q_dom = await get_quantile(quantiles_dom, dom) # type: ignore 30 | q_size = await get_quantile(quantiles_size, size) 31 | q_req = await get_quantile(quantiles_req, requests) # type: ignore 32 | 33 | return round(100 - 5 * (3 * q_dom + 2 * q_req + q_size) / 6) 34 | 35 | 36 | @deprecated("Use compute_ecoindex instead") 37 | async def get_ecoindex(dom: int, size: float, requests: int) -> Ecoindex: 38 | score = await get_score(dom=dom, size=size, requests=requests) 39 | 40 | return Ecoindex( 41 | score=score, 42 | grade=Grade(await get_grade(score)), 43 | ges=await get_greenhouse_gases_emmission(score), 44 | water=await get_water_consumption(score), 45 | ) 46 | 47 | 48 | async def compute_ecoindex(nodes: int, size: float, requests: int) -> Ecoindex: 49 | return await get_ecoindex( 50 | dom=nodes, 51 | size=size, 52 | requests=requests, 53 | ) 54 | 55 | 56 | async def get_grade(ecoindex: float) -> str: 57 | for grade in "ABCDEF": 58 | if ecoindex > globals()[grade]: 59 | return grade 60 | 61 | return "G" 62 | 63 | 64 | async def get_greenhouse_gases_emmission(ecoindex: float) -> float: 65 | return round(100 * (2 + 2 * (50 - ecoindex) / 100)) / 100 66 | 67 | 68 | async def get_water_consumption(ecoindex: float) -> float: 69 | return round(100 * (3 + 3 * (50 - ecoindex) / 100)) / 100 70 | -------------------------------------------------------------------------------- /components/ecoindex/config/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.config.settings import Settings 2 | 3 | __all__ = ["Settings"] 4 | -------------------------------------------------------------------------------- /components/ecoindex/config/settings.py: -------------------------------------------------------------------------------- 1 | from pydantic_settings import BaseSettings, SettingsConfigDict 2 | 3 | 4 | class Settings(BaseSettings): 5 | model_config = SettingsConfigDict(env_file=".env") 6 | 7 | API_KEYS_BATCH: list[ 8 | dict[str, str] 9 | ] = [] # formated as [{"key": "xxx", "name": "xxx", "description": "xxx", "source": "ecoindex.fr"}] 10 | CORS_ALLOWED_CREDENTIALS: bool = True 11 | CORS_ALLOWED_HEADERS: list = ["*"] 12 | CORS_ALLOWED_METHODS: list = ["*"] 13 | CORS_ALLOWED_ORIGINS: list = ["*"] 14 | DAILY_LIMIT_PER_HOST: int = 0 15 | DATABASE_URL: str = "sqlite+aiosqlite:///db.sqlite3" 16 | DEBUG: bool = False 17 | DOCKER_CONTAINER: bool = False 18 | ENABLE_SCREENSHOT: bool = False 19 | EXCLUDED_HOSTS: list[str] = ["localhost", "127.0.0.1"] 20 | FRONTEND_BASE_URL: str = "https://www.ecoindex.fr" 21 | GLITCHTIP_DSN: str = "" 22 | REDIS_CACHE_HOST: str = "localhost" 23 | SCREENSHOTS_GID: int | None = None 24 | SCREENSHOTS_UID: int | None = None 25 | TZ: str = "Europe/Paris" 26 | WAIT_AFTER_SCROLL: int = 3 27 | WAIT_BEFORE_SCROLL: int = 3 28 | -------------------------------------------------------------------------------- /components/ecoindex/data/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.data.colors import A as A_color 2 | from ecoindex.data.colors import B as B_color 3 | from ecoindex.data.colors import C as C_color 4 | from ecoindex.data.colors import D as D_color 5 | from ecoindex.data.colors import E as E_color 6 | from ecoindex.data.colors import F as F_color 7 | from ecoindex.data.colors import G as G_color 8 | from ecoindex.data.grades import A, B, C, D, E, F, G 9 | from ecoindex.data.medians import ( 10 | median_dom, 11 | median_req, 12 | median_size, 13 | ) 14 | from ecoindex.data.quantiles import ( 15 | quantiles_dom, 16 | quantiles_req, 17 | quantiles_size, 18 | ) 19 | from ecoindex.data.targets import ( 20 | target_dom, 21 | target_req, 22 | target_size, 23 | ) 24 | 25 | __all__ = [ 26 | "A", 27 | "B", 28 | "C", 29 | "D", 30 | "E", 31 | "F", 32 | "G", 33 | "A_color", 34 | "B_color", 35 | "C_color", 36 | "D_color", 37 | "E_color", 38 | "F_color", 39 | "G_color", 40 | "median_dom", 41 | "median_req", 42 | "median_size", 43 | "quantiles_dom", 44 | "quantiles_req", 45 | "quantiles_size", 46 | "target_dom", 47 | "target_req", 48 | "target_size", 49 | ] 50 | -------------------------------------------------------------------------------- /components/ecoindex/data/colors.py: -------------------------------------------------------------------------------- 1 | A = "#349A47" 2 | B = "#51B84B" 3 | C = "#CADB2A" 4 | D = "#F6EB15" 5 | E = "#FECD06" 6 | F = "#F99839" 7 | G = "#ED2124" 8 | -------------------------------------------------------------------------------- /components/ecoindex/data/grades.py: -------------------------------------------------------------------------------- 1 | A = 80 2 | B = 70 3 | C = 55 4 | D = 40 5 | E = 25 6 | F = 10 7 | G = 0 8 | -------------------------------------------------------------------------------- /components/ecoindex/data/medians.py: -------------------------------------------------------------------------------- 1 | median_dom = 693 2 | median_req = 78 3 | median_size = 2420 4 | -------------------------------------------------------------------------------- /components/ecoindex/data/quantiles.py: -------------------------------------------------------------------------------- 1 | quantiles_dom = [ 2 | 0, 3 | 47, 4 | 75, 5 | 159, 6 | 233, 7 | 298, 8 | 358, 9 | 417, 10 | 476, 11 | 537, 12 | 603, 13 | 674, 14 | 753, 15 | 843, 16 | 949, 17 | 1076, 18 | 1237, 19 | 1459, 20 | 1801, 21 | 2479, 22 | 594601, 23 | ] 24 | quantiles_req = [ 25 | 0, 26 | 2, 27 | 15, 28 | 25, 29 | 34, 30 | 42, 31 | 49, 32 | 56, 33 | 63, 34 | 70, 35 | 78, 36 | 86, 37 | 95, 38 | 105, 39 | 117, 40 | 130, 41 | 147, 42 | 170, 43 | 205, 44 | 281, 45 | 3920, 46 | ] 47 | quantiles_size = [ 48 | 0, 49 | 1.37, 50 | 144.7, 51 | 319.53, 52 | 479.46, 53 | 631.97, 54 | 783.38, 55 | 937.91, 56 | 1098.62, 57 | 1265.47, 58 | 1448.32, 59 | 1648.27, 60 | 1876.08, 61 | 2142.06, 62 | 2465.37, 63 | 2866.31, 64 | 3401.59, 65 | 4155.73, 66 | 5400.08, 67 | 8037.54, 68 | 223212.26, 69 | ] 70 | -------------------------------------------------------------------------------- /components/ecoindex/data/targets.py: -------------------------------------------------------------------------------- 1 | target_dom = 600 2 | target_req = 40 3 | target_size = 1024 4 | -------------------------------------------------------------------------------- /components/ecoindex/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/components/ecoindex/database/__init__.py -------------------------------------------------------------------------------- /components/ecoindex/database/engine.py: -------------------------------------------------------------------------------- 1 | from typing import AsyncGenerator 2 | 3 | from ecoindex.config import Settings 4 | from ecoindex.models.api import * # noqa: F401, F403 5 | from sqlalchemy.ext.asyncio import create_async_engine 6 | from sqlalchemy.orm import sessionmaker 7 | from sqlalchemy.pool import NullPool 8 | from sqlmodel import SQLModel 9 | from sqlmodel.ext.asyncio.session import AsyncSession 10 | 11 | engine = create_async_engine( 12 | Settings().DATABASE_URL, 13 | future=True, 14 | pool_pre_ping=True, 15 | poolclass=NullPool, 16 | echo=Settings().DEBUG, 17 | ) 18 | 19 | 20 | async def init_db(): 21 | async with engine.begin() as conn: 22 | await conn.run_sync(SQLModel.metadata.create_all) 23 | 24 | 25 | async def get_session() -> AsyncGenerator: 26 | async_session = sessionmaker( 27 | bind=engine, # type: ignore 28 | class_=AsyncSession, 29 | expire_on_commit=False, 30 | ) 31 | async with async_session() as session: # type: ignore 32 | yield session 33 | -------------------------------------------------------------------------------- /components/ecoindex/database/exceptions/quota.py: -------------------------------------------------------------------------------- 1 | from ecoindex.config import Settings 2 | from ecoindex.database.models import ApiEcoindex 3 | 4 | 5 | class QuotaExceededException(Exception): 6 | def __init__(self, limit: int, host: str, latest_result: ApiEcoindex) -> None: 7 | self.daily_limit_per_host = Settings().DAILY_LIMIT_PER_HOST 8 | self.limit = limit 9 | self.host = host 10 | self.latest_result = latest_result 11 | self.message = ( 12 | "You have already reached the daily limit " 13 | f"of {limit} requests for host {host} today" 14 | ) 15 | 16 | super().__init__(self.message) 17 | -------------------------------------------------------------------------------- /components/ecoindex/database/helper.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | 3 | from ecoindex.database.models import ApiEcoindex 4 | from sqlalchemy.engine.reflection import Inspector 5 | from sqlmodel.sql.expression import SelectOfScalar 6 | 7 | SelectOfScalar.inherit_cache = True # type: ignore 8 | 9 | 10 | def date_filter( 11 | statement: SelectOfScalar, 12 | date_from: date | None = None, 13 | date_to: date | None = None, 14 | ) -> SelectOfScalar: 15 | if date_from and ApiEcoindex.date: 16 | statement = statement.where(ApiEcoindex.date >= date_from) 17 | 18 | if date_to and ApiEcoindex.date: 19 | statement = statement.where(ApiEcoindex.date <= date_to) 20 | 21 | return statement 22 | 23 | 24 | def table_exists(conn, table_name) -> bool: 25 | inspector = Inspector.from_engine(conn) 26 | return table_name in inspector.get_table_names() 27 | 28 | 29 | def column_exists(conn, table_name, column_name) -> bool: 30 | inspector = Inspector.from_engine(conn) 31 | return column_name in [c["name"] for c in inspector.get_columns(table_name)] 32 | 33 | 34 | def index_exists(conn, table_name, index_name) -> bool: 35 | inspector = Inspector.from_engine(conn) 36 | return index_name in [i["name"] for i in inspector.get_indexes(table_name)] 37 | -------------------------------------------------------------------------------- /components/ecoindex/database/models/__init__.py: -------------------------------------------------------------------------------- 1 | from uuid import UUID 2 | 3 | from ecoindex.models.compute import Result 4 | from pydantic import BaseModel 5 | from sqlmodel import Field, SQLModel 6 | 7 | 8 | class ApiEcoindex(SQLModel, Result, table=True): # type: ignore 9 | id: UUID | None = Field( 10 | default=None, 11 | description="Analysis ID of type `UUID`", 12 | primary_key=True, 13 | index=True, 14 | ) 15 | host: str = Field( 16 | default=..., 17 | title="Web page host", 18 | description="Host name of the web page", 19 | index=True, 20 | ) 21 | version: int = Field( 22 | default=1, 23 | title="API version", 24 | description="Version number of the API used to run the test", 25 | ) 26 | initial_ranking: int | None = Field( 27 | default=..., 28 | title="Analysis rank", 29 | description=( 30 | "This is the initial rank of the analysis. " 31 | "This is an indicator of the ranking at the " 32 | "time of the analysis for a given version." 33 | ), 34 | ) 35 | initial_total_results: int | None = Field( 36 | default=..., 37 | title="Total number of analysis", 38 | description=( 39 | "This is the initial total number of analysis. " 40 | "This is an indicator of the total number of analysis " 41 | "at the time of the analysis for a given version." 42 | ), 43 | ) 44 | source: str | None = Field( 45 | default="ecoindex.fr", 46 | title="Source of the analysis", 47 | description="Source of the analysis", 48 | ) 49 | 50 | 51 | ApiEcoindexes = list[ApiEcoindex] 52 | 53 | 54 | class PageApiEcoindexes(BaseModel): 55 | items: list[ApiEcoindex] 56 | total: int 57 | page: int 58 | size: int 59 | 60 | 61 | class EcoindexSearchResults(BaseModel): 62 | count: int 63 | latest_result: ApiEcoindex | None = None 64 | older_results: list[ApiEcoindex] = [] 65 | host_results: list[ApiEcoindex] = [] 66 | -------------------------------------------------------------------------------- /components/ecoindex/database/repositories/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/components/ecoindex/database/repositories/__init__.py -------------------------------------------------------------------------------- /components/ecoindex/database/repositories/ecoindex.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from uuid import UUID 3 | 4 | from ecoindex.database.helper import date_filter 5 | from ecoindex.database.models import ApiEcoindex 6 | from ecoindex.models import Result 7 | from ecoindex.models.enums import Version 8 | from ecoindex.models.sort import Sort 9 | from sqlalchemy import func, text 10 | from sqlalchemy.sql.expression import asc, desc 11 | from sqlmodel import select 12 | from sqlmodel.ext.asyncio.session import AsyncSession 13 | 14 | 15 | async def get_count_analysis_db( 16 | session: AsyncSession, 17 | version: Version = Version.v1, 18 | host: str | None = None, 19 | date_from: date | None = None, 20 | date_to: date | None = None, 21 | ) -> int: 22 | statement = ( 23 | "SELECT count(*) FROM apiecoindex " 24 | f"WHERE version = {version.get_version_number()}" 25 | ) 26 | 27 | if host: 28 | statement += f" AND host = '{host}'" 29 | 30 | if date_from: 31 | statement += f" AND date >= '{date_from}'" 32 | 33 | if date_to: 34 | statement += f" AND date <= '{date_to}'" 35 | 36 | result = await session.exec(statement=text(statement)) # type: ignore 37 | 38 | return result.scalar_one() 39 | 40 | 41 | async def get_rank_analysis_db( 42 | session: AsyncSession, ecoindex: Result, version: Version = Version.v1 43 | ) -> int | None: 44 | statement = ( 45 | "SELECT ranking FROM (" 46 | "SELECT *, ROW_NUMBER() OVER (ORDER BY score DESC) ranking " 47 | "FROM apiecoindex " 48 | f"WHERE version={version.get_version_number()} " 49 | "ORDER BY score DESC) t " 50 | f"WHERE score <= {ecoindex.score} " 51 | "LIMIT 1;" 52 | ) 53 | 54 | result = await session.exec(text(statement)) # type: ignore 55 | 56 | return result.scalar_one_or_none() 57 | 58 | 59 | async def get_ecoindex_result_list_db( 60 | session: AsyncSession, 61 | version: Version = Version.v1, 62 | host: str | None = None, 63 | date_from: date | None = None, 64 | date_to: date | None = None, 65 | page: int = 1, 66 | size: int = 50, 67 | sort_params: list[Sort] = [], 68 | ) -> list[ApiEcoindex]: 69 | statement = ( 70 | select(ApiEcoindex) 71 | .where(ApiEcoindex.version == version.get_version_number()) 72 | .offset((page - 1) * size) 73 | .limit(size) 74 | ) 75 | 76 | if host: 77 | statement = statement.where(ApiEcoindex.host == host) 78 | statement = date_filter(statement=statement, date_from=date_from, date_to=date_to) 79 | 80 | for sort in sort_params: 81 | if sort.sort == "asc": 82 | sort_parameter = asc(sort.clause) # type: ignore 83 | elif sort.sort == "desc": 84 | sort_parameter = desc(sort.clause) 85 | 86 | statement = statement.order_by(sort_parameter) # type: ignore 87 | 88 | ecoindexes = await session.exec(statement) 89 | 90 | return [ecoindex for ecoindex in ecoindexes.all()] 91 | 92 | 93 | async def get_ecoindex_result_by_id_db( 94 | session: AsyncSession, id: UUID, version: Version = Version.v1 95 | ) -> ApiEcoindex | None: 96 | statement = ( 97 | select(ApiEcoindex) 98 | .where(ApiEcoindex.id == id) 99 | .where(ApiEcoindex.version == version.get_version_number()) 100 | ) 101 | 102 | ecoindex = await session.exec(statement) 103 | 104 | return ecoindex.one_or_none() 105 | 106 | 107 | async def get_count_daily_request_per_host(session: AsyncSession, host: str) -> int: 108 | statement = select(ApiEcoindex).where( 109 | func.date(ApiEcoindex.date) == date.today(), ApiEcoindex.host == host 110 | ) 111 | 112 | results = await session.exec(statement) 113 | 114 | return len(results.all()) 115 | 116 | 117 | async def get_latest_result(session: AsyncSession, host: str) -> ApiEcoindex | None: 118 | statement = ( 119 | select(ApiEcoindex) 120 | .where(ApiEcoindex.host == host) 121 | .order_by(text("date desc")) 122 | .limit(1) 123 | ) 124 | 125 | result = await session.exec(statement) 126 | 127 | return result.one_or_none() 128 | -------------------------------------------------------------------------------- /components/ecoindex/database/repositories/host.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | 3 | from ecoindex.database.helper import date_filter 4 | from ecoindex.database.models import ApiEcoindex 5 | from ecoindex.models.enums import Version 6 | from sqlalchemy import text 7 | from sqlmodel import select 8 | from sqlmodel.ext.asyncio.session import AsyncSession 9 | 10 | 11 | async def get_host_list_db( 12 | session: AsyncSession, 13 | version: Version = Version.v1, 14 | host: str | None = None, 15 | date_from: date | None = None, 16 | date_to: date | None = None, 17 | page: int = 1, 18 | size: int = 50, 19 | ) -> list[str]: 20 | statement = ( 21 | select(ApiEcoindex.host) 22 | .where(ApiEcoindex.version == version.get_version_number()) 23 | .offset(size * (page - 1)) 24 | .limit(size) 25 | ) 26 | 27 | if host: 28 | statement = statement.filter(ApiEcoindex.host.like(f"%{host}%")) # type: ignore 29 | 30 | statement = date_filter(statement=statement, date_from=date_from, date_to=date_to) 31 | 32 | statement = statement.group_by(ApiEcoindex.host).order_by(ApiEcoindex.host) 33 | 34 | hosts = await session.exec(statement=statement) 35 | 36 | return [str(host) for host in hosts.all()] 37 | 38 | 39 | async def get_count_hosts_db( 40 | session: AsyncSession, 41 | version: Version = Version.v1, 42 | name: str | None = None, 43 | q: str | None = None, 44 | date_from: date | None = None, 45 | date_to: date | None = None, 46 | group_by_host: bool = True, 47 | ) -> int: 48 | sub_statement = ( 49 | f"SELECT host FROM apiecoindex WHERE version = {version.get_version_number()}" 50 | ) 51 | if name: 52 | sub_statement += f" AND host = '{name}'" 53 | 54 | if q: 55 | sub_statement += f" AND host LIKE '%{q}%'" 56 | 57 | if date_from: 58 | sub_statement += f" AND date >= '{date_from}'" 59 | 60 | if date_to: 61 | sub_statement += f" AND date <= '{date_to}'" 62 | 63 | if group_by_host: 64 | sub_statement += " GROUP BY host" 65 | 66 | statement = f"SELECT count(*) FROM ({sub_statement}) t" 67 | 68 | result = await session.exec(statement=text(statement)) # type: ignore 69 | 70 | return result.scalar_one() 71 | -------------------------------------------------------------------------------- /components/ecoindex/database/repositories/worker.py: -------------------------------------------------------------------------------- 1 | from uuid import UUID 2 | 3 | from ecoindex.database.models import ApiEcoindex 4 | from ecoindex.database.repositories.ecoindex import ( 5 | get_count_analysis_db, 6 | get_rank_analysis_db, 7 | ) 8 | from ecoindex.models import Result 9 | from ecoindex.models.enums import Version 10 | from sqlmodel.ext.asyncio.session import AsyncSession 11 | 12 | 13 | async def save_ecoindex_result_db( 14 | session: AsyncSession, 15 | id: UUID, 16 | ecoindex_result: Result, 17 | version: Version = Version.v1, 18 | source: str | None = None, 19 | ) -> ApiEcoindex: 20 | ranking = await get_rank_analysis_db( 21 | session=session, ecoindex=ecoindex_result, version=version 22 | ) 23 | total_results = await get_count_analysis_db(session=session, version=version) 24 | 25 | db_ecoindex = ApiEcoindex( 26 | id=id, 27 | date=ecoindex_result.date, 28 | url=ecoindex_result.url, 29 | host=ecoindex_result.get_url_host(), 30 | width=ecoindex_result.width, 31 | height=ecoindex_result.height, 32 | size=ecoindex_result.size, 33 | nodes=ecoindex_result.nodes, 34 | requests=ecoindex_result.requests, 35 | grade=ecoindex_result.grade, 36 | score=ecoindex_result.score, 37 | ges=ecoindex_result.ges, 38 | water=ecoindex_result.water, 39 | page_type=ecoindex_result.page_type, 40 | version=version.get_version_number(), 41 | initial_ranking=ranking if ranking else total_results + 1, 42 | initial_total_results=total_results + 1, 43 | ecoindex_version=ecoindex_result.ecoindex_version, 44 | source=source, 45 | ) 46 | 47 | session.add(db_ecoindex) 48 | try: 49 | await session.commit() 50 | await session.refresh(db_ecoindex) 51 | except Exception: 52 | await session.rollback() 53 | raise 54 | finally: 55 | await session.close() 56 | 57 | return db_ecoindex 58 | -------------------------------------------------------------------------------- /components/ecoindex/exceptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/components/ecoindex/exceptions/__init__.py -------------------------------------------------------------------------------- /components/ecoindex/exceptions/scraper.py: -------------------------------------------------------------------------------- 1 | class EcoindexScraperException(Exception): 2 | pass 3 | 4 | 5 | class EcoindexScraperStatusException(EcoindexScraperException): 6 | def __init__(self, url: str, status: int, message: str): 7 | self.message = message 8 | self.url = url 9 | self.status = status 10 | 11 | pass 12 | -------------------------------------------------------------------------------- /components/ecoindex/exceptions/worker.py: -------------------------------------------------------------------------------- 1 | class EcoindexContentTypeError(Exception): 2 | pass 3 | 4 | 5 | class EcoindexHostUnreachable(Exception): 6 | pass 7 | 8 | 9 | class EcoindexPageNotFound(Exception): 10 | pass 11 | 12 | 13 | class EcoindexStatusError(Exception): 14 | pass 15 | 16 | 17 | class EcoindexTimeout(Exception): 18 | pass 19 | -------------------------------------------------------------------------------- /components/ecoindex/models/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.models.cli import ( 2 | CliHost, 3 | ) 4 | from ecoindex.models.compute import ( 5 | Ecoindex, 6 | PageMetrics, 7 | PageType, 8 | Request, 9 | Result, 10 | ScreenShot, 11 | WebPage, 12 | WindowSize, 13 | ) 14 | from ecoindex.models.enums import ExportFormat, Language, Version 15 | from ecoindex.models.response_examples import ( 16 | example_daily_limit_response, 17 | example_ecoindex_not_found, 18 | example_file_not_found, 19 | example_page_listing_empty, 20 | ) 21 | from ecoindex.models.scraper import RequestItem, Requests 22 | from ecoindex.models.sort import Sort 23 | 24 | __all__ = [ 25 | "CliHost", 26 | "Ecoindex", 27 | "example_daily_limit_response", 28 | "example_ecoindex_not_found", 29 | "example_file_not_found", 30 | "example_page_listing_empty", 31 | "ExportFormat", 32 | "Language", 33 | "PageMetrics", 34 | "PageType", 35 | "Request", 36 | "RequestItem", 37 | "Requests", 38 | "Result", 39 | "ScreenShot", 40 | "Sort", 41 | "Version", 42 | "WebPage", 43 | "WindowSize", 44 | ] 45 | -------------------------------------------------------------------------------- /components/ecoindex/models/api.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class ApiHealth(BaseModel): 7 | database: bool = Field(default=..., title="Status of database") 8 | 9 | 10 | class BaseHost(BaseModel): 11 | name: str 12 | total_count: int 13 | 14 | 15 | class Host(BaseHost): 16 | remaining_daily_requests: int | None = None 17 | 18 | 19 | class PageHosts(BaseModel): 20 | items: list[str] 21 | total: int 22 | page: int 23 | size: int 24 | 25 | 26 | class ExceptionResponse(BaseModel): 27 | args: list[Any] 28 | exception: str 29 | message: str | None = None 30 | 31 | 32 | class HealthWorker(BaseModel): 33 | name: str = Field(default=..., title="Name of worker") 34 | healthy: bool = Field(default=..., title="Status of worker") 35 | 36 | 37 | class HealthWorkers(BaseModel): 38 | healthy: bool = Field(default=..., title="Global status of workers") 39 | workers: list[HealthWorker] = Field(default=..., title="List of workers") 40 | 41 | 42 | class HealthResponse(BaseModel): 43 | database: bool = Field(default=..., title="Status of database") 44 | workers: HealthWorkers = Field(default=..., title="Status of workers") 45 | -------------------------------------------------------------------------------- /components/ecoindex/models/cli.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | class CliHost(BaseModel): 4 | domain: str 5 | netloc: str 6 | 7 | -------------------------------------------------------------------------------- /components/ecoindex/models/compute.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from functools import lru_cache 4 | from pathlib import Path 5 | from typing import Any 6 | 7 | from ecoindex.models.enums import Grade 8 | from pydantic import AnyHttpUrl, BaseModel, Field, field_validator 9 | 10 | PageType = str 11 | 12 | 13 | @lru_cache 14 | def get_compute_version() -> str: 15 | current_directory = os.path.dirname(os.path.realpath(__file__)) 16 | version_filename = os.path.join(current_directory, "..", "compute", "VERSION") 17 | 18 | with open(version_filename, "r") as f: 19 | return (f.read()).strip() 20 | 21 | 22 | class Ecoindex(BaseModel): 23 | grade: Grade | None = Field( 24 | default=None, 25 | title="Ecoindex grade", 26 | description="Is the corresponding ecoindex grade of the page (from A to G)", 27 | ) 28 | score: float | None = Field( 29 | default=None, 30 | title="Ecoindex score", 31 | description="Is the corresponding ecoindex score of the page (0 to 100)", 32 | ge=0, 33 | le=100, 34 | ) 35 | ges: float | None = Field( 36 | default=None, 37 | title="Ecoindex GES equivalent", 38 | description=( 39 | "Is the equivalent of greenhouse gases emission" " (in `gCO2e`) of the page" 40 | ), 41 | ge=0, 42 | ) 43 | water: float | None = Field( 44 | default=None, 45 | title="Ecoindex Water equivalent", 46 | description="Is the equivalent water consumption (in `cl`) of the page", 47 | ge=0, 48 | ) 49 | ecoindex_version: str | None = Field( 50 | default=get_compute_version(), 51 | title="Ecoindex version", 52 | description="Is the version of the ecoindex used to compute the score", 53 | ) 54 | 55 | 56 | class PageMetrics(BaseModel): 57 | size: float = Field( 58 | default=..., 59 | title="Page size", 60 | description=( 61 | "Is the size of the page and of the downloaded" 62 | " elements of the page in KB" 63 | ), 64 | ge=0, 65 | ) 66 | nodes: int = Field( 67 | default=..., 68 | title="Page nodes", 69 | description="Is the number of the DOM elements in the page", 70 | ge=0, 71 | ) 72 | requests: int = Field( 73 | default=..., 74 | title="Page requests", 75 | description="Is the number of external requests made by the page", 76 | ge=0, 77 | ) 78 | 79 | 80 | class WebPage(BaseModel): 81 | width: int | None = Field( 82 | default=1920, 83 | title="Page Width", 84 | description="Width of the simulated window in pixel", 85 | ge=100, 86 | le=3840, 87 | ) 88 | height: int | None = Field( 89 | default=1080, 90 | title="Page Height", 91 | description="Height of the simulated window in pixel", 92 | ge=50, 93 | le=2160, 94 | ) 95 | url: str = Field( 96 | default=..., 97 | title="Page url", 98 | description="Url of the analysed page", 99 | examples=["https://www.ecoindex.fr"], 100 | ) 101 | 102 | @field_validator("url") 103 | @classmethod 104 | def url_as_http_url(cls, v: str) -> str: 105 | url_object = AnyHttpUrl(url=v) # type: ignore 106 | assert url_object.scheme in {"http", "https"}, "scheme must be http or https" 107 | 108 | return url_object.unicode_string() 109 | 110 | def get_url_host(self) -> str: 111 | url_object = AnyHttpUrl(url=self.url) # type: ignore 112 | 113 | return str(url_object.host) 114 | 115 | def get_url_path(self) -> str: 116 | url_obect = AnyHttpUrl(url=self.url) # type: ignore 117 | 118 | return str(url_obect.path) 119 | 120 | 121 | class WindowSize(BaseModel): 122 | height: int = Field( 123 | default=..., 124 | title="Window height", 125 | description="Height of the simulated window in pixel", 126 | ) 127 | width: int = Field( 128 | default=..., 129 | title="Window width", 130 | description="Width of the simulated window in pixel", 131 | ) 132 | 133 | def __str__(self) -> str: 134 | return f"{self.width},{self.height}" 135 | 136 | 137 | class Result(Ecoindex, PageMetrics, WebPage): 138 | date: datetime | None = Field( 139 | default=None, title="Analysis datetime", description="Date of the analysis" 140 | ) 141 | page_type: PageType | None = Field( 142 | default=None, 143 | title="Page type", 144 | description="Is the type of the page, based ton the [opengraph type tag](https://ogp.me/#types)", 145 | ) 146 | 147 | 148 | class ScreenShot(BaseModel): 149 | id: str 150 | folder: str 151 | 152 | def __init__(__pydantic_self__, **data: Any) -> None: 153 | super().__init__(**data) 154 | path = Path(__pydantic_self__.folder) 155 | path.mkdir(parents=True, exist_ok=True) 156 | 157 | def __str__(self) -> str: 158 | return f"{self.folder}/{self.id}" 159 | 160 | def get_png(self) -> str: 161 | return f"{self.__str__()}.png" 162 | 163 | def get_webp(self) -> str: 164 | return f"{self.__str__()}.webp" 165 | 166 | 167 | class Request(BaseModel): 168 | url: str 169 | type: str 170 | size: float 171 | -------------------------------------------------------------------------------- /components/ecoindex/models/enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class Version(str, Enum): 5 | v0 = "v0" 6 | v1 = "v1" 7 | 8 | def get_version_number(self) -> int: 9 | return int(self.value[1:]) 10 | 11 | 12 | class ExportFormat(Enum): 13 | csv = "csv" 14 | json = "json" 15 | 16 | 17 | class Language(Enum): 18 | fr = "fr" 19 | en = "en" 20 | 21 | 22 | class TaskStatus(str, Enum): 23 | FAILURE = "FAILURE" 24 | PENDING = "PENDING" 25 | SUCCESS = "SUCCESS" 26 | 27 | 28 | class BadgeTheme(str, Enum): 29 | dark = "dark" 30 | light = "light" 31 | 32 | 33 | class Grade(str, Enum): 34 | A = "A" 35 | B = "B" 36 | C = "C" 37 | D = "D" 38 | E = "E" 39 | F = "F" 40 | G = "G" 41 | -------------------------------------------------------------------------------- /components/ecoindex/models/response_examples.py: -------------------------------------------------------------------------------- 1 | example_ecoindex_not_found = { 2 | "description": "Not found", 3 | "content": { 4 | "application/json": { 5 | "example": { 6 | "detail": ( 7 | "Analysis e9a4d5ea-b9c5-4440-a74a-cac229f7d672 " 8 | "not found for version v1" 9 | ) 10 | } 11 | } 12 | }, 13 | } 14 | 15 | example_file_not_found = { 16 | "description": "Not found", 17 | "content": { 18 | "application/json": { 19 | "example": { 20 | "detail": ( 21 | "File at path screenshots/v0/" 22 | "550cdf8c-9c4c-4f8a-819d-cb69d0866fe1.webp does not exist." 23 | ) 24 | } 25 | } 26 | }, 27 | } 28 | 29 | example_page_listing_empty = { 30 | "description": "Empty page", 31 | "content": { 32 | "application/json": { 33 | "example": { 34 | "items": [], 35 | "total": 0, 36 | "page": 1, 37 | "size": 10, 38 | } 39 | } 40 | }, 41 | } 42 | 43 | example_daily_limit_response = { 44 | "description": "You have reached the daily limit", 45 | "content": { 46 | "application/json": { 47 | "example": { 48 | "detail": { 49 | "daily_limit_per_host": 1, 50 | "limit": 1, 51 | "host": "www.ecoindex.fr", 52 | "latest_result": { 53 | "width": 1920, 54 | "height": 1080, 55 | "size": 107.178, 56 | "requests": 6, 57 | "score": 87, 58 | "water": 1.89, 59 | "date": "2023-01-05T12:06:57", 60 | "id": "be8c3612-545f-4e72-8880-13b8db74ff6e", 61 | "version": 1, 62 | "initial_ranking": 1, 63 | "url": "https://www.ecoindex.fr", 64 | "nodes": 201, 65 | "grade": "A", 66 | "ges": 1.26, 67 | "ecoindex_version": "5.4.1", 68 | "page_type": None, 69 | "host": "www.ecoindex.fr", 70 | "initial_total_results": 1, 71 | }, 72 | "message": ( 73 | "You have already reached the daily limit of 1 " 74 | "requests for host www.ecoindex.fr today" 75 | ), 76 | } 77 | } 78 | } 79 | }, 80 | } 81 | 82 | example_daily_limit_response = { 83 | "description": "You have reached the daily limit", 84 | "content": { 85 | "application/json": { 86 | "example": { 87 | "detail": { 88 | "daily_limit_per_host": 1, 89 | "limit": 1, 90 | "host": "www.ecoindex.fr", 91 | "latest_result": { 92 | "width": 1920, 93 | "height": 1080, 94 | "size": 107.178, 95 | "requests": 6, 96 | "score": 87, 97 | "water": 1.89, 98 | "date": "2023-01-05T12:06:57", 99 | "id": "be8c3612-545f-4e72-8880-13b8db74ff6e", 100 | "version": 1, 101 | "initial_ranking": 1, 102 | "url": "https://www.ecoindex.fr", 103 | "nodes": 201, 104 | "grade": "A", 105 | "ges": 1.26, 106 | "ecoindex_version": "5.4.1", 107 | "page_type": None, 108 | "host": "www.ecoindex.fr", 109 | "initial_total_results": 1, 110 | }, 111 | "message": ( 112 | "You have already reached the daily limit of 1 " 113 | "requests for host www.ecoindex.fr today" 114 | ), 115 | } 116 | } 117 | } 118 | }, 119 | } 120 | 121 | example_host_unreachable = { 122 | "description": "Host unreachable", 123 | "content": { 124 | "application/json": { 125 | "example": { 126 | "detail": "The URL http://localhost is unreachable. Are you really sure of this url? 🤔", 127 | } 128 | } 129 | }, 130 | } 131 | -------------------------------------------------------------------------------- /components/ecoindex/models/scraper.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class RequestItem(BaseModel): 5 | category: str 6 | mime_type: str 7 | size: float 8 | status: int 9 | url: str 10 | 11 | 12 | class MimetypeMetrics(BaseModel): 13 | total_count: int = 0 14 | total_size: float = 0 15 | 16 | 17 | class MimetypeAggregation(BaseModel): 18 | audio: MimetypeMetrics = MimetypeMetrics() 19 | css: MimetypeMetrics = MimetypeMetrics() 20 | font: MimetypeMetrics = MimetypeMetrics() 21 | html: MimetypeMetrics = MimetypeMetrics() 22 | image: MimetypeMetrics = MimetypeMetrics() 23 | javascript: MimetypeMetrics = MimetypeMetrics() 24 | other: MimetypeMetrics = MimetypeMetrics() 25 | video: MimetypeMetrics = MimetypeMetrics() 26 | 27 | @classmethod 28 | async def get_category_of_resource(cls, mimetype: str) -> str: 29 | mimetypes = [type for type in cls.model_fields.keys()] 30 | 31 | for type in mimetypes: 32 | if type in mimetype: 33 | return type 34 | 35 | return "other" 36 | 37 | 38 | class Requests(BaseModel): 39 | aggregation: MimetypeAggregation = MimetypeAggregation() 40 | items: list[RequestItem] = [] 41 | total_count: int = 0 42 | total_size: float = 0 43 | -------------------------------------------------------------------------------- /components/ecoindex/models/sort.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class Sort(BaseModel): 7 | clause: str 8 | sort: Literal["asc", "desc"] 9 | -------------------------------------------------------------------------------- /components/ecoindex/models/tasks.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from ecoindex.models import Result 4 | from pydantic import AnyHttpUrl, BaseModel, Field 5 | 6 | 7 | class QueueTaskError(BaseModel): 8 | detail: Any | None = Field( 9 | default=None, title="Detail object of the raised exception" 10 | ) 11 | exception: str = Field(default=..., title="Name of the exception that was raised") 12 | message: str = Field(default=..., title="Message of the exception") 13 | status_code: int | None = Field( 14 | default=None, title="Corresponding original HTTP status code sended by the API" 15 | ) 16 | url: AnyHttpUrl | None = Field(default=None, title="URL of the analyzed web page") 17 | 18 | 19 | class QueueTaskResult(BaseModel): 20 | status: str | None = Field( 21 | default=None, 22 | title="Status of the ecoindex analysis.", 23 | description=( 24 | "While the task is pending or the analysis is running, it is null." 25 | " But once the analysis is complete, it should return SUCCESS or FAILURE." 26 | ), 27 | ) 28 | detail: Result | None = Field( 29 | default=None, 30 | title="Result of the ecoindex analysis once it was successfuly completed", 31 | ) 32 | error: QueueTaskError | None = Field( 33 | default=None, title="Detail of the ecoindex error if it is not successful" 34 | ) 35 | 36 | 37 | class QueueTaskApi(BaseModel): 38 | id: str = Field( 39 | default=..., 40 | title=( 41 | "Identifier of the current. " 42 | "This identifier will become the identifier of the analysis" 43 | ), 44 | ) 45 | status: str = Field( 46 | default=..., 47 | title="Status of the current task. Can be PENDING, FAILURE, SUCCESS", 48 | ) 49 | ecoindex_result: QueueTaskResult | None = Field( 50 | default=None, title="Result of the Ecoindex analysis" 51 | ) 52 | task_error: Any | None = Field( 53 | default=None, 54 | title="Detail of the error encountered by the task in case of Failure", 55 | ) 56 | 57 | 58 | class QueueTaskApiBatch(BaseModel): 59 | id: str = Field( 60 | default=..., 61 | title=( 62 | "Identifier of the current. " 63 | "This identifier will become the identifier of the analysis" 64 | ), 65 | ) 66 | status: str = Field( 67 | default=..., 68 | title="Status of the current task. Can be PENDING, FAILURE, SUCCESS", 69 | ) 70 | task_error: Any | None = Field( 71 | default=None, 72 | title="Detail of the error encountered by the task in case of Failure", 73 | ) 74 | -------------------------------------------------------------------------------- /components/ecoindex/scraper/VERSION: -------------------------------------------------------------------------------- 1 | 3.15.0 2 | -------------------------------------------------------------------------------- /components/ecoindex/scraper/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.scraper.scrap import EcoindexScraper 2 | 3 | __all__ = ["EcoindexScraper"] 4 | -------------------------------------------------------------------------------- /components/ecoindex/scraper/helper.py: -------------------------------------------------------------------------------- 1 | from asyncio import run 2 | from concurrent.futures import ThreadPoolExecutor, as_completed 3 | from typing import Generator 4 | 5 | from ecoindex.models.compute import Result, WindowSize 6 | from ecoindex.scraper.scrap import EcoindexScraper 7 | 8 | 9 | def run_page_analysis( 10 | url: str, 11 | window_size: WindowSize, 12 | wait_after_scroll: int = 3, 13 | wait_before_scroll: int = 3, 14 | logger=None, 15 | ) -> tuple[Result, bool]: 16 | """Run the page analysis and return the result and a boolean indicating if the analysis was successful""" 17 | scraper = EcoindexScraper( 18 | url=str(url), 19 | window_size=window_size, 20 | wait_after_scroll=wait_after_scroll, 21 | wait_before_scroll=wait_before_scroll, 22 | page_load_timeout=20, 23 | ) 24 | try: 25 | return (run(scraper.get_page_analysis()), True) 26 | except Exception as e: 27 | logger.error(f"{url} -- {e.msg if hasattr(e, 'msg') else e}") 28 | 29 | return ( 30 | Result( 31 | url=url, 32 | water=0, 33 | width=window_size.width, 34 | height=window_size.height, 35 | size=0, 36 | nodes=0, 37 | requests=0, 38 | ), 39 | False, 40 | ) 41 | 42 | 43 | def bulk_analysis( 44 | max_workers, 45 | urls, 46 | window_sizes, 47 | wait_after_scroll: int = 0, 48 | wait_before_scroll: int = 0, 49 | logger=None, 50 | ) -> Generator[tuple[Result, bool], None, None]: 51 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 52 | future_to_analysis = {} 53 | 54 | for url in urls: 55 | for window_size in window_sizes: 56 | future_to_analysis[ 57 | executor.submit( 58 | run_page_analysis, 59 | url, 60 | window_size, 61 | wait_after_scroll, 62 | wait_before_scroll, 63 | logger, 64 | ) 65 | ] = ( 66 | url, 67 | window_size, 68 | wait_after_scroll, 69 | wait_before_scroll, 70 | logger, 71 | ) 72 | 73 | for future in as_completed(future_to_analysis): 74 | yield future.result() 75 | -------------------------------------------------------------------------------- /components/ecoindex/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.scripts.update_values import update_values 2 | 3 | __all__ = ["update_values"] 4 | -------------------------------------------------------------------------------- /components/ecoindex/scripts/update_values.py: -------------------------------------------------------------------------------- 1 | from asyncio import run 2 | from json import dumps 3 | from os import getcwd 4 | 5 | from aiofile import async_open 6 | from requests import get 7 | 8 | 9 | async def update_values_async() -> None: 10 | response = get( 11 | "https://cdn.jsdelivr.net/gh/cnumr/ecoindex_reference@1/ecoindex_reference.json", 12 | ) 13 | 14 | data = response.json() 15 | data_folder = f"{getcwd()}/components/ecoindex/data/" 16 | 17 | async with async_open(f"{data_folder}quantiles.py", "w") as quantile_file: 18 | quantiles = f"quantiles_dom = {dumps(data['quantiles']['dom_size'])}\n" 19 | quantiles += f"quantiles_req = {dumps(data['quantiles']['nb_request'])}\n" 20 | quantiles += f"quantiles_size = {dumps(data['quantiles']['response_size'])}\n" 21 | 22 | await quantile_file.write(quantiles) 23 | 24 | async with async_open(f"{data_folder}targets.py", "w") as target_file: 25 | targets = f"target_dom = {dumps(data['targets']['dom_size'])}\n" 26 | targets += f"target_req = {dumps(data['targets']['nb_request'])}\n" 27 | targets += f"target_size = {dumps(data['targets']['response_size'])}\n" 28 | 29 | await target_file.write(targets) 30 | 31 | async with async_open(f"{data_folder}medians.py", "w") as median_file: 32 | medians = f"median_dom = {dumps(data['medians']['dom_size'])}\n" 33 | medians += f"median_req = {dumps(data['medians']['nb_request'])}\n" 34 | medians += f"median_size = {dumps(data['medians']['response_size'])}\n" 35 | 36 | await median_file.write(medians) 37 | 38 | async with async_open(f"{data_folder}grades.py", "w") as grades_file: 39 | grades = "" 40 | 41 | for grade in data["grades"]: 42 | grades += f"{grade['grade']} = {grade['value']}\n" 43 | 44 | await grades_file.write(grades) 45 | 46 | print("Values updated") 47 | 48 | 49 | def update_values() -> None: 50 | run(update_values_async()) 51 | -------------------------------------------------------------------------------- /components/ecoindex/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from ecoindex.utils.screenshots import convert_screenshot_to_webp, set_screenshot_rights 2 | 3 | __all__ = [ 4 | "convert_screenshot_to_webp", 5 | "set_screenshot_rights", 6 | ] 7 | -------------------------------------------------------------------------------- /components/ecoindex/utils/cli_translations/en.yml: -------------------------------------------------------------------------------- 1 | title: Ecoindex analysis for the website 2 | synthesis: Synthesis 3 | top10: Top 10 4 | indicators: Indicators 5 | all_data_title: All the data 6 | download: Download 7 | ecoindex_score: The ecoindex score 8 | best_pages: Best pages 9 | worst_pages: Worst pages 10 | number_of_requests: Number of requests 11 | pages_size: Pages size 12 | number_of_dom_nodes: Number of DOM nodes 13 | 14 | ecoindex_body: | 15 |

The ecoindex is an indicator developed by the "Collectif Numérique Responsable". 16 | It allows you to analyze an html page and deduce a score based on three physical criteria of a web page.


17 |

The calculation of the ecoindex makes it possible to deduce a standardized indicator (from A to G) making it possible to describe the environmental footprint of a page.


18 | ecoindex_body_end_pre: This synthesis is based on the analysis of 19 | ecoindex_body_end_mid: pages of the website 20 | ecoindex_body_end_suf:

More info on Ecoindex.fr

21 | 22 | requests_body: | 23 | The number of HTTP requests gives a good idea of the server load. 24 | To put it quickly and caricaturedly, the greater the number of requests for the same page, the more servers will be needed to serve this page. 25 | 26 | size_body: | 27 | The weight of the data transferred (KB) reflects the effort required to transport the page to the browser. 28 | In this first version, we consider that the connection is Wi-Fi via ADSL. 29 | But in the next version we will take into account different types of connections, especially 4G. 30 | Indeed, a 4G connection requires up to 23 times more energy to transport the same amount of data as an ADSL connection. 31 | 32 | nodes_body: 33 | The number of DOM elements testifies to the complexity of the site and therefore, a priori, 34 | efforts to be made by the browser to display the page. 35 | The more complex the site, the more power (especially CPU) it takes to display it. 36 | And the more it helps to shorten the life of the computer it is running on. 37 | Conversely, a particularly simple and light site contributes to extending the life of the equipment because it requires little memory and a “small” processor. 38 | 39 | histograms: 40 | grade: 41 | title: Ecoindex per page distribution 42 | xlabel: Ecoindex 43 | ylabel: Number of pages 44 | size: 45 | title: Pages size per page distribution 46 | xlabel: Size of pages (KB) 47 | ylabel: Number of pages 48 | nodes: 49 | title: DOM nodes per page distribution 50 | xlabel: Number of DOM nodes 51 | ylabel: Number of pages 52 | requests: 53 | title: Requests per page distribution 54 | xlabel: Number of requests 55 | ylabel: Number of pages 56 | 57 | footer: Ecoindex_cli By the "collectif Numérique Responsable" 58 | 59 | good_result: 😃 Your result is great! 60 | bad_result: 😞 You could do better on this... 61 | better_than: is better than 62 | worse_than: is worse than 63 | my_median: My median 64 | target_median: Target median 65 | global_median: Global median 66 | -------------------------------------------------------------------------------- /components/ecoindex/utils/cli_translations/fr.yml: -------------------------------------------------------------------------------- 1 | title: Analyse écoindex pour le site 2 | synthesis: Synthèse 3 | top10: Top 10 4 | indicators: Indicateurs 5 | all_data_title: Toutes les données 6 | download: Télécharger 7 | ecoindex_score: Le score ecoindex 8 | best_pages: Meilleures pages 9 | worst_pages: Moins bonnes pages 10 | number_of_requests: Nombre de requêtes 11 | pages_size: Taille des pages 12 | number_of_dom_nodes: Nombre d'éléments de la page 13 | 14 | ecoindex_body: | 15 |

L'ecoindex est un indicateur développé par le collectif numérique responsable. 16 | Il permet d'analyser une page html et d'en déduire une score en se basant sur trois critères physiques d'une page web.


17 |

Le calcul de l'ecoindex permet de déduire un indicateur normé (de A à G) permettant de décrire l'empreinte environnementale d'une page.


18 | ecoindex_body_end_pre: Cette synthèse est basée sur l'analyse de 19 | ecoindex_body_end_mid: pages du site internet 20 | ecoindex_body_end_suf:

Plus d'infos sur Ecoindex.fr

21 | 22 | requests_body: | 23 | Le nombre de requêtes HTTP donne une bonne idée de la charge serveur. 24 | Pour le dire vite et de façon caricaturale, plus le nombre de requêtes est important pour une même page et plus il faudra de serveurs pour servir cette page. 25 | 26 | size_body: | 27 | Le poids des données transférées (Ko) témoigne des efforts à faire pour transporter la page jusqu’au navigateur. 28 | Dans cette première version, nous considérons que la connexion est de type Wi-Fi via ADSL. 29 | Mais dans la prochaine version nous prendrons en compte différents types de connexions, notamment 4G. 30 | En effet, une connexion 4G nécessite jusqu’à 23 fois plus d’énergie pour transporter la même quantité de données qu’une connexion ADSL. 31 | 32 | nodes_body: 33 | Le nombre d’éléments du DOM témoigne de la complexité du site et donc, a priori, 34 | des efforts à faire par le navigateur pour afficher la page. 35 | Plus le site est complexe et plus il faut de puissance (notamment CPU) pour l’afficher. 36 | Et plus on contribue à raccourcir la durée de vie de l’ordinateur sur lequel il s’exécute. 37 | A l’inverse, un site particulièrement simple et léger contribue à allonger la durée de vie de l’équipement car il nécessite peu de mémoire et un “petit” processeur. 38 | 39 | histograms: 40 | grade: 41 | title: Répartition écoindex 42 | xlabel: Ecoindex 43 | ylabel: Nombre de pages 44 | size: 45 | title: Répartition du poids des pages 46 | xlabel: Poids des pages (Ko) 47 | ylabel: Nombre de pages 48 | nodes: 49 | title: Répartition des éléments du DOM par page 50 | xlabel: Nombre d'éléments du DOM 51 | ylabel: Nombre de pages 52 | requests: 53 | title: Répartition des requêtes par page 54 | xlabel: Nombre de requêtes 55 | ylabel: Nombre de pages 56 | 57 | footer: Ecoindex_cli par le collectif Numérique Responsable 58 | 59 | good_result: 😃 Très bon résultat ! 60 | bad_result: 😞 Hum, on peut mieux faire... 61 | better_than: ", c'est mieux que" 62 | worse_than: ", c'est moins bien que" 63 | my_median: Ma valeur médiane 64 | target_median: Valeur médiane cible 65 | global_median: Valeur médiane globale 66 | -------------------------------------------------------------------------------- /components/ecoindex/utils/files.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from csv import DictWriter 3 | from json import dump 4 | from os import makedirs 5 | from os.path import dirname, exists 6 | 7 | from ecoindex.models import ExportFormat, Language, Result 8 | from yaml import safe_load as load_yaml 9 | 10 | 11 | def create_folder(path: str) -> None: 12 | if not exists(path): 13 | makedirs(path) 14 | 15 | 16 | class File(ABC): 17 | def __init__( 18 | self, 19 | filename: str, 20 | results: list[Result], 21 | export_format: ExportFormat | None = ExportFormat.csv, 22 | ): 23 | self.filename = filename 24 | self.results = results 25 | self.export_format = export_format 26 | 27 | @abstractmethod 28 | def write(self) -> None: 29 | pass 30 | 31 | 32 | class CsvFile(File): 33 | def write(self) -> None: 34 | headers = self.results[0].__dict__ 35 | 36 | with open(self.filename, "w") as fp: 37 | writer = DictWriter(fp, fieldnames=headers) 38 | 39 | writer.writeheader() 40 | for ecoindex in self.results: 41 | writer.writerow(ecoindex.__dict__) 42 | 43 | 44 | class JsonFile(File): 45 | def write(self) -> None: 46 | with open(self.filename, "w") as fp: 47 | dump( 48 | obj=[ecoindex.__dict__ for ecoindex in self.results], 49 | fp=fp, 50 | indent=4, 51 | default=str, 52 | ) 53 | 54 | 55 | def write_results_to_file( 56 | filename: str, 57 | results: list[Result], 58 | export_format: ExportFormat | None = ExportFormat.csv, 59 | ) -> None: 60 | if export_format == ExportFormat.csv: 61 | CsvFile(filename=filename, results=results, export_format=export_format).write() 62 | 63 | if export_format == ExportFormat.json: 64 | JsonFile( 65 | filename=filename, results=results, export_format=export_format 66 | ).write() 67 | 68 | 69 | def write_urls_to_file(file_prefix: str, urls: list[str]) -> None: 70 | tmp_input_folder = "/tmp/ecoindex-cli/input" 71 | create_folder(tmp_input_folder) 72 | with open( 73 | file=f"{tmp_input_folder}/{file_prefix}.csv", mode="w" 74 | ) as input_urls_file: 75 | for url in urls: 76 | input_urls_file.write(f"{str(url).strip()}\n") 77 | 78 | 79 | def get_translations(language: Language) -> dict: 80 | filename = f"{dirname(__file__)}/cli_translations/{language.value}.yml" 81 | with open(filename) as fp: 82 | return load_yaml(fp) 83 | -------------------------------------------------------------------------------- /components/ecoindex/utils/screenshots.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ecoindex.models import ScreenShot 4 | from PIL import Image 5 | 6 | 7 | async def convert_screenshot_to_webp(screenshot: ScreenShot) -> None: 8 | image = Image.open(rf"{screenshot.get_png()}") 9 | width, height = image.size 10 | ratio = 800 / height if width > height else 600 / width 11 | 12 | image.convert("RGB").resize(size=(int(width * ratio), int(height * ratio))).save( 13 | rf"{screenshot.get_webp()}", 14 | format="webp", 15 | ) 16 | os.unlink(screenshot.get_png()) 17 | 18 | 19 | async def set_screenshot_rights( 20 | screenshot: ScreenShot, uid: int | None = None, gid: int | None = None 21 | ) -> None: 22 | if uid and gid: 23 | os.chown(path=screenshot.get_webp(), uid=uid, gid=gid) 24 | -------------------------------------------------------------------------------- /components/ecoindex/worker_component/__init__.py: -------------------------------------------------------------------------------- 1 | from celery import Celery 2 | from ecoindex.config.settings import Settings 3 | 4 | app: Celery = Celery( 5 | "tasks", 6 | broker=f"redis://{Settings().REDIS_CACHE_HOST}:6379/0", 7 | backend=f"redis://{Settings().REDIS_CACHE_HOST}:6379/1", 8 | broker_connection_retry=True, 9 | broker_connection_retry_on_startup=True, 10 | broker_connection_max_retries=10, 11 | ) 12 | -------------------------------------------------------------------------------- /development/ecoindex_compute.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pprint import pprint 3 | 4 | from ecoindex.compute import compute_ecoindex 5 | 6 | ecoindex = asyncio.run(compute_ecoindex(nodes=100, size=100, requests=100)) 7 | pprint(ecoindex) 8 | -------------------------------------------------------------------------------- /development/ecoindex_scraper.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pprint import pprint 3 | from uuid import uuid1 4 | 5 | from ecoindex.models.compute import ScreenShot 6 | from ecoindex.scraper import EcoindexScraper 7 | 8 | scraper = EcoindexScraper( 9 | url="https://www.kiabi.com", 10 | screenshot=ScreenShot(id=str(uuid1()), folder="./screenshots"), 11 | ) 12 | 13 | result = asyncio.run(scraper.get_page_analysis()) 14 | all_requests = asyncio.run(scraper.get_all_requests()) 15 | requests_by_category = asyncio.run(scraper.get_requests_by_category()) 16 | 17 | pprint(result) 18 | -------------------------------------------------------------------------------- /development/scraper_test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from concurrent.futures import ThreadPoolExecutor, as_completed 3 | 4 | import pandas as pd 5 | from ecoindex.scraper import EcoindexScraper 6 | from haralyzer import HarParser 7 | from slugify import slugify 8 | 9 | 10 | async def get_page_analysis(url: str): 11 | scraper = EcoindexScraper(url=url) 12 | return ( 13 | await scraper.get_page_analysis(), 14 | await scraper.get_all_requests(), 15 | await scraper.get_requests_by_category(), 16 | scraper.har_temp_file_path, 17 | ) 18 | 19 | 20 | def run_page_analysis(url: str, index: int): 21 | analysis, requests, aggregation, har_file_path = asyncio.run(get_page_analysis(url)) 22 | 23 | return index, analysis, requests, aggregation, har_file_path 24 | 25 | 26 | with ThreadPoolExecutor(max_workers=8) as executor: 27 | future_to_analysis = {} 28 | 29 | urls = ["https://www.graphic-sud.com/", "https://federiconavarrete.com/"] 30 | i = 0 31 | 32 | for url in urls: 33 | print(f"Starting ecoindex {i} analysis") 34 | future_to_analysis[ 35 | executor.submit( 36 | run_page_analysis, 37 | url, 38 | i, 39 | ) 40 | ] = url 41 | i += 1 42 | 43 | for future in as_completed(future_to_analysis): 44 | try: 45 | index, analysis, requests, aggregation, har_file_path = future.result() 46 | 47 | har_parser = HarParser.from_file(har_file_path) 48 | for page in har_parser.pages: 49 | haralyzer_data = [ 50 | { 51 | "type": "audio", 52 | "count": len(page.audio_files), 53 | "size": page.audio_size_trans, 54 | }, 55 | { 56 | "type": "css", 57 | "count": len(page.css_files), 58 | "size": page.css_size_trans, 59 | }, 60 | { 61 | "type": "javascript", 62 | "count": len(page.js_files), 63 | "size": page.js_size_trans, 64 | }, 65 | {"type": "page", "count": 1, "size": page.page_size_trans}, 66 | { 67 | "type": "image", 68 | "count": len(page.image_files), 69 | "size": page.image_size_trans, 70 | }, 71 | { 72 | "type": "video", 73 | "count": len(page.video_files), 74 | "size": page.video_size_trans, 75 | }, 76 | { 77 | "type": "other", 78 | "count": len(page.text_files), 79 | "size": page.text_size_trans, 80 | }, 81 | {"type": "html", "count": len(page.html_files), "size": None}, 82 | { 83 | "type": "total", 84 | "count": len(page.entries), 85 | "size": page.page_size_trans, 86 | }, 87 | ] 88 | 89 | df_haralyzer = pd.DataFrame( 90 | haralyzer_data, columns=["type", "count", "size"] 91 | ) 92 | df_haralyzer["size"] = df_haralyzer["size"] / 1000 93 | 94 | flatten_aggregation = [ 95 | { 96 | "type": type, 97 | "count": item["total_count"], 98 | "size": item["total_size"], 99 | } 100 | for type, item in aggregation.model_dump().items() 101 | ] 102 | flatten_aggregation.append( 103 | { 104 | "type": "total", 105 | "count": analysis.requests, 106 | "size": analysis.size * 1000, 107 | } 108 | ) 109 | 110 | df = pd.DataFrame(flatten_aggregation, columns=["type", "count", "size"]) 111 | df["size"] = df["size"] / 1000 112 | 113 | joinned_df = pd.merge( 114 | df, 115 | df_haralyzer, 116 | on="type", 117 | how="left", 118 | suffixes=("_ecoindex", "_haralyzer"), 119 | ) 120 | 121 | print() 122 | print(page.url) 123 | print(har_file_path) 124 | print(df) 125 | print(joinned_df) 126 | print() 127 | 128 | joinned_df.to_csv(f"joinned_ecoindex_{slugify(page.url)}.csv", index=False) 129 | 130 | except Exception as e: 131 | print(e) 132 | -------------------------------------------------------------------------------- /docs/images/ecoindex-python-fullstack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/docs/images/ecoindex-python-fullstack.png -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | path = ".venv" 3 | in-project = true 4 | -------------------------------------------------------------------------------- /projects/ecoindex_api/.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !.env 3 | !api 4 | !alembic.ini 5 | !common 6 | !db 7 | !docker 8 | !pyproject.toml 9 | !poetry.lock 10 | !settings.py 11 | !worker 12 | !dist 13 | !alembic -------------------------------------------------------------------------------- /projects/ecoindex_api/.env.template: -------------------------------------------------------------------------------- 1 | # API_PORT=8001 2 | # API_VERSION=latest 3 | # DAILY_LIMIT_PER_HOST=10 4 | # DB_HOST=db 5 | # DB_NAME=ecoindex 6 | # DB_PASSWORD=ecoindex 7 | # DB_PORT=3306 8 | # DB_USER=ecoindex 9 | # DEBUG=1 10 | # ENABLE_SCREENSHOT=1 11 | # EXCLUDED_HOSTS='["localhost","127.0.0.1"]' 12 | # FLOWER_BASIC_AUTH=ecoindex:password 13 | # FLOWER_PORT=5555 14 | # GLITCHTIP_DSN= 15 | # REDIS_CACHE_HOST=redis 16 | # SCREENSHOTS_GID=1006 17 | # SCREENSHOTS_UID=1006 18 | # TZ=Europe/Paris 19 | # API_KEYS_BATCH='{"key": "random_key", "name":"Example key", "description": "This key is used to authenticate batch import of results as an example", "source": "ecoindex.fr"}' -------------------------------------------------------------------------------- /projects/ecoindex_api/.gitignore: -------------------------------------------------------------------------------- 1 | docker-compose.yml 2 | .env 3 | *.webp -------------------------------------------------------------------------------- /projects/ecoindex_api/Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | includes: 4 | poetry: ../../tasks/PoetryTaskfile.yml 5 | docker: ../../tasks/DockerTaskfile.yml 6 | 7 | vars: 8 | PROJECT_NAME: api 9 | 10 | tasks: 11 | update-openapi: 12 | desc: Update the openapi.json file 13 | cmds: 14 | - echo "Update the openapi.json file" 15 | - poetry run update-openapi > openapi.json 16 | - echo "Done!" 17 | silent: true 18 | 19 | bump: 20 | desc: Bump the API version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease. 21 | cmds: 22 | - task: poetry:bump 23 | vars: 24 | VERSION_FILE_PATH: "../../bases/ecoindex/backend/VERSION" 25 | VERSION_RULE: "{{.CLI_ARGS}}" 26 | silent: true 27 | 28 | docker-build-backend: 29 | desc: Build the backend docker image 30 | vars: 31 | VERSION: 32 | sh: poetry version -s 33 | cmds: 34 | - echo "Build the backend docker image with version {{.VERSION}}" 35 | - task: docker:build 36 | vars: 37 | VERSION: "{{.VERSION}}" 38 | NAME: "api-backend" 39 | OPTIONS: --build-arg="wheel=ecoindex_api-{{.VERSION}}-py3-none-any.whl" -f docker/backend/dockerfile 40 | silent: true 41 | 42 | docker-build-worker: 43 | desc: Build the worker docker image 44 | vars: 45 | VERSION: 46 | sh: poetry version -s 47 | cmds: 48 | - echo "Build the worker docker image with version {{.VERSION}}" 49 | - task: docker:build 50 | vars: 51 | VERSION: "{{.VERSION}}" 52 | NAME: "api-worker" 53 | OPTIONS: --build-arg="wheel=ecoindex_api-{{.VERSION}}-py3-none-any.whl" -f docker/worker/dockerfile 54 | silent: true 55 | 56 | docker-build-all: 57 | desc: Build poetry project and then build the docker images 58 | cmds: 59 | - task: poetry:build 60 | - task: docker-build-images 61 | silent: true 62 | 63 | docker-build-images: 64 | internal: true 65 | desc: Build the docker images 66 | deps: [docker-build-backend, docker-build-worker] 67 | cmds: 68 | - echo "Build the docker images" 69 | silent: true 70 | 71 | docker-push-backend: 72 | desc: Push the backend docker image 73 | vars: 74 | VERSION: 75 | sh: poetry version -s 76 | cmds: 77 | - echo "Push the backend docker image with version {{.VERSION}}" 78 | - task: docker:push 79 | vars: 80 | VERSION: "{{.VERSION}}" 81 | NAME: "api-backend" 82 | silent: true 83 | 84 | docker-push-worker: 85 | desc: Push the worker docker image 86 | vars: 87 | VERSION: 88 | sh: poetry version -s 89 | cmds: 90 | - echo "Push the worker docker image with version {{.VERSION}}" 91 | - task: docker:push 92 | vars: 93 | VERSION: "{{.VERSION}}" 94 | NAME: "api-worker" 95 | silent: true 96 | 97 | docker-push-all: 98 | desc: Push the docker images 99 | deps: [docker-push-backend, docker-push-worker] 100 | cmds: 101 | - echo "Push the docker images" 102 | silent: true 103 | 104 | docker-pull: 105 | desc: Pull the docker images 106 | cmds: 107 | - echo "Pull the docker images" 108 | - docker compose pull 109 | silent: true 110 | 111 | docker-up: 112 | desc: Start the docker-compose API 113 | deps: [init-env, init-docker-compose] 114 | cmds: 115 | - docker compose up {{.CLI_ARGS}} 116 | silent: true 117 | 118 | docker-down: 119 | desc: Stop the docker-compose API 120 | preconditions: 121 | - test -f docker-compose.yml 122 | cmds: 123 | - docker compose down {{.CLI_ARGS}} 124 | silent: true 125 | 126 | docker-exec: 127 | desc: Execute a command in the docker-compose API 128 | preconditions: 129 | - test -f docker-compose.yml 130 | - test -f .env 131 | cmds: 132 | - docker compose exec {{.CLI_ARGS}} 133 | silent: true 134 | 135 | docker-logs: 136 | desc: Show the logs of the docker-compose API 137 | preconditions: 138 | - test -f docker-compose.yml 139 | cmds: 140 | - docker compose logs {{.CLI_ARGS}} 141 | silent: true 142 | 143 | migration-create: 144 | desc: Create a new alembic migration 145 | cmds: 146 | - poetry run alembic revision --autogenerate -m "{{.CLI_ARGS}}" 147 | silent: true 148 | 149 | migration-upgrade: 150 | desc: Upgrade the database to the last migration 151 | cmds: 152 | - poetry run alembic upgrade head 153 | silent: true 154 | 155 | start-redis: 156 | internal: true 157 | cmds: 158 | - docker run --rm -p 6379:6379 -d redis:alpine 159 | status: 160 | - docker ps | grep redis 161 | silent: true 162 | 163 | start-worker: 164 | deps: [start-redis] 165 | cmds: 166 | - poetry run watchmedo auto-restart --directory=../.. --pattern=worker/*.py --recursive -- poetry run celery -- -A ecoindex.worker.tasks worker --loglevel=DEBUG --queues=ecoindex,ecoindex_batch -E 167 | silent: true 168 | 169 | start-backend: 170 | cmds: 171 | - poetry run uvicorn ecoindex.backend.main:app --host 0.0.0.0 --port 8000 --reload --reload-dir ../.. 172 | silent: true 173 | 174 | start-dev: 175 | deps: [start-backend, start-worker] 176 | desc: Start the backend and the worker 177 | cmds: 178 | - echo "Starting the backend and the worker" 179 | silent: true 180 | 181 | init-env: 182 | desc: Initialize the .env file 183 | preconditions: 184 | - test -f .env.template 185 | cmds: 186 | - echo "Initialize the .env file" 187 | - cp .env.template .env 188 | silent: true 189 | internal: true 190 | status: 191 | - test -f .env 192 | 193 | init-dev-project: 194 | desc: Initialize the project for development 195 | cmds: 196 | - echo "Initialize the project for development" 197 | - echo "Install poetry dependencies" 198 | - task: poetry:install 199 | - echo "Install playwright" 200 | - task: poetry:install-playwright 201 | - echo "Create the environment file" 202 | - task: init-env 203 | - echo "Create the database" 204 | - task: migration-upgrade 205 | - echo "All done!" 206 | silent: true 207 | 208 | init-docker-compose: 209 | desc: Initialize the docker-compose API 210 | internal: true 211 | cmds: 212 | - echo "Initialize the docker-compose API" 213 | - cp docker-compose.yml.template docker-compose.yml 214 | silent: true 215 | status: 216 | - test -f docker-compose.yml 217 | 218 | monitor-queues: 219 | desc: Show the queues of the docker-compose API 220 | cmds: 221 | - poetry run celery --app=ecoindex.worker.tasks events 222 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = ./alembic 6 | 7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s 8 | # Uncomment the line below if you want the files to be prepended with date and time 9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file 10 | # for all available tokens 11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s 12 | 13 | # sys.path path, will be prepended to sys.path if present. 14 | # defaults to the current working directory. 15 | prepend_sys_path = . 16 | 17 | # timezone to use when rendering the date within the migration file 18 | # as well as the filename. 19 | # If specified, requires the python-dateutil library that can be 20 | # installed by adding `alembic[tz]` to the pip requirements 21 | # string value is passed to dateutil.tz.gettz() 22 | # leave blank for localtime 23 | # timezone = 24 | 25 | # max length of characters to apply to the 26 | # "slug" field 27 | # truncate_slug_length = 40 28 | 29 | # set to 'true' to run the environment during 30 | # the 'revision' command, regardless of autogenerate 31 | # revision_environment = false 32 | 33 | # set to 'true' to allow .pyc and .pyo files without 34 | # a source .py file to be detected as revisions in the 35 | # versions/ directory 36 | # sourceless = false 37 | 38 | # version location specification; This defaults 39 | # to db/alembic/versions. When using multiple version 40 | # directories, initial revisions must be specified with --version-path. 41 | # The path separator used here should be the separator specified by "version_path_separator" below. 42 | # version_locations = %(here)s/bar:%(here)s/bat:db/alembic/versions 43 | 44 | # version path separator; As mentioned above, this is the character used to split 45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. 46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. 47 | # Valid values for version_path_separator are: 48 | # 49 | # version_path_separator = : 50 | # version_path_separator = ; 51 | # version_path_separator = space 52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 53 | 54 | # the output encoding used when revision files 55 | # are written from script.py.mako 56 | # output_encoding = utf-8 57 | 58 | sqlalchemy.url = 59 | 60 | 61 | [post_write_hooks] 62 | # post_write_hooks defines scripts or Python functions that are run 63 | # on newly generated revision scripts. See the documentation for further 64 | # detail and examples 65 | 66 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 67 | # hooks = black 68 | # black.type = console_scripts 69 | # black.entrypoint = black 70 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 71 | 72 | # Logging configuration 73 | [loggers] 74 | keys = root,sqlalchemy,alembic 75 | 76 | [handlers] 77 | keys = console 78 | 79 | [formatters] 80 | keys = generic 81 | 82 | [logger_root] 83 | level = WARN 84 | handlers = console 85 | qualname = 86 | 87 | [logger_sqlalchemy] 88 | level = WARN 89 | handlers = 90 | qualname = sqlalchemy.engine 91 | 92 | [logger_alembic] 93 | level = INFO 94 | handlers = 95 | qualname = alembic 96 | 97 | [handler_console] 98 | class = StreamHandler 99 | args = (sys.stderr,) 100 | level = NOTSET 101 | formatter = generic 102 | 103 | [formatter_generic] 104 | format = %(levelname)-5.5s [%(name)s] %(message)s 105 | datefmt = %H:%M:%S 106 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/env.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from logging.config import fileConfig 3 | 4 | from alembic import context 5 | from ecoindex.config import Settings 6 | from ecoindex.models.api import * # noqa: F403 7 | from sqlalchemy import pool 8 | from sqlalchemy.engine import Connection 9 | from sqlalchemy.ext.asyncio import async_engine_from_config 10 | from sqlmodel import SQLModel 11 | 12 | # this is the Alembic Config object, which provides 13 | # access to the values within the .ini file in use. 14 | config = context.config 15 | 16 | config.set_main_option("sqlalchemy.url", Settings().DATABASE_URL) 17 | 18 | 19 | # Interpret the config file for Python logging. 20 | # This line sets up loggers basically. 21 | if config.config_file_name is not None: 22 | fileConfig(config.config_file_name) 23 | 24 | # add your model's MetaData object here 25 | # for 'autogenerate' support 26 | # from myapp import mymodel 27 | # target_metadata = mymodel.Base.metadata 28 | target_metadata = SQLModel.metadata 29 | 30 | # other values from the config, defined by the needs of env.py, 31 | # can be acquired: 32 | # my_important_option = config.get_main_option("my_important_option") 33 | # ... etc. 34 | 35 | 36 | def run_migrations_offline() -> None: 37 | """Run migrations in 'offline' mode. 38 | 39 | This configures the context with just a URL 40 | and not an Engine, though an Engine is acceptable 41 | here as well. By skipping the Engine creation 42 | we don't even need a DBAPI to be available. 43 | 44 | Calls to context.execute() here emit the given string to the 45 | script output. 46 | 47 | """ 48 | url = config.get_main_option("sqlalchemy.url") 49 | context.configure( 50 | url=url, 51 | target_metadata=target_metadata, 52 | literal_binds=True, 53 | compare_type=True, 54 | dialect_opts={"paramstyle": "named"}, 55 | ) 56 | 57 | with context.begin_transaction(): 58 | context.run_migrations() 59 | 60 | 61 | def do_run_migrations(connection: Connection) -> None: 62 | context.configure(connection=connection, target_metadata=target_metadata) 63 | 64 | with context.begin_transaction(): 65 | context.run_migrations() 66 | 67 | 68 | async def run_async_migrations() -> None: 69 | """In this scenario we need to create an Engine 70 | and associate a connection with the context. 71 | 72 | """ 73 | 74 | connectable = async_engine_from_config( 75 | config.get_section(config.config_ini_section, {}), 76 | prefix="sqlalchemy.", 77 | poolclass=pool.NullPool, 78 | ) 79 | 80 | async with connectable.connect() as connection: 81 | await connection.run_sync(do_run_migrations) 82 | 83 | await connectable.dispose() 84 | 85 | 86 | def run_migrations_online() -> None: 87 | """Run migrations in 'online' mode.""" 88 | 89 | asyncio.run(run_async_migrations()) 90 | 91 | 92 | if context.is_offline_mode(): 93 | run_migrations_offline() 94 | else: 95 | run_migrations_online() 96 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | import sqlmodel 11 | ${imports if imports else ""} 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = ${repr(up_revision)} 15 | down_revision = ${repr(down_revision)} 16 | branch_labels = ${repr(branch_labels)} 17 | depends_on = ${repr(depends_on)} 18 | 19 | 20 | def upgrade() -> None: 21 | ${upgrades if upgrades else "pass"} 22 | 23 | 24 | def downgrade() -> None: 25 | ${downgrades if downgrades else "pass"} 26 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/versions/5afa2faea43f_.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Revision ID: 5afa2faea43f 4 | Revises: 7eaafaa65b32 5 | Create Date: 2025-01-14 14:12:47.013413 6 | 7 | """ 8 | import sqlalchemy as sa 9 | import sqlmodel 10 | from alembic import op 11 | 12 | revision = "5afa2faea43f" 13 | down_revision = "7eaafaa65b32" 14 | branch_labels = None 15 | depends_on = None 16 | 17 | 18 | def upgrade() -> None: 19 | op.add_column( 20 | "apiecoindex", 21 | sa.Column("source", sqlmodel.sql.sqltypes.AutoString(), nullable=True), # type: ignore 22 | ) 23 | 24 | 25 | def downgrade() -> None: 26 | op.drop_column("apiecoindex", "source") 27 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/versions/7eaafaa65b32_update_url_field_type_to_text.py: -------------------------------------------------------------------------------- 1 | """Update URL field type to text 2 | 3 | Revision ID: 7eaafaa65b32 4 | Revises: e83263a5def4 5 | Create Date: 2023-03-28 11:24:39.089063 6 | 7 | """ 8 | import sqlalchemy as sa 9 | from alembic import op 10 | 11 | revision = "7eaafaa65b32" 12 | down_revision = "e83263a5def4" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op: 19 | batch_op.alter_column( 20 | "url", 21 | existing_type=sa.String(length=2048), 22 | type_=sa.Text(), 23 | ) 24 | 25 | 26 | def downgrade() -> None: 27 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op: 28 | batch_op.alter_column( 29 | "url", 30 | existing_type=sa.Text(), 31 | type_=sa.String(length=2048), 32 | ) 33 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/versions/826abb0c4222_add_ecoindex_version_field.py: -------------------------------------------------------------------------------- 1 | """Add ecoindex_version field 2 | 3 | Revision ID: 826abb0c4222 4 | Revises: fd9a1f5662c8 5 | Create Date: 2022-09-12 17:39:44.209071 6 | 7 | """ 8 | import sqlalchemy as sa 9 | import sqlmodel 10 | from alembic import op 11 | from ecoindex.database.helper import column_exists 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "826abb0c4222" 15 | down_revision = "fd9a1f5662c8" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade() -> None: 21 | if not column_exists(op.get_bind(), "apiecoindex", "ecoindex_version"): 22 | op.add_column( 23 | "apiecoindex", 24 | sa.Column( 25 | "ecoindex_version", sqlmodel.sql.sqltypes.AutoString(), nullable=True 26 | ), 27 | ) 28 | 29 | 30 | def downgrade() -> None: 31 | if column_exists(op.get_bind(), "apiecoindex", "ecoindex_version"): 32 | op.drop_column("apiecoindex", "ecoindex_version") 33 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/versions/e83263a5def4_add_index_id_and_host.py: -------------------------------------------------------------------------------- 1 | """Add index ID and host 2 | 3 | Revision ID: e83263a5def4 4 | Revises: 826abb0c4222 5 | Create Date: 2023-02-13 15:58:55.102285 6 | 7 | """ 8 | import sqlalchemy as sa 9 | import sqlmodel # noqa: F401 10 | from alembic import op 11 | from ecoindex.database.helper import index_exists 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "e83263a5def4" 15 | down_revision = "826abb0c4222" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade() -> None: 21 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op: 22 | batch_op.alter_column( 23 | "id", 24 | existing_type=sqlmodel.sql.sqltypes.GUID(), 25 | nullable=False, 26 | ) 27 | batch_op.alter_column("version", existing_type=sa.INTEGER(), nullable=False) 28 | 29 | if not index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_id"): 30 | op.create_index(op.f("ix_apiecoindex_id"), "apiecoindex", ["id"], unique=False) 31 | 32 | if not index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_host"): 33 | op.create_index( 34 | op.f("ix_apiecoindex_host"), "apiecoindex", ["host"], unique=False 35 | ) 36 | 37 | 38 | def downgrade() -> None: 39 | if index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_host"): 40 | op.drop_index(op.f("ix_apiecoindex_host"), table_name="apiecoindex") 41 | 42 | if index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_id"): 43 | op.drop_index(op.f("ix_apiecoindex_id"), table_name="apiecoindex") 44 | 45 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op: 46 | batch_op.alter_column("version", existing_type=sa.INTEGER(), nullable=True) 47 | batch_op.alter_column( 48 | "id", 49 | existing_type=sqlmodel.sql.sqltypes.GUID(), 50 | nullable=True, 51 | ) 52 | -------------------------------------------------------------------------------- /projects/ecoindex_api/alembic/versions/fd9a1f5662c8_first_migration.py: -------------------------------------------------------------------------------- 1 | """First migration 2 | 3 | Revision ID: fd9a1f5662c8 4 | Revises: 5 | Create Date: 2022-09-12 15:03:22.363502 6 | 7 | """ 8 | import sqlalchemy as sa 9 | import sqlmodel 10 | from alembic import op 11 | from ecoindex.database.helper import table_exists 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "fd9a1f5662c8" 15 | down_revision = None 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade() -> None: 21 | if not table_exists(op.get_bind(), "apiecoindex"): 22 | op.create_table( 23 | "apiecoindex", 24 | sa.Column("width", sa.Integer(), nullable=True), 25 | sa.Column("height", sa.Integer(), nullable=True), 26 | sa.Column("url", sqlmodel.sql.sqltypes.AutoString(), nullable=True), 27 | sa.Column("size", sa.Float(), nullable=False), 28 | sa.Column("nodes", sa.Integer(), nullable=False), 29 | sa.Column("requests", sa.Integer(), nullable=False), 30 | sa.Column("grade", sqlmodel.sql.sqltypes.AutoString(), nullable=True), 31 | sa.Column("score", sa.Float(), nullable=True), 32 | sa.Column("ges", sa.Float(), nullable=True), 33 | sa.Column("water", sa.Float(), nullable=True), 34 | sa.Column("date", sa.DateTime(), nullable=True), 35 | sa.Column("page_type", sqlmodel.sql.sqltypes.AutoString(), nullable=True), 36 | sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=True), 37 | sa.Column("host", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 38 | sa.Column("version", sa.Integer(), nullable=True), 39 | sa.Column("initial_ranking", sa.Integer(), nullable=False), 40 | sa.Column("initial_total_results", sa.Integer(), nullable=False), 41 | sa.PrimaryKeyConstraint("id"), 42 | ) 43 | 44 | 45 | def downgrade() -> None: 46 | if table_exists(op.get_bind(), "apiecoindex"): 47 | op.drop_table("apiecoindex") 48 | -------------------------------------------------------------------------------- /projects/ecoindex_api/docker-compose.yml.template: -------------------------------------------------------------------------------- 1 | services: 2 | db: 3 | image: mysql 4 | restart: always 5 | volumes: 6 | - db:/var/lib/mysql 7 | environment: 8 | MYSQL_DATABASE: ${DB_NAME:-ecoindex} 9 | MYSQL_USER: ${DB_USER:-ecoindex} 10 | MYSQL_PASSWORD: ${DB_PASSWORD:-ecoindex} 11 | MYSQL_ROOT_PASSWORD: ${DB_PASSWORD:-ecoindex} 12 | ports: 13 | - "${DB_PORT:-3306}:3306" 14 | healthcheck: 15 | test: mysqladmin ping -h 127.0.0.1 -u $$MYSQL_USER --password=$$MYSQL_PASSWORD 16 | timeout: 5s 17 | retries: 10 18 | interval: 2s 19 | 20 | backend: 21 | image: vvatelot/ecoindex-api-backend:${API_VERSION:-latest} 22 | restart: always 23 | env_file: 24 | - .env 25 | ports: 26 | - "${API_PORT:-8001}:8000" 27 | environment: 28 | DATABASE_URL: mysql+aiomysql://${DB_USER:-ecoindex}:${DB_PASSWORD:-ecoindex}@${DB_HOST:-db}/${DB_NAME:-ecoindex}?charset=utf8mb4 29 | DEBUG: ${DEBUG:-0} 30 | REDIS_CACHE_HOST: ${REDIS_CACHE_HOST:-redis} 31 | TZ: ${TZ:-Europe/Paris} 32 | depends_on: 33 | db: 34 | condition: service_healthy 35 | redis: 36 | condition: service_started 37 | volumes: 38 | - ./screenshots:/code/screenshots 39 | 40 | worker: 41 | image: vvatelot/ecoindex-api-worker:${API_VERSION:-latest} 42 | restart: always 43 | env_file: 44 | - .env 45 | environment: 46 | DATABASE_URL: mysql+aiomysql://${DB_USER:-ecoindex}:${DB_PASSWORD:-ecoindex}@${DB_HOST:-db}/${DB_NAME:-ecoindex}?charset=utf8mb4 47 | DEBUG: ${DEBUG:-0} 48 | REDIS_CACHE_HOST: ${REDIS_CACHE_HOST:-redis} 49 | TZ: ${TZ:-Europe/Paris} 50 | ENABLE_SCREENSHOT: ${ENABLE_SCREENSHOT:-0} 51 | depends_on: 52 | db: 53 | condition: service_healthy 54 | redis: 55 | condition: service_started 56 | volumes: 57 | - ./screenshots:/code/screenshots 58 | 59 | redis: 60 | image: redis:alpine 61 | restart: always 62 | command: redis-server --save 20 1 --loglevel warning 63 | volumes: 64 | - redis:/data 65 | 66 | flower: 67 | image: mher/flower 68 | ports: 69 | - "${FLOWER_PORT:-5555}:5555" 70 | environment: 71 | CELERY_BROKER_URL: redis://${REDIS_CACHE_HOST:-redis}/0 72 | FLOWER_BASIC_AUTH: ${FLOWER_BASIC_AUTH:-ecoindex:ecoindex} 73 | 74 | volumes: 75 | db: 76 | redis: 77 | -------------------------------------------------------------------------------- /projects/ecoindex_api/docker/backend/dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim AS requirements-stage 2 | 3 | WORKDIR /tmp 4 | 5 | RUN pip install poetry==1.8.5 6 | COPY pyproject.toml poetry.lock /tmp/ 7 | RUN poetry export --with=backend --output=requirements.txt --without-hashes 8 | 9 | 10 | FROM python:3.12-slim 11 | 12 | ARG wheel=ecoindex_api-3.1.0-py3-none-any.whl 13 | 14 | WORKDIR /code 15 | 16 | # Needed for git repositories dependencies 17 | RUN apt-get update && apt-get install -y --no-install-recommends git \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | COPY alembic.ini alembic.ini 21 | ADD alembic alembic 22 | 23 | COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt 24 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 25 | 26 | COPY dist/$wheel $wheel 27 | RUN pip install --no-cache-dir $wheel 28 | RUN pip install --no-cache-dir aiomysql gunicorn 29 | 30 | RUN rm -rf $wheel requirements.txt /tmp/dist /var/lib/{apt,dpkg,cache,log}/ 31 | 32 | COPY docker/backend/entrypoint.sh /usr/bin/entrypoint 33 | RUN chmod +x /usr/bin/entrypoint 34 | 35 | ENTRYPOINT [ "/usr/bin/entrypoint" ] -------------------------------------------------------------------------------- /projects/ecoindex_api/docker/backend/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | alembic upgrade head 4 | gunicorn ecoindex.backend.main:app --timeout 0 --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000 -------------------------------------------------------------------------------- /projects/ecoindex_api/docker/worker/dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim AS requirements-stage 2 | 3 | WORKDIR /tmp 4 | 5 | RUN pip install poetry==1.8.5 6 | COPY pyproject.toml poetry.lock /tmp/ 7 | RUN poetry export --with=worker --output=requirements.txt --without-hashes 8 | 9 | 10 | FROM python:3.12-slim 11 | 12 | ARG wheel=ecoindex_api-3.1.0-py3-none-any.whl 13 | 14 | WORKDIR /code 15 | 16 | RUN apt-get update && apt-get install -y --no-install-recommends git \ 17 | && rm -rf /var/lib/apt/lists/* 18 | 19 | COPY alembic.ini alembic.ini 20 | ADD alembic alembic 21 | 22 | COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt 23 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 24 | 25 | COPY dist/$wheel $wheel 26 | RUN pip install --no-cache-dir $wheel 27 | RUN pip install --no-cache-dir aiomysql 28 | 29 | RUN playwright install chromium --with-deps 30 | 31 | RUN rm -rf $wheel requirements.txt /tmp/dist /var/lib/{apt,dpkg,cache,log}/ 32 | 33 | COPY docker/worker/entrypoint.sh /usr/bin/entrypoint 34 | RUN chmod +x /usr/bin/entrypoint 35 | 36 | ENTRYPOINT [ "/usr/bin/entrypoint" ] -------------------------------------------------------------------------------- /projects/ecoindex_api/docker/worker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | celery -A ecoindex.worker.tasks worker --queues=ecoindex,ecoindex_batch -------------------------------------------------------------------------------- /projects/ecoindex_api/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ecoindex_api" 3 | version = "3.11.1" 4 | description = "REST API to expose Ecoindex" 5 | authors = ['Vincent Vatelot '] 6 | license = "Creative Commons BY-NC-ND" 7 | homepage = "http://www.ecoindex.fr" 8 | repository = "https://github.com/cnumr/ecoindex_api" 9 | include = ["LICENSE"] 10 | 11 | packages = [ 12 | { include = "ecoindex/backend", from = "../../bases" }, 13 | { include = "ecoindex/compute", from = "../../components" }, 14 | { include = "ecoindex/config", from = "../../components" }, 15 | { include = "ecoindex/data", from = "../../components" }, 16 | { include = "ecoindex/database", from = "../../components" }, 17 | { include = "ecoindex/exceptions", from = "../../components" }, 18 | { include = "ecoindex/models", from = "../../components" }, 19 | { include = "ecoindex/scraper", from = "../../components" }, 20 | { include = "ecoindex/utils", from = "../../components" }, 21 | { include = "ecoindex/worker_component", from = "../../components" }, 22 | { include = "ecoindex/worker", from = "../../bases" }, 23 | ] 24 | 25 | [tool.poetry.dependencies] 26 | aiofile = "^3.8.8" 27 | alembic = "^1.12.1" 28 | celery = "^5.3.4" 29 | fastapi = "^0.109.1" 30 | pillow = "^10.3.0" 31 | playwright = "^1.39.0" 32 | playwright-stealth = "^1.0.6" 33 | pydantic = { version = ">=2.1.1,<=2.4.2", extras = ["email"] } 34 | pydantic-settings = "^2.0.3" 35 | python = "^3.10" 36 | pyyaml = "^6.0.1" 37 | redis = "^5.0.1" 38 | requests = "^2.32.2" 39 | sqlmodel = "^0.0.14" 40 | sentry-sdk = "^2.8.0" 41 | setuptools = "^75.6.0" 42 | cryptography = "^44.0.2" 43 | ua-generator = "^2.0.5" 44 | 45 | [tool.poetry.group.backend.dependencies] 46 | uvicorn = "^0.23.2" 47 | 48 | [tool.poetry.group.worker.dependencies] 49 | pillow = "^10.3.0" 50 | playwright = "^1.39.0" 51 | playwright-stealth = "^1.0.6" 52 | 53 | [tool.poetry.group.dev.dependencies] 54 | typing-extensions = "^4.8.0" 55 | aiosqlite = "^0.19.0" 56 | watchdog = "^6.0.0" 57 | 58 | [tool.poetry.scripts] 59 | update-openapi = "ecoindex.backend.scripts.openapi:main" 60 | 61 | [build-system] 62 | requires = ["poetry-core>=1.0.0"] 63 | build-backend = "poetry.core.masonry.api" 64 | -------------------------------------------------------------------------------- /projects/ecoindex_api/screenshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/projects/ecoindex_api/screenshots/.gitkeep -------------------------------------------------------------------------------- /projects/ecoindex_cli/.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .pytest_cache 3 | .ruff_cache 4 | .venv 5 | .vscode 6 | dockerfile -------------------------------------------------------------------------------- /projects/ecoindex_cli/Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | includes: 4 | poetry: ../../tasks/PoetryTaskfile.yml 5 | docker: ../../tasks/DockerTaskfile.yml 6 | pypi: ../../tasks/PypiTaskFile.yml 7 | 8 | vars: 9 | PROJECT_NAME: cli 10 | 11 | tasks: 12 | bump: 13 | desc: Bump the CLI version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease. 14 | cmds: 15 | - task: poetry:bump 16 | vars: 17 | VERSION_FILE_PATH: "../../bases/ecoindex/cli/VERSION" 18 | VERSION_RULE: "{{.CLI_ARGS}}" 19 | silent: true 20 | 21 | docker-build: 22 | desc: Build the docker image 23 | deps: [poetry:build] 24 | vars: 25 | VERSION: 26 | sh: poetry version -s 27 | cmds: 28 | - task: docker:build 29 | vars: 30 | VERSION: "{{.VERSION}}" 31 | NAME: "{{.PROJECT_NAME}}" 32 | OPTIONS: --build-arg="wheel=ecoindex_cli-{{.VERSION}}-py3-none-any.whl" 33 | silent: true 34 | 35 | docker-push: 36 | desc: Push the docker image 37 | vars: 38 | VERSION: 39 | sh: poetry version -s 40 | cmds: 41 | - task: docker:push 42 | vars: 43 | VERSION: "{{.VERSION}}" 44 | NAME: "{{.PROJECT_NAME}}" 45 | silent: true -------------------------------------------------------------------------------- /projects/ecoindex_cli/doc/report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/projects/ecoindex_cli/doc/report.png -------------------------------------------------------------------------------- /projects/ecoindex_cli/dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim AS requirements-stage 2 | 3 | WORKDIR /tmp 4 | 5 | RUN pip install poetry poetry==1.8.5 6 | COPY pyproject.toml poetry.lock /tmp/ 7 | RUN poetry export --output=requirements.txt --without-hashes 8 | 9 | 10 | FROM python:3.12-slim 11 | 12 | ARG wheel=ecoindex_cli-2.26.0a0-py3-none-any.whl 13 | ENV DOCKER_CONTAINER=True 14 | 15 | WORKDIR /code 16 | 17 | COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt 18 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 19 | 20 | COPY dist/$wheel $wheel 21 | RUN pip install --no-cache-dir $wheel 22 | 23 | RUN playwright install chromium --with-deps 24 | 25 | RUN rm -rf $wheel requirements.txt /tmp/dist /var/lib/{apt,dpkg,cache,log}/ -------------------------------------------------------------------------------- /projects/ecoindex_cli/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ecoindex_cli" 3 | version = "2.30.0" 4 | description = "`ecoindex-cli` is a CLI tool that let you make ecoindex tests on given pages" 5 | authors = ['Vincent Vatelot '] 6 | license = "Creative Commons BY-NC-ND" 7 | readme = "README.md" 8 | homepage = "http://www.ecoindex.fr" 9 | repository = "https://github.com/cnumr/ecoindex_cli" 10 | include = ["LICENSE"] 11 | packages = [ 12 | { include = "ecoindex/cli", from = "../../bases" }, 13 | { include = "ecoindex/compute", from = "../../components" }, 14 | { include = "ecoindex/config", from = "../../components" }, 15 | { include = "ecoindex/data", from = "../../components" }, 16 | { include = "ecoindex/exceptions", from = "../../components" }, 17 | { include = "ecoindex/models", from = "../../components" }, 18 | { include = "ecoindex/scraper", from = "../../components" }, 19 | { include = "ecoindex/utils", from = "../../components" }, 20 | ] 21 | 22 | [tool.poetry.dependencies] 23 | aiofile = "^3.8.8" 24 | click-spinner = "^0.1.10" 25 | jinja2 = "^3.1.2" 26 | loguru = "^0.7.2" 27 | matplotlib = "^3.8.0" 28 | pandas = "^2.1.2" 29 | playwright = "^1.39.0" 30 | playwright-stealth = "^1.0.6" 31 | pydantic = "^2.4.2" 32 | pydantic-settings = "^2.0.3" 33 | python = ">=3.10,<3.13" 34 | pyyaml = "^6.0.1" 35 | rich = "^13.6.0" 36 | scrapy = "^2.11.0" 37 | typer = "^0.9.0" 38 | ua-generator = "^2.0.5" 39 | 40 | [tool.poetry.scripts] 41 | ecoindex-cli = "ecoindex.cli.app:app" 42 | 43 | [build-system] 44 | requires = ["poetry-core>=1.0.0"] 45 | build-backend = "poetry.core.masonry.api" 46 | -------------------------------------------------------------------------------- /projects/ecoindex_compute/README.md: -------------------------------------------------------------------------------- 1 | # Ecoindex python 2 | 3 | [![Validate project quality](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml/badge.svg?branch=main)](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml) 4 | ![PyPI - Version](https://img.shields.io/pypi/v/ecoindex-compute?logo=pypi) 5 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/ecoindex-compute?style=social&logo=pypi) 6 | 7 | This basic module provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) based on 3 parameters: 8 | 9 | - The number of DOM elements in the page 10 | - The size of the page 11 | - The number of external requests of the page 12 | 13 | ## Requirements 14 | 15 | - Python ^3.10 with [pip](https://pip.pypa.io/en/stable/installation/) 16 | 17 | ## Install 18 | 19 | ```shell 20 | pip install ecoindex_compute 21 | ``` 22 | 23 | ## Use 24 | 25 | ### Compute ecoindex 26 | 27 | You can easily compute the ecoindex by calling the function `compute_ecoindex()`: 28 | 29 | ```python 30 | (function) compute_ecoindex: (dom: int, size: float, requests: int) -> Coroutine[Any, Any, Ecoindex] 31 | ``` 32 | 33 | Example: 34 | 35 | ```python 36 | import asyncio 37 | from pprint import pprint 38 | 39 | from ecoindex.compute import compute_ecoindex 40 | 41 | # Get ecoindex from DOM elements, size of page and requests of the page 42 | ecoindex = asyncio.run(compute_ecoindex(nodes=100, size=100, requests=100)) 43 | pprint(ecoindex) 44 | ``` 45 | 46 | Result example: 47 | 48 | ```python 49 | Ecoindex(grade='B', score=72.0, ges=1.56, water=2.34, ecoindex_version='3.0.0') 50 | ``` 51 | 52 | -------------------------------------------------------------------------------- /projects/ecoindex_compute/Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | includes: 4 | poetry: ../../tasks/PoetryTaskfile.yml 5 | pypi: ../../tasks/PypiTaskFile.yml 6 | 7 | vars: 8 | PROJECT_NAME: compute 9 | 10 | tasks: 11 | bump: 12 | desc: Bump the compute version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease. 13 | cmds: 14 | - task: poetry:bump 15 | vars: 16 | VERSION_FILE_PATH: "../../components/ecoindex/compute/VERSION" 17 | VERSION_RULE: "{{.CLI_ARGS}}" 18 | silent: true -------------------------------------------------------------------------------- /projects/ecoindex_compute/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ecoindex_compute" 3 | version = "5.9.0" 4 | readme = "README.md" 5 | description = "Ecoindex module provides a simple way to measure the Ecoindex score based on the 3 parameters: The DOM elements of the page, the size of the page and the number of external requests of the page" 6 | authors = ['Vincent Vatelot '] 7 | license = "Creative Commons BY-NC-ND" 8 | homepage = "http://www.ecoindex.fr" 9 | repository = "https://github.com/cnumr/ecoindex_python" 10 | include = ["LICENSE"] 11 | 12 | packages = [ 13 | { include = "ecoindex/compute", from = "../../components" }, 14 | { include = "ecoindex/data", from = "../../components" }, 15 | { include = "ecoindex/models", from = "../../components" }, 16 | ] 17 | 18 | [tool.poetry.dependencies] 19 | aiofile = "^3.8.8" 20 | pydantic = "^2.4.2" 21 | python = "^3.10" 22 | requests = "^2.31.0" 23 | 24 | [build-system] 25 | requires = ["poetry-core>=1.0.0"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /projects/ecoindex_scraper/README.md: -------------------------------------------------------------------------------- 1 | # Ecoindex Scraper 2 | 3 | [![Validate project quality](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml/badge.svg?branch=main)](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml) 4 | 5 | ![PyPI - Version](https://img.shields.io/pypi/v/ecoindex-scraper?logo=pypi) 6 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/ecoindex-scraper?style=social&logo=pypi) 7 | 8 | This module provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) of a given webpage using module [ecoindex-compute](https://pypi.org/project/ecoindex-compute/) 9 | 10 | ## Requirements 11 | 12 | - Python ^3.10 with [pip](https://pip.pypa.io/en/stable/installation/) 13 | 14 | ## Install 15 | 16 | ```shell 17 | pip install ecoindex-scraper 18 | ``` 19 | 20 | ## Use 21 | 22 | ### Get a page analysis 23 | 24 | You can run a page analysis by calling the function `get_page_analysis()`: 25 | 26 | ```python 27 | (function) get_page_analysis: (url: AnyHttpUrl, window_size: WindowSize | None = WindowSize(width=1920, height=1080), wait_before_scroll: int | None = 1, wait_after_scroll: int | None = 1) -> Coroutine[Any, Any, Result] 28 | ``` 29 | 30 | Example: 31 | 32 | ```python 33 | import asyncio 34 | from pprint import pprint 35 | 36 | from ecoindex.scraper import EcoindexScraper 37 | 38 | pprint( 39 | asyncio.run( 40 | EcoindexScraper(url="http://ecoindex.fr").get_page_analysis() 41 | ) 42 | ) 43 | ``` 44 | 45 | Result example: 46 | 47 | ```python 48 | Result(width=1920, height=1080, url=AnyHttpUrl('http://ecoindex.fr', ), size=549.253, nodes=52, requests=12, grade='A', score=90.0, ges=1.2, water=1.8, ecoindex_version='5.0.0', date=datetime.datetime(2022, 9, 12, 10, 54, 46, 773443), page_type=None) 49 | ``` 50 | 51 | > **Default behaviour:** By default, the page analysis simulates: 52 | > 53 | > - Window size of **1920x1080** pixels (can be set with parameter `window_size`) 54 | > - Wait for **1 second when page is loaded** (can be set with parameter `wait_before_scroll`) 55 | > - Scroll to the bottom of the page (if it is possible) 56 | > - Wait for **1 second after** having scrolled to the bottom of the page (can be set with parameter `wait_after_scroll`) 57 | 58 | ### Get a page analysis and generate a screenshot 59 | 60 | It is possible to generate a screenshot of the analyzed page by adding a `ScreenShot` property to the `EcoindexScraper` object. 61 | You have to define an id (can be a string, but it is recommended to use a unique id) and a path to the screenshot file (if the folder does not exist, it will be created). 62 | 63 | ```python 64 | import asyncio 65 | from pprint import pprint 66 | from uuid import uuid1 67 | 68 | from ecoindex.models import ScreenShot 69 | from ecoindex.scrap import EcoindexScraper 70 | 71 | pprint( 72 | asyncio.run( 73 | EcoindexScraper( 74 | url="http://www.ecoindex.fr/", 75 | screenshot=ScreenShot(id=str(uuid1()), folder="./screenshots"), 76 | ) 77 | .get_page_analysis() 78 | ) 79 | ) 80 | ``` 81 | 82 | ## Async analysis 83 | 84 | You can also run the analysis asynchronously: 85 | 86 | ```python 87 | import asyncio 88 | from concurrent.futures import ThreadPoolExecutor, as_completed 89 | 90 | from ecoindex.scrap import EcoindexScraper 91 | 92 | def run_page_analysis(url): 93 | return asyncio.run( 94 | EcoindexScraper(url=url) 95 | .get_page_analysis() 96 | ) 97 | 98 | 99 | with ThreadPoolExecutor(max_workers=8) as executor: 100 | future_to_analysis = {} 101 | 102 | url = "https://www.ecoindex.fr" 103 | 104 | for i in range(10): 105 | future_to_analysis[ 106 | executor.submit( 107 | run_page_analysis, 108 | url, 109 | ) 110 | ] = (url) 111 | 112 | for future in as_completed(future_to_analysis): 113 | try: 114 | print(future.result()) 115 | except Exception as e: 116 | print(e) 117 | ``` 118 | ## Get requests details from an analysis 119 | 120 | You can get the details of the requests made by the page by calling the function `get_all_requests()` and also get the aggregation of requests by category by calling the function `get_requests_by_category()`: 121 | 122 | ```python 123 | import asyncio 124 | from pprint import pprint 125 | 126 | from ecoindex.scraper import EcoindexScraper 127 | 128 | scraper = EcoindexScraper(url="http://www.ecoindex.fr") 129 | 130 | result = asyncio.run(scraper.get_page_analysis()) 131 | all_requests = asyncio.run(scraper.get_all_requests()) 132 | requests_by_category = asyncio.run(scraper.get_requests_by_category()) 133 | 134 | pprint([request.model_dump() for request in all_requests]) 135 | # [{'category': 'html', 136 | # 'mime_type': 'text/html; charset=iso-8859-1', 137 | # 'size': 475.0, 138 | # 'status': 301, 139 | # 'url': 'http://www.ecoindex.fr/'}, 140 | # {'category': 'html', 141 | # 'mime_type': 'text/html', 142 | # 'size': 7772.0, 143 | # 'status': 200, 144 | # 'url': 'https://www.ecoindex.fr/'}, 145 | # {'category': 'css', 146 | # 'mime_type': 'text/css', 147 | # 'size': 9631.0, 148 | # 'status': 200, 149 | # 'url': 'https://www.ecoindex.fr/css/bundle.min.d38033feecefa0352173204171412aec01f58eee728df0ac5c917a396ca0bc14.css'}, 150 | # {'category': 'javascript', 151 | # 'mime_type': 'application/javascript', 152 | # 'size': 9823.0, 153 | # 'status': 200, 154 | # 'url': 'https://www.ecoindex.fr/fr/js/bundle.8781a9ae8d87b4ebaa689167fc17b7d71193cf514eb8bb40aac9bf4548e14533.js'}, 155 | # {'category': 'other', 156 | # 'mime_type': 'x-unknown', 157 | # 'size': 892.0, 158 | # 'status': 200, 159 | # 'url': 'https://www.ecoindex.fr/images/logo-neutral-it.webp'}, 160 | # {'category': 'image', 161 | # 'mime_type': 'image/svg+xml', 162 | # 'size': 3298.0, 163 | # 'status': 200, 164 | # 'url': 'https://www.ecoindex.fr/images/logo-greenit.svg'}] 165 | 166 | pprint(requests_by_category.model_dump()) 167 | # {'css': {'total_count': 1, 'total_size': 9631.0}, 168 | # 'font': {'total_count': 0, 'total_size': 0.0}, 169 | # 'html': {'total_count': 2, 'total_size': 8247.0}, 170 | # 'image': {'total_count': 1, 'total_size': 3298.0}, 171 | # 'javascript': {'total_count': 1, 'total_size': 9823.0}, 172 | # 'other': {'total_count': 1, 'total_size': 892.0}, 173 | # 'video': {'total_count': 0, 'total_size': 0.0}} 174 | ``` -------------------------------------------------------------------------------- /projects/ecoindex_scraper/Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | includes: 4 | poetry: ../../tasks/PoetryTaskfile.yml 5 | pypi: ../../tasks/PypiTaskFile.yml 6 | 7 | vars: 8 | PROJECT_NAME: scraper 9 | 10 | tasks: 11 | bump: 12 | desc: Bump the scraper version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease. 13 | cmds: 14 | - task: poetry:bump 15 | vars: 16 | VERSION_FILE_PATH: "../../components/ecoindex/scraper/VERSION" 17 | VERSION_RULE: "{{.CLI_ARGS}}" 18 | silent: true -------------------------------------------------------------------------------- /projects/ecoindex_scraper/dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM python:3.12-slim 3 | 4 | WORKDIR /code 5 | ENV PYTHONPATH "/code" 6 | 7 | RUN pip install poetry 8 | COPY ./ ./ 9 | RUN poetry install --only scraper 10 | 11 | RUN poetry run playwright install chromium --with-deps 12 | 13 | RUN rm -rf /tmp/dist /var/lib/{apt,dpkg,cache,log}/ -------------------------------------------------------------------------------- /projects/ecoindex_scraper/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ecoindex_scraper" 3 | version = "3.15.0" 4 | readme = "README.md" 5 | description = "Ecoindex_scraper module provides a way to scrape data from given website while simulating a real web browser" 6 | authors = ['Vincent Vatelot '] 7 | license = "Creative Commons BY-NC-ND" 8 | homepage = "http://www.ecoindex.fr" 9 | repository = "https://github.com/cnumr/ecoindex_scrap_python" 10 | include = ["LICENSE"] 11 | packages = [ 12 | { include = "ecoindex/compute", from = "../../components" }, 13 | { include = "ecoindex/data", from = "../../components" }, 14 | { include = "ecoindex/exceptions", from = "../../components" }, 15 | { include = "ecoindex/models", from = "../../components" }, 16 | { include = "ecoindex/scraper", from = "../../components" }, 17 | { include = "ecoindex/utils", from = "../../components" }, 18 | ] 19 | 20 | [tool.poetry.dependencies] 21 | playwright = "^1.39.0" 22 | playwright-stealth = "^1.0.6" 23 | pydantic = "^2.4.2" 24 | python = "^3.10" 25 | typing-extensions = "^4.8.0" 26 | pyyaml = "^6.0.1" 27 | pillow = "^10.1.0" 28 | setuptools = ">=69.5.1,<71.0.0" 29 | ua-generator = "^2.0.5" 30 | 31 | [build-system] 32 | requires = ["poetry-core>=1.0.0"] 33 | build-backend = "poetry.core.masonry.api" 34 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ecoindex-monorepo" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Vincent Vatelot "] 6 | readme = "README.md" 7 | packages = [ 8 | { include = "development" }, 9 | { include = "ecoindex/backend", from = "bases" }, 10 | { include = "ecoindex/cli", from = "bases" }, 11 | { include = "ecoindex/compute", from = "components" }, 12 | { include = "ecoindex/config", from = "components" }, 13 | { include = "ecoindex/data", from = "components" }, 14 | { include = "ecoindex/database", from = "components" }, 15 | { include = "ecoindex/exceptions", from = "components" }, 16 | { include = "ecoindex/models", from = "components" }, 17 | { include = "ecoindex/scraper", from = "components" }, 18 | { include = "ecoindex/scripts", from = "components" }, 19 | { include = "ecoindex/utils", from = "components" }, 20 | { include = "ecoindex/worker_component", from = "components" }, 21 | { include = "ecoindex/worker", from = "bases" }, 22 | ] 23 | 24 | [tool.poetry.dependencies] 25 | aiofile = "^3.8.8" 26 | loguru = "^0.7.2" 27 | pydantic = "^2.4.2" 28 | pydantic-settings = "^2.0.3" 29 | python = ">=3.10,<3.13" 30 | redis = { extras = ["hiredis"], version = "^5.0.1" } 31 | requests = "^2.32.3" 32 | tomli = "^2.0.1" 33 | haralyzer = "^2.4.0" 34 | python-slugify = "^8.0.4" 35 | setuptools = "^74.0.0" 36 | cryptography = "^44.0.2" 37 | 38 | [tool.poetry.group.scraper.dependencies] 39 | pillow = "^10.3.0" 40 | playwright = "^1.39.0" 41 | playwright-stealth = "^1.0.6" 42 | 43 | [tool.poetry.group.cli.dependencies] 44 | click-spinner = "^0.1.10" 45 | jinja2 = "^3.1.3" 46 | matplotlib = "^3.8.0" 47 | pandas = "^2.1.2" 48 | pyyaml = "^6.0.1" 49 | rich = "^13.6.0" 50 | scrapy = "^2.11.1" 51 | typer = "^0.9.0" 52 | 53 | [tool.poetry.group.api.dependencies] 54 | aiosqlite = "^0.19.0" 55 | alembic = "^1.12.1" 56 | celery = "^5.3.4" 57 | redis = "^5.0.1" 58 | sqlmodel = "^0.0.14" 59 | sentry-sdk = "^2.8.0" 60 | ua-generator = "^2.0.3" 61 | 62 | [tool.poetry.group.api-backend.dependencies] 63 | fastapi = "^0.109.1" 64 | uvicorn = "^0.23.2" 65 | 66 | [tool.poetry.group.dev.dependencies] 67 | black = "^24.3.0" 68 | mypy = "^1.7.0" 69 | pytest = "^7.4.2" 70 | pytest-asyncio = "^0.21.1" 71 | pytest-cov = "^4.1.0" 72 | ruff = "^0.1.1" 73 | types-click-spinner = "^0.1.13.6" 74 | types-pyyaml = "^6.0.12.12" 75 | types-requests = "^2.31.0.10" 76 | watchdog = "^6.0.0" 77 | pytest-picked = "^0.5.0" 78 | 79 | [build-system] 80 | requires = ["poetry-core"] 81 | build-backend = "poetry.core.masonry.api" 82 | 83 | [tool.poetry.scripts] 84 | update-values = "ecoindex.scripts:update_values" 85 | 86 | [tool.pytest.ini_options] 87 | filterwarnings = ["ignore::DeprecationWarning"] 88 | 89 | [tool.mypy] 90 | mypy_path = ["bases", "components"] 91 | namespace_packages = true 92 | explicit_package_bases = true 93 | ignore_missing_imports = true 94 | disallow_untyped_defs = false 95 | exclude = ["test", "dist", "__pycache__"] 96 | 97 | [tool.coverage.run] 98 | omit = ["test/*"] 99 | 100 | [tool.coverage.report] 101 | skip_empty = true 102 | 103 | [tool.pyright] 104 | extraPaths = ["bases", "components"] 105 | -------------------------------------------------------------------------------- /tasks/DockerTaskfile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | tasks: 4 | build: 5 | internal: true 6 | cmds: 7 | - echo "docker build -t {{.NAME}} {{.VERSION}} {{.OPTIONS}} ." 8 | - docker build -t vvatelot/ecoindex-{{.NAME}}:{{.VERSION}} -t vvatelot/ecoindex-{{.NAME}}:latest {{.OPTIONS}} . 9 | silent: true 10 | 11 | push: 12 | internal: true 13 | cmds: 14 | - echo "docker push vvatelot/ecoindex-{{.NAME}}:{{.VERSION}}" 15 | - docker push vvatelot/ecoindex-{{.NAME}}:{{.VERSION}} 16 | - echo "docker push vvatelot/ecoindex-{{.NAME}}:latest" 17 | - docker push vvatelot/ecoindex-{{.NAME}}:latest 18 | silent: true 19 | -------------------------------------------------------------------------------- /tasks/PoetryTaskfile.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | tasks: 4 | default: 5 | desc: Run poetry 6 | cmds: 7 | - poetry {{.CLI_ARGS}} 8 | silent: true 9 | 10 | install: 11 | desc: Install the poetry project dependencies of {{.PROJECT_NAME}} 12 | cmds: 13 | - poetry install 14 | silent: true 15 | 16 | add: 17 | desc: Add a new dependency to {{.PROJECT_NAME}} 18 | cmds: 19 | - poetry add {{.CLI_ARGS}} 20 | silent: true 21 | 22 | remove: 23 | desc: Remove a dependency from {{.PROJECT_NAME}} 24 | cmds: 25 | - poetry remove {{.CLI_ARGS}} 26 | silent: true 27 | 28 | update: 29 | desc: Update dependencies of {{.PROJECT_NAME}} 30 | cmds: 31 | - poetry update 32 | silent: true 33 | 34 | build: 35 | desc: Build the poetry project {{.PROJECT_NAME}} dependencies, and then build the project distribution 36 | cmds: 37 | - poetry lock 38 | - poetry build-project 39 | silent: true 40 | 41 | bump: 42 | internal: true 43 | preconditions: 44 | - sh: '[[ "{{.CLI_ARGS}}" =~ ^(major|minor|patch|premajor|preminor|prepatch|prerelease)$ ]]' 45 | msg: "Invalid version rule: `{{.CLI_ARGS}}` must be major, minor, patch, premajor, preminor, prepatch or prerelease." 46 | cmds: 47 | - poetry version -s > /tmp/version-current-output-{{.PROJECT_NAME}} 48 | - poetry version {{.VERSION_RULE}} -s > /tmp/version-bump-output-{{.PROJECT_NAME}} 49 | - echo "Current version:$(cat /tmp/version-current-output-{{.PROJECT_NAME}})" 50 | - echo "Bumped version:$(cat /tmp/version-bump-output-{{.PROJECT_NAME}})" 51 | - echo "Update {{.VERSION_FILE_PATH}}" 52 | - echo "$(cat /tmp/version-bump-output-{{.PROJECT_NAME}})" > {{.VERSION_FILE_PATH}} 53 | silent: true 54 | 55 | version-short: 56 | desc: Show the current version of {{.PROJECT_NAME}} 57 | cmds: 58 | - poetry version -s 59 | silent: true 60 | 61 | install-playwright: 62 | desc: Install playwright 63 | cmds: 64 | - poetry run playwright install chromium --with-deps 65 | silent: true 66 | interactive: true 67 | -------------------------------------------------------------------------------- /tasks/PypiTaskFile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | tasks: 4 | publish: 5 | desc: Publish the project {{.PROJECT_NAME}} to pypi 6 | cmds: 7 | - poetry publish 8 | silent: true -------------------------------------------------------------------------------- /tasks/QualityTaskFile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | tasks: 4 | tests: 5 | desc: Run tests 6 | cmds: 7 | - poetry run pytest 8 | 9 | tests-coverage: 10 | desc: Run tests with coverage 11 | cmds: 12 | - poetry run pytest --cov-report "xml:coverage.xml" --cov=. test 13 | 14 | tests-coverage-branch: 15 | desc: Run tests with coverage for current branch 16 | cmds: 17 | - poetry run pytest --cov-report "xml:coverage.xml" --cov=. --picked --mode=branch test 18 | 19 | mypy: 20 | desc: Run mypy 21 | cmds: 22 | - poetry run mypy . 23 | 24 | ruff: 25 | desc: Run ruff 26 | cmds: 27 | - poetry run ruff . 28 | 29 | quality-branch: 30 | desc: Run quality checks 31 | deps: [tests-coverage-branch, mypy, ruff] 32 | cmds: 33 | - echo "Quality checks passed" 34 | 35 | default: 36 | desc: Run quality checks 37 | deps: [tests-coverage, mypy, ruff] 38 | cmds: 39 | - echo "Quality checks passed" -------------------------------------------------------------------------------- /test/bases/ecoindex/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/bases/ecoindex/backend/__init__.py -------------------------------------------------------------------------------- /test/bases/ecoindex/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/bases/ecoindex/cli/__init__.py -------------------------------------------------------------------------------- /test/bases/ecoindex/cli/test_app.py: -------------------------------------------------------------------------------- 1 | from os import remove 2 | 3 | from ecoindex.cli.app import app 4 | from typer.testing import CliRunner 5 | 6 | runner = CliRunner() 7 | 8 | 9 | def test_analyze_no_args() -> None: 10 | result = runner.invoke(app=app, args=["analyze"]) 11 | assert result.exit_code == 1 12 | assert "🔥 You must provide an url..." in result.stdout 13 | 14 | 15 | def test_analyze_not_valid_url() -> None: 16 | invalid_url = "url" 17 | result = runner.invoke(app=app, args=["analyze", "--url", invalid_url]) 18 | assert result.exit_code == 1 19 | assert ( 20 | "Input should be a valid URL, relative URL without a base [type=url_parsing, input_value='url', input_type=str]" 21 | in result.stdout 22 | ) 23 | 24 | 25 | def test_analyze_one_invalid_url() -> None: 26 | valid_url = "https://www.test.com" 27 | invalid_url = "dummy" 28 | result = runner.invoke( 29 | app=app, args=["analyze", "--url", valid_url, "--url", invalid_url], input="n\n" 30 | ) 31 | assert result.exit_code == 1 32 | assert ( 33 | "Input should be a valid URL, relative URL without a base [type=url_parsing, input_value='dummy', input_type=str]" 34 | in result.stdout 35 | ) 36 | 37 | 38 | def test_analyze_one_valid_url() -> None: 39 | domain = "www.test.com" 40 | valid_url = f"https://{domain}" 41 | result = runner.invoke(app=app, args=["analyze", "--url", valid_url], input="n\n") 42 | assert "There are 1 url(s), do you want to process?" in result.stdout 43 | assert result.exit_code == 1 44 | assert "Aborted" in result.stdout 45 | assert f"📁️ Urls recorded in file `input/{domain}.csv`" 46 | remove(f"/tmp/ecoindex-cli/input/{domain}.csv") 47 | 48 | 49 | def test_analyze_string_window_size() -> None: 50 | invalid_window_size = "window" 51 | result = runner.invoke( 52 | app=app, args=["analyze", "--window-size", invalid_window_size] 53 | ) 54 | assert result.exit_code == 1 55 | assert ( 56 | f"🔥 `{invalid_window_size}` is not a valid window size. Must be of type `1920,1080`" 57 | in result.stdout 58 | ) 59 | 60 | 61 | def test_analyze_one_invalid_window_size() -> None: 62 | valid_window_size = "1920,1080" 63 | invalid_window_size = "1920,height" 64 | result = runner.invoke( 65 | app=app, 66 | args=[ 67 | "analyze", 68 | "--window-size", 69 | valid_window_size, 70 | "--window-size", 71 | invalid_window_size, 72 | ], 73 | ) 74 | assert result.exit_code == 1 75 | assert ( 76 | f"🔥 `{invalid_window_size}` is not a valid window size. Must be of type `1920,1080`" 77 | in result.stdout 78 | ) 79 | 80 | 81 | def test_analyze_abort_recursive() -> None: 82 | result = runner.invoke(app=app, args=["analyze", "--recursive"], input="n\n") 83 | assert ( 84 | "You are about to perform a recursive website scraping. This can take a long time. Are you sure to want to proceed?" 85 | in result.stdout 86 | ) 87 | assert "Aborted" in result.stdout 88 | assert result.exit_code == 1 89 | 90 | 91 | def test_analyze_abort_sitemap() -> None: 92 | domain = "www.test.com" 93 | valid_url = f"https://{domain}/sitemap.xml" 94 | result = runner.invoke( 95 | app=app, args=["analyze", "--sitemap", valid_url], input="n\n" 96 | ) 97 | assert ( 98 | "You are about to read urls from a website sitemap. This can take a long time. Are you sure to want to proceed?" 99 | in result.stdout 100 | ) 101 | assert "Aborted" in result.stdout 102 | assert result.exit_code == 1 103 | 104 | 105 | def test_invalid_sitemap() -> None: 106 | domain = "www.test.com" 107 | invalid_sitemap = f"https://{domain}" 108 | result = runner.invoke( 109 | app=app, args=["analyze", "--sitemap", invalid_sitemap], input="y\n" 110 | ) 111 | assert ( 112 | "You are about to read urls from a website sitemap. This can take a long time. Are you sure to want to proceed?" 113 | in result.stdout 114 | ) 115 | assert "Aborted" not in result.stdout 116 | assert "The provided url is not a valid sitemap url" in result.stdout 117 | assert result.exit_code == 1 118 | 119 | 120 | def test_no_interaction() -> None: 121 | result = runner.invoke(app=app, args=["analyze", "--recursive", "--no-interaction"]) 122 | assert "[Y/n]" not in result.stdout 123 | assert result.exit_code == 1 124 | 125 | 126 | def test_unauthorized_export_format() -> None: 127 | result = runner.invoke(app=app, args=["analyze", "--export-format", "txt"]) 128 | assert result.exit_code == 2 129 | assert "'txt' is not one of 'csv', 'json'." in result.stdout 130 | -------------------------------------------------------------------------------- /test/bases/ecoindex/cli/test_arguments_handler.py: -------------------------------------------------------------------------------- 1 | from ecoindex.cli.arguments_handler import ( 2 | get_file_prefix_input_file_logger_file, 3 | get_url_from_args, 4 | get_urls_from_file, 5 | get_window_sizes_from_args, 6 | ) 7 | from ecoindex.models import WindowSize 8 | from pydantic import ValidationError 9 | from pytest import raises 10 | 11 | 12 | def test_urls_all_valid_from_args(): 13 | urls = ( 14 | "http://test.com/", 15 | "https://test.com/", 16 | "https://www.dummy.com/page/", 17 | "https://www.dummy.com/another_page", 18 | "http://localhost:8000/", 19 | ) 20 | valid_urls = get_url_from_args(urls_arg=urls) 21 | assert len(valid_urls) == 5 22 | for url in valid_urls: 23 | assert str(url) in urls 24 | 25 | 26 | def test_urls_invalid_from_args(): 27 | urls = "test.com" 28 | with raises(ValidationError): 29 | get_url_from_args(urls) 30 | 31 | 32 | def test_validate_valid_window_size(): 33 | assert get_window_sizes_from_args(["1024,768"]) == [ 34 | WindowSize(width=1024, height=768) 35 | ] 36 | 37 | 38 | def test_validate_invalid_window_size(): 39 | with raises(ValueError): 40 | get_window_sizes_from_args(("800x600",)) 41 | 42 | with raises(ValueError): 43 | get_window_sizes_from_args(("width,600",)) 44 | 45 | with raises(ValueError): 46 | get_window_sizes_from_args(("600",)) 47 | 48 | 49 | def test_get_file_prefix_input_file_logger_file(): 50 | urls = ("http://test.com", "https://test.com", "https://www.dummy.com/page/") 51 | assert get_file_prefix_input_file_logger_file(urls=urls) == ( 52 | "test.com", 53 | "/tmp/ecoindex-cli/input/test.com.csv", 54 | "/tmp/ecoindex-cli/logs/test.com.log", 55 | ) 56 | 57 | assert get_file_prefix_input_file_logger_file( 58 | urls=urls, urls_file="/home/user/my_urls.csv" 59 | ) == ( 60 | "my_urls.csv", 61 | "/home/user/my_urls.csv", 62 | "/tmp/ecoindex-cli/logs/my_urls.csv.log", 63 | ) 64 | 65 | 66 | def test_read_file_with_empty_lines(): 67 | urls = ("http://test.com", "https://test.com", "https://www.dummy.com/page/", "") 68 | with open(file="/tmp/ecoindex-cli/input/test.com.csv", mode="w") as f: 69 | f.write("\n".join(urls)) 70 | 71 | validated_urls = get_urls_from_file( 72 | urls_file="/tmp/ecoindex-cli/input/test.com.csv" 73 | ) 74 | assert len(validated_urls) == 3 75 | assert "" not in validated_urls 76 | -------------------------------------------------------------------------------- /test/bases/ecoindex/cli/test_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.parse import urlparse 3 | from ecoindex.cli.helper import replace_localhost_with_hostdocker 4 | from ecoindex.models.cli import CliHost 5 | 6 | 7 | def test_replace_localhost_with_hostdocker(): 8 | assert replace_localhost_with_hostdocker( 9 | urlparse("https://test.com/page/").netloc 10 | ) == CliHost(domain="test.com", netloc="test.com") 11 | 12 | assert replace_localhost_with_hostdocker( 13 | urlparse("https://localhost:8000/page/").netloc 14 | ) == CliHost(domain="localhost", netloc="localhost:8000") 15 | 16 | os.environ["DOCKER_CONTAINER"] = "true" 17 | assert replace_localhost_with_hostdocker( 18 | urlparse("https://localhost:8000/page/").netloc 19 | ) == CliHost(domain="host.docker.internal", netloc="host.docker.internal:8000") -------------------------------------------------------------------------------- /test/bases/ecoindex/worker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/bases/ecoindex/worker/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/compute/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/compute/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/compute/test_ecoindex.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ecoindex.compute import ( 3 | get_ecoindex, 4 | get_grade, 5 | get_greenhouse_gases_emmission, 6 | get_quantile, 7 | get_score, 8 | get_water_consumption, 9 | ) 10 | from ecoindex.data import quantiles_dom, quantiles_req, quantiles_size 11 | from ecoindex.models import Ecoindex 12 | 13 | 14 | @pytest.mark.asyncio 15 | class TestAsyncGroup: 16 | async def test_get_quantiles(self): 17 | assert await get_quantile(quantiles_size, 2500) == 14.086372025739513 18 | assert await get_quantile(quantiles_dom, 150) == 2.892857142857143 19 | assert await get_quantile(quantiles_req, 23) == 2.8 20 | assert await get_quantile(quantiles_size, 310182.902) == 20 21 | 22 | async def test_get_score(self): 23 | assert await get_score(dom=100, requests=100, size=100) == 72 24 | assert await get_score(dom=100, requests=100, size=1000) == 67 25 | assert await get_score(dom=100, requests=100, size=10000) == 58 26 | assert await get_score(dom=200, requests=200, size=10000) == 46 27 | assert await get_score(dom=2355, requests=267, size=2493) == 10 28 | assert await get_score(dom=240, requests=20, size=331) == 83 29 | 30 | async def test_get_ecoindex(self): 31 | assert await get_ecoindex(dom=100, requests=100, size=100) == Ecoindex( 32 | score=72, 33 | grade="B", 34 | ges=1.56, 35 | water=2.34, 36 | ) 37 | 38 | async def test_get_grade(self): 39 | assert await get_grade(2) == "G" 40 | assert await get_grade(25) == "F" 41 | assert await get_grade(10) == "G" 42 | assert await get_grade(50.2) == "D" 43 | assert await get_grade(100) == "A" 44 | 45 | async def test_get_greenhouse_gases_emission(self): 46 | assert await get_greenhouse_gases_emmission(2) == 2.96 47 | assert await get_greenhouse_gases_emmission(10) == 2.8 48 | assert await get_greenhouse_gases_emmission(50) == 2 49 | assert await get_greenhouse_gases_emmission(70) == 1.6 50 | 51 | async def test_get_water_consumption(self): 52 | assert await get_water_consumption(2) == 4.44 53 | assert await get_water_consumption(10) == 4.2 54 | assert await get_water_consumption(50) == 3 55 | assert await get_water_consumption(70) == 2.4 56 | 57 | async def test_get_ecoindex_out_of_range(self): 58 | assert await get_ecoindex(dom=2240, requests=100, size=310182.902) == Ecoindex( 59 | score=16, 60 | grade="F", 61 | ges=2.68, 62 | water=4.02, 63 | ) 64 | -------------------------------------------------------------------------------- /test/components/ecoindex/compute/test_models.py: -------------------------------------------------------------------------------- 1 | from os import rmdir 2 | from os.path import isdir 3 | 4 | from ecoindex.models import Ecoindex, Result, ScreenShot, WebPage 5 | from pydantic import ValidationError 6 | from pytest import raises 7 | 8 | 9 | def test_model_webpage_no_url() -> None: 10 | with raises(ValidationError) as error: 11 | WebPage() 12 | 13 | assert ( 14 | "1 validation error for WebPage\n" 15 | "url\n " 16 | "Field required [type=missing, input_value={}, input_type=dict]\n" 17 | ) in str(error.value) 18 | 19 | 20 | def test_model_webpage_invalid_url() -> None: 21 | with raises(ValidationError) as error: 22 | WebPage(url="toto") 23 | 24 | assert ( 25 | "1 validation error for WebPage\n" 26 | "url\n " 27 | "Input should be a valid URL, relative URL without a base " 28 | "[type=url_parsing, input_value='toto', input_type=str]\n" 29 | ) in str(error.value) 30 | 31 | with raises(ValidationError): 32 | WebPage(url="about:config") 33 | 34 | 35 | def test_model_webpage_wrong_size() -> None: 36 | with raises(ValidationError) as error: 37 | WebPage(url="https://www.google.fr", width=0, height=0) 38 | 39 | assert ( 40 | "2 validation errors for WebPage\nwidth\n " 41 | "Input should be greater than or equal to 100 [type=greater_than_equal, input_value=0, input_type=int]" 42 | ) in str(error.value) 43 | assert ( 44 | "height\n " 45 | "Input should be greater than or equal to 50 [type=greater_than_equal, input_value=0, input_type=int]" 46 | ) in str(error.value) 47 | 48 | 49 | def test_model_webpage_default_size() -> None: 50 | webpage = WebPage(url="https://www.google.fr") 51 | assert webpage.height == 1080 52 | assert webpage.width == 1920 53 | 54 | 55 | def test_model_valid(): 56 | valid_ecoindex = Ecoindex(grade="A", score=99.9, ges=0.6, water=0.1) 57 | assert valid_ecoindex.grade == "A" 58 | assert valid_ecoindex.score == 99.9 59 | assert valid_ecoindex.ges == 0.6 60 | assert valid_ecoindex.water == 0.1 61 | assert valid_ecoindex.ecoindex_version not in [None, ""] 62 | 63 | 64 | def test_model_invalid(): 65 | with raises(ValidationError) as error: 66 | Ecoindex(grade="dummy", score="dummy") 67 | 68 | assert "2 validation errors for Ecoindex" in str(error.value) 69 | 70 | 71 | def test_ecoindex_model_empty(): 72 | ecoindex = Ecoindex() 73 | assert ecoindex.ges is None 74 | assert ecoindex.grade is None 75 | assert ecoindex.score is None 76 | assert ecoindex.water is None 77 | 78 | 79 | def test_result_model(): 80 | result = Result( 81 | size=119, 82 | nodes=45, 83 | requests=8, 84 | url="http://www.myurl.com", 85 | width=1920, 86 | height=1080, 87 | grade="A", 88 | score=89, 89 | ges=1.22, 90 | water=1.89, 91 | ) 92 | assert result.page_type is None 93 | assert result.size == 119 94 | assert result.nodes == 45 95 | assert result.requests == 8 96 | assert result.width == 1920 97 | assert result.height == 1080 98 | assert result.grade == "A" 99 | assert result.score == 89 100 | assert result.ges == 1.22 101 | assert result.water == 1.89 102 | assert result.ecoindex_version is not None 103 | 104 | 105 | def test_screenshot_model(): 106 | id = "screenshot_test_id" 107 | folder = "./screenshot_test" 108 | 109 | screenshot = ScreenShot(id=id, folder=folder) 110 | 111 | assert isdir(folder) is True 112 | assert screenshot.id == id 113 | assert screenshot.folder == folder 114 | assert screenshot.get_png() == f"{folder}/{id}.png" 115 | assert screenshot.get_webp() == f"{folder}/{id}.webp" 116 | 117 | rmdir(folder) 118 | assert isdir(folder) is False 119 | 120 | 121 | if __name__ == "__main__": 122 | test_model_webpage_no_url() 123 | test_model_webpage_invalid_url() 124 | test_model_webpage_wrong_size() 125 | test_model_webpage_default_size() 126 | test_model_valid() 127 | test_model_invalid() 128 | test_ecoindex_model_empty() 129 | test_result_model() 130 | test_screenshot_model() 131 | -------------------------------------------------------------------------------- /test/components/ecoindex/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/data/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/exceptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/exceptions/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/models/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/models/test_scraper.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ecoindex.models.scraper import MimetypeAggregation 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_get_category_of_resource_video() -> None: 7 | mime_type = "video/mp4" 8 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "video" 9 | 10 | 11 | @pytest.mark.asyncio 12 | async def test_get_category_of_resource_image() -> None: 13 | mime_type = "image/png" 14 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "image" 15 | 16 | 17 | @pytest.mark.asyncio 18 | async def test_get_category_of_resource_font() -> None: 19 | mime_type = "font/woff2" 20 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "font" 21 | 22 | 23 | @pytest.mark.asyncio 24 | async def test_get_category_of_resource_css() -> None: 25 | mime_type = "text/css" 26 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "css" 27 | 28 | 29 | @pytest.mark.asyncio 30 | async def test_get_category_of_resource_javascript() -> None: 31 | mime_type = "application/javascript" 32 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "javascript" 33 | 34 | 35 | @pytest.mark.asyncio 36 | async def test_get_category_of_resource_other() -> None: 37 | mime_type = "application/pdf" 38 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "other" 39 | 40 | 41 | if __name__ == "__main__": 42 | pytest.main() 43 | -------------------------------------------------------------------------------- /test/components/ecoindex/scraper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/scraper/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/scraper/test_scraper.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from ecoindex.exceptions.scraper import EcoindexScraperStatusException 4 | from ecoindex.models import ScreenShot, WindowSize 5 | from ecoindex.scraper import EcoindexScraper 6 | 7 | 8 | def test_scraper_init(): 9 | url = "https://www.example.com" 10 | scraper = EcoindexScraper(url=url) # type: ignore 11 | assert scraper.url == url 12 | assert scraper.window_size == WindowSize(width=1920, height=1080) 13 | assert scraper.wait_before_scroll == 1 14 | assert scraper.wait_after_scroll == 1 15 | assert scraper.screenshot is None 16 | assert scraper.screenshot_uid is None 17 | assert scraper.screenshot_gid is None 18 | assert scraper.page_load_timeout == 20 19 | 20 | 21 | def test_scraper_init_with_options(): 22 | url = "https://www.example.com" 23 | window_size = WindowSize(width=800, height=600) 24 | wait_before_scroll = 2 25 | wait_after_scroll = 2 26 | screenshot_uid = 123 27 | screenshot_gid = 456 28 | page_load_timeout = 30 29 | screenshot_id = "123" 30 | screenshot_folder = "/tmp/screenshots" 31 | 32 | scraper = EcoindexScraper( 33 | url=url, # type: ignore 34 | window_size=window_size, 35 | wait_before_scroll=wait_before_scroll, 36 | wait_after_scroll=wait_after_scroll, 37 | screenshot=ScreenShot(id=screenshot_id, folder=screenshot_folder), 38 | screenshot_uid=screenshot_uid, 39 | screenshot_gid=screenshot_gid, 40 | page_load_timeout=page_load_timeout, 41 | ) 42 | 43 | assert scraper.url == url 44 | assert scraper.window_size == window_size 45 | assert scraper.wait_before_scroll == wait_before_scroll 46 | assert scraper.wait_after_scroll == wait_after_scroll 47 | assert scraper.screenshot.get_png() == f"{screenshot_folder}/{screenshot_id}.png" # type: ignore 48 | assert scraper.screenshot.get_webp() == f"{screenshot_folder}/{screenshot_id}.webp" # type: ignore 49 | assert scraper.screenshot_gid == screenshot_gid 50 | assert scraper.page_load_timeout == page_load_timeout 51 | 52 | 53 | def test_get_request_size(): 54 | mock_stripped_har_entry = ( 55 | { 56 | "request": { 57 | "url": "https://www.ecoindex.fr/", 58 | }, 59 | "response": { 60 | "status": 200, 61 | "headers": [ 62 | {"name": "content-length", "value": "7347"}, 63 | ], 64 | "content": { 65 | "mimeType": "text/html", 66 | }, 67 | "_transferSize": 7772, 68 | }, 69 | }, 70 | { 71 | "request": { 72 | "url": "https://www.ecoindex.fr/", 73 | }, 74 | "response": { 75 | "status": 200, 76 | "headers": [ 77 | {"name": "content-length", "value": "7347"}, 78 | ], 79 | "content": { 80 | "mimeType": "text/html", 81 | }, 82 | "_transferSize": -1, 83 | }, 84 | }, 85 | { 86 | "request": { 87 | "url": "https://www.ecoindex.fr/", 88 | }, 89 | "response": { 90 | "status": 206, 91 | "headers": [ 92 | {"name": "Content-Length", "value": "7347"}, 93 | ], 94 | "content": { 95 | "mimeType": "text/html", 96 | }, 97 | "_transferSize": -1, 98 | }, 99 | }, 100 | ) 101 | url = "https://www.example.com" 102 | window_size = WindowSize(width=800, height=600) 103 | wait_before_scroll = 2 104 | wait_after_scroll = 2 105 | screenshot_uid = 123 106 | screenshot_gid = 456 107 | page_load_timeout = 30 108 | screenshot_id = "123" 109 | screenshot_folder = "/tmp/screenshots" 110 | 111 | scraper = EcoindexScraper( 112 | url=url, # type: ignore 113 | window_size=window_size, 114 | wait_before_scroll=wait_before_scroll, 115 | wait_after_scroll=wait_after_scroll, 116 | screenshot=ScreenShot(id=screenshot_id, folder=screenshot_folder), 117 | screenshot_uid=screenshot_uid, 118 | screenshot_gid=screenshot_gid, 119 | page_load_timeout=page_load_timeout, 120 | ) 121 | assert scraper.get_request_size(mock_stripped_har_entry[0]) == 7772 122 | assert scraper.get_request_size(mock_stripped_har_entry[1]) == len( 123 | json.dumps(mock_stripped_har_entry[1]["response"]).encode("utf-8") 124 | ) 125 | assert scraper.get_request_size(mock_stripped_har_entry[2]) == len( 126 | json.dumps(mock_stripped_har_entry[2]["response"]).encode("utf-8") 127 | ) 128 | 129 | 130 | async def test_check_page_response(): 131 | mock_stripped_har_entry = ( 132 | { 133 | "response": { 134 | "status": 200, 135 | "headers": {"content-type": "audio/mpeg"}, 136 | } 137 | }, 138 | { 139 | "response": { 140 | "status": 404, 141 | "headers": {"content-type": "text/html"}, 142 | "status_text": "Not Found", 143 | } 144 | }, 145 | { 146 | "response": { 147 | "status": 200, 148 | "headers": {"content-type": "text/html"}, 149 | } 150 | }, 151 | ) 152 | url = "https://www.example.com" 153 | window_size = WindowSize(width=800, height=600) 154 | wait_before_scroll = 2 155 | wait_after_scroll = 2 156 | screenshot_uid = 123 157 | screenshot_gid = 456 158 | page_load_timeout = 30 159 | screenshot_id = "123" 160 | screenshot_folder = "/tmp/screenshots" 161 | 162 | scraper = EcoindexScraper( 163 | url=url, # type: ignore 164 | window_size=window_size, 165 | wait_before_scroll=wait_before_scroll, 166 | wait_after_scroll=wait_after_scroll, 167 | screenshot=ScreenShot(id=screenshot_id, folder=screenshot_folder), 168 | screenshot_uid=screenshot_uid, 169 | screenshot_gid=screenshot_gid, 170 | page_load_timeout=page_load_timeout, 171 | ) 172 | try: 173 | scraper.check_page_response(mock_stripped_har_entry[0]) 174 | except TypeError as e: 175 | assert str(e) == { 176 | "mimetype": "audio/mpeg", 177 | "message": ( 178 | "This resource is not " "a standard page with mimeType 'text/html'" 179 | ), 180 | } 181 | 182 | try: 183 | scraper.check_page_response(mock_stripped_har_entry[1]) 184 | except EcoindexScraperStatusException as e: 185 | assert str(e) == { 186 | "url": "https://www.example.com", 187 | "status": 404, 188 | "message": mock_stripped_har_entry[1]["response"]["status_text"], 189 | } 190 | 191 | assert scraper.check_page_response(mock_stripped_har_entry[2]) is None 192 | -------------------------------------------------------------------------------- /test/components/ecoindex/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/scripts/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/utils/__init__.py -------------------------------------------------------------------------------- /test/components/ecoindex/worker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/worker/__init__.py -------------------------------------------------------------------------------- /workspace.toml: -------------------------------------------------------------------------------- 1 | [tool.polylith] 2 | namespace = "ecoindex" 3 | git_tag_pattern = "stable-*" 4 | 5 | [tool.polylith.structure] 6 | theme = "loose" 7 | 8 | [tool.polylith.resources] 9 | brick_docs_enabled = false 10 | 11 | [tool.polylith.test] 12 | enabled = true 13 | --------------------------------------------------------------------------------