├── .devcontainer
└── devcontainer.json
├── .github
├── ISSUE_TEMPLATE
│ ├── bug-report.yml
│ └── new-feature.yml
├── labeler.yml
└── workflows
│ ├── build_publish_api.yml
│ ├── build_publish_cli.yml
│ ├── build_publish_compute.yml
│ ├── build_publish_scraper.yml
│ ├── pr_validation.yml
│ ├── quality_check.yml
│ └── stale.yml
├── .gitignore
├── .vscode
└── launch.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── Taskfile.yml
├── bases
└── ecoindex
│ ├── backend
│ ├── VERSION
│ ├── __init__.py
│ ├── dependencies
│ │ ├── __init__.py
│ │ ├── bff.py
│ │ ├── compute.py
│ │ ├── dates.py
│ │ ├── host.py
│ │ ├── id.py
│ │ ├── pagination.py
│ │ ├── validation.py
│ │ └── version.py
│ ├── main.py
│ ├── middlewares
│ │ ├── cors.py
│ │ └── exception_handler.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── dependencies_parameters
│ │ │ ├── __init__.py
│ │ │ ├── bff.py
│ │ │ ├── compute.py
│ │ │ ├── dates.py
│ │ │ ├── host.py
│ │ │ ├── id.py
│ │ │ ├── pagination.py
│ │ │ └── version.py
│ │ └── parameters.py
│ ├── routers
│ │ ├── __init__.py
│ │ ├── bff.py
│ │ ├── compute.py
│ │ ├── ecoindex.py
│ │ ├── health.py
│ │ ├── host.py
│ │ └── tasks.py
│ ├── scripts
│ │ ├── __init__.py
│ │ └── openapi.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── cache.py
│ │ └── ecoindex.py
│ └── utils
│ │ └── __init__.py
│ ├── cli
│ ├── VERSION
│ ├── __init__.py
│ ├── app.py
│ ├── arguments_handler.py
│ ├── console_output.py
│ ├── crawl.py
│ ├── helper.py
│ ├── report.py
│ ├── sitemap.py
│ └── template.html
│ └── worker
│ ├── __init__.py
│ ├── health.py
│ └── tasks.py
├── components
└── ecoindex
│ ├── compute
│ ├── VERSION
│ ├── __init__.py
│ └── ecoindex.py
│ ├── config
│ ├── __init__.py
│ └── settings.py
│ ├── data
│ ├── __init__.py
│ ├── colors.py
│ ├── grades.py
│ ├── medians.py
│ ├── quantiles.py
│ └── targets.py
│ ├── database
│ ├── __init__.py
│ ├── engine.py
│ ├── exceptions
│ │ └── quota.py
│ ├── helper.py
│ ├── models
│ │ └── __init__.py
│ └── repositories
│ │ ├── __init__.py
│ │ ├── ecoindex.py
│ │ ├── host.py
│ │ └── worker.py
│ ├── exceptions
│ ├── __init__.py
│ ├── scraper.py
│ └── worker.py
│ ├── models
│ ├── __init__.py
│ ├── api.py
│ ├── cli.py
│ ├── compute.py
│ ├── enums.py
│ ├── response_examples.py
│ ├── scraper.py
│ ├── sort.py
│ └── tasks.py
│ ├── scraper
│ ├── VERSION
│ ├── __init__.py
│ ├── helper.py
│ └── scrap.py
│ ├── scripts
│ ├── __init__.py
│ └── update_values.py
│ ├── utils
│ ├── __init__.py
│ ├── cli_translations
│ │ ├── en.yml
│ │ └── fr.yml
│ ├── files.py
│ └── screenshots.py
│ └── worker_component
│ └── __init__.py
├── development
├── ecoindex_compute.py
├── ecoindex_scraper.py
└── scraper_test.py
├── docs
└── images
│ └── ecoindex-python-fullstack.png
├── poetry.lock
├── poetry.toml
├── projects
├── ecoindex_api
│ ├── .dockerignore
│ ├── .env.template
│ ├── .gitignore
│ ├── README.md
│ ├── Taskfile.yml
│ ├── alembic.ini
│ ├── alembic
│ │ ├── README
│ │ ├── env.py
│ │ ├── script.py.mako
│ │ └── versions
│ │ │ ├── 5afa2faea43f_.py
│ │ │ ├── 7eaafaa65b32_update_url_field_type_to_text.py
│ │ │ ├── 826abb0c4222_add_ecoindex_version_field.py
│ │ │ ├── e83263a5def4_add_index_id_and_host.py
│ │ │ └── fd9a1f5662c8_first_migration.py
│ ├── docker-compose.yml.template
│ ├── docker
│ │ ├── backend
│ │ │ ├── dockerfile
│ │ │ └── entrypoint.sh
│ │ └── worker
│ │ │ ├── dockerfile
│ │ │ └── entrypoint.sh
│ ├── openapi.json
│ ├── poetry.lock
│ ├── pyproject.toml
│ └── screenshots
│ │ └── .gitkeep
├── ecoindex_cli
│ ├── .dockerignore
│ ├── README.md
│ ├── Taskfile.yml
│ ├── doc
│ │ └── report.png
│ ├── dockerfile
│ ├── poetry.lock
│ └── pyproject.toml
├── ecoindex_compute
│ ├── README.md
│ ├── Taskfile.yml
│ ├── poetry.lock
│ └── pyproject.toml
└── ecoindex_scraper
│ ├── README.md
│ ├── Taskfile.yml
│ ├── dockerfile
│ ├── poetry.lock
│ └── pyproject.toml
├── pyproject.toml
├── tasks
├── DockerTaskfile.yml
├── PoetryTaskfile.yml
├── PypiTaskFile.yml
└── QualityTaskFile.yml
├── test
├── bases
│ └── ecoindex
│ │ ├── backend
│ │ └── __init__.py
│ │ ├── cli
│ │ ├── __init__.py
│ │ ├── test_app.py
│ │ ├── test_arguments_handler.py
│ │ └── test_helper.py
│ │ └── worker
│ │ └── __init__.py
└── components
│ └── ecoindex
│ ├── compute
│ ├── __init__.py
│ ├── test_ecoindex.py
│ └── test_models.py
│ ├── data
│ └── __init__.py
│ ├── exceptions
│ └── __init__.py
│ ├── models
│ ├── __init__.py
│ └── test_scraper.py
│ ├── scraper
│ ├── __init__.py
│ └── test_scraper.py
│ ├── scripts
│ └── __init__.py
│ ├── utils
│ └── __init__.py
│ └── worker
│ └── __init__.py
└── workspace.toml
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Ecoindex python full stack dev container",
3 | "image": "mcr.microsoft.com/devcontainers/python:3.12",
4 | "postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin",
5 | "features": {
6 | "ghcr.io/audacioustux/devcontainers/taskfile": {},
7 | "ghcr.io/devcontainers/features/docker-in-docker:2": {
8 | "installDockerBuildx": true,
9 | "version": "latest",
10 | "dockerDashComposeVersion": "v2"
11 | }
12 | },
13 | "forwardPorts": [
14 | 8000
15 | ],
16 | "customizations": {
17 | "vscode": {
18 | "extensions": [
19 | "-ms-python.autopep8",
20 | "adrianwilczynski.alpine-js-intellisense",
21 | "adrianwilczynski.alpine-js-intellisense",
22 | "bierner.markdown-emoji",
23 | "charliermarsh.ruff",
24 | "Codeium.codeium",
25 | "github.vscode-github-actions",
26 | "Gruntfuggly.todo-tree",
27 | "mhutchie.git-graph",
28 | "ms-azuretools.vscode-docker",
29 | "ms-python.mypy-type-checker",
30 | "ms-python.python",
31 | "Perkovec.emoji",
32 | "samuelcolvin.jinjahtml",
33 | "tamasfe.even-better-toml",
34 | "ue.alphabetical-sorter",
35 | "yzhang.markdown-all-in-one",
36 | "esbenp.prettier-vscode",
37 | "ms-pyright.pyright",
38 | "-ms-python.vscode-pylance"
39 | ]
40 | }
41 | }
42 | }
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: File a bug report
3 | title: "[Bug]: "
4 | labels: [bug, triage]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | Thanks for taking the time to fill out this bug report! :heart:
10 | - type: textarea
11 | id: what-happened
12 | attributes:
13 | label: What happened?
14 | description: Also tell us, what did you expect to happen?
15 | placeholder: Tell us what you see!
16 | value: "A bug happened!"
17 | validations:
18 | required: true
19 | - type: dropdown
20 | id: project
21 | attributes:
22 | label: Project
23 | description: What project is concerned by this bug?
24 | options:
25 | - Ecoindex API
26 | - Ecoindex CLI
27 | - Ecoindex Compute
28 | - Ecoindex Scraper
29 | validations:
30 | required: true
31 | - type: dropdown
32 | id: os
33 | attributes:
34 | label: What OS do you use?
35 | multiple: true
36 | options:
37 | - Windows
38 | - Linux
39 | - Mac
40 | - type: textarea
41 | id: urls
42 | attributes:
43 | label: urls
44 | description: Can you provide one or more url example where you have this issue?
45 | validations:
46 | required: false
47 | - type: textarea
48 | id: logs
49 | attributes:
50 | label: Relevant log output
51 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
52 | render: shell
53 | - type: checkboxes
54 | id: terms
55 | attributes:
56 | label: Code of Conduct
57 | description: By submitting this issue, you agree to follow our [Code of Conduct](CODE_OF_CONDUCT.md)
58 | options:
59 | - label: I agree to follow this project's Code of Conduct
60 | required: true
61 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/new-feature.yml:
--------------------------------------------------------------------------------
1 | name: New feature
2 | description: Request a new feature
3 | title: "[Feature]: "
4 | labels: [enhancement, triage]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | Thanks for taking the time to request a new feature! :heart:
10 | - type: textarea
11 | id: what-feature
12 | attributes:
13 | label: What feature do you want?
14 | description: Describe the feature you want to see in this project
15 | placeholder: Tell us what you want!
16 | value: "I want a new feature!"
17 | validations:
18 | required: true
19 | - type: dropdown
20 | id: project
21 | attributes:
22 | label: Project
23 | description: What project is concerned by this bug?
24 | options:
25 | - Ecoindex API
26 | - Ecoindex CLI
27 | - Ecoindex Compute
28 | - Ecoindex Scraper
29 | validations:
30 | required: true
31 | - type: textarea
32 | id: why-feature
33 | attributes:
34 | label: Why do you want this feature?
35 | description: Tell us why you want this feature
36 | placeholder: Tell us why you want this feature!
37 | value: "I want this feature because..."
38 | validations:
39 | required: true
40 | - type: checkboxes
41 | id: terms
42 | attributes:
43 | label: Code of Conduct
44 | description: By submitting this issue, you agree to follow our [Code of Conduct](CODE_OF_CONDUCT.md)
45 | options:
46 | - label: I agree to follow this project's Code of Conduct
47 | required: true
48 |
--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
1 | ci/cd:
2 | - .github/workflows/*
3 |
4 | documentation:
5 | - README.md
6 | - ./**/*.md
7 |
8 | tooling:
9 | - ./**/*TaskFile.yml
10 |
11 | tests:
12 | - test/**
13 |
14 | compute:
15 | - components/ecoindex/compute/**
16 |
17 | scraper:
18 | - components/ecoindex/scraper/**
19 |
20 | cli:
21 | - bases/ecoindex/cli/**
22 |
23 | api:
24 | - bases/ecoindex/backend/**
25 | - bases/ecoindex/worker/**
26 |
27 | components:
28 | - components
--------------------------------------------------------------------------------
/.github/workflows/pr_validation.yml:
--------------------------------------------------------------------------------
1 | name: "Validate PR"
2 |
3 | on:
4 | pull_request:
5 | types: [opened, edited, synchronize, reopened]
6 |
7 | permissions:
8 | pull-requests: write
9 |
10 | jobs:
11 | title-lint:
12 | name: Validate PR title
13 | runs-on: ubuntu-latest
14 | concurrency: pr-lint
15 | steps:
16 | - uses: amannn/action-semantic-pull-request@v5
17 | env:
18 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
19 |
20 | triage:
21 | runs-on: ubuntu-latest
22 | concurrency: triage
23 | steps:
24 | - uses: actions/checkout@v4
25 | - uses: actions/labeler@v4
26 | with:
27 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
28 |
29 | size-label:
30 | runs-on: ubuntu-latest
31 | concurrency: size-label
32 | steps:
33 | - uses: actions/checkout@v4
34 | - name: size-label
35 | uses: "pascalgn/size-label-action@v0.5.0"
36 | env:
37 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
38 | with:
39 | sizes: >
40 | {
41 | "0": "XS",
42 | "20": "S",
43 | "50": "M",
44 | "250": "Too Large"
45 | }
46 |
--------------------------------------------------------------------------------
/.github/workflows/quality_check.yml:
--------------------------------------------------------------------------------
1 | name: Validate project quality
2 |
3 | on:
4 | pull_request:
5 | types: [opened, edited, synchronize, reopened]
6 | push:
7 | branches: [main]
8 |
9 | permissions:
10 | contents: write
11 | pull-requests: write
12 |
13 | jobs:
14 | project-quality:
15 | name: Validate project quality
16 | runs-on: ubuntu-latest
17 | steps:
18 | - uses: actions/checkout@v4
19 | - name: Install Task
20 | uses: arduino/setup-task@v1
21 | with:
22 | version: 3.x
23 | repo-token: ${{ secrets.GITHUB_TOKEN }}
24 | - name: Install poetry and plugins
25 | run: |
26 | curl -sSL https://install.python-poetry.org | python -
27 | poetry config virtualenvs.create true
28 | poetry self add poetry-multiproject-plugin
29 | poetry self add poetry-polylith-plugin
30 | - name: Install dependencies
31 | run: |
32 | poetry install
33 | - name: Validate polylith project
34 | run: |
35 | task project-check
36 | - name: Validate code quality
37 | run: |
38 | task quality
39 | - name: Pytest coverage comment
40 | uses: MishaKav/pytest-coverage-comment@main
41 | with:
42 | pytest-xml-coverage-path: ./coverage.xml
43 | title: Branch coverage
44 | badge-title: Coverage PR ${{ github.event.pull_request.number }}
45 | report-only-changed-files: true
46 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Stale bot
2 |
3 | on:
4 | schedule:
5 | - cron: "0 0 * * *"
6 |
7 | permissions:
8 | contents: write
9 | issues: write
10 | pull-requests: write
11 |
12 | jobs:
13 | stale:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/stale@v9
17 | with:
18 | repo-token: ${{ secrets.GITHUB_TOKEN }}
19 | stale-issue-message: This issue has been marked as inactive because it hasn't been updated for 30 days. If it's not updated within 7 days, it will be automatically closed. To prevent it from being closed, you can add the `keep open` label.
20 | stale-pr-message: This pull request has been marked as inactive because it hasn't been updated for 30 days. If it's not updated within 7 days, it will be automatically closed. To prevent it from being closed, you can add the `keep open` label.
21 | exempt-pr-labels: "keep open"
22 | exempt-issue-labels: "keep open"
23 | close-issue-message: This issue has been closed because it's been inactive for 37 days. If you think this is a mistake, you can reopen it.
24 | close-pr-message: This pull request has been closed because it's been inactive for 37 days. If you think this is a mistake, you can reopen it.
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | dist
3 | *.webp
4 | *.sqlite3
5 | .coverage
6 | coverage.xml
7 | *.csv
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 |
8 | {
9 | "name": "Python Debugger: FastAPI",
10 | "type": "debugpy",
11 | "request": "launch",
12 | "module": "uvicorn",
13 | "args": [
14 | "ecoindex.backend.main:app",
15 | "--reload"
16 | ],
17 | "jinja": true
18 | }
19 | ]
20 | }
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
44 |
45 | [homepage]: https://www.contributor-covenant.org
46 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to ecoindex_python
2 |
3 | We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's:
4 |
5 | - Reporting a bug
6 | - Discussing the current state of the code
7 | - Submitting a fix
8 | - Proposing new features
9 | - Becoming a maintainer
10 |
11 | ## We Develop with Github
12 |
13 | We use github to host code, to track issues and feature requests, as well as accept pull requests.
14 |
15 | ## We Use [Github Flow](https://guides.github.com/introduction/flow/index.html), So All Code Changes Happen Through Pull Requests
16 |
17 | Pull requests are the best way to propose changes to the codebase (we use [Github Flow](https://guides.github.com/introduction/flow/index.html)). We actively welcome your pull requests:
18 |
19 | 1. Fork the repo and create your branch from `master`.
20 | 2. If you've added code that should be tested, add tests.
21 | 3. Ensure the test suite passes.
22 | 4. Make sure your code lints.
23 | 5. Issue that pull request!
24 |
25 | ## Any contributions you make will be under the Creative Commons Software License
26 |
27 | In short, when you submit code changes, your submissions are understood to be under the same [Creative Commons License](LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern.
28 |
29 | ## Report bugs using Github's issues
30 |
31 | We use GitHub issues to track public bugs. Report a bug by opening a new issue; it's that easy!
32 |
33 | ## Write bug reports with detail, background, and sample code
34 |
35 | **Great Bug Reports** tend to have:
36 |
37 | - A quick summary and/or background
38 | - Steps to reproduce
39 | - Be specific!
40 | - Give sample code if you can. [An example question](http://stackoverflow.com/q/12488905/180626) includes sample code that *anyone* with a base R setup can run to reproduce what I was seeing
41 | - What you expected would happen
42 | - What actually happens
43 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
44 |
45 | People *love* thorough bug reports. I'm not even kidding.
46 |
47 | ## Use a Consistent Coding Style
48 |
49 | We use [black](https://github.com/psf/black) for linting.
50 |
51 | ## License
52 |
53 | By contributing, you agree that your contributions will be licensed under its Creative Commons License.
54 |
55 | ## References
56 |
57 | This document was adapted from the open-source contribution guidelines for [Facebook's Draft](https://github.com/facebook/draft-js/blob/a9316a723f9e918afde44dea68b5f9f39b7d9b00/CONTRIBUTING.md)
58 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Ecoindex Python Fullstack
2 |
3 | [](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml)
4 |
5 | 
6 | 
7 |
8 | 
9 | 
10 |
11 | 
12 | 
13 | 
14 |
15 | 
16 | 
17 |
18 | - [Ecoindex Python Fullstack](#ecoindex-python-fullstack)
19 | - [Projects](#projects)
20 | - [Getting started](#getting-started)
21 | - [Requirements](#requirements)
22 | - [Installation](#installation)
23 | - [Usage](#usage)
24 | - [Disclaimer](#disclaimer)
25 | - [License](#license)
26 | - [Contributing](#contributing)
27 | - [Code of conduct](#code-of-conduct)
28 |
29 | This project is a polylith repository for the Ecoindex project. It is called "fullstack" because it contains all the components of the project, including the backend that is used in production, but also a CLI tool and modules that can be used in other projects.
30 |
31 | You can get more information about polylith in the [official documentation](https://polylith.gitbook.io/polylith) and in the [python-polylith tool](https://github.com/DavidVujic/python-polylith) that is used to manage this repository.
32 |
33 | ## Projects
34 |
35 | This repository contains the following projects:
36 |
37 | - [Ecoindex Compute](projects/ecoindex_compute/README.md): this is the base module that provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) based on 3 parameters:
38 | - The number of DOM elements in the page
39 | - The size of the page
40 | - The number of external requests of the page
41 | - [Ecoindex Scraper](projects/ecoindex_scraper/README.md): This module provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) based on a URL. It uses [Playwright](https://playwright.dev/) to get the DOM elements, size and requests of the page.
42 | - [Ecoindex CLI](projects/ecoindex_cli/README.md): This module provides a CLI tool to get the [Ecoindex](http://www.ecoindex.fr) based on a URL. It uses the [Ecoindex Scraper](projects/ecoindex_scraper/README.md) module.
43 | - [Ecoindex API](projects/ecoindex_api/README.md): This module provides a REST API to get the [Ecoindex](http://www.ecoindex.fr) based on a URL. It uses the [Ecoindex Scraper](projects/ecoindex_scraper/README.md) module.
44 |
45 | Here is a diagram of the dependencies between the projects:
46 |
47 | 
48 |
49 | ## Getting started
50 |
51 | ### Requirements
52 |
53 | - [Python 3.9+](https://www.python.org/downloads/)
54 | - [Poetry](https://python-poetry.org/docs/#installation)
55 | - [Task](https://taskfile.dev/#/installation)
56 | - [Docker](https://docs.docker.com/get-docker/) (optional)
57 |
58 | ### Installation
59 |
60 | To install main dependencies, run:
61 |
62 | ```bash
63 | task poetry:install
64 | ```
65 |
66 | ### Usage
67 |
68 | Have a look at the task help:
69 |
70 | ```bash
71 | task --list
72 | ```
73 |
74 | ## Disclaimer
75 |
76 | The LCA values used by [ecoindex](https://github.com/cnumr/ecoindex_monorepo) to evaluate environmental impacts are not under free license - ©Frédéric Bordage
77 | Please also refer to the mentions provided in the code files for specifics on the IP regime.
78 |
79 | ## [License](LICENSE)
80 |
81 | ## [Contributing](CONTRIBUTING.md)
82 |
83 | ## [Code of conduct](CODE_OF_CONDUCT.md)
--------------------------------------------------------------------------------
/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | includes:
4 | api:
5 | taskfile: ./projects/ecoindex_api/Taskfile.yml
6 | dir: ./projects/ecoindex_api/
7 |
8 | cli:
9 | taskfile: ./projects/ecoindex_cli/Taskfile.yml
10 | dir: ./projects/ecoindex_cli/
11 |
12 | compute:
13 | taskfile: ./projects/ecoindex_compute/Taskfile.yml
14 | dir: ./projects/ecoindex_compute/
15 |
16 | scraper:
17 | taskfile: ./projects/ecoindex_scraper/Taskfile.yml
18 | dir: ./projects/ecoindex_scraper/
19 |
20 | poetry: ./tasks/PoetryTaskfile.yml
21 |
22 | quality: ./tasks/QualityTaskFile.yml
23 |
24 | tasks:
25 | project-check:
26 | desc: Check polylith structure project
27 | cmds:
28 | - poetry poly check
29 |
30 | project-sync:
31 | desc: Sync polylith structure project
32 | cmds:
33 | - poetry poly sync
34 |
35 | bump:
36 | desc: Bump the Main project version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease.
37 | cmds:
38 | - task: poetry:bump
39 | vars:
40 | VERSION_FILE_PATH: "VERSION"
41 | VERSION_RULE: "{{.CLI_ARGS}}"
42 | silent: true
43 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/VERSION:
--------------------------------------------------------------------------------
1 | 3.11.1
2 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | from functools import lru_cache
3 |
4 |
5 | @lru_cache
6 | def get_api_version() -> str:
7 | current_directory = os.path.dirname(os.path.realpath(__file__))
8 | version_filename = os.path.join(current_directory, "VERSION")
9 |
10 | with open(version_filename, "r") as f:
11 | return (f.read()).strip()
12 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/bff.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.models.dependencies_parameters.version import VersionParameter
4 | from ecoindex.backend.models.parameters import BffParameters
5 | from ecoindex.models.enums import Version
6 | from fastapi import Query
7 | from pydantic import AnyHttpUrl
8 |
9 |
10 | def get_bff_parameters(
11 | url: Annotated[AnyHttpUrl, Query(description="Url to be searched in database")],
12 | refresh: Annotated[
13 | bool,
14 | Query(
15 | description="Force the refresh of the cache",
16 | ),
17 | ] = False,
18 | version: VersionParameter = Version.v1,
19 | ) -> BffParameters:
20 | return BffParameters(
21 | url=url,
22 | refresh=refresh,
23 | version=version,
24 | )
25 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/compute.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.models.parameters import ComputeParameters
4 | from fastapi import Query
5 |
6 |
7 | def get_compute_parameters(
8 | dom: Annotated[
9 | int,
10 | Query(
11 | default=...,
12 | description="Number of DOM nodes of the page",
13 | gt=0,
14 | example=204,
15 | ),
16 | ],
17 | size: Annotated[
18 | float,
19 | Query(
20 | default=..., description="Total size of the page in Kb", gt=0, example=109
21 | ),
22 | ],
23 | requests: Annotated[
24 | int,
25 | Query(
26 | default=..., description="Number of requests of the page", gt=0, example=5
27 | ),
28 | ],
29 | ) -> ComputeParameters:
30 | return ComputeParameters(dom=dom, size=size, requests=requests)
31 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/dates.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from typing import Annotated
3 |
4 | from ecoindex.backend.models.parameters import DateRange
5 | from fastapi import Query
6 |
7 |
8 | def get_date_parameters(
9 | date_from: Annotated[
10 | date | None,
11 | Query(description="Start date of the filter elements (example: 2020-01-01)"),
12 | ] = None,
13 | date_to: Annotated[
14 | date | None,
15 | Query(description="End date of the filter elements (example: 2020-01-01)"),
16 | ] = None,
17 | ) -> DateRange:
18 | return DateRange(date_from=date_from, date_to=date_to)
19 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/host.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from fastapi import Query
4 |
5 |
6 | def get_host_parameter(
7 | host: Annotated[
8 | str | None, Query(description="Host name you want to filter (can be partial)")
9 | ] = None,
10 | ) -> str | None:
11 | return host
12 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/id.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 | from uuid import UUID
3 |
4 | from fastapi import Path
5 |
6 |
7 | def get_id_parameter(
8 | id: Annotated[
9 | UUID,
10 | Path(default=..., description="Unique identifier of the ecoindex analysis"),
11 | ]
12 | ) -> UUID:
13 | return id
14 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/pagination.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.models.parameters import Pagination
4 | from fastapi import Query
5 |
6 |
7 | def get_pagination_parameters(
8 | page: Annotated[int, Query(description="Page number", ge=1)] = 1,
9 | size: Annotated[
10 | int, Query(description="Number of elements per page", ge=1, le=100)
11 | ] = 50,
12 | ) -> Pagination:
13 | return Pagination(page=page, size=size)
14 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/validation.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.config.settings import Settings
4 | from fastapi import Header, HTTPException, status
5 |
6 |
7 | def validate_api_key_batch(
8 | api_key: Annotated[
9 | str,
10 | Header(alias="X-Api-Key"),
11 | ],
12 | ):
13 | if not api_key:
14 | raise HTTPException(
15 | status_code=status.HTTP_403_FORBIDDEN,
16 | detail="Invalid API key",
17 | )
18 |
19 | for authorized_api_key in Settings().API_KEYS_BATCH:
20 | if api_key == authorized_api_key["key"]:
21 | return authorized_api_key
22 |
23 | raise HTTPException(
24 | status_code=status.HTTP_403_FORBIDDEN,
25 | detail="Invalid API key",
26 | )
27 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/dependencies/version.py:
--------------------------------------------------------------------------------
1 | from ecoindex.models.enums import Version
2 | from fastapi import Path
3 |
4 |
5 | def get_version_parameter(
6 | version: Version = Path(
7 | default=...,
8 | title="Engine version",
9 | description="Engine version used to run the analysis (v0 or v1)",
10 | example=Version.v1.value,
11 | )
12 | ) -> Version:
13 | return version
14 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/main.py:
--------------------------------------------------------------------------------
1 | from ecoindex.backend import get_api_version
2 | from ecoindex.backend.routers import router
3 | from ecoindex.backend.services.cache import cache
4 | from ecoindex.config import Settings
5 | from ecoindex.database.engine import init_db
6 | from fastapi import FastAPI
7 | from fastapi.concurrency import asynccontextmanager
8 | from sentry_sdk import init as sentry_init
9 |
10 |
11 | def init_app():
12 | cache.init()
13 | if Settings().GLITCHTIP_DSN:
14 | sentry_init(Settings().GLITCHTIP_DSN)
15 |
16 | @asynccontextmanager
17 | async def lifespan(app: FastAPI):
18 | await init_db()
19 | yield
20 |
21 | app = FastAPI(
22 | title="Ecoindex API",
23 | version=get_api_version(),
24 | description=(
25 | "Ecoindex API enables you to perform ecoindex analysis of given web pages"
26 | ),
27 | lifespan=lifespan,
28 | )
29 |
30 | app.include_router(router)
31 |
32 | from ecoindex.backend.middlewares.cors import add_cors_middleware
33 | from ecoindex.backend.middlewares.exception_handler import handle_exceptions
34 |
35 | handle_exceptions(app)
36 | add_cors_middleware(app)
37 |
38 | return app
39 |
40 |
41 | app = init_app()
42 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/middlewares/cors.py:
--------------------------------------------------------------------------------
1 | from ecoindex.config import Settings
2 | from fastapi import FastAPI
3 | from fastapi.middleware.cors import CORSMiddleware
4 |
5 |
6 | def add_cors_middleware(app: FastAPI):
7 | app.add_middleware(
8 | CORSMiddleware,
9 | allow_credentials=Settings().CORS_ALLOWED_CREDENTIALS,
10 | allow_headers=Settings().CORS_ALLOWED_HEADERS,
11 | allow_methods=Settings().CORS_ALLOWED_METHODS,
12 | allow_origins=Settings().CORS_ALLOWED_ORIGINS,
13 | )
14 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/middlewares/exception_handler.py:
--------------------------------------------------------------------------------
1 | from ecoindex.backend.utils import format_exception_response
2 | from ecoindex.database.exceptions.quota import QuotaExceededException
3 | from fastapi import FastAPI, Request, status
4 | from fastapi.responses import JSONResponse
5 |
6 | HTTP_520_ECOINDEX_TYPE_ERROR = 520
7 | HTTP_521_ECOINDEX_CONNECTION_ERROR = 521
8 |
9 |
10 | def handle_exceptions(app: FastAPI):
11 | @app.exception_handler(RuntimeError)
12 | async def handle_screenshot_not_found_exception(_: Request, exc: FileNotFoundError):
13 | return JSONResponse(
14 | content={"detail": str(exc)},
15 | status_code=status.HTTP_404_NOT_FOUND,
16 | )
17 |
18 | @app.exception_handler(TypeError)
19 | async def handle_resource_type_error(_: Request, exc: TypeError):
20 | return JSONResponse(
21 | content={"detail": exc.args[0]},
22 | status_code=HTTP_520_ECOINDEX_TYPE_ERROR,
23 | )
24 |
25 | @app.exception_handler(ConnectionError)
26 | async def handle_connection_error(_: Request, exc: ConnectionError):
27 | return JSONResponse(
28 | content={"detail": exc.args[0]},
29 | status_code=HTTP_521_ECOINDEX_CONNECTION_ERROR,
30 | )
31 |
32 | @app.exception_handler(QuotaExceededException)
33 | async def handle_quota_exceeded_exception(_: Request, exc: QuotaExceededException):
34 | return JSONResponse(
35 | status_code=status.HTTP_429_TOO_MANY_REQUESTS,
36 | content={"detail": exc.__dict__},
37 | )
38 |
39 | @app.exception_handler(Exception)
40 | async def handle_exception(_: Request, exc: Exception):
41 | exception_response = await format_exception_response(exception=exc)
42 | return JSONResponse(
43 | content={"detail": exception_response.model_dump()},
44 | status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
45 | )
46 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/models/__init__.py
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/models/dependencies_parameters/__init__.py
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/bff.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.dependencies.bff import get_bff_parameters
4 | from ecoindex.backend.models.parameters import BffParameters
5 | from fastapi import Depends
6 |
7 | BffDepParameters = Annotated[BffParameters, Depends(get_bff_parameters)]
8 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/compute.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.dependencies.compute import get_compute_parameters
4 | from ecoindex.backend.models.parameters import ComputeParameters
5 | from fastapi import Depends
6 |
7 | ComputeDepParameters = Annotated[ComputeParameters, Depends(get_compute_parameters)]
8 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/dates.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.dependencies.dates import get_date_parameters
4 | from ecoindex.backend.models.parameters import DateRange
5 | from fastapi import Depends
6 |
7 | DateRangeParameters = Annotated[DateRange, Depends(get_date_parameters)]
8 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/host.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.dependencies.host import get_host_parameter
4 | from fastapi import Depends
5 |
6 | HostParameter = Annotated[str | None, Depends(get_host_parameter)]
7 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/id.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 | from uuid import UUID
3 |
4 | from ecoindex.backend.dependencies.id import get_id_parameter
5 | from fastapi import Depends
6 |
7 | IdParameter = Annotated[UUID, Depends(get_id_parameter)]
8 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/pagination.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.dependencies.pagination import get_pagination_parameters
4 | from ecoindex.backend.models.parameters import Pagination
5 | from fastapi import Depends
6 |
7 | PaginationParameters = Annotated[Pagination, Depends(get_pagination_parameters)]
8 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/dependencies_parameters/version.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.dependencies.version import get_version_parameter
4 | from ecoindex.models.enums import Version
5 | from fastapi import Depends
6 |
7 | VersionParameter = Annotated[Version, Depends(get_version_parameter)]
8 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/models/parameters.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 |
3 | from ecoindex.models.enums import Version
4 | from pydantic import AnyHttpUrl, BaseModel
5 |
6 |
7 | class Pagination(BaseModel):
8 | page: int = 1
9 | size: int = 50
10 |
11 |
12 | class DateRange(BaseModel):
13 | date_from: date | None = None
14 | date_to: date | None = None
15 |
16 |
17 | class BffParameters(BaseModel):
18 | url: AnyHttpUrl
19 | refresh: bool = False
20 | version: Version = Version.v1
21 |
22 |
23 | class ComputeParameters(BaseModel):
24 | dom: int
25 | size: float
26 | requests: int
27 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/routers/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.backend.routers.bff import router as router_bff
2 | from ecoindex.backend.routers.compute import router as router_compute
3 | from ecoindex.backend.routers.ecoindex import router as router_ecoindex
4 | from ecoindex.backend.routers.health import router as router_health
5 | from ecoindex.backend.routers.host import router as router_host
6 | from ecoindex.backend.routers.tasks import router as router_task
7 | from fastapi import APIRouter
8 |
9 | router = APIRouter()
10 |
11 | router.include_router(router=router_bff)
12 | router.include_router(router=router_ecoindex)
13 | router.include_router(router=router_compute)
14 | router.include_router(router=router_host)
15 | router.include_router(router=router_task)
16 | router.include_router(router=router_health)
17 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/routers/bff.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.models.dependencies_parameters.bff import BffDepParameters
4 | from ecoindex.backend.services.ecoindex import get_badge, get_latest_result_by_url
5 | from ecoindex.config.settings import Settings
6 | from ecoindex.database.engine import get_session
7 | from ecoindex.database.models import EcoindexSearchResults
8 | from ecoindex.models import example_file_not_found
9 | from ecoindex.models.enums import BadgeTheme
10 | from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
11 | from fastapi.responses import RedirectResponse
12 | from sqlmodel.ext.asyncio.session import AsyncSession
13 |
14 | router = router = APIRouter(prefix="/{version}/ecoindexes", tags=["BFF"])
15 |
16 |
17 | @router.get(
18 | name="Get latest results",
19 | path="/latest",
20 | response_model=EcoindexSearchResults,
21 | response_description="Get latest results for a given url",
22 | )
23 | async def get_latest_results(
24 | response: Response,
25 | parameters: BffDepParameters,
26 | session: AsyncSession = Depends(get_session),
27 | ) -> EcoindexSearchResults:
28 | """
29 | This returns the latest results for a given url. This feature is used by the Ecoindex
30 | browser extension. By default, the results are cached for 7 days.
31 |
32 | If the url is not found in the database, the response status code will be 404.
33 | """
34 | latest_result = await get_latest_result_by_url(
35 | session=session,
36 | url=parameters.url,
37 | refresh=parameters.refresh,
38 | version=parameters.version,
39 | )
40 |
41 | if latest_result.count == 0:
42 | response.status_code = status.HTTP_404_NOT_FOUND
43 |
44 | return latest_result
45 |
46 |
47 | @router.get(
48 | name="Get badge",
49 | path="/latest/badge",
50 | response_description="Badge of the given url from [CDN V1](https://www.jsdelivr.com/package/gh/cnumr/ecoindex_badge)",
51 | responses={status.HTTP_404_NOT_FOUND: example_file_not_found},
52 | )
53 | async def get_badge_enpoint(
54 | parameters: BffDepParameters,
55 | theme: Annotated[
56 | BadgeTheme, Query(description="Theme of the badge")
57 | ] = BadgeTheme.light,
58 | session: AsyncSession = Depends(get_session),
59 | ) -> Response:
60 | """
61 | This returns the SVG badge of the given url. This feature is used by the Ecoindex
62 | badge. By default, the results are cached for 7 days.
63 |
64 | If the url is not found in the database, it will return a badge with the grade `?`.
65 | """
66 | return Response(
67 | content=await get_badge(
68 | session=session,
69 | url=parameters.url,
70 | refresh=parameters.refresh,
71 | version=parameters.version,
72 | theme=theme.value,
73 | ),
74 | media_type="image/svg+xml",
75 | )
76 |
77 |
78 | @router.get(
79 | name="Get latest results redirect",
80 | path="/latest/redirect",
81 | response_description="Redirect to the latest results for a given url",
82 | )
83 | async def get_latest_result_redirect(
84 | parameters: BffDepParameters,
85 | session: AsyncSession = Depends(get_session),
86 | ) -> RedirectResponse:
87 | """
88 | This redirects to the latest results on the frontend website for the given url.
89 | This feature is used by the Ecoindex browser extension and badge.
90 |
91 | If the url is not found in the database, the response status code will be 404.
92 | """
93 | latest_result = await get_latest_result_by_url(
94 | session=session,
95 | url=parameters.url,
96 | refresh=parameters.refresh,
97 | version=parameters.version,
98 | )
99 |
100 | if latest_result.count == 0:
101 | raise HTTPException(
102 | status_code=status.HTTP_404_NOT_FOUND,
103 | detail=f"No analysis found for {parameters.url}",
104 | )
105 |
106 | return RedirectResponse(
107 | url=f"{Settings().FRONTEND_BASE_URL}/resultat/?id={latest_result.latest_result.id}" # type: ignore
108 | )
109 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/routers/compute.py:
--------------------------------------------------------------------------------
1 | from ecoindex.backend.models.dependencies_parameters.compute import ComputeDepParameters
2 | from ecoindex.compute.ecoindex import compute_ecoindex
3 | from ecoindex.models.compute import Ecoindex
4 | from fastapi import APIRouter
5 |
6 | router = APIRouter(prefix="/ecoindex", tags=["Ecoindex"])
7 |
8 |
9 | @router.get(
10 | name="Compute ecoindex",
11 | path="/ecoindex",
12 | tags=["Ecoindex"],
13 | description=(
14 | "This returns the ecoindex computed based on the given parameters: "
15 | "DOM (number of DOM nodes), size (total size in Kb) and requests"
16 | ),
17 | )
18 | async def compute_ecoindex_api(parameters: ComputeDepParameters) -> Ecoindex:
19 | return await compute_ecoindex(
20 | nodes=parameters.dom, size=parameters.size, requests=parameters.requests
21 | )
22 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/routers/ecoindex.py:
--------------------------------------------------------------------------------
1 | from os import getcwd
2 | from typing import Annotated
3 |
4 | from ecoindex.backend.models.dependencies_parameters.dates import DateRangeParameters
5 | from ecoindex.backend.models.dependencies_parameters.host import HostParameter
6 | from ecoindex.backend.models.dependencies_parameters.id import IdParameter
7 | from ecoindex.backend.models.dependencies_parameters.pagination import (
8 | PaginationParameters,
9 | )
10 | from ecoindex.backend.models.dependencies_parameters.version import VersionParameter
11 | from ecoindex.backend.models.parameters import DateRange, Pagination
12 | from ecoindex.backend.utils import get_sort_parameters, get_status_code
13 | from ecoindex.database.engine import get_session
14 | from ecoindex.database.models import (
15 | ApiEcoindex,
16 | PageApiEcoindexes,
17 | )
18 | from ecoindex.database.repositories.ecoindex import (
19 | get_count_analysis_db,
20 | get_ecoindex_result_by_id_db,
21 | get_ecoindex_result_list_db,
22 | )
23 | from ecoindex.models import example_ecoindex_not_found, example_file_not_found
24 | from ecoindex.models.enums import Version
25 | from fastapi import APIRouter, Depends, HTTPException, Response, status
26 | from fastapi.params import Query
27 | from fastapi.responses import FileResponse
28 | from sqlmodel.ext.asyncio.session import AsyncSession
29 |
30 | router = APIRouter(prefix="/{version}/ecoindexes", tags=["Ecoindex"])
31 |
32 |
33 | @router.get(
34 | name="Get ecoindex analysis list",
35 | path="",
36 | response_model=PageApiEcoindexes,
37 | response_description="List of corresponding ecoindex results",
38 | responses={
39 | status.HTTP_206_PARTIAL_CONTENT: {"model": PageApiEcoindexes},
40 | status.HTTP_404_NOT_FOUND: {"model": PageApiEcoindexes},
41 | },
42 | description=(
43 | "This returns a list of ecoindex analysis "
44 | "corresponding to query filters and the given version engine. "
45 | "The results are ordered by ascending date"
46 | ),
47 | )
48 | async def get_ecoindex_analysis_list(
49 | response: Response,
50 | host: HostParameter,
51 | version: VersionParameter = Version.v1,
52 | date_range: DateRangeParameters = DateRange(),
53 | pagination: PaginationParameters = Pagination(),
54 | sort: Annotated[
55 | list[str],
56 | Query(
57 | description=(
58 | "You can sort results using this param with the format "
59 | "`sort=param1:asc&sort=param2:desc`"
60 | )
61 | ),
62 | ] = ["date:desc"],
63 | session: AsyncSession = Depends(get_session),
64 | ) -> PageApiEcoindexes:
65 | ecoindexes = await get_ecoindex_result_list_db(
66 | session=session,
67 | date_from=date_range.date_from,
68 | date_to=date_range.date_to,
69 | host=host,
70 | version=version,
71 | page=pagination.page,
72 | size=pagination.size,
73 | sort_params=await get_sort_parameters(
74 | query_params=sort,
75 | model=ApiEcoindex, # type: ignore
76 | ),
77 | )
78 | total_results = await get_count_analysis_db(
79 | session=session,
80 | version=version,
81 | date_from=date_range.date_from,
82 | date_to=date_range.date_to,
83 | host=host,
84 | )
85 |
86 | response.status_code = await get_status_code(items=ecoindexes, total=total_results)
87 |
88 | return PageApiEcoindexes(
89 | items=ecoindexes,
90 | total=total_results,
91 | page=pagination.page,
92 | size=pagination.size,
93 | )
94 |
95 |
96 | @router.get(
97 | name="Get ecoindex analysis by id",
98 | path="/{id}",
99 | response_model=ApiEcoindex,
100 | response_description="Get one ecoindex result by its id",
101 | responses={status.HTTP_404_NOT_FOUND: example_ecoindex_not_found},
102 | description="This returns an ecoindex given by its unique identifier",
103 | )
104 | async def get_ecoindex_analysis_by_id(
105 | id: IdParameter,
106 | version: VersionParameter = Version.v1,
107 | session: AsyncSession = Depends(get_session),
108 | ) -> ApiEcoindex:
109 | ecoindex = await get_ecoindex_result_by_id_db(
110 | session=session, id=id, version=version
111 | )
112 |
113 | if not ecoindex:
114 | raise HTTPException(
115 | status_code=status.HTTP_404_NOT_FOUND,
116 | detail=f"Analysis {id} not found for version {version.value}",
117 | )
118 | return ecoindex
119 |
120 |
121 | @router.get(
122 | name="Get screenshot",
123 | path="/{id}/screenshot",
124 | description="This returns the screenshot of the webpage analysis if it exists",
125 | responses={status.HTTP_404_NOT_FOUND: example_file_not_found},
126 | )
127 | async def get_screenshot_endpoint(
128 | id: IdParameter,
129 | version: VersionParameter = Version.v1,
130 | ):
131 | return FileResponse(
132 | path=f"{getcwd()}/screenshots/{version.value}/{id}.webp",
133 | filename=f"{id}.webp",
134 | content_disposition_type="inline",
135 | media_type="image/webp",
136 | )
137 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/routers/health.py:
--------------------------------------------------------------------------------
1 | from ecoindex.database.engine import get_session
2 | from ecoindex.models.api import HealthResponse
3 | from ecoindex.worker.health import is_worker_healthy
4 | from fastapi import APIRouter, Depends
5 | from sqlmodel.ext.asyncio.session import AsyncSession
6 |
7 | router = APIRouter(prefix="/health", tags=["Infra"])
8 |
9 |
10 | @router.get(
11 | name="Health check",
12 | path="",
13 | description="This returns the health of the service",
14 | )
15 | async def health_check(session: AsyncSession = Depends(get_session)) -> HealthResponse:
16 | return HealthResponse(database=session.is_active, workers=is_worker_healthy())
17 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/routers/host.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from ecoindex.backend.models.dependencies_parameters.dates import DateRangeParameters
4 | from ecoindex.backend.models.dependencies_parameters.host import HostParameter
5 | from ecoindex.backend.models.dependencies_parameters.pagination import (
6 | PaginationParameters,
7 | )
8 | from ecoindex.backend.models.dependencies_parameters.version import VersionParameter
9 | from ecoindex.backend.models.parameters import DateRange, Pagination
10 | from ecoindex.backend.utils import check_quota, get_status_code
11 | from ecoindex.database.engine import get_session
12 | from ecoindex.database.repositories.host import get_count_hosts_db, get_host_list_db
13 | from ecoindex.models.api import Host, PageHosts
14 | from ecoindex.models.enums import Version
15 | from ecoindex.models.response_examples import example_daily_limit_response
16 | from fastapi import Depends, Path, status
17 | from fastapi.param_functions import Query
18 | from fastapi.responses import Response
19 | from fastapi.routing import APIRouter
20 | from sqlmodel.ext.asyncio.session import AsyncSession
21 |
22 | router = APIRouter(prefix="/{version}/hosts", tags=["Host"])
23 |
24 |
25 | @router.get(
26 | name="Get host list",
27 | path="",
28 | response_model=PageHosts,
29 | response_description="List ecoindex hosts",
30 | responses={
31 | status.HTTP_206_PARTIAL_CONTENT: {"model": PageHosts},
32 | status.HTTP_404_NOT_FOUND: {"model": PageHosts},
33 | },
34 | description=(
35 | "This returns a list of hosts that "
36 | "ran an ecoindex analysis order by most request made"
37 | ),
38 | )
39 | async def get_host_list(
40 | response: Response,
41 | host: HostParameter,
42 | version: VersionParameter = Version.v1,
43 | date_range: DateRangeParameters = DateRange(),
44 | pagination: PaginationParameters = Pagination(),
45 | q: str = Query(
46 | default=None,
47 | description="Filter by partial host name (replaced by `host`)",
48 | deprecated=True,
49 | ),
50 | session: AsyncSession = Depends(get_session),
51 | ) -> PageHosts:
52 | hosts = await get_host_list_db(
53 | session=session,
54 | date_from=date_range.date_from,
55 | date_to=date_range.date_to,
56 | host=host or q,
57 | version=version,
58 | page=pagination.page,
59 | size=pagination.size,
60 | )
61 |
62 | total_hosts = await get_count_hosts_db(
63 | session=session,
64 | version=version,
65 | q=q,
66 | date_from=date_range.date_from,
67 | date_to=date_range.date_to,
68 | )
69 |
70 | response.status_code = await get_status_code(items=hosts, total=total_hosts)
71 |
72 | return PageHosts(
73 | items=hosts, total=total_hosts, page=pagination.page, size=pagination.size
74 | )
75 |
76 |
77 | @router.get(
78 | name="Get host details",
79 | path="/{host}",
80 | response_description="Host details",
81 | responses={
82 | status.HTTP_200_OK: {"model": Host},
83 | status.HTTP_404_NOT_FOUND: {"model": Host},
84 | status.HTTP_429_TOO_MANY_REQUESTS: example_daily_limit_response,
85 | },
86 | description=(
87 | "This returns the details of a host. If no no quota is set, "
88 | "remaining_daily_requests will be null"
89 | ),
90 | )
91 | async def get_daily_remaining(
92 | host: Annotated[str, Path(..., description="Exact matching host name")],
93 | version: VersionParameter = Version.v1,
94 | session: AsyncSession = Depends(get_session),
95 | ) -> Host:
96 | return Host(
97 | name=host,
98 | remaining_daily_requests=await check_quota(session=session, host=host),
99 | total_count=await get_count_hosts_db(
100 | session=session, name=host, version=version, group_by_host=False
101 | ),
102 | )
103 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/scripts/__init__.py
--------------------------------------------------------------------------------
/bases/ecoindex/backend/scripts/openapi.py:
--------------------------------------------------------------------------------
1 | from json import dumps
2 |
3 | from ecoindex.backend.main import app
4 |
5 |
6 | def main() -> None:
7 | openapi = app.openapi()
8 |
9 | print(dumps(openapi, indent=2, sort_keys=True))
10 |
11 |
12 | if __name__ == "__main__":
13 | main()
14 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/backend/services/__init__.py
--------------------------------------------------------------------------------
/bases/ecoindex/backend/services/cache.py:
--------------------------------------------------------------------------------
1 | from hashlib import sha1
2 |
3 | from ecoindex.config import Settings
4 | from redis import Redis
5 |
6 |
7 | class EcoindexCache:
8 | def init(self) -> None:
9 | self._r = Redis(host=Settings().REDIS_CACHE_HOST, db=2)
10 |
11 | def set_cache_key(self, key: str):
12 | self.cache_key = sha1(key.encode("utf-8")).hexdigest()
13 |
14 | return self
15 |
16 | async def get(
17 | self,
18 | ) -> str | None:
19 | results = self._r.get(name=self.cache_key)
20 |
21 | if results:
22 | return results.decode("utf-8") # type: ignore
23 |
24 | return None
25 |
26 | async def set(self, data: str) -> None:
27 | self._r.set(
28 | name=self.cache_key,
29 | value=data,
30 | ex=60 * 60 * 24 * 7, # set default expiration to 7 days
31 | )
32 |
33 |
34 | cache = EcoindexCache()
35 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/services/ecoindex.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from ecoindex.backend.services.cache import cache
4 | from ecoindex.database.models import EcoindexSearchResults
5 | from ecoindex.database.repositories.ecoindex import get_ecoindex_result_list_db
6 | from ecoindex.models.enums import Version
7 | from ecoindex.models.sort import Sort
8 | from pydantic import AnyHttpUrl
9 | from requests import get
10 | from sqlmodel.ext.asyncio.session import AsyncSession
11 |
12 |
13 | async def get_latest_result_by_url(
14 | session: AsyncSession, url: AnyHttpUrl, refresh: bool, version: Version
15 | ) -> EcoindexSearchResults:
16 | """
17 | Get the latest ecoindex result for a given url. This function will first try to find
18 | an exact match for the url path, and if it doesn't find any, it will return the latest
19 | result for the host.
20 |
21 | Results are cached for 1 week by default. If you want to force the refresh of the cache,
22 | set the refresh parameter to True.
23 |
24 | params:
25 | url: the url to search for
26 | refresh: if True, will force the refresh of the cache
27 | version: the version of the ecoindex to use
28 |
29 | returns:
30 | EcoindexSearchResults: the results for the given url
31 | """
32 | ecoindex_cache = cache.set_cache_key(key=f"ecoindex-{url.host}/{url.path}")
33 | cached_results = await ecoindex_cache.get()
34 |
35 | if not refresh and cached_results:
36 | return EcoindexSearchResults(**json.loads(cached_results))
37 |
38 | ecoindexes = await get_ecoindex_result_list_db(
39 | session=session,
40 | host=str(url.host),
41 | version=version,
42 | size=20,
43 | sort_params=[Sort(clause="date", sort="desc")],
44 | )
45 |
46 | if not ecoindexes:
47 | await ecoindex_cache.set_cached_ecoindex_search_results(
48 | ecoindex_search_results=EcoindexSearchResults(count=0).model_dump_json()
49 | )
50 |
51 | return EcoindexSearchResults(count=0)
52 |
53 | exact_url_results = []
54 | host_results = []
55 |
56 | for ecoindex in ecoindexes:
57 | if ecoindex.get_url_path() == str(url.path):
58 | exact_url_results.append(ecoindex)
59 | else:
60 | host_results.append(ecoindex)
61 |
62 | results = EcoindexSearchResults(
63 | count=len(exact_url_results),
64 | latest_result=exact_url_results[0] if exact_url_results else None,
65 | older_results=exact_url_results[1:] if len(exact_url_results) > 1 else [],
66 | host_results=host_results,
67 | )
68 |
69 | await ecoindex_cache.set(
70 | data=results.model_dump_json(),
71 | )
72 |
73 | return results
74 |
75 |
76 | async def get_badge(
77 | session: AsyncSession, url: AnyHttpUrl, refresh: bool, version: Version, theme: str
78 | ) -> str:
79 | """
80 | Get the badge for a given url. This function will use the method `get_latest_result_by_url`.
81 | If the url is not found, it will return the badge for the grade "unknown".
82 |
83 | This returns the badge that [are hosted on jsdelivr.net](https://cdn.jsdelivr.net/gh/cnumr/ecoindex_badge@1/assets/svg/).
84 |
85 | params:
86 | url: the url to search for
87 | refresh: if True, will force the refresh of the cache
88 | version: the version of the ecoindex to use
89 | theme: the theme of the badge to use (light or dark)
90 |
91 | returns:
92 | str: the badge image
93 | """
94 | results = await get_latest_result_by_url(
95 | session=session, url=url, refresh=refresh, version=version
96 | )
97 |
98 | grade = results.latest_result.grade if results.latest_result else "unknown"
99 | ecoindex_cache = cache.set_cache_key(key=f"badge-{grade}-{theme}")
100 |
101 | cached_badge = await ecoindex_cache.get()
102 |
103 | if cached_badge:
104 | return cached_badge
105 |
106 | base_url = f"https://cdn.jsdelivr.net/gh/cnumr/ecoindex_badge@1/assets/svg/{theme}/{grade}.svg"
107 |
108 | image = get(base_url).text
109 |
110 | await ecoindex_cache.set(data=image)
111 |
112 | return image
113 |
--------------------------------------------------------------------------------
/bases/ecoindex/backend/utils/__init__.py:
--------------------------------------------------------------------------------
1 | import re
2 | from json import loads
3 | from uuid import UUID, uuid4
4 |
5 | from ecoindex.config.settings import Settings
6 | from ecoindex.database.exceptions.quota import QuotaExceededException
7 | from ecoindex.database.repositories.ecoindex import (
8 | get_count_daily_request_per_host,
9 | get_latest_result,
10 | )
11 | from ecoindex.models.api import ExceptionResponse
12 | from ecoindex.models.sort import Sort
13 | from fastapi import HTTPException, status
14 | from pydantic import BaseModel
15 | from sqlmodel.ext.asyncio.session import AsyncSession
16 |
17 |
18 | async def format_exception_response(exception: Exception) -> ExceptionResponse:
19 | return ExceptionResponse(
20 | exception=type(exception).__name__,
21 | args=[arg for arg in exception.args if arg] if exception.args else [],
22 | message=exception.msg if hasattr(exception, "msg") else None, # type: ignore
23 | )
24 |
25 |
26 | async def new_uuid() -> UUID:
27 | val = uuid4()
28 | while val.hex[0] == "0":
29 | val = uuid4()
30 | return val
31 |
32 |
33 | async def get_status_code(items: list, total: int) -> int:
34 | if not items:
35 | return status.HTTP_404_NOT_FOUND
36 |
37 | if total > len(items):
38 | return status.HTTP_206_PARTIAL_CONTENT
39 |
40 | return status.HTTP_200_OK
41 |
42 |
43 | async def get_sort_parameters(query_params: list[str], model: BaseModel) -> list[Sort]:
44 | validation_error = []
45 | result = []
46 |
47 | for query_param in query_params:
48 | pattern = re.compile("^\w+:(asc|desc)$") # type: ignore
49 |
50 | if not re.fullmatch(pattern, query_param):
51 | validation_error.append(
52 | {
53 | "loc": ["query", "sort", query_param],
54 | "message": "this parameter does not respect the sort format",
55 | "type": "value_error.sort",
56 | }
57 | )
58 | continue
59 |
60 | sort_params = query_param.split(":")
61 |
62 | if sort_params[0] not in model.__fields__:
63 | validation_error.append(
64 | {
65 | "loc": ["query", "sort", sort_params[0]],
66 | "message": "this parameter does not exist",
67 | "type": "value_error.sort",
68 | }
69 | )
70 | continue
71 |
72 | result.append(Sort(clause=sort_params[0], sort=sort_params[1])) # type: ignore
73 |
74 | if validation_error:
75 | raise HTTPException(
76 | status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=validation_error
77 | )
78 |
79 | return result
80 |
81 |
82 | async def check_quota(
83 | session: AsyncSession,
84 | host: str,
85 | ) -> int | None:
86 | if not Settings().DAILY_LIMIT_PER_HOST:
87 | return None
88 |
89 | count_daily_request_per_host = await get_count_daily_request_per_host(
90 | session=session, host=host
91 | )
92 |
93 | if count_daily_request_per_host >= Settings().DAILY_LIMIT_PER_HOST:
94 | latest_result = await get_latest_result(session=session, host=host)
95 | raise QuotaExceededException(
96 | limit=Settings().DAILY_LIMIT_PER_HOST,
97 | host=host,
98 | latest_result=loads(latest_result.model_dump_json() or "{}"), # type: ignore
99 | )
100 |
101 | return Settings().DAILY_LIMIT_PER_HOST - count_daily_request_per_host
102 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/VERSION:
--------------------------------------------------------------------------------
1 | 2.30.0
2 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/cli/__init__.py
--------------------------------------------------------------------------------
/bases/ecoindex/cli/arguments_handler.py:
--------------------------------------------------------------------------------
1 | from tempfile import NamedTemporaryFile
2 | from typing import Set
3 | from urllib.parse import urlparse, urlunparse
4 |
5 | from ecoindex.cli.crawl import EcoindexSpider
6 | from ecoindex.cli.helper import replace_localhost_with_hostdocker
7 | from ecoindex.cli.sitemap import EcoindexSitemapSpider
8 | from ecoindex.models import WindowSize
9 |
10 | from pydantic import AnyHttpUrl, validate_call
11 | from pydantic.types import FilePath
12 | from scrapy.crawler import CrawlerProcess
13 |
14 |
15 | @validate_call
16 | def validate_list_of_urls(urls: list[AnyHttpUrl]) -> Set[str]:
17 | result = set()
18 |
19 | for url in urls:
20 | splitted_url = str(url).split("?")
21 | result.add(splitted_url[0])
22 |
23 | return result
24 |
25 |
26 | @validate_call
27 | def get_urls_from_file(urls_file: FilePath) -> Set[str]:
28 | with open(urls_file) as fp:
29 | urls_from_file = set()
30 |
31 | for url in fp.readlines():
32 | url = url.strip()
33 |
34 | if url:
35 | urls_from_file.add(url)
36 |
37 | return validate_list_of_urls(urls_from_file) # type: ignore
38 |
39 |
40 | def get_urls_recursive(main_url: str) -> Set[str]:
41 | parsed_url = urlparse(main_url)
42 | host_infos = replace_localhost_with_hostdocker(parsed_url.netloc)
43 | netloc = host_infos.netloc
44 | domain = host_infos.domain
45 | main_url = f"{parsed_url.scheme}://{netloc}"
46 | process = CrawlerProcess()
47 |
48 | with NamedTemporaryFile(mode="w+t") as temp_file:
49 | process.crawl(
50 | crawler_or_spidercls=EcoindexSpider,
51 | allowed_domains=[domain],
52 | start_urls=[main_url],
53 | temp_file=temp_file,
54 | )
55 | process.start()
56 | temp_file.seek(0)
57 | urls = temp_file.readlines()
58 | return validate_list_of_urls(urls) # type: ignore
59 |
60 |
61 | def get_urls_from_sitemap(main_url: str) -> Set[str]:
62 | process = CrawlerProcess()
63 | if "sitemap" not in main_url or not main_url.endswith(".xml"):
64 | raise ValueError("The provided url is not a valid sitemap url")
65 |
66 | with NamedTemporaryFile(mode="w+t") as temp_file:
67 | process.crawl(
68 | crawler_or_spidercls=EcoindexSitemapSpider,
69 | sitemap_urls=[main_url],
70 | temp_file=temp_file,
71 | )
72 | process.start()
73 | temp_file.seek(0)
74 | urls = list()
75 | str_urls = temp_file.readlines()
76 | for url in str_urls:
77 | urls.append(AnyHttpUrl(url))
78 |
79 | return validate_list_of_urls(urls)
80 |
81 |
82 | @validate_call
83 | def get_url_from_args(urls_arg: list[AnyHttpUrl]) -> set[AnyHttpUrl]:
84 | urls_from_args = set()
85 | for url in urls_arg:
86 | parsed_url = urlparse(str(url))
87 | host_infos = replace_localhost_with_hostdocker(parsed_url.netloc)
88 | url = AnyHttpUrl(urlunparse((parsed_url.scheme, host_infos.netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)))
89 | urls_from_args.add(url)
90 |
91 | return urls_from_args
92 |
93 |
94 | def get_window_sizes_from_args(window_sizes: list[str]) -> list[WindowSize]:
95 | result = []
96 | errors = ""
97 | for window_size in window_sizes:
98 | try:
99 | width, height = window_size.split(",")
100 | result.append(WindowSize(width=int(width), height=int(height)))
101 | except ValueError:
102 | errors += f"🔥 `{window_size}` is not a valid window size. Must be of type `1920,1080`\n"
103 |
104 | if errors:
105 | raise ValueError(errors)
106 |
107 | return result
108 |
109 |
110 | def get_file_prefix_input_file_logger_file(
111 | urls: list[AnyHttpUrl],
112 | urls_file: str | None = None,
113 | tmp_folder: str = "/tmp/ecoindex-cli",
114 | ) -> tuple[str, str, str]:
115 | """
116 | Returns file prefix, input file and logger file based on provided urls
117 | and provider method: If this is based on an existing csv file, we take
118 | the name of the file, else, we take the first provided url's domain
119 | """
120 | if urls_file:
121 | file_prefix = urls_file.split("/")[-1]
122 | input_file = urls_file
123 | else:
124 | first_url = str(next(iter(urls)))
125 | file_prefix = urlparse(first_url).netloc
126 | input_file = f"{tmp_folder}/input/{file_prefix}.csv"
127 |
128 | return (file_prefix, input_file, f"{tmp_folder}/logs/{file_prefix}.log")
129 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/console_output.py:
--------------------------------------------------------------------------------
1 | from rich.console import Console
2 | from rich.table import Table
3 |
4 |
5 | def display_result_synthesis(total: int, count_errors: int) -> None:
6 | console = Console()
7 |
8 | table = Table(show_header=True)
9 | table.add_column("Total analysis")
10 | table.add_column("Success", header_style="green")
11 | table.add_column("Failed", header_style="red")
12 | table.add_row(str(total), str(total - count_errors), str(count_errors))
13 |
14 | console.print(table)
15 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/crawl.py:
--------------------------------------------------------------------------------
1 | from tempfile import NamedTemporaryFile
2 |
3 | from scrapy.linkextractors import LinkExtractor
4 | from scrapy.spiders import CrawlSpider, Rule
5 |
6 |
7 | class EcoindexSpider(CrawlSpider):
8 | name = "EcoindexSpider"
9 | custom_settings = {"LOG_ENABLED": False}
10 | rules = (Rule(LinkExtractor(), callback="parse_item", follow=True),)
11 |
12 | def __init__(
13 | self,
14 | allowed_domains: list[str],
15 | start_urls: list[str],
16 | temp_file: NamedTemporaryFile, # type: ignore
17 | *a,
18 | **kw,
19 | ):
20 | self.links: set[str] = set()
21 | self.allowed_domains = allowed_domains
22 | self.start_urls = start_urls
23 | self.temp_file = temp_file
24 | super().__init__(*a, **kw)
25 |
26 | def parse_item(self, response):
27 | self.temp_file.write(f"{response.url}\n")
28 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/helper.py:
--------------------------------------------------------------------------------
1 | from ecoindex.config import Settings
2 | from ecoindex.models import CliHost
3 |
4 |
5 | def replace_localhost_with_hostdocker(netloc: str) -> CliHost:
6 | if Settings().DOCKER_CONTAINER and "localhost" in netloc:
7 | domain = "host.docker.internal"
8 | netloc = netloc.replace("localhost", domain)
9 | elif "localhost" in netloc:
10 | domain = "localhost"
11 | else:
12 | domain = netloc
13 |
14 | return CliHost(domain=domain, netloc=netloc)
15 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/sitemap.py:
--------------------------------------------------------------------------------
1 | from tempfile import NamedTemporaryFile
2 | from scrapy.spiders import SitemapSpider
3 |
4 |
5 | class EcoindexSitemapSpider(SitemapSpider):
6 | name = "EcoindexSitemapSpider"
7 | custom_settings = {"LOG_ENABLED": False}
8 |
9 | def __init__(
10 | self,
11 | sitemap_urls: list[str],
12 | temp_file: NamedTemporaryFile, # type: ignore
13 | *a,
14 | **kw,
15 | ):
16 | self.sitemap_urls = sitemap_urls
17 | self.temp_file = temp_file
18 | super().__init__(*a, **kw)
19 |
20 | def parse(self, response):
21 | self.temp_file.write(f"{response.url}\n")
22 |
23 |
--------------------------------------------------------------------------------
/bases/ecoindex/cli/template.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | {{ title }} {{ site }}
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
33 |
34 |
35 |
36 |
37 |
38 |
⬆️ {{ synthesis }}
39 |
40 |
41 |
42 |
43 |
44 |
45 |
48 |
49 | {{ ecoindex_body }}
50 |
51 |
{{ ecoindex_body_end_pre}} {{ nb_page }} {{ ecoindex_body_end_mid }} {{ site }}
52 |
53 | {{ ecoindex_body_end_suf }}
54 |
55 |
56 |
57 | {{ summary }}
58 |
59 |
60 |
61 |
62 |
⬆️ {{ top10 }}
63 |
✔️ {{ best_pages }}
64 | {{ best }}
65 |
❌️ {{ worst_pages }}
66 | {{ worst }}
67 |
68 |
69 |
70 |
71 |
⬆️ {{ indicators }}
72 |
{{ number_of_requests }}
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | {{ requests_body }}
81 |
82 |
83 | {{ requests_comment }}
84 |
85 |
86 |
87 |
88 |
89 |
{{ pages_size }}
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 | {{ size_body }}
98 |
99 |
100 | {{ size_comment }}
101 |
102 |
103 |
104 |
105 |
106 |
{{ number_of_dom_nodes }}
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 | {{ nodes_body }}
115 |
116 |
117 | {{ nodes_comment }}
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
⬆️ {{ all_data_title }}
126 | {{ all_data }}
127 |
128 |
133 |
148 |
149 |
150 |
--------------------------------------------------------------------------------
/bases/ecoindex/worker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/bases/ecoindex/worker/__init__.py
--------------------------------------------------------------------------------
/bases/ecoindex/worker/health.py:
--------------------------------------------------------------------------------
1 | from ecoindex.models.api import HealthWorker, HealthWorkers
2 | from ecoindex.worker.tasks import app
3 |
4 |
5 | def is_worker_healthy() -> HealthWorkers:
6 | workers = []
7 | workers_ping = app.control.ping()
8 |
9 | for worker in workers_ping:
10 | for name in worker:
11 | workers.append(
12 | HealthWorker(name=name, healthy=True if "ok" in worker[name] else False)
13 | )
14 |
15 | return HealthWorkers(
16 | healthy=False if False in [w.healthy for w in workers] or not workers else True,
17 | workers=workers,
18 | )
19 |
--------------------------------------------------------------------------------
/components/ecoindex/compute/VERSION:
--------------------------------------------------------------------------------
1 | 5.9.0
2 |
--------------------------------------------------------------------------------
/components/ecoindex/compute/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.compute.ecoindex import (
2 | compute_ecoindex,
3 | get_ecoindex,
4 | get_grade,
5 | get_greenhouse_gases_emmission,
6 | get_quantile,
7 | get_score,
8 | get_water_consumption,
9 | )
10 |
11 | __all__ = [
12 | "compute_ecoindex",
13 | "get_ecoindex",
14 | "get_grade",
15 | "get_greenhouse_gases_emmission",
16 | "get_quantile",
17 | "get_score",
18 | "get_water_consumption",
19 | ]
20 |
--------------------------------------------------------------------------------
/components/ecoindex/compute/ecoindex.py:
--------------------------------------------------------------------------------
1 | from ecoindex.data import ( # noqa: F401
2 | A,
3 | B,
4 | C,
5 | D,
6 | E,
7 | F,
8 | G,
9 | quantiles_dom,
10 | quantiles_req,
11 | quantiles_size,
12 | )
13 | from ecoindex.models import Ecoindex
14 | from ecoindex.models.enums import Grade
15 | from typing_extensions import deprecated
16 |
17 |
18 | async def get_quantile(quantiles: list[int | float], value: int | float) -> float:
19 | for i in range(1, len(quantiles)):
20 | if value < quantiles[i]:
21 | return (
22 | i - 1 + (value - quantiles[i - 1]) / (quantiles[i] - quantiles[i - 1])
23 | )
24 |
25 | return len(quantiles) - 1
26 |
27 |
28 | async def get_score(dom: int, size: float, requests: int) -> float:
29 | q_dom = await get_quantile(quantiles_dom, dom) # type: ignore
30 | q_size = await get_quantile(quantiles_size, size)
31 | q_req = await get_quantile(quantiles_req, requests) # type: ignore
32 |
33 | return round(100 - 5 * (3 * q_dom + 2 * q_req + q_size) / 6)
34 |
35 |
36 | @deprecated("Use compute_ecoindex instead")
37 | async def get_ecoindex(dom: int, size: float, requests: int) -> Ecoindex:
38 | score = await get_score(dom=dom, size=size, requests=requests)
39 |
40 | return Ecoindex(
41 | score=score,
42 | grade=Grade(await get_grade(score)),
43 | ges=await get_greenhouse_gases_emmission(score),
44 | water=await get_water_consumption(score),
45 | )
46 |
47 |
48 | async def compute_ecoindex(nodes: int, size: float, requests: int) -> Ecoindex:
49 | return await get_ecoindex(
50 | dom=nodes,
51 | size=size,
52 | requests=requests,
53 | )
54 |
55 |
56 | async def get_grade(ecoindex: float) -> str:
57 | for grade in "ABCDEF":
58 | if ecoindex > globals()[grade]:
59 | return grade
60 |
61 | return "G"
62 |
63 |
64 | async def get_greenhouse_gases_emmission(ecoindex: float) -> float:
65 | return round(100 * (2 + 2 * (50 - ecoindex) / 100)) / 100
66 |
67 |
68 | async def get_water_consumption(ecoindex: float) -> float:
69 | return round(100 * (3 + 3 * (50 - ecoindex) / 100)) / 100
70 |
--------------------------------------------------------------------------------
/components/ecoindex/config/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.config.settings import Settings
2 |
3 | __all__ = ["Settings"]
4 |
--------------------------------------------------------------------------------
/components/ecoindex/config/settings.py:
--------------------------------------------------------------------------------
1 | from pydantic_settings import BaseSettings, SettingsConfigDict
2 |
3 |
4 | class Settings(BaseSettings):
5 | model_config = SettingsConfigDict(env_file=".env")
6 |
7 | API_KEYS_BATCH: list[
8 | dict[str, str]
9 | ] = [] # formated as [{"key": "xxx", "name": "xxx", "description": "xxx", "source": "ecoindex.fr"}]
10 | CORS_ALLOWED_CREDENTIALS: bool = True
11 | CORS_ALLOWED_HEADERS: list = ["*"]
12 | CORS_ALLOWED_METHODS: list = ["*"]
13 | CORS_ALLOWED_ORIGINS: list = ["*"]
14 | DAILY_LIMIT_PER_HOST: int = 0
15 | DATABASE_URL: str = "sqlite+aiosqlite:///db.sqlite3"
16 | DEBUG: bool = False
17 | DOCKER_CONTAINER: bool = False
18 | ENABLE_SCREENSHOT: bool = False
19 | EXCLUDED_HOSTS: list[str] = ["localhost", "127.0.0.1"]
20 | FRONTEND_BASE_URL: str = "https://www.ecoindex.fr"
21 | GLITCHTIP_DSN: str = ""
22 | REDIS_CACHE_HOST: str = "localhost"
23 | SCREENSHOTS_GID: int | None = None
24 | SCREENSHOTS_UID: int | None = None
25 | TZ: str = "Europe/Paris"
26 | WAIT_AFTER_SCROLL: int = 3
27 | WAIT_BEFORE_SCROLL: int = 3
28 |
--------------------------------------------------------------------------------
/components/ecoindex/data/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.data.colors import A as A_color
2 | from ecoindex.data.colors import B as B_color
3 | from ecoindex.data.colors import C as C_color
4 | from ecoindex.data.colors import D as D_color
5 | from ecoindex.data.colors import E as E_color
6 | from ecoindex.data.colors import F as F_color
7 | from ecoindex.data.colors import G as G_color
8 | from ecoindex.data.grades import A, B, C, D, E, F, G
9 | from ecoindex.data.medians import (
10 | median_dom,
11 | median_req,
12 | median_size,
13 | )
14 | from ecoindex.data.quantiles import (
15 | quantiles_dom,
16 | quantiles_req,
17 | quantiles_size,
18 | )
19 | from ecoindex.data.targets import (
20 | target_dom,
21 | target_req,
22 | target_size,
23 | )
24 |
25 | __all__ = [
26 | "A",
27 | "B",
28 | "C",
29 | "D",
30 | "E",
31 | "F",
32 | "G",
33 | "A_color",
34 | "B_color",
35 | "C_color",
36 | "D_color",
37 | "E_color",
38 | "F_color",
39 | "G_color",
40 | "median_dom",
41 | "median_req",
42 | "median_size",
43 | "quantiles_dom",
44 | "quantiles_req",
45 | "quantiles_size",
46 | "target_dom",
47 | "target_req",
48 | "target_size",
49 | ]
50 |
--------------------------------------------------------------------------------
/components/ecoindex/data/colors.py:
--------------------------------------------------------------------------------
1 | A = "#349A47"
2 | B = "#51B84B"
3 | C = "#CADB2A"
4 | D = "#F6EB15"
5 | E = "#FECD06"
6 | F = "#F99839"
7 | G = "#ED2124"
8 |
--------------------------------------------------------------------------------
/components/ecoindex/data/grades.py:
--------------------------------------------------------------------------------
1 | A = 80
2 | B = 70
3 | C = 55
4 | D = 40
5 | E = 25
6 | F = 10
7 | G = 0
8 |
--------------------------------------------------------------------------------
/components/ecoindex/data/medians.py:
--------------------------------------------------------------------------------
1 | median_dom = 693
2 | median_req = 78
3 | median_size = 2420
4 |
--------------------------------------------------------------------------------
/components/ecoindex/data/quantiles.py:
--------------------------------------------------------------------------------
1 | quantiles_dom = [
2 | 0,
3 | 47,
4 | 75,
5 | 159,
6 | 233,
7 | 298,
8 | 358,
9 | 417,
10 | 476,
11 | 537,
12 | 603,
13 | 674,
14 | 753,
15 | 843,
16 | 949,
17 | 1076,
18 | 1237,
19 | 1459,
20 | 1801,
21 | 2479,
22 | 594601,
23 | ]
24 | quantiles_req = [
25 | 0,
26 | 2,
27 | 15,
28 | 25,
29 | 34,
30 | 42,
31 | 49,
32 | 56,
33 | 63,
34 | 70,
35 | 78,
36 | 86,
37 | 95,
38 | 105,
39 | 117,
40 | 130,
41 | 147,
42 | 170,
43 | 205,
44 | 281,
45 | 3920,
46 | ]
47 | quantiles_size = [
48 | 0,
49 | 1.37,
50 | 144.7,
51 | 319.53,
52 | 479.46,
53 | 631.97,
54 | 783.38,
55 | 937.91,
56 | 1098.62,
57 | 1265.47,
58 | 1448.32,
59 | 1648.27,
60 | 1876.08,
61 | 2142.06,
62 | 2465.37,
63 | 2866.31,
64 | 3401.59,
65 | 4155.73,
66 | 5400.08,
67 | 8037.54,
68 | 223212.26,
69 | ]
70 |
--------------------------------------------------------------------------------
/components/ecoindex/data/targets.py:
--------------------------------------------------------------------------------
1 | target_dom = 600
2 | target_req = 40
3 | target_size = 1024
4 |
--------------------------------------------------------------------------------
/components/ecoindex/database/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/components/ecoindex/database/__init__.py
--------------------------------------------------------------------------------
/components/ecoindex/database/engine.py:
--------------------------------------------------------------------------------
1 | from typing import AsyncGenerator
2 |
3 | from ecoindex.config import Settings
4 | from ecoindex.models.api import * # noqa: F401, F403
5 | from sqlalchemy.ext.asyncio import create_async_engine
6 | from sqlalchemy.orm import sessionmaker
7 | from sqlalchemy.pool import NullPool
8 | from sqlmodel import SQLModel
9 | from sqlmodel.ext.asyncio.session import AsyncSession
10 |
11 | engine = create_async_engine(
12 | Settings().DATABASE_URL,
13 | future=True,
14 | pool_pre_ping=True,
15 | poolclass=NullPool,
16 | echo=Settings().DEBUG,
17 | )
18 |
19 |
20 | async def init_db():
21 | async with engine.begin() as conn:
22 | await conn.run_sync(SQLModel.metadata.create_all)
23 |
24 |
25 | async def get_session() -> AsyncGenerator:
26 | async_session = sessionmaker(
27 | bind=engine, # type: ignore
28 | class_=AsyncSession,
29 | expire_on_commit=False,
30 | )
31 | async with async_session() as session: # type: ignore
32 | yield session
33 |
--------------------------------------------------------------------------------
/components/ecoindex/database/exceptions/quota.py:
--------------------------------------------------------------------------------
1 | from ecoindex.config import Settings
2 | from ecoindex.database.models import ApiEcoindex
3 |
4 |
5 | class QuotaExceededException(Exception):
6 | def __init__(self, limit: int, host: str, latest_result: ApiEcoindex) -> None:
7 | self.daily_limit_per_host = Settings().DAILY_LIMIT_PER_HOST
8 | self.limit = limit
9 | self.host = host
10 | self.latest_result = latest_result
11 | self.message = (
12 | "You have already reached the daily limit "
13 | f"of {limit} requests for host {host} today"
14 | )
15 |
16 | super().__init__(self.message)
17 |
--------------------------------------------------------------------------------
/components/ecoindex/database/helper.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 |
3 | from ecoindex.database.models import ApiEcoindex
4 | from sqlalchemy.engine.reflection import Inspector
5 | from sqlmodel.sql.expression import SelectOfScalar
6 |
7 | SelectOfScalar.inherit_cache = True # type: ignore
8 |
9 |
10 | def date_filter(
11 | statement: SelectOfScalar,
12 | date_from: date | None = None,
13 | date_to: date | None = None,
14 | ) -> SelectOfScalar:
15 | if date_from and ApiEcoindex.date:
16 | statement = statement.where(ApiEcoindex.date >= date_from)
17 |
18 | if date_to and ApiEcoindex.date:
19 | statement = statement.where(ApiEcoindex.date <= date_to)
20 |
21 | return statement
22 |
23 |
24 | def table_exists(conn, table_name) -> bool:
25 | inspector = Inspector.from_engine(conn)
26 | return table_name in inspector.get_table_names()
27 |
28 |
29 | def column_exists(conn, table_name, column_name) -> bool:
30 | inspector = Inspector.from_engine(conn)
31 | return column_name in [c["name"] for c in inspector.get_columns(table_name)]
32 |
33 |
34 | def index_exists(conn, table_name, index_name) -> bool:
35 | inspector = Inspector.from_engine(conn)
36 | return index_name in [i["name"] for i in inspector.get_indexes(table_name)]
37 |
--------------------------------------------------------------------------------
/components/ecoindex/database/models/__init__.py:
--------------------------------------------------------------------------------
1 | from uuid import UUID
2 |
3 | from ecoindex.models.compute import Result
4 | from pydantic import BaseModel
5 | from sqlmodel import Field, SQLModel
6 |
7 |
8 | class ApiEcoindex(SQLModel, Result, table=True): # type: ignore
9 | id: UUID | None = Field(
10 | default=None,
11 | description="Analysis ID of type `UUID`",
12 | primary_key=True,
13 | index=True,
14 | )
15 | host: str = Field(
16 | default=...,
17 | title="Web page host",
18 | description="Host name of the web page",
19 | index=True,
20 | )
21 | version: int = Field(
22 | default=1,
23 | title="API version",
24 | description="Version number of the API used to run the test",
25 | )
26 | initial_ranking: int | None = Field(
27 | default=...,
28 | title="Analysis rank",
29 | description=(
30 | "This is the initial rank of the analysis. "
31 | "This is an indicator of the ranking at the "
32 | "time of the analysis for a given version."
33 | ),
34 | )
35 | initial_total_results: int | None = Field(
36 | default=...,
37 | title="Total number of analysis",
38 | description=(
39 | "This is the initial total number of analysis. "
40 | "This is an indicator of the total number of analysis "
41 | "at the time of the analysis for a given version."
42 | ),
43 | )
44 | source: str | None = Field(
45 | default="ecoindex.fr",
46 | title="Source of the analysis",
47 | description="Source of the analysis",
48 | )
49 |
50 |
51 | ApiEcoindexes = list[ApiEcoindex]
52 |
53 |
54 | class PageApiEcoindexes(BaseModel):
55 | items: list[ApiEcoindex]
56 | total: int
57 | page: int
58 | size: int
59 |
60 |
61 | class EcoindexSearchResults(BaseModel):
62 | count: int
63 | latest_result: ApiEcoindex | None = None
64 | older_results: list[ApiEcoindex] = []
65 | host_results: list[ApiEcoindex] = []
66 |
--------------------------------------------------------------------------------
/components/ecoindex/database/repositories/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/components/ecoindex/database/repositories/__init__.py
--------------------------------------------------------------------------------
/components/ecoindex/database/repositories/ecoindex.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from uuid import UUID
3 |
4 | from ecoindex.database.helper import date_filter
5 | from ecoindex.database.models import ApiEcoindex
6 | from ecoindex.models import Result
7 | from ecoindex.models.enums import Version
8 | from ecoindex.models.sort import Sort
9 | from sqlalchemy import func, text
10 | from sqlalchemy.sql.expression import asc, desc
11 | from sqlmodel import select
12 | from sqlmodel.ext.asyncio.session import AsyncSession
13 |
14 |
15 | async def get_count_analysis_db(
16 | session: AsyncSession,
17 | version: Version = Version.v1,
18 | host: str | None = None,
19 | date_from: date | None = None,
20 | date_to: date | None = None,
21 | ) -> int:
22 | statement = (
23 | "SELECT count(*) FROM apiecoindex "
24 | f"WHERE version = {version.get_version_number()}"
25 | )
26 |
27 | if host:
28 | statement += f" AND host = '{host}'"
29 |
30 | if date_from:
31 | statement += f" AND date >= '{date_from}'"
32 |
33 | if date_to:
34 | statement += f" AND date <= '{date_to}'"
35 |
36 | result = await session.exec(statement=text(statement)) # type: ignore
37 |
38 | return result.scalar_one()
39 |
40 |
41 | async def get_rank_analysis_db(
42 | session: AsyncSession, ecoindex: Result, version: Version = Version.v1
43 | ) -> int | None:
44 | statement = (
45 | "SELECT ranking FROM ("
46 | "SELECT *, ROW_NUMBER() OVER (ORDER BY score DESC) ranking "
47 | "FROM apiecoindex "
48 | f"WHERE version={version.get_version_number()} "
49 | "ORDER BY score DESC) t "
50 | f"WHERE score <= {ecoindex.score} "
51 | "LIMIT 1;"
52 | )
53 |
54 | result = await session.exec(text(statement)) # type: ignore
55 |
56 | return result.scalar_one_or_none()
57 |
58 |
59 | async def get_ecoindex_result_list_db(
60 | session: AsyncSession,
61 | version: Version = Version.v1,
62 | host: str | None = None,
63 | date_from: date | None = None,
64 | date_to: date | None = None,
65 | page: int = 1,
66 | size: int = 50,
67 | sort_params: list[Sort] = [],
68 | ) -> list[ApiEcoindex]:
69 | statement = (
70 | select(ApiEcoindex)
71 | .where(ApiEcoindex.version == version.get_version_number())
72 | .offset((page - 1) * size)
73 | .limit(size)
74 | )
75 |
76 | if host:
77 | statement = statement.where(ApiEcoindex.host == host)
78 | statement = date_filter(statement=statement, date_from=date_from, date_to=date_to)
79 |
80 | for sort in sort_params:
81 | if sort.sort == "asc":
82 | sort_parameter = asc(sort.clause) # type: ignore
83 | elif sort.sort == "desc":
84 | sort_parameter = desc(sort.clause)
85 |
86 | statement = statement.order_by(sort_parameter) # type: ignore
87 |
88 | ecoindexes = await session.exec(statement)
89 |
90 | return [ecoindex for ecoindex in ecoindexes.all()]
91 |
92 |
93 | async def get_ecoindex_result_by_id_db(
94 | session: AsyncSession, id: UUID, version: Version = Version.v1
95 | ) -> ApiEcoindex | None:
96 | statement = (
97 | select(ApiEcoindex)
98 | .where(ApiEcoindex.id == id)
99 | .where(ApiEcoindex.version == version.get_version_number())
100 | )
101 |
102 | ecoindex = await session.exec(statement)
103 |
104 | return ecoindex.one_or_none()
105 |
106 |
107 | async def get_count_daily_request_per_host(session: AsyncSession, host: str) -> int:
108 | statement = select(ApiEcoindex).where(
109 | func.date(ApiEcoindex.date) == date.today(), ApiEcoindex.host == host
110 | )
111 |
112 | results = await session.exec(statement)
113 |
114 | return len(results.all())
115 |
116 |
117 | async def get_latest_result(session: AsyncSession, host: str) -> ApiEcoindex | None:
118 | statement = (
119 | select(ApiEcoindex)
120 | .where(ApiEcoindex.host == host)
121 | .order_by(text("date desc"))
122 | .limit(1)
123 | )
124 |
125 | result = await session.exec(statement)
126 |
127 | return result.one_or_none()
128 |
--------------------------------------------------------------------------------
/components/ecoindex/database/repositories/host.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 |
3 | from ecoindex.database.helper import date_filter
4 | from ecoindex.database.models import ApiEcoindex
5 | from ecoindex.models.enums import Version
6 | from sqlalchemy import text
7 | from sqlmodel import select
8 | from sqlmodel.ext.asyncio.session import AsyncSession
9 |
10 |
11 | async def get_host_list_db(
12 | session: AsyncSession,
13 | version: Version = Version.v1,
14 | host: str | None = None,
15 | date_from: date | None = None,
16 | date_to: date | None = None,
17 | page: int = 1,
18 | size: int = 50,
19 | ) -> list[str]:
20 | statement = (
21 | select(ApiEcoindex.host)
22 | .where(ApiEcoindex.version == version.get_version_number())
23 | .offset(size * (page - 1))
24 | .limit(size)
25 | )
26 |
27 | if host:
28 | statement = statement.filter(ApiEcoindex.host.like(f"%{host}%")) # type: ignore
29 |
30 | statement = date_filter(statement=statement, date_from=date_from, date_to=date_to)
31 |
32 | statement = statement.group_by(ApiEcoindex.host).order_by(ApiEcoindex.host)
33 |
34 | hosts = await session.exec(statement=statement)
35 |
36 | return [str(host) for host in hosts.all()]
37 |
38 |
39 | async def get_count_hosts_db(
40 | session: AsyncSession,
41 | version: Version = Version.v1,
42 | name: str | None = None,
43 | q: str | None = None,
44 | date_from: date | None = None,
45 | date_to: date | None = None,
46 | group_by_host: bool = True,
47 | ) -> int:
48 | sub_statement = (
49 | f"SELECT host FROM apiecoindex WHERE version = {version.get_version_number()}"
50 | )
51 | if name:
52 | sub_statement += f" AND host = '{name}'"
53 |
54 | if q:
55 | sub_statement += f" AND host LIKE '%{q}%'"
56 |
57 | if date_from:
58 | sub_statement += f" AND date >= '{date_from}'"
59 |
60 | if date_to:
61 | sub_statement += f" AND date <= '{date_to}'"
62 |
63 | if group_by_host:
64 | sub_statement += " GROUP BY host"
65 |
66 | statement = f"SELECT count(*) FROM ({sub_statement}) t"
67 |
68 | result = await session.exec(statement=text(statement)) # type: ignore
69 |
70 | return result.scalar_one()
71 |
--------------------------------------------------------------------------------
/components/ecoindex/database/repositories/worker.py:
--------------------------------------------------------------------------------
1 | from uuid import UUID
2 |
3 | from ecoindex.database.models import ApiEcoindex
4 | from ecoindex.database.repositories.ecoindex import (
5 | get_count_analysis_db,
6 | get_rank_analysis_db,
7 | )
8 | from ecoindex.models import Result
9 | from ecoindex.models.enums import Version
10 | from sqlmodel.ext.asyncio.session import AsyncSession
11 |
12 |
13 | async def save_ecoindex_result_db(
14 | session: AsyncSession,
15 | id: UUID,
16 | ecoindex_result: Result,
17 | version: Version = Version.v1,
18 | source: str | None = None,
19 | ) -> ApiEcoindex:
20 | ranking = await get_rank_analysis_db(
21 | session=session, ecoindex=ecoindex_result, version=version
22 | )
23 | total_results = await get_count_analysis_db(session=session, version=version)
24 |
25 | db_ecoindex = ApiEcoindex(
26 | id=id,
27 | date=ecoindex_result.date,
28 | url=ecoindex_result.url,
29 | host=ecoindex_result.get_url_host(),
30 | width=ecoindex_result.width,
31 | height=ecoindex_result.height,
32 | size=ecoindex_result.size,
33 | nodes=ecoindex_result.nodes,
34 | requests=ecoindex_result.requests,
35 | grade=ecoindex_result.grade,
36 | score=ecoindex_result.score,
37 | ges=ecoindex_result.ges,
38 | water=ecoindex_result.water,
39 | page_type=ecoindex_result.page_type,
40 | version=version.get_version_number(),
41 | initial_ranking=ranking if ranking else total_results + 1,
42 | initial_total_results=total_results + 1,
43 | ecoindex_version=ecoindex_result.ecoindex_version,
44 | source=source,
45 | )
46 |
47 | session.add(db_ecoindex)
48 | try:
49 | await session.commit()
50 | await session.refresh(db_ecoindex)
51 | except Exception:
52 | await session.rollback()
53 | raise
54 | finally:
55 | await session.close()
56 |
57 | return db_ecoindex
58 |
--------------------------------------------------------------------------------
/components/ecoindex/exceptions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/components/ecoindex/exceptions/__init__.py
--------------------------------------------------------------------------------
/components/ecoindex/exceptions/scraper.py:
--------------------------------------------------------------------------------
1 | class EcoindexScraperException(Exception):
2 | pass
3 |
4 |
5 | class EcoindexScraperStatusException(EcoindexScraperException):
6 | def __init__(self, url: str, status: int, message: str):
7 | self.message = message
8 | self.url = url
9 | self.status = status
10 |
11 | pass
12 |
--------------------------------------------------------------------------------
/components/ecoindex/exceptions/worker.py:
--------------------------------------------------------------------------------
1 | class EcoindexContentTypeError(Exception):
2 | pass
3 |
4 |
5 | class EcoindexHostUnreachable(Exception):
6 | pass
7 |
8 |
9 | class EcoindexPageNotFound(Exception):
10 | pass
11 |
12 |
13 | class EcoindexStatusError(Exception):
14 | pass
15 |
16 |
17 | class EcoindexTimeout(Exception):
18 | pass
19 |
--------------------------------------------------------------------------------
/components/ecoindex/models/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.models.cli import (
2 | CliHost,
3 | )
4 | from ecoindex.models.compute import (
5 | Ecoindex,
6 | PageMetrics,
7 | PageType,
8 | Request,
9 | Result,
10 | ScreenShot,
11 | WebPage,
12 | WindowSize,
13 | )
14 | from ecoindex.models.enums import ExportFormat, Language, Version
15 | from ecoindex.models.response_examples import (
16 | example_daily_limit_response,
17 | example_ecoindex_not_found,
18 | example_file_not_found,
19 | example_page_listing_empty,
20 | )
21 | from ecoindex.models.scraper import RequestItem, Requests
22 | from ecoindex.models.sort import Sort
23 |
24 | __all__ = [
25 | "CliHost",
26 | "Ecoindex",
27 | "example_daily_limit_response",
28 | "example_ecoindex_not_found",
29 | "example_file_not_found",
30 | "example_page_listing_empty",
31 | "ExportFormat",
32 | "Language",
33 | "PageMetrics",
34 | "PageType",
35 | "Request",
36 | "RequestItem",
37 | "Requests",
38 | "Result",
39 | "ScreenShot",
40 | "Sort",
41 | "Version",
42 | "WebPage",
43 | "WindowSize",
44 | ]
45 |
--------------------------------------------------------------------------------
/components/ecoindex/models/api.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class ApiHealth(BaseModel):
7 | database: bool = Field(default=..., title="Status of database")
8 |
9 |
10 | class BaseHost(BaseModel):
11 | name: str
12 | total_count: int
13 |
14 |
15 | class Host(BaseHost):
16 | remaining_daily_requests: int | None = None
17 |
18 |
19 | class PageHosts(BaseModel):
20 | items: list[str]
21 | total: int
22 | page: int
23 | size: int
24 |
25 |
26 | class ExceptionResponse(BaseModel):
27 | args: list[Any]
28 | exception: str
29 | message: str | None = None
30 |
31 |
32 | class HealthWorker(BaseModel):
33 | name: str = Field(default=..., title="Name of worker")
34 | healthy: bool = Field(default=..., title="Status of worker")
35 |
36 |
37 | class HealthWorkers(BaseModel):
38 | healthy: bool = Field(default=..., title="Global status of workers")
39 | workers: list[HealthWorker] = Field(default=..., title="List of workers")
40 |
41 |
42 | class HealthResponse(BaseModel):
43 | database: bool = Field(default=..., title="Status of database")
44 | workers: HealthWorkers = Field(default=..., title="Status of workers")
45 |
--------------------------------------------------------------------------------
/components/ecoindex/models/cli.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 | class CliHost(BaseModel):
4 | domain: str
5 | netloc: str
6 |
7 |
--------------------------------------------------------------------------------
/components/ecoindex/models/compute.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from functools import lru_cache
4 | from pathlib import Path
5 | from typing import Any
6 |
7 | from ecoindex.models.enums import Grade
8 | from pydantic import AnyHttpUrl, BaseModel, Field, field_validator
9 |
10 | PageType = str
11 |
12 |
13 | @lru_cache
14 | def get_compute_version() -> str:
15 | current_directory = os.path.dirname(os.path.realpath(__file__))
16 | version_filename = os.path.join(current_directory, "..", "compute", "VERSION")
17 |
18 | with open(version_filename, "r") as f:
19 | return (f.read()).strip()
20 |
21 |
22 | class Ecoindex(BaseModel):
23 | grade: Grade | None = Field(
24 | default=None,
25 | title="Ecoindex grade",
26 | description="Is the corresponding ecoindex grade of the page (from A to G)",
27 | )
28 | score: float | None = Field(
29 | default=None,
30 | title="Ecoindex score",
31 | description="Is the corresponding ecoindex score of the page (0 to 100)",
32 | ge=0,
33 | le=100,
34 | )
35 | ges: float | None = Field(
36 | default=None,
37 | title="Ecoindex GES equivalent",
38 | description=(
39 | "Is the equivalent of greenhouse gases emission" " (in `gCO2e`) of the page"
40 | ),
41 | ge=0,
42 | )
43 | water: float | None = Field(
44 | default=None,
45 | title="Ecoindex Water equivalent",
46 | description="Is the equivalent water consumption (in `cl`) of the page",
47 | ge=0,
48 | )
49 | ecoindex_version: str | None = Field(
50 | default=get_compute_version(),
51 | title="Ecoindex version",
52 | description="Is the version of the ecoindex used to compute the score",
53 | )
54 |
55 |
56 | class PageMetrics(BaseModel):
57 | size: float = Field(
58 | default=...,
59 | title="Page size",
60 | description=(
61 | "Is the size of the page and of the downloaded"
62 | " elements of the page in KB"
63 | ),
64 | ge=0,
65 | )
66 | nodes: int = Field(
67 | default=...,
68 | title="Page nodes",
69 | description="Is the number of the DOM elements in the page",
70 | ge=0,
71 | )
72 | requests: int = Field(
73 | default=...,
74 | title="Page requests",
75 | description="Is the number of external requests made by the page",
76 | ge=0,
77 | )
78 |
79 |
80 | class WebPage(BaseModel):
81 | width: int | None = Field(
82 | default=1920,
83 | title="Page Width",
84 | description="Width of the simulated window in pixel",
85 | ge=100,
86 | le=3840,
87 | )
88 | height: int | None = Field(
89 | default=1080,
90 | title="Page Height",
91 | description="Height of the simulated window in pixel",
92 | ge=50,
93 | le=2160,
94 | )
95 | url: str = Field(
96 | default=...,
97 | title="Page url",
98 | description="Url of the analysed page",
99 | examples=["https://www.ecoindex.fr"],
100 | )
101 |
102 | @field_validator("url")
103 | @classmethod
104 | def url_as_http_url(cls, v: str) -> str:
105 | url_object = AnyHttpUrl(url=v) # type: ignore
106 | assert url_object.scheme in {"http", "https"}, "scheme must be http or https"
107 |
108 | return url_object.unicode_string()
109 |
110 | def get_url_host(self) -> str:
111 | url_object = AnyHttpUrl(url=self.url) # type: ignore
112 |
113 | return str(url_object.host)
114 |
115 | def get_url_path(self) -> str:
116 | url_obect = AnyHttpUrl(url=self.url) # type: ignore
117 |
118 | return str(url_obect.path)
119 |
120 |
121 | class WindowSize(BaseModel):
122 | height: int = Field(
123 | default=...,
124 | title="Window height",
125 | description="Height of the simulated window in pixel",
126 | )
127 | width: int = Field(
128 | default=...,
129 | title="Window width",
130 | description="Width of the simulated window in pixel",
131 | )
132 |
133 | def __str__(self) -> str:
134 | return f"{self.width},{self.height}"
135 |
136 |
137 | class Result(Ecoindex, PageMetrics, WebPage):
138 | date: datetime | None = Field(
139 | default=None, title="Analysis datetime", description="Date of the analysis"
140 | )
141 | page_type: PageType | None = Field(
142 | default=None,
143 | title="Page type",
144 | description="Is the type of the page, based ton the [opengraph type tag](https://ogp.me/#types)",
145 | )
146 |
147 |
148 | class ScreenShot(BaseModel):
149 | id: str
150 | folder: str
151 |
152 | def __init__(__pydantic_self__, **data: Any) -> None:
153 | super().__init__(**data)
154 | path = Path(__pydantic_self__.folder)
155 | path.mkdir(parents=True, exist_ok=True)
156 |
157 | def __str__(self) -> str:
158 | return f"{self.folder}/{self.id}"
159 |
160 | def get_png(self) -> str:
161 | return f"{self.__str__()}.png"
162 |
163 | def get_webp(self) -> str:
164 | return f"{self.__str__()}.webp"
165 |
166 |
167 | class Request(BaseModel):
168 | url: str
169 | type: str
170 | size: float
171 |
--------------------------------------------------------------------------------
/components/ecoindex/models/enums.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class Version(str, Enum):
5 | v0 = "v0"
6 | v1 = "v1"
7 |
8 | def get_version_number(self) -> int:
9 | return int(self.value[1:])
10 |
11 |
12 | class ExportFormat(Enum):
13 | csv = "csv"
14 | json = "json"
15 |
16 |
17 | class Language(Enum):
18 | fr = "fr"
19 | en = "en"
20 |
21 |
22 | class TaskStatus(str, Enum):
23 | FAILURE = "FAILURE"
24 | PENDING = "PENDING"
25 | SUCCESS = "SUCCESS"
26 |
27 |
28 | class BadgeTheme(str, Enum):
29 | dark = "dark"
30 | light = "light"
31 |
32 |
33 | class Grade(str, Enum):
34 | A = "A"
35 | B = "B"
36 | C = "C"
37 | D = "D"
38 | E = "E"
39 | F = "F"
40 | G = "G"
41 |
--------------------------------------------------------------------------------
/components/ecoindex/models/response_examples.py:
--------------------------------------------------------------------------------
1 | example_ecoindex_not_found = {
2 | "description": "Not found",
3 | "content": {
4 | "application/json": {
5 | "example": {
6 | "detail": (
7 | "Analysis e9a4d5ea-b9c5-4440-a74a-cac229f7d672 "
8 | "not found for version v1"
9 | )
10 | }
11 | }
12 | },
13 | }
14 |
15 | example_file_not_found = {
16 | "description": "Not found",
17 | "content": {
18 | "application/json": {
19 | "example": {
20 | "detail": (
21 | "File at path screenshots/v0/"
22 | "550cdf8c-9c4c-4f8a-819d-cb69d0866fe1.webp does not exist."
23 | )
24 | }
25 | }
26 | },
27 | }
28 |
29 | example_page_listing_empty = {
30 | "description": "Empty page",
31 | "content": {
32 | "application/json": {
33 | "example": {
34 | "items": [],
35 | "total": 0,
36 | "page": 1,
37 | "size": 10,
38 | }
39 | }
40 | },
41 | }
42 |
43 | example_daily_limit_response = {
44 | "description": "You have reached the daily limit",
45 | "content": {
46 | "application/json": {
47 | "example": {
48 | "detail": {
49 | "daily_limit_per_host": 1,
50 | "limit": 1,
51 | "host": "www.ecoindex.fr",
52 | "latest_result": {
53 | "width": 1920,
54 | "height": 1080,
55 | "size": 107.178,
56 | "requests": 6,
57 | "score": 87,
58 | "water": 1.89,
59 | "date": "2023-01-05T12:06:57",
60 | "id": "be8c3612-545f-4e72-8880-13b8db74ff6e",
61 | "version": 1,
62 | "initial_ranking": 1,
63 | "url": "https://www.ecoindex.fr",
64 | "nodes": 201,
65 | "grade": "A",
66 | "ges": 1.26,
67 | "ecoindex_version": "5.4.1",
68 | "page_type": None,
69 | "host": "www.ecoindex.fr",
70 | "initial_total_results": 1,
71 | },
72 | "message": (
73 | "You have already reached the daily limit of 1 "
74 | "requests for host www.ecoindex.fr today"
75 | ),
76 | }
77 | }
78 | }
79 | },
80 | }
81 |
82 | example_daily_limit_response = {
83 | "description": "You have reached the daily limit",
84 | "content": {
85 | "application/json": {
86 | "example": {
87 | "detail": {
88 | "daily_limit_per_host": 1,
89 | "limit": 1,
90 | "host": "www.ecoindex.fr",
91 | "latest_result": {
92 | "width": 1920,
93 | "height": 1080,
94 | "size": 107.178,
95 | "requests": 6,
96 | "score": 87,
97 | "water": 1.89,
98 | "date": "2023-01-05T12:06:57",
99 | "id": "be8c3612-545f-4e72-8880-13b8db74ff6e",
100 | "version": 1,
101 | "initial_ranking": 1,
102 | "url": "https://www.ecoindex.fr",
103 | "nodes": 201,
104 | "grade": "A",
105 | "ges": 1.26,
106 | "ecoindex_version": "5.4.1",
107 | "page_type": None,
108 | "host": "www.ecoindex.fr",
109 | "initial_total_results": 1,
110 | },
111 | "message": (
112 | "You have already reached the daily limit of 1 "
113 | "requests for host www.ecoindex.fr today"
114 | ),
115 | }
116 | }
117 | }
118 | },
119 | }
120 |
121 | example_host_unreachable = {
122 | "description": "Host unreachable",
123 | "content": {
124 | "application/json": {
125 | "example": {
126 | "detail": "The URL http://localhost is unreachable. Are you really sure of this url? 🤔",
127 | }
128 | }
129 | },
130 | }
131 |
--------------------------------------------------------------------------------
/components/ecoindex/models/scraper.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 |
4 | class RequestItem(BaseModel):
5 | category: str
6 | mime_type: str
7 | size: float
8 | status: int
9 | url: str
10 |
11 |
12 | class MimetypeMetrics(BaseModel):
13 | total_count: int = 0
14 | total_size: float = 0
15 |
16 |
17 | class MimetypeAggregation(BaseModel):
18 | audio: MimetypeMetrics = MimetypeMetrics()
19 | css: MimetypeMetrics = MimetypeMetrics()
20 | font: MimetypeMetrics = MimetypeMetrics()
21 | html: MimetypeMetrics = MimetypeMetrics()
22 | image: MimetypeMetrics = MimetypeMetrics()
23 | javascript: MimetypeMetrics = MimetypeMetrics()
24 | other: MimetypeMetrics = MimetypeMetrics()
25 | video: MimetypeMetrics = MimetypeMetrics()
26 |
27 | @classmethod
28 | async def get_category_of_resource(cls, mimetype: str) -> str:
29 | mimetypes = [type for type in cls.model_fields.keys()]
30 |
31 | for type in mimetypes:
32 | if type in mimetype:
33 | return type
34 |
35 | return "other"
36 |
37 |
38 | class Requests(BaseModel):
39 | aggregation: MimetypeAggregation = MimetypeAggregation()
40 | items: list[RequestItem] = []
41 | total_count: int = 0
42 | total_size: float = 0
43 |
--------------------------------------------------------------------------------
/components/ecoindex/models/sort.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class Sort(BaseModel):
7 | clause: str
8 | sort: Literal["asc", "desc"]
9 |
--------------------------------------------------------------------------------
/components/ecoindex/models/tasks.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from ecoindex.models import Result
4 | from pydantic import AnyHttpUrl, BaseModel, Field
5 |
6 |
7 | class QueueTaskError(BaseModel):
8 | detail: Any | None = Field(
9 | default=None, title="Detail object of the raised exception"
10 | )
11 | exception: str = Field(default=..., title="Name of the exception that was raised")
12 | message: str = Field(default=..., title="Message of the exception")
13 | status_code: int | None = Field(
14 | default=None, title="Corresponding original HTTP status code sended by the API"
15 | )
16 | url: AnyHttpUrl | None = Field(default=None, title="URL of the analyzed web page")
17 |
18 |
19 | class QueueTaskResult(BaseModel):
20 | status: str | None = Field(
21 | default=None,
22 | title="Status of the ecoindex analysis.",
23 | description=(
24 | "While the task is pending or the analysis is running, it is null."
25 | " But once the analysis is complete, it should return SUCCESS or FAILURE."
26 | ),
27 | )
28 | detail: Result | None = Field(
29 | default=None,
30 | title="Result of the ecoindex analysis once it was successfuly completed",
31 | )
32 | error: QueueTaskError | None = Field(
33 | default=None, title="Detail of the ecoindex error if it is not successful"
34 | )
35 |
36 |
37 | class QueueTaskApi(BaseModel):
38 | id: str = Field(
39 | default=...,
40 | title=(
41 | "Identifier of the current. "
42 | "This identifier will become the identifier of the analysis"
43 | ),
44 | )
45 | status: str = Field(
46 | default=...,
47 | title="Status of the current task. Can be PENDING, FAILURE, SUCCESS",
48 | )
49 | ecoindex_result: QueueTaskResult | None = Field(
50 | default=None, title="Result of the Ecoindex analysis"
51 | )
52 | task_error: Any | None = Field(
53 | default=None,
54 | title="Detail of the error encountered by the task in case of Failure",
55 | )
56 |
57 |
58 | class QueueTaskApiBatch(BaseModel):
59 | id: str = Field(
60 | default=...,
61 | title=(
62 | "Identifier of the current. "
63 | "This identifier will become the identifier of the analysis"
64 | ),
65 | )
66 | status: str = Field(
67 | default=...,
68 | title="Status of the current task. Can be PENDING, FAILURE, SUCCESS",
69 | )
70 | task_error: Any | None = Field(
71 | default=None,
72 | title="Detail of the error encountered by the task in case of Failure",
73 | )
74 |
--------------------------------------------------------------------------------
/components/ecoindex/scraper/VERSION:
--------------------------------------------------------------------------------
1 | 3.15.0
2 |
--------------------------------------------------------------------------------
/components/ecoindex/scraper/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.scraper.scrap import EcoindexScraper
2 |
3 | __all__ = ["EcoindexScraper"]
4 |
--------------------------------------------------------------------------------
/components/ecoindex/scraper/helper.py:
--------------------------------------------------------------------------------
1 | from asyncio import run
2 | from concurrent.futures import ThreadPoolExecutor, as_completed
3 | from typing import Generator
4 |
5 | from ecoindex.models.compute import Result, WindowSize
6 | from ecoindex.scraper.scrap import EcoindexScraper
7 |
8 |
9 | def run_page_analysis(
10 | url: str,
11 | window_size: WindowSize,
12 | wait_after_scroll: int = 3,
13 | wait_before_scroll: int = 3,
14 | logger=None,
15 | ) -> tuple[Result, bool]:
16 | """Run the page analysis and return the result and a boolean indicating if the analysis was successful"""
17 | scraper = EcoindexScraper(
18 | url=str(url),
19 | window_size=window_size,
20 | wait_after_scroll=wait_after_scroll,
21 | wait_before_scroll=wait_before_scroll,
22 | page_load_timeout=20,
23 | )
24 | try:
25 | return (run(scraper.get_page_analysis()), True)
26 | except Exception as e:
27 | logger.error(f"{url} -- {e.msg if hasattr(e, 'msg') else e}")
28 |
29 | return (
30 | Result(
31 | url=url,
32 | water=0,
33 | width=window_size.width,
34 | height=window_size.height,
35 | size=0,
36 | nodes=0,
37 | requests=0,
38 | ),
39 | False,
40 | )
41 |
42 |
43 | def bulk_analysis(
44 | max_workers,
45 | urls,
46 | window_sizes,
47 | wait_after_scroll: int = 0,
48 | wait_before_scroll: int = 0,
49 | logger=None,
50 | ) -> Generator[tuple[Result, bool], None, None]:
51 | with ThreadPoolExecutor(max_workers=max_workers) as executor:
52 | future_to_analysis = {}
53 |
54 | for url in urls:
55 | for window_size in window_sizes:
56 | future_to_analysis[
57 | executor.submit(
58 | run_page_analysis,
59 | url,
60 | window_size,
61 | wait_after_scroll,
62 | wait_before_scroll,
63 | logger,
64 | )
65 | ] = (
66 | url,
67 | window_size,
68 | wait_after_scroll,
69 | wait_before_scroll,
70 | logger,
71 | )
72 |
73 | for future in as_completed(future_to_analysis):
74 | yield future.result()
75 |
--------------------------------------------------------------------------------
/components/ecoindex/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.scripts.update_values import update_values
2 |
3 | __all__ = ["update_values"]
4 |
--------------------------------------------------------------------------------
/components/ecoindex/scripts/update_values.py:
--------------------------------------------------------------------------------
1 | from asyncio import run
2 | from json import dumps
3 | from os import getcwd
4 |
5 | from aiofile import async_open
6 | from requests import get
7 |
8 |
9 | async def update_values_async() -> None:
10 | response = get(
11 | "https://cdn.jsdelivr.net/gh/cnumr/ecoindex_reference@1/ecoindex_reference.json",
12 | )
13 |
14 | data = response.json()
15 | data_folder = f"{getcwd()}/components/ecoindex/data/"
16 |
17 | async with async_open(f"{data_folder}quantiles.py", "w") as quantile_file:
18 | quantiles = f"quantiles_dom = {dumps(data['quantiles']['dom_size'])}\n"
19 | quantiles += f"quantiles_req = {dumps(data['quantiles']['nb_request'])}\n"
20 | quantiles += f"quantiles_size = {dumps(data['quantiles']['response_size'])}\n"
21 |
22 | await quantile_file.write(quantiles)
23 |
24 | async with async_open(f"{data_folder}targets.py", "w") as target_file:
25 | targets = f"target_dom = {dumps(data['targets']['dom_size'])}\n"
26 | targets += f"target_req = {dumps(data['targets']['nb_request'])}\n"
27 | targets += f"target_size = {dumps(data['targets']['response_size'])}\n"
28 |
29 | await target_file.write(targets)
30 |
31 | async with async_open(f"{data_folder}medians.py", "w") as median_file:
32 | medians = f"median_dom = {dumps(data['medians']['dom_size'])}\n"
33 | medians += f"median_req = {dumps(data['medians']['nb_request'])}\n"
34 | medians += f"median_size = {dumps(data['medians']['response_size'])}\n"
35 |
36 | await median_file.write(medians)
37 |
38 | async with async_open(f"{data_folder}grades.py", "w") as grades_file:
39 | grades = ""
40 |
41 | for grade in data["grades"]:
42 | grades += f"{grade['grade']} = {grade['value']}\n"
43 |
44 | await grades_file.write(grades)
45 |
46 | print("Values updated")
47 |
48 |
49 | def update_values() -> None:
50 | run(update_values_async())
51 |
--------------------------------------------------------------------------------
/components/ecoindex/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from ecoindex.utils.screenshots import convert_screenshot_to_webp, set_screenshot_rights
2 |
3 | __all__ = [
4 | "convert_screenshot_to_webp",
5 | "set_screenshot_rights",
6 | ]
7 |
--------------------------------------------------------------------------------
/components/ecoindex/utils/cli_translations/en.yml:
--------------------------------------------------------------------------------
1 | title: Ecoindex analysis for the website
2 | synthesis: Synthesis
3 | top10: Top 10
4 | indicators: Indicators
5 | all_data_title: All the data
6 | download: Download
7 | ecoindex_score: The ecoindex score
8 | best_pages: Best pages
9 | worst_pages: Worst pages
10 | number_of_requests: Number of requests
11 | pages_size: Pages size
12 | number_of_dom_nodes: Number of DOM nodes
13 |
14 | ecoindex_body: |
15 |
The ecoindex is an indicator developed by the "Collectif Numérique Responsable" .
16 | It allows you to analyze an html page and deduce a score based on three physical criteria of a web page.
17 |
The calculation of the ecoindex makes it possible to deduce a standardized indicator (from A to G) making it possible to describe the environmental footprint of a page.
18 | ecoindex_body_end_pre: This synthesis is based on the analysis of
19 | ecoindex_body_end_mid: pages of the website
20 | ecoindex_body_end_suf:
More info on Ecoindex.fr
21 |
22 | requests_body: |
23 |
The number of HTTP requests gives a good idea of the server load.
24 | To put it quickly and caricaturedly, the greater the number of requests for the same page, the more servers will be needed to serve this page.
25 |
26 | size_body: |
27 |
The weight of the data transferred (KB) reflects the effort required to transport the page to the browser.
28 | In this first version, we consider that the connection is Wi-Fi via ADSL.
29 | But in the next version we will take into account different types of connections, especially 4G.
30 | Indeed, a 4G connection requires up to 23 times more energy to transport the same amount of data as an ADSL connection.
31 |
32 | nodes_body:
33 |
The number of DOM elements testifies to the complexity of the site and therefore, a priori,
34 | efforts to be made by the browser to display the page.
35 | The more complex the site, the more power (especially CPU) it takes to display it.
36 | And the more it helps to shorten the life of the computer it is running on.
37 | Conversely, a particularly simple and light site contributes to extending the life of the equipment because it requires little memory and a “small” processor.
38 |
39 | histograms:
40 | grade:
41 | title: Ecoindex per page distribution
42 | xlabel: Ecoindex
43 | ylabel: Number of pages
44 | size:
45 | title: Pages size per page distribution
46 | xlabel: Size of pages (KB)
47 | ylabel: Number of pages
48 | nodes:
49 | title: DOM nodes per page distribution
50 | xlabel: Number of DOM nodes
51 | ylabel: Number of pages
52 | requests:
53 | title: Requests per page distribution
54 | xlabel: Number of requests
55 | ylabel: Number of pages
56 |
57 | footer:
Ecoindex_cli By the
"collectif Numérique Responsable"
58 |
59 | good_result: 😃 Your result is great!
60 | bad_result: 😞 You could do better on this...
61 | better_than: is better than
62 | worse_than: is worse than
63 | my_median: My median
64 | target_median: Target median
65 | global_median: Global median
66 |
--------------------------------------------------------------------------------
/components/ecoindex/utils/cli_translations/fr.yml:
--------------------------------------------------------------------------------
1 | title: Analyse écoindex pour le site
2 | synthesis: Synthèse
3 | top10: Top 10
4 | indicators: Indicateurs
5 | all_data_title: Toutes les données
6 | download: Télécharger
7 | ecoindex_score: Le score ecoindex
8 | best_pages: Meilleures pages
9 | worst_pages: Moins bonnes pages
10 | number_of_requests: Nombre de requêtes
11 | pages_size: Taille des pages
12 | number_of_dom_nodes: Nombre d'éléments de la page
13 |
14 | ecoindex_body: |
15 |
L'ecoindex est un indicateur développé par le collectif numérique responsable .
16 | Il permet d'analyser une page html et d'en déduire une score en se basant sur trois critères physiques d'une page web.
17 |
Le calcul de l'ecoindex permet de déduire un indicateur normé (de A à G) permettant de décrire l'empreinte environnementale d'une page.
18 | ecoindex_body_end_pre: Cette synthèse est basée sur l'analyse de
19 | ecoindex_body_end_mid: pages du site internet
20 | ecoindex_body_end_suf:
Plus d'infos sur Ecoindex.fr
21 |
22 | requests_body: |
23 |
Le nombre de requêtes HTTP donne une bonne idée de la charge serveur.
24 | Pour le dire vite et de façon caricaturale, plus le nombre de requêtes est important pour une même page et plus il faudra de serveurs pour servir cette page.
25 |
26 | size_body: |
27 |
Le poids des données transférées (Ko) témoigne des efforts à faire pour transporter la page jusqu’au navigateur.
28 | Dans cette première version, nous considérons que la connexion est de type Wi-Fi via ADSL.
29 | Mais dans la prochaine version nous prendrons en compte différents types de connexions, notamment 4G.
30 | En effet, une connexion 4G nécessite jusqu’à 23 fois plus d’énergie pour transporter la même quantité de données qu’une connexion ADSL.
31 |
32 | nodes_body:
33 |
Le nombre d’éléments du DOM témoigne de la complexité du site et donc, a priori,
34 | des efforts à faire par le navigateur pour afficher la page.
35 | Plus le site est complexe et plus il faut de puissance (notamment CPU) pour l’afficher.
36 | Et plus on contribue à raccourcir la durée de vie de l’ordinateur sur lequel il s’exécute.
37 | A l’inverse, un site particulièrement simple et léger contribue à allonger la durée de vie de l’équipement car il nécessite peu de mémoire et un “petit” processeur.
38 |
39 | histograms:
40 | grade:
41 | title: Répartition écoindex
42 | xlabel: Ecoindex
43 | ylabel: Nombre de pages
44 | size:
45 | title: Répartition du poids des pages
46 | xlabel: Poids des pages (Ko)
47 | ylabel: Nombre de pages
48 | nodes:
49 | title: Répartition des éléments du DOM par page
50 | xlabel: Nombre d'éléments du DOM
51 | ylabel: Nombre de pages
52 | requests:
53 | title: Répartition des requêtes par page
54 | xlabel: Nombre de requêtes
55 | ylabel: Nombre de pages
56 |
57 | footer:
Ecoindex_cli par le
collectif Numérique Responsable
58 |
59 | good_result: 😃 Très bon résultat !
60 | bad_result: 😞 Hum, on peut mieux faire...
61 | better_than: ", c'est mieux que"
62 | worse_than: ", c'est moins bien que"
63 | my_median: Ma valeur médiane
64 | target_median: Valeur médiane cible
65 | global_median: Valeur médiane globale
66 |
--------------------------------------------------------------------------------
/components/ecoindex/utils/files.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from csv import DictWriter
3 | from json import dump
4 | from os import makedirs
5 | from os.path import dirname, exists
6 |
7 | from ecoindex.models import ExportFormat, Language, Result
8 | from yaml import safe_load as load_yaml
9 |
10 |
11 | def create_folder(path: str) -> None:
12 | if not exists(path):
13 | makedirs(path)
14 |
15 |
16 | class File(ABC):
17 | def __init__(
18 | self,
19 | filename: str,
20 | results: list[Result],
21 | export_format: ExportFormat | None = ExportFormat.csv,
22 | ):
23 | self.filename = filename
24 | self.results = results
25 | self.export_format = export_format
26 |
27 | @abstractmethod
28 | def write(self) -> None:
29 | pass
30 |
31 |
32 | class CsvFile(File):
33 | def write(self) -> None:
34 | headers = self.results[0].__dict__
35 |
36 | with open(self.filename, "w") as fp:
37 | writer = DictWriter(fp, fieldnames=headers)
38 |
39 | writer.writeheader()
40 | for ecoindex in self.results:
41 | writer.writerow(ecoindex.__dict__)
42 |
43 |
44 | class JsonFile(File):
45 | def write(self) -> None:
46 | with open(self.filename, "w") as fp:
47 | dump(
48 | obj=[ecoindex.__dict__ for ecoindex in self.results],
49 | fp=fp,
50 | indent=4,
51 | default=str,
52 | )
53 |
54 |
55 | def write_results_to_file(
56 | filename: str,
57 | results: list[Result],
58 | export_format: ExportFormat | None = ExportFormat.csv,
59 | ) -> None:
60 | if export_format == ExportFormat.csv:
61 | CsvFile(filename=filename, results=results, export_format=export_format).write()
62 |
63 | if export_format == ExportFormat.json:
64 | JsonFile(
65 | filename=filename, results=results, export_format=export_format
66 | ).write()
67 |
68 |
69 | def write_urls_to_file(file_prefix: str, urls: list[str]) -> None:
70 | tmp_input_folder = "/tmp/ecoindex-cli/input"
71 | create_folder(tmp_input_folder)
72 | with open(
73 | file=f"{tmp_input_folder}/{file_prefix}.csv", mode="w"
74 | ) as input_urls_file:
75 | for url in urls:
76 | input_urls_file.write(f"{str(url).strip()}\n")
77 |
78 |
79 | def get_translations(language: Language) -> dict:
80 | filename = f"{dirname(__file__)}/cli_translations/{language.value}.yml"
81 | with open(filename) as fp:
82 | return load_yaml(fp)
83 |
--------------------------------------------------------------------------------
/components/ecoindex/utils/screenshots.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from ecoindex.models import ScreenShot
4 | from PIL import Image
5 |
6 |
7 | async def convert_screenshot_to_webp(screenshot: ScreenShot) -> None:
8 | image = Image.open(rf"{screenshot.get_png()}")
9 | width, height = image.size
10 | ratio = 800 / height if width > height else 600 / width
11 |
12 | image.convert("RGB").resize(size=(int(width * ratio), int(height * ratio))).save(
13 | rf"{screenshot.get_webp()}",
14 | format="webp",
15 | )
16 | os.unlink(screenshot.get_png())
17 |
18 |
19 | async def set_screenshot_rights(
20 | screenshot: ScreenShot, uid: int | None = None, gid: int | None = None
21 | ) -> None:
22 | if uid and gid:
23 | os.chown(path=screenshot.get_webp(), uid=uid, gid=gid)
24 |
--------------------------------------------------------------------------------
/components/ecoindex/worker_component/__init__.py:
--------------------------------------------------------------------------------
1 | from celery import Celery
2 | from ecoindex.config.settings import Settings
3 |
4 | app: Celery = Celery(
5 | "tasks",
6 | broker=f"redis://{Settings().REDIS_CACHE_HOST}:6379/0",
7 | backend=f"redis://{Settings().REDIS_CACHE_HOST}:6379/1",
8 | broker_connection_retry=True,
9 | broker_connection_retry_on_startup=True,
10 | broker_connection_max_retries=10,
11 | )
12 |
--------------------------------------------------------------------------------
/development/ecoindex_compute.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pprint import pprint
3 |
4 | from ecoindex.compute import compute_ecoindex
5 |
6 | ecoindex = asyncio.run(compute_ecoindex(nodes=100, size=100, requests=100))
7 | pprint(ecoindex)
8 |
--------------------------------------------------------------------------------
/development/ecoindex_scraper.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pprint import pprint
3 | from uuid import uuid1
4 |
5 | from ecoindex.models.compute import ScreenShot
6 | from ecoindex.scraper import EcoindexScraper
7 |
8 | scraper = EcoindexScraper(
9 | url="https://www.kiabi.com",
10 | screenshot=ScreenShot(id=str(uuid1()), folder="./screenshots"),
11 | )
12 |
13 | result = asyncio.run(scraper.get_page_analysis())
14 | all_requests = asyncio.run(scraper.get_all_requests())
15 | requests_by_category = asyncio.run(scraper.get_requests_by_category())
16 |
17 | pprint(result)
18 |
--------------------------------------------------------------------------------
/development/scraper_test.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from concurrent.futures import ThreadPoolExecutor, as_completed
3 |
4 | import pandas as pd
5 | from ecoindex.scraper import EcoindexScraper
6 | from haralyzer import HarParser
7 | from slugify import slugify
8 |
9 |
10 | async def get_page_analysis(url: str):
11 | scraper = EcoindexScraper(url=url)
12 | return (
13 | await scraper.get_page_analysis(),
14 | await scraper.get_all_requests(),
15 | await scraper.get_requests_by_category(),
16 | scraper.har_temp_file_path,
17 | )
18 |
19 |
20 | def run_page_analysis(url: str, index: int):
21 | analysis, requests, aggregation, har_file_path = asyncio.run(get_page_analysis(url))
22 |
23 | return index, analysis, requests, aggregation, har_file_path
24 |
25 |
26 | with ThreadPoolExecutor(max_workers=8) as executor:
27 | future_to_analysis = {}
28 |
29 | urls = ["https://www.graphic-sud.com/", "https://federiconavarrete.com/"]
30 | i = 0
31 |
32 | for url in urls:
33 | print(f"Starting ecoindex {i} analysis")
34 | future_to_analysis[
35 | executor.submit(
36 | run_page_analysis,
37 | url,
38 | i,
39 | )
40 | ] = url
41 | i += 1
42 |
43 | for future in as_completed(future_to_analysis):
44 | try:
45 | index, analysis, requests, aggregation, har_file_path = future.result()
46 |
47 | har_parser = HarParser.from_file(har_file_path)
48 | for page in har_parser.pages:
49 | haralyzer_data = [
50 | {
51 | "type": "audio",
52 | "count": len(page.audio_files),
53 | "size": page.audio_size_trans,
54 | },
55 | {
56 | "type": "css",
57 | "count": len(page.css_files),
58 | "size": page.css_size_trans,
59 | },
60 | {
61 | "type": "javascript",
62 | "count": len(page.js_files),
63 | "size": page.js_size_trans,
64 | },
65 | {"type": "page", "count": 1, "size": page.page_size_trans},
66 | {
67 | "type": "image",
68 | "count": len(page.image_files),
69 | "size": page.image_size_trans,
70 | },
71 | {
72 | "type": "video",
73 | "count": len(page.video_files),
74 | "size": page.video_size_trans,
75 | },
76 | {
77 | "type": "other",
78 | "count": len(page.text_files),
79 | "size": page.text_size_trans,
80 | },
81 | {"type": "html", "count": len(page.html_files), "size": None},
82 | {
83 | "type": "total",
84 | "count": len(page.entries),
85 | "size": page.page_size_trans,
86 | },
87 | ]
88 |
89 | df_haralyzer = pd.DataFrame(
90 | haralyzer_data, columns=["type", "count", "size"]
91 | )
92 | df_haralyzer["size"] = df_haralyzer["size"] / 1000
93 |
94 | flatten_aggregation = [
95 | {
96 | "type": type,
97 | "count": item["total_count"],
98 | "size": item["total_size"],
99 | }
100 | for type, item in aggregation.model_dump().items()
101 | ]
102 | flatten_aggregation.append(
103 | {
104 | "type": "total",
105 | "count": analysis.requests,
106 | "size": analysis.size * 1000,
107 | }
108 | )
109 |
110 | df = pd.DataFrame(flatten_aggregation, columns=["type", "count", "size"])
111 | df["size"] = df["size"] / 1000
112 |
113 | joinned_df = pd.merge(
114 | df,
115 | df_haralyzer,
116 | on="type",
117 | how="left",
118 | suffixes=("_ecoindex", "_haralyzer"),
119 | )
120 |
121 | print()
122 | print(page.url)
123 | print(har_file_path)
124 | print(df)
125 | print(joinned_df)
126 | print()
127 |
128 | joinned_df.to_csv(f"joinned_ecoindex_{slugify(page.url)}.csv", index=False)
129 |
130 | except Exception as e:
131 | print(e)
132 |
--------------------------------------------------------------------------------
/docs/images/ecoindex-python-fullstack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/docs/images/ecoindex-python-fullstack.png
--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | path = ".venv"
3 | in-project = true
4 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/.dockerignore:
--------------------------------------------------------------------------------
1 | *
2 | !.env
3 | !api
4 | !alembic.ini
5 | !common
6 | !db
7 | !docker
8 | !pyproject.toml
9 | !poetry.lock
10 | !settings.py
11 | !worker
12 | !dist
13 | !alembic
--------------------------------------------------------------------------------
/projects/ecoindex_api/.env.template:
--------------------------------------------------------------------------------
1 | # API_PORT=8001
2 | # API_VERSION=latest
3 | # DAILY_LIMIT_PER_HOST=10
4 | # DB_HOST=db
5 | # DB_NAME=ecoindex
6 | # DB_PASSWORD=ecoindex
7 | # DB_PORT=3306
8 | # DB_USER=ecoindex
9 | # DEBUG=1
10 | # ENABLE_SCREENSHOT=1
11 | # EXCLUDED_HOSTS='["localhost","127.0.0.1"]'
12 | # FLOWER_BASIC_AUTH=ecoindex:password
13 | # FLOWER_PORT=5555
14 | # GLITCHTIP_DSN=
15 | # REDIS_CACHE_HOST=redis
16 | # SCREENSHOTS_GID=1006
17 | # SCREENSHOTS_UID=1006
18 | # TZ=Europe/Paris
19 | # API_KEYS_BATCH='{"key": "random_key", "name":"Example key", "description": "This key is used to authenticate batch import of results as an example", "source": "ecoindex.fr"}'
--------------------------------------------------------------------------------
/projects/ecoindex_api/.gitignore:
--------------------------------------------------------------------------------
1 | docker-compose.yml
2 | .env
3 | *.webp
--------------------------------------------------------------------------------
/projects/ecoindex_api/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | includes:
4 | poetry: ../../tasks/PoetryTaskfile.yml
5 | docker: ../../tasks/DockerTaskfile.yml
6 |
7 | vars:
8 | PROJECT_NAME: api
9 |
10 | tasks:
11 | update-openapi:
12 | desc: Update the openapi.json file
13 | cmds:
14 | - echo "Update the openapi.json file"
15 | - poetry run update-openapi > openapi.json
16 | - echo "Done!"
17 | silent: true
18 |
19 | bump:
20 | desc: Bump the API version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease.
21 | cmds:
22 | - task: poetry:bump
23 | vars:
24 | VERSION_FILE_PATH: "../../bases/ecoindex/backend/VERSION"
25 | VERSION_RULE: "{{.CLI_ARGS}}"
26 | silent: true
27 |
28 | docker-build-backend:
29 | desc: Build the backend docker image
30 | vars:
31 | VERSION:
32 | sh: poetry version -s
33 | cmds:
34 | - echo "Build the backend docker image with version {{.VERSION}}"
35 | - task: docker:build
36 | vars:
37 | VERSION: "{{.VERSION}}"
38 | NAME: "api-backend"
39 | OPTIONS: --build-arg="wheel=ecoindex_api-{{.VERSION}}-py3-none-any.whl" -f docker/backend/dockerfile
40 | silent: true
41 |
42 | docker-build-worker:
43 | desc: Build the worker docker image
44 | vars:
45 | VERSION:
46 | sh: poetry version -s
47 | cmds:
48 | - echo "Build the worker docker image with version {{.VERSION}}"
49 | - task: docker:build
50 | vars:
51 | VERSION: "{{.VERSION}}"
52 | NAME: "api-worker"
53 | OPTIONS: --build-arg="wheel=ecoindex_api-{{.VERSION}}-py3-none-any.whl" -f docker/worker/dockerfile
54 | silent: true
55 |
56 | docker-build-all:
57 | desc: Build poetry project and then build the docker images
58 | cmds:
59 | - task: poetry:build
60 | - task: docker-build-images
61 | silent: true
62 |
63 | docker-build-images:
64 | internal: true
65 | desc: Build the docker images
66 | deps: [docker-build-backend, docker-build-worker]
67 | cmds:
68 | - echo "Build the docker images"
69 | silent: true
70 |
71 | docker-push-backend:
72 | desc: Push the backend docker image
73 | vars:
74 | VERSION:
75 | sh: poetry version -s
76 | cmds:
77 | - echo "Push the backend docker image with version {{.VERSION}}"
78 | - task: docker:push
79 | vars:
80 | VERSION: "{{.VERSION}}"
81 | NAME: "api-backend"
82 | silent: true
83 |
84 | docker-push-worker:
85 | desc: Push the worker docker image
86 | vars:
87 | VERSION:
88 | sh: poetry version -s
89 | cmds:
90 | - echo "Push the worker docker image with version {{.VERSION}}"
91 | - task: docker:push
92 | vars:
93 | VERSION: "{{.VERSION}}"
94 | NAME: "api-worker"
95 | silent: true
96 |
97 | docker-push-all:
98 | desc: Push the docker images
99 | deps: [docker-push-backend, docker-push-worker]
100 | cmds:
101 | - echo "Push the docker images"
102 | silent: true
103 |
104 | docker-pull:
105 | desc: Pull the docker images
106 | cmds:
107 | - echo "Pull the docker images"
108 | - docker compose pull
109 | silent: true
110 |
111 | docker-up:
112 | desc: Start the docker-compose API
113 | deps: [init-env, init-docker-compose]
114 | cmds:
115 | - docker compose up {{.CLI_ARGS}}
116 | silent: true
117 |
118 | docker-down:
119 | desc: Stop the docker-compose API
120 | preconditions:
121 | - test -f docker-compose.yml
122 | cmds:
123 | - docker compose down {{.CLI_ARGS}}
124 | silent: true
125 |
126 | docker-exec:
127 | desc: Execute a command in the docker-compose API
128 | preconditions:
129 | - test -f docker-compose.yml
130 | - test -f .env
131 | cmds:
132 | - docker compose exec {{.CLI_ARGS}}
133 | silent: true
134 |
135 | docker-logs:
136 | desc: Show the logs of the docker-compose API
137 | preconditions:
138 | - test -f docker-compose.yml
139 | cmds:
140 | - docker compose logs {{.CLI_ARGS}}
141 | silent: true
142 |
143 | migration-create:
144 | desc: Create a new alembic migration
145 | cmds:
146 | - poetry run alembic revision --autogenerate -m "{{.CLI_ARGS}}"
147 | silent: true
148 |
149 | migration-upgrade:
150 | desc: Upgrade the database to the last migration
151 | cmds:
152 | - poetry run alembic upgrade head
153 | silent: true
154 |
155 | start-redis:
156 | internal: true
157 | cmds:
158 | - docker run --rm -p 6379:6379 -d redis:alpine
159 | status:
160 | - docker ps | grep redis
161 | silent: true
162 |
163 | start-worker:
164 | deps: [start-redis]
165 | cmds:
166 | - poetry run watchmedo auto-restart --directory=../.. --pattern=worker/*.py --recursive -- poetry run celery -- -A ecoindex.worker.tasks worker --loglevel=DEBUG --queues=ecoindex,ecoindex_batch -E
167 | silent: true
168 |
169 | start-backend:
170 | cmds:
171 | - poetry run uvicorn ecoindex.backend.main:app --host 0.0.0.0 --port 8000 --reload --reload-dir ../..
172 | silent: true
173 |
174 | start-dev:
175 | deps: [start-backend, start-worker]
176 | desc: Start the backend and the worker
177 | cmds:
178 | - echo "Starting the backend and the worker"
179 | silent: true
180 |
181 | init-env:
182 | desc: Initialize the .env file
183 | preconditions:
184 | - test -f .env.template
185 | cmds:
186 | - echo "Initialize the .env file"
187 | - cp .env.template .env
188 | silent: true
189 | internal: true
190 | status:
191 | - test -f .env
192 |
193 | init-dev-project:
194 | desc: Initialize the project for development
195 | cmds:
196 | - echo "Initialize the project for development"
197 | - echo "Install poetry dependencies"
198 | - task: poetry:install
199 | - echo "Install playwright"
200 | - task: poetry:install-playwright
201 | - echo "Create the environment file"
202 | - task: init-env
203 | - echo "Create the database"
204 | - task: migration-upgrade
205 | - echo "All done!"
206 | silent: true
207 |
208 | init-docker-compose:
209 | desc: Initialize the docker-compose API
210 | internal: true
211 | cmds:
212 | - echo "Initialize the docker-compose API"
213 | - cp docker-compose.yml.template docker-compose.yml
214 | silent: true
215 | status:
216 | - test -f docker-compose.yml
217 |
218 | monitor-queues:
219 | desc: Show the queues of the docker-compose API
220 | cmds:
221 | - poetry run celery --app=ecoindex.worker.tasks events
222 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic.ini:
--------------------------------------------------------------------------------
1 | # A generic, single database configuration.
2 |
3 | [alembic]
4 | # path to migration scripts
5 | script_location = ./alembic
6 |
7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
8 | # Uncomment the line below if you want the files to be prepended with date and time
9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
10 | # for all available tokens
11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
12 |
13 | # sys.path path, will be prepended to sys.path if present.
14 | # defaults to the current working directory.
15 | prepend_sys_path = .
16 |
17 | # timezone to use when rendering the date within the migration file
18 | # as well as the filename.
19 | # If specified, requires the python-dateutil library that can be
20 | # installed by adding `alembic[tz]` to the pip requirements
21 | # string value is passed to dateutil.tz.gettz()
22 | # leave blank for localtime
23 | # timezone =
24 |
25 | # max length of characters to apply to the
26 | # "slug" field
27 | # truncate_slug_length = 40
28 |
29 | # set to 'true' to run the environment during
30 | # the 'revision' command, regardless of autogenerate
31 | # revision_environment = false
32 |
33 | # set to 'true' to allow .pyc and .pyo files without
34 | # a source .py file to be detected as revisions in the
35 | # versions/ directory
36 | # sourceless = false
37 |
38 | # version location specification; This defaults
39 | # to db/alembic/versions. When using multiple version
40 | # directories, initial revisions must be specified with --version-path.
41 | # The path separator used here should be the separator specified by "version_path_separator" below.
42 | # version_locations = %(here)s/bar:%(here)s/bat:db/alembic/versions
43 |
44 | # version path separator; As mentioned above, this is the character used to split
45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47 | # Valid values for version_path_separator are:
48 | #
49 | # version_path_separator = :
50 | # version_path_separator = ;
51 | # version_path_separator = space
52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
53 |
54 | # the output encoding used when revision files
55 | # are written from script.py.mako
56 | # output_encoding = utf-8
57 |
58 | sqlalchemy.url =
59 |
60 |
61 | [post_write_hooks]
62 | # post_write_hooks defines scripts or Python functions that are run
63 | # on newly generated revision scripts. See the documentation for further
64 | # detail and examples
65 |
66 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
67 | # hooks = black
68 | # black.type = console_scripts
69 | # black.entrypoint = black
70 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
71 |
72 | # Logging configuration
73 | [loggers]
74 | keys = root,sqlalchemy,alembic
75 |
76 | [handlers]
77 | keys = console
78 |
79 | [formatters]
80 | keys = generic
81 |
82 | [logger_root]
83 | level = WARN
84 | handlers = console
85 | qualname =
86 |
87 | [logger_sqlalchemy]
88 | level = WARN
89 | handlers =
90 | qualname = sqlalchemy.engine
91 |
92 | [logger_alembic]
93 | level = INFO
94 | handlers =
95 | qualname = alembic
96 |
97 | [handler_console]
98 | class = StreamHandler
99 | args = (sys.stderr,)
100 | level = NOTSET
101 | formatter = generic
102 |
103 | [formatter_generic]
104 | format = %(levelname)-5.5s [%(name)s] %(message)s
105 | datefmt = %H:%M:%S
106 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/env.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from logging.config import fileConfig
3 |
4 | from alembic import context
5 | from ecoindex.config import Settings
6 | from ecoindex.models.api import * # noqa: F403
7 | from sqlalchemy import pool
8 | from sqlalchemy.engine import Connection
9 | from sqlalchemy.ext.asyncio import async_engine_from_config
10 | from sqlmodel import SQLModel
11 |
12 | # this is the Alembic Config object, which provides
13 | # access to the values within the .ini file in use.
14 | config = context.config
15 |
16 | config.set_main_option("sqlalchemy.url", Settings().DATABASE_URL)
17 |
18 |
19 | # Interpret the config file for Python logging.
20 | # This line sets up loggers basically.
21 | if config.config_file_name is not None:
22 | fileConfig(config.config_file_name)
23 |
24 | # add your model's MetaData object here
25 | # for 'autogenerate' support
26 | # from myapp import mymodel
27 | # target_metadata = mymodel.Base.metadata
28 | target_metadata = SQLModel.metadata
29 |
30 | # other values from the config, defined by the needs of env.py,
31 | # can be acquired:
32 | # my_important_option = config.get_main_option("my_important_option")
33 | # ... etc.
34 |
35 |
36 | def run_migrations_offline() -> None:
37 | """Run migrations in 'offline' mode.
38 |
39 | This configures the context with just a URL
40 | and not an Engine, though an Engine is acceptable
41 | here as well. By skipping the Engine creation
42 | we don't even need a DBAPI to be available.
43 |
44 | Calls to context.execute() here emit the given string to the
45 | script output.
46 |
47 | """
48 | url = config.get_main_option("sqlalchemy.url")
49 | context.configure(
50 | url=url,
51 | target_metadata=target_metadata,
52 | literal_binds=True,
53 | compare_type=True,
54 | dialect_opts={"paramstyle": "named"},
55 | )
56 |
57 | with context.begin_transaction():
58 | context.run_migrations()
59 |
60 |
61 | def do_run_migrations(connection: Connection) -> None:
62 | context.configure(connection=connection, target_metadata=target_metadata)
63 |
64 | with context.begin_transaction():
65 | context.run_migrations()
66 |
67 |
68 | async def run_async_migrations() -> None:
69 | """In this scenario we need to create an Engine
70 | and associate a connection with the context.
71 |
72 | """
73 |
74 | connectable = async_engine_from_config(
75 | config.get_section(config.config_ini_section, {}),
76 | prefix="sqlalchemy.",
77 | poolclass=pool.NullPool,
78 | )
79 |
80 | async with connectable.connect() as connection:
81 | await connection.run_sync(do_run_migrations)
82 |
83 | await connectable.dispose()
84 |
85 |
86 | def run_migrations_online() -> None:
87 | """Run migrations in 'online' mode."""
88 |
89 | asyncio.run(run_async_migrations())
90 |
91 |
92 | if context.is_offline_mode():
93 | run_migrations_offline()
94 | else:
95 | run_migrations_online()
96 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/script.py.mako:
--------------------------------------------------------------------------------
1 | """${message}
2 |
3 | Revision ID: ${up_revision}
4 | Revises: ${down_revision | comma,n}
5 | Create Date: ${create_date}
6 |
7 | """
8 | from alembic import op
9 | import sqlalchemy as sa
10 | import sqlmodel
11 | ${imports if imports else ""}
12 |
13 | # revision identifiers, used by Alembic.
14 | revision = ${repr(up_revision)}
15 | down_revision = ${repr(down_revision)}
16 | branch_labels = ${repr(branch_labels)}
17 | depends_on = ${repr(depends_on)}
18 |
19 |
20 | def upgrade() -> None:
21 | ${upgrades if upgrades else "pass"}
22 |
23 |
24 | def downgrade() -> None:
25 | ${downgrades if downgrades else "pass"}
26 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/versions/5afa2faea43f_.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | Revision ID: 5afa2faea43f
4 | Revises: 7eaafaa65b32
5 | Create Date: 2025-01-14 14:12:47.013413
6 |
7 | """
8 | import sqlalchemy as sa
9 | import sqlmodel
10 | from alembic import op
11 |
12 | revision = "5afa2faea43f"
13 | down_revision = "7eaafaa65b32"
14 | branch_labels = None
15 | depends_on = None
16 |
17 |
18 | def upgrade() -> None:
19 | op.add_column(
20 | "apiecoindex",
21 | sa.Column("source", sqlmodel.sql.sqltypes.AutoString(), nullable=True), # type: ignore
22 | )
23 |
24 |
25 | def downgrade() -> None:
26 | op.drop_column("apiecoindex", "source")
27 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/versions/7eaafaa65b32_update_url_field_type_to_text.py:
--------------------------------------------------------------------------------
1 | """Update URL field type to text
2 |
3 | Revision ID: 7eaafaa65b32
4 | Revises: e83263a5def4
5 | Create Date: 2023-03-28 11:24:39.089063
6 |
7 | """
8 | import sqlalchemy as sa
9 | from alembic import op
10 |
11 | revision = "7eaafaa65b32"
12 | down_revision = "e83263a5def4"
13 | branch_labels = None
14 | depends_on = None
15 |
16 |
17 | def upgrade() -> None:
18 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op:
19 | batch_op.alter_column(
20 | "url",
21 | existing_type=sa.String(length=2048),
22 | type_=sa.Text(),
23 | )
24 |
25 |
26 | def downgrade() -> None:
27 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op:
28 | batch_op.alter_column(
29 | "url",
30 | existing_type=sa.Text(),
31 | type_=sa.String(length=2048),
32 | )
33 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/versions/826abb0c4222_add_ecoindex_version_field.py:
--------------------------------------------------------------------------------
1 | """Add ecoindex_version field
2 |
3 | Revision ID: 826abb0c4222
4 | Revises: fd9a1f5662c8
5 | Create Date: 2022-09-12 17:39:44.209071
6 |
7 | """
8 | import sqlalchemy as sa
9 | import sqlmodel
10 | from alembic import op
11 | from ecoindex.database.helper import column_exists
12 |
13 | # revision identifiers, used by Alembic.
14 | revision = "826abb0c4222"
15 | down_revision = "fd9a1f5662c8"
16 | branch_labels = None
17 | depends_on = None
18 |
19 |
20 | def upgrade() -> None:
21 | if not column_exists(op.get_bind(), "apiecoindex", "ecoindex_version"):
22 | op.add_column(
23 | "apiecoindex",
24 | sa.Column(
25 | "ecoindex_version", sqlmodel.sql.sqltypes.AutoString(), nullable=True
26 | ),
27 | )
28 |
29 |
30 | def downgrade() -> None:
31 | if column_exists(op.get_bind(), "apiecoindex", "ecoindex_version"):
32 | op.drop_column("apiecoindex", "ecoindex_version")
33 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/versions/e83263a5def4_add_index_id_and_host.py:
--------------------------------------------------------------------------------
1 | """Add index ID and host
2 |
3 | Revision ID: e83263a5def4
4 | Revises: 826abb0c4222
5 | Create Date: 2023-02-13 15:58:55.102285
6 |
7 | """
8 | import sqlalchemy as sa
9 | import sqlmodel # noqa: F401
10 | from alembic import op
11 | from ecoindex.database.helper import index_exists
12 |
13 | # revision identifiers, used by Alembic.
14 | revision = "e83263a5def4"
15 | down_revision = "826abb0c4222"
16 | branch_labels = None
17 | depends_on = None
18 |
19 |
20 | def upgrade() -> None:
21 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op:
22 | batch_op.alter_column(
23 | "id",
24 | existing_type=sqlmodel.sql.sqltypes.GUID(),
25 | nullable=False,
26 | )
27 | batch_op.alter_column("version", existing_type=sa.INTEGER(), nullable=False)
28 |
29 | if not index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_id"):
30 | op.create_index(op.f("ix_apiecoindex_id"), "apiecoindex", ["id"], unique=False)
31 |
32 | if not index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_host"):
33 | op.create_index(
34 | op.f("ix_apiecoindex_host"), "apiecoindex", ["host"], unique=False
35 | )
36 |
37 |
38 | def downgrade() -> None:
39 | if index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_host"):
40 | op.drop_index(op.f("ix_apiecoindex_host"), table_name="apiecoindex")
41 |
42 | if index_exists(op.get_bind(), "apiecoindex", "ix_apiecoindex_id"):
43 | op.drop_index(op.f("ix_apiecoindex_id"), table_name="apiecoindex")
44 |
45 | with op.batch_alter_table("apiecoindex", schema=None) as batch_op:
46 | batch_op.alter_column("version", existing_type=sa.INTEGER(), nullable=True)
47 | batch_op.alter_column(
48 | "id",
49 | existing_type=sqlmodel.sql.sqltypes.GUID(),
50 | nullable=True,
51 | )
52 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/alembic/versions/fd9a1f5662c8_first_migration.py:
--------------------------------------------------------------------------------
1 | """First migration
2 |
3 | Revision ID: fd9a1f5662c8
4 | Revises:
5 | Create Date: 2022-09-12 15:03:22.363502
6 |
7 | """
8 | import sqlalchemy as sa
9 | import sqlmodel
10 | from alembic import op
11 | from ecoindex.database.helper import table_exists
12 |
13 | # revision identifiers, used by Alembic.
14 | revision = "fd9a1f5662c8"
15 | down_revision = None
16 | branch_labels = None
17 | depends_on = None
18 |
19 |
20 | def upgrade() -> None:
21 | if not table_exists(op.get_bind(), "apiecoindex"):
22 | op.create_table(
23 | "apiecoindex",
24 | sa.Column("width", sa.Integer(), nullable=True),
25 | sa.Column("height", sa.Integer(), nullable=True),
26 | sa.Column("url", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
27 | sa.Column("size", sa.Float(), nullable=False),
28 | sa.Column("nodes", sa.Integer(), nullable=False),
29 | sa.Column("requests", sa.Integer(), nullable=False),
30 | sa.Column("grade", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
31 | sa.Column("score", sa.Float(), nullable=True),
32 | sa.Column("ges", sa.Float(), nullable=True),
33 | sa.Column("water", sa.Float(), nullable=True),
34 | sa.Column("date", sa.DateTime(), nullable=True),
35 | sa.Column("page_type", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
36 | sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=True),
37 | sa.Column("host", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
38 | sa.Column("version", sa.Integer(), nullable=True),
39 | sa.Column("initial_ranking", sa.Integer(), nullable=False),
40 | sa.Column("initial_total_results", sa.Integer(), nullable=False),
41 | sa.PrimaryKeyConstraint("id"),
42 | )
43 |
44 |
45 | def downgrade() -> None:
46 | if table_exists(op.get_bind(), "apiecoindex"):
47 | op.drop_table("apiecoindex")
48 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/docker-compose.yml.template:
--------------------------------------------------------------------------------
1 | services:
2 | db:
3 | image: mysql
4 | restart: always
5 | volumes:
6 | - db:/var/lib/mysql
7 | environment:
8 | MYSQL_DATABASE: ${DB_NAME:-ecoindex}
9 | MYSQL_USER: ${DB_USER:-ecoindex}
10 | MYSQL_PASSWORD: ${DB_PASSWORD:-ecoindex}
11 | MYSQL_ROOT_PASSWORD: ${DB_PASSWORD:-ecoindex}
12 | ports:
13 | - "${DB_PORT:-3306}:3306"
14 | healthcheck:
15 | test: mysqladmin ping -h 127.0.0.1 -u $$MYSQL_USER --password=$$MYSQL_PASSWORD
16 | timeout: 5s
17 | retries: 10
18 | interval: 2s
19 |
20 | backend:
21 | image: vvatelot/ecoindex-api-backend:${API_VERSION:-latest}
22 | restart: always
23 | env_file:
24 | - .env
25 | ports:
26 | - "${API_PORT:-8001}:8000"
27 | environment:
28 | DATABASE_URL: mysql+aiomysql://${DB_USER:-ecoindex}:${DB_PASSWORD:-ecoindex}@${DB_HOST:-db}/${DB_NAME:-ecoindex}?charset=utf8mb4
29 | DEBUG: ${DEBUG:-0}
30 | REDIS_CACHE_HOST: ${REDIS_CACHE_HOST:-redis}
31 | TZ: ${TZ:-Europe/Paris}
32 | depends_on:
33 | db:
34 | condition: service_healthy
35 | redis:
36 | condition: service_started
37 | volumes:
38 | - ./screenshots:/code/screenshots
39 |
40 | worker:
41 | image: vvatelot/ecoindex-api-worker:${API_VERSION:-latest}
42 | restart: always
43 | env_file:
44 | - .env
45 | environment:
46 | DATABASE_URL: mysql+aiomysql://${DB_USER:-ecoindex}:${DB_PASSWORD:-ecoindex}@${DB_HOST:-db}/${DB_NAME:-ecoindex}?charset=utf8mb4
47 | DEBUG: ${DEBUG:-0}
48 | REDIS_CACHE_HOST: ${REDIS_CACHE_HOST:-redis}
49 | TZ: ${TZ:-Europe/Paris}
50 | ENABLE_SCREENSHOT: ${ENABLE_SCREENSHOT:-0}
51 | depends_on:
52 | db:
53 | condition: service_healthy
54 | redis:
55 | condition: service_started
56 | volumes:
57 | - ./screenshots:/code/screenshots
58 |
59 | redis:
60 | image: redis:alpine
61 | restart: always
62 | command: redis-server --save 20 1 --loglevel warning
63 | volumes:
64 | - redis:/data
65 |
66 | flower:
67 | image: mher/flower
68 | ports:
69 | - "${FLOWER_PORT:-5555}:5555"
70 | environment:
71 | CELERY_BROKER_URL: redis://${REDIS_CACHE_HOST:-redis}/0
72 | FLOWER_BASIC_AUTH: ${FLOWER_BASIC_AUTH:-ecoindex:ecoindex}
73 |
74 | volumes:
75 | db:
76 | redis:
77 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/docker/backend/dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.12-slim AS requirements-stage
2 |
3 | WORKDIR /tmp
4 |
5 | RUN pip install poetry==1.8.5
6 | COPY pyproject.toml poetry.lock /tmp/
7 | RUN poetry export --with=backend --output=requirements.txt --without-hashes
8 |
9 |
10 | FROM python:3.12-slim
11 |
12 | ARG wheel=ecoindex_api-3.1.0-py3-none-any.whl
13 |
14 | WORKDIR /code
15 |
16 | # Needed for git repositories dependencies
17 | RUN apt-get update && apt-get install -y --no-install-recommends git \
18 | && rm -rf /var/lib/apt/lists/*
19 |
20 | COPY alembic.ini alembic.ini
21 | ADD alembic alembic
22 |
23 | COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt
24 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
25 |
26 | COPY dist/$wheel $wheel
27 | RUN pip install --no-cache-dir $wheel
28 | RUN pip install --no-cache-dir aiomysql gunicorn
29 |
30 | RUN rm -rf $wheel requirements.txt /tmp/dist /var/lib/{apt,dpkg,cache,log}/
31 |
32 | COPY docker/backend/entrypoint.sh /usr/bin/entrypoint
33 | RUN chmod +x /usr/bin/entrypoint
34 |
35 | ENTRYPOINT [ "/usr/bin/entrypoint" ]
--------------------------------------------------------------------------------
/projects/ecoindex_api/docker/backend/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | alembic upgrade head
4 | gunicorn ecoindex.backend.main:app --timeout 0 --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
--------------------------------------------------------------------------------
/projects/ecoindex_api/docker/worker/dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.12-slim AS requirements-stage
2 |
3 | WORKDIR /tmp
4 |
5 | RUN pip install poetry==1.8.5
6 | COPY pyproject.toml poetry.lock /tmp/
7 | RUN poetry export --with=worker --output=requirements.txt --without-hashes
8 |
9 |
10 | FROM python:3.12-slim
11 |
12 | ARG wheel=ecoindex_api-3.1.0-py3-none-any.whl
13 |
14 | WORKDIR /code
15 |
16 | RUN apt-get update && apt-get install -y --no-install-recommends git \
17 | && rm -rf /var/lib/apt/lists/*
18 |
19 | COPY alembic.ini alembic.ini
20 | ADD alembic alembic
21 |
22 | COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt
23 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
24 |
25 | COPY dist/$wheel $wheel
26 | RUN pip install --no-cache-dir $wheel
27 | RUN pip install --no-cache-dir aiomysql
28 |
29 | RUN playwright install chromium --with-deps
30 |
31 | RUN rm -rf $wheel requirements.txt /tmp/dist /var/lib/{apt,dpkg,cache,log}/
32 |
33 | COPY docker/worker/entrypoint.sh /usr/bin/entrypoint
34 | RUN chmod +x /usr/bin/entrypoint
35 |
36 | ENTRYPOINT [ "/usr/bin/entrypoint" ]
--------------------------------------------------------------------------------
/projects/ecoindex_api/docker/worker/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | celery -A ecoindex.worker.tasks worker --queues=ecoindex,ecoindex_batch
--------------------------------------------------------------------------------
/projects/ecoindex_api/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "ecoindex_api"
3 | version = "3.11.1"
4 | description = "REST API to expose Ecoindex"
5 | authors = ['Vincent Vatelot
']
6 | license = "Creative Commons BY-NC-ND"
7 | homepage = "http://www.ecoindex.fr"
8 | repository = "https://github.com/cnumr/ecoindex_api"
9 | include = ["LICENSE"]
10 |
11 | packages = [
12 | { include = "ecoindex/backend", from = "../../bases" },
13 | { include = "ecoindex/compute", from = "../../components" },
14 | { include = "ecoindex/config", from = "../../components" },
15 | { include = "ecoindex/data", from = "../../components" },
16 | { include = "ecoindex/database", from = "../../components" },
17 | { include = "ecoindex/exceptions", from = "../../components" },
18 | { include = "ecoindex/models", from = "../../components" },
19 | { include = "ecoindex/scraper", from = "../../components" },
20 | { include = "ecoindex/utils", from = "../../components" },
21 | { include = "ecoindex/worker_component", from = "../../components" },
22 | { include = "ecoindex/worker", from = "../../bases" },
23 | ]
24 |
25 | [tool.poetry.dependencies]
26 | aiofile = "^3.8.8"
27 | alembic = "^1.12.1"
28 | celery = "^5.3.4"
29 | fastapi = "^0.109.1"
30 | pillow = "^10.3.0"
31 | playwright = "^1.39.0"
32 | playwright-stealth = "^1.0.6"
33 | pydantic = { version = ">=2.1.1,<=2.4.2", extras = ["email"] }
34 | pydantic-settings = "^2.0.3"
35 | python = "^3.10"
36 | pyyaml = "^6.0.1"
37 | redis = "^5.0.1"
38 | requests = "^2.32.2"
39 | sqlmodel = "^0.0.14"
40 | sentry-sdk = "^2.8.0"
41 | setuptools = "^75.6.0"
42 | cryptography = "^44.0.2"
43 | ua-generator = "^2.0.5"
44 |
45 | [tool.poetry.group.backend.dependencies]
46 | uvicorn = "^0.23.2"
47 |
48 | [tool.poetry.group.worker.dependencies]
49 | pillow = "^10.3.0"
50 | playwright = "^1.39.0"
51 | playwright-stealth = "^1.0.6"
52 |
53 | [tool.poetry.group.dev.dependencies]
54 | typing-extensions = "^4.8.0"
55 | aiosqlite = "^0.19.0"
56 | watchdog = "^6.0.0"
57 |
58 | [tool.poetry.scripts]
59 | update-openapi = "ecoindex.backend.scripts.openapi:main"
60 |
61 | [build-system]
62 | requires = ["poetry-core>=1.0.0"]
63 | build-backend = "poetry.core.masonry.api"
64 |
--------------------------------------------------------------------------------
/projects/ecoindex_api/screenshots/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/projects/ecoindex_api/screenshots/.gitkeep
--------------------------------------------------------------------------------
/projects/ecoindex_cli/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .pytest_cache
3 | .ruff_cache
4 | .venv
5 | .vscode
6 | dockerfile
--------------------------------------------------------------------------------
/projects/ecoindex_cli/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | includes:
4 | poetry: ../../tasks/PoetryTaskfile.yml
5 | docker: ../../tasks/DockerTaskfile.yml
6 | pypi: ../../tasks/PypiTaskFile.yml
7 |
8 | vars:
9 | PROJECT_NAME: cli
10 |
11 | tasks:
12 | bump:
13 | desc: Bump the CLI version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease.
14 | cmds:
15 | - task: poetry:bump
16 | vars:
17 | VERSION_FILE_PATH: "../../bases/ecoindex/cli/VERSION"
18 | VERSION_RULE: "{{.CLI_ARGS}}"
19 | silent: true
20 |
21 | docker-build:
22 | desc: Build the docker image
23 | deps: [poetry:build]
24 | vars:
25 | VERSION:
26 | sh: poetry version -s
27 | cmds:
28 | - task: docker:build
29 | vars:
30 | VERSION: "{{.VERSION}}"
31 | NAME: "{{.PROJECT_NAME}}"
32 | OPTIONS: --build-arg="wheel=ecoindex_cli-{{.VERSION}}-py3-none-any.whl"
33 | silent: true
34 |
35 | docker-push:
36 | desc: Push the docker image
37 | vars:
38 | VERSION:
39 | sh: poetry version -s
40 | cmds:
41 | - task: docker:push
42 | vars:
43 | VERSION: "{{.VERSION}}"
44 | NAME: "{{.PROJECT_NAME}}"
45 | silent: true
--------------------------------------------------------------------------------
/projects/ecoindex_cli/doc/report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/projects/ecoindex_cli/doc/report.png
--------------------------------------------------------------------------------
/projects/ecoindex_cli/dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.12-slim AS requirements-stage
2 |
3 | WORKDIR /tmp
4 |
5 | RUN pip install poetry poetry==1.8.5
6 | COPY pyproject.toml poetry.lock /tmp/
7 | RUN poetry export --output=requirements.txt --without-hashes
8 |
9 |
10 | FROM python:3.12-slim
11 |
12 | ARG wheel=ecoindex_cli-2.26.0a0-py3-none-any.whl
13 | ENV DOCKER_CONTAINER=True
14 |
15 | WORKDIR /code
16 |
17 | COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt
18 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
19 |
20 | COPY dist/$wheel $wheel
21 | RUN pip install --no-cache-dir $wheel
22 |
23 | RUN playwright install chromium --with-deps
24 |
25 | RUN rm -rf $wheel requirements.txt /tmp/dist /var/lib/{apt,dpkg,cache,log}/
--------------------------------------------------------------------------------
/projects/ecoindex_cli/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "ecoindex_cli"
3 | version = "2.30.0"
4 | description = "`ecoindex-cli` is a CLI tool that let you make ecoindex tests on given pages"
5 | authors = ['Vincent Vatelot ']
6 | license = "Creative Commons BY-NC-ND"
7 | readme = "README.md"
8 | homepage = "http://www.ecoindex.fr"
9 | repository = "https://github.com/cnumr/ecoindex_cli"
10 | include = ["LICENSE"]
11 | packages = [
12 | { include = "ecoindex/cli", from = "../../bases" },
13 | { include = "ecoindex/compute", from = "../../components" },
14 | { include = "ecoindex/config", from = "../../components" },
15 | { include = "ecoindex/data", from = "../../components" },
16 | { include = "ecoindex/exceptions", from = "../../components" },
17 | { include = "ecoindex/models", from = "../../components" },
18 | { include = "ecoindex/scraper", from = "../../components" },
19 | { include = "ecoindex/utils", from = "../../components" },
20 | ]
21 |
22 | [tool.poetry.dependencies]
23 | aiofile = "^3.8.8"
24 | click-spinner = "^0.1.10"
25 | jinja2 = "^3.1.2"
26 | loguru = "^0.7.2"
27 | matplotlib = "^3.8.0"
28 | pandas = "^2.1.2"
29 | playwright = "^1.39.0"
30 | playwright-stealth = "^1.0.6"
31 | pydantic = "^2.4.2"
32 | pydantic-settings = "^2.0.3"
33 | python = ">=3.10,<3.13"
34 | pyyaml = "^6.0.1"
35 | rich = "^13.6.0"
36 | scrapy = "^2.11.0"
37 | typer = "^0.9.0"
38 | ua-generator = "^2.0.5"
39 |
40 | [tool.poetry.scripts]
41 | ecoindex-cli = "ecoindex.cli.app:app"
42 |
43 | [build-system]
44 | requires = ["poetry-core>=1.0.0"]
45 | build-backend = "poetry.core.masonry.api"
46 |
--------------------------------------------------------------------------------
/projects/ecoindex_compute/README.md:
--------------------------------------------------------------------------------
1 | # Ecoindex python
2 |
3 | [](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml)
4 | 
5 | 
6 |
7 | This basic module provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) based on 3 parameters:
8 |
9 | - The number of DOM elements in the page
10 | - The size of the page
11 | - The number of external requests of the page
12 |
13 | ## Requirements
14 |
15 | - Python ^3.10 with [pip](https://pip.pypa.io/en/stable/installation/)
16 |
17 | ## Install
18 |
19 | ```shell
20 | pip install ecoindex_compute
21 | ```
22 |
23 | ## Use
24 |
25 | ### Compute ecoindex
26 |
27 | You can easily compute the ecoindex by calling the function `compute_ecoindex()`:
28 |
29 | ```python
30 | (function) compute_ecoindex: (dom: int, size: float, requests: int) -> Coroutine[Any, Any, Ecoindex]
31 | ```
32 |
33 | Example:
34 |
35 | ```python
36 | import asyncio
37 | from pprint import pprint
38 |
39 | from ecoindex.compute import compute_ecoindex
40 |
41 | # Get ecoindex from DOM elements, size of page and requests of the page
42 | ecoindex = asyncio.run(compute_ecoindex(nodes=100, size=100, requests=100))
43 | pprint(ecoindex)
44 | ```
45 |
46 | Result example:
47 |
48 | ```python
49 | Ecoindex(grade='B', score=72.0, ges=1.56, water=2.34, ecoindex_version='3.0.0')
50 | ```
51 |
52 |
--------------------------------------------------------------------------------
/projects/ecoindex_compute/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | includes:
4 | poetry: ../../tasks/PoetryTaskfile.yml
5 | pypi: ../../tasks/PypiTaskFile.yml
6 |
7 | vars:
8 | PROJECT_NAME: compute
9 |
10 | tasks:
11 | bump:
12 | desc: Bump the compute version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease.
13 | cmds:
14 | - task: poetry:bump
15 | vars:
16 | VERSION_FILE_PATH: "../../components/ecoindex/compute/VERSION"
17 | VERSION_RULE: "{{.CLI_ARGS}}"
18 | silent: true
--------------------------------------------------------------------------------
/projects/ecoindex_compute/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "ecoindex_compute"
3 | version = "5.9.0"
4 | readme = "README.md"
5 | description = "Ecoindex module provides a simple way to measure the Ecoindex score based on the 3 parameters: The DOM elements of the page, the size of the page and the number of external requests of the page"
6 | authors = ['Vincent Vatelot ']
7 | license = "Creative Commons BY-NC-ND"
8 | homepage = "http://www.ecoindex.fr"
9 | repository = "https://github.com/cnumr/ecoindex_python"
10 | include = ["LICENSE"]
11 |
12 | packages = [
13 | { include = "ecoindex/compute", from = "../../components" },
14 | { include = "ecoindex/data", from = "../../components" },
15 | { include = "ecoindex/models", from = "../../components" },
16 | ]
17 |
18 | [tool.poetry.dependencies]
19 | aiofile = "^3.8.8"
20 | pydantic = "^2.4.2"
21 | python = "^3.10"
22 | requests = "^2.31.0"
23 |
24 | [build-system]
25 | requires = ["poetry-core>=1.0.0"]
26 | build-backend = "poetry.core.masonry.api"
27 |
--------------------------------------------------------------------------------
/projects/ecoindex_scraper/README.md:
--------------------------------------------------------------------------------
1 | # Ecoindex Scraper
2 |
3 | [](https://github.com/cnumr/ecoindex_python_fullstack/actions/workflows/quality_check.yml)
4 |
5 | 
6 | 
7 |
8 | This module provides a simple interface to get the [Ecoindex](http://www.ecoindex.fr) of a given webpage using module [ecoindex-compute](https://pypi.org/project/ecoindex-compute/)
9 |
10 | ## Requirements
11 |
12 | - Python ^3.10 with [pip](https://pip.pypa.io/en/stable/installation/)
13 |
14 | ## Install
15 |
16 | ```shell
17 | pip install ecoindex-scraper
18 | ```
19 |
20 | ## Use
21 |
22 | ### Get a page analysis
23 |
24 | You can run a page analysis by calling the function `get_page_analysis()`:
25 |
26 | ```python
27 | (function) get_page_analysis: (url: AnyHttpUrl, window_size: WindowSize | None = WindowSize(width=1920, height=1080), wait_before_scroll: int | None = 1, wait_after_scroll: int | None = 1) -> Coroutine[Any, Any, Result]
28 | ```
29 |
30 | Example:
31 |
32 | ```python
33 | import asyncio
34 | from pprint import pprint
35 |
36 | from ecoindex.scraper import EcoindexScraper
37 |
38 | pprint(
39 | asyncio.run(
40 | EcoindexScraper(url="http://ecoindex.fr").get_page_analysis()
41 | )
42 | )
43 | ```
44 |
45 | Result example:
46 |
47 | ```python
48 | Result(width=1920, height=1080, url=AnyHttpUrl('http://ecoindex.fr', ), size=549.253, nodes=52, requests=12, grade='A', score=90.0, ges=1.2, water=1.8, ecoindex_version='5.0.0', date=datetime.datetime(2022, 9, 12, 10, 54, 46, 773443), page_type=None)
49 | ```
50 |
51 | > **Default behaviour:** By default, the page analysis simulates:
52 | >
53 | > - Window size of **1920x1080** pixels (can be set with parameter `window_size`)
54 | > - Wait for **1 second when page is loaded** (can be set with parameter `wait_before_scroll`)
55 | > - Scroll to the bottom of the page (if it is possible)
56 | > - Wait for **1 second after** having scrolled to the bottom of the page (can be set with parameter `wait_after_scroll`)
57 |
58 | ### Get a page analysis and generate a screenshot
59 |
60 | It is possible to generate a screenshot of the analyzed page by adding a `ScreenShot` property to the `EcoindexScraper` object.
61 | You have to define an id (can be a string, but it is recommended to use a unique id) and a path to the screenshot file (if the folder does not exist, it will be created).
62 |
63 | ```python
64 | import asyncio
65 | from pprint import pprint
66 | from uuid import uuid1
67 |
68 | from ecoindex.models import ScreenShot
69 | from ecoindex.scrap import EcoindexScraper
70 |
71 | pprint(
72 | asyncio.run(
73 | EcoindexScraper(
74 | url="http://www.ecoindex.fr/",
75 | screenshot=ScreenShot(id=str(uuid1()), folder="./screenshots"),
76 | )
77 | .get_page_analysis()
78 | )
79 | )
80 | ```
81 |
82 | ## Async analysis
83 |
84 | You can also run the analysis asynchronously:
85 |
86 | ```python
87 | import asyncio
88 | from concurrent.futures import ThreadPoolExecutor, as_completed
89 |
90 | from ecoindex.scrap import EcoindexScraper
91 |
92 | def run_page_analysis(url):
93 | return asyncio.run(
94 | EcoindexScraper(url=url)
95 | .get_page_analysis()
96 | )
97 |
98 |
99 | with ThreadPoolExecutor(max_workers=8) as executor:
100 | future_to_analysis = {}
101 |
102 | url = "https://www.ecoindex.fr"
103 |
104 | for i in range(10):
105 | future_to_analysis[
106 | executor.submit(
107 | run_page_analysis,
108 | url,
109 | )
110 | ] = (url)
111 |
112 | for future in as_completed(future_to_analysis):
113 | try:
114 | print(future.result())
115 | except Exception as e:
116 | print(e)
117 | ```
118 | ## Get requests details from an analysis
119 |
120 | You can get the details of the requests made by the page by calling the function `get_all_requests()` and also get the aggregation of requests by category by calling the function `get_requests_by_category()`:
121 |
122 | ```python
123 | import asyncio
124 | from pprint import pprint
125 |
126 | from ecoindex.scraper import EcoindexScraper
127 |
128 | scraper = EcoindexScraper(url="http://www.ecoindex.fr")
129 |
130 | result = asyncio.run(scraper.get_page_analysis())
131 | all_requests = asyncio.run(scraper.get_all_requests())
132 | requests_by_category = asyncio.run(scraper.get_requests_by_category())
133 |
134 | pprint([request.model_dump() for request in all_requests])
135 | # [{'category': 'html',
136 | # 'mime_type': 'text/html; charset=iso-8859-1',
137 | # 'size': 475.0,
138 | # 'status': 301,
139 | # 'url': 'http://www.ecoindex.fr/'},
140 | # {'category': 'html',
141 | # 'mime_type': 'text/html',
142 | # 'size': 7772.0,
143 | # 'status': 200,
144 | # 'url': 'https://www.ecoindex.fr/'},
145 | # {'category': 'css',
146 | # 'mime_type': 'text/css',
147 | # 'size': 9631.0,
148 | # 'status': 200,
149 | # 'url': 'https://www.ecoindex.fr/css/bundle.min.d38033feecefa0352173204171412aec01f58eee728df0ac5c917a396ca0bc14.css'},
150 | # {'category': 'javascript',
151 | # 'mime_type': 'application/javascript',
152 | # 'size': 9823.0,
153 | # 'status': 200,
154 | # 'url': 'https://www.ecoindex.fr/fr/js/bundle.8781a9ae8d87b4ebaa689167fc17b7d71193cf514eb8bb40aac9bf4548e14533.js'},
155 | # {'category': 'other',
156 | # 'mime_type': 'x-unknown',
157 | # 'size': 892.0,
158 | # 'status': 200,
159 | # 'url': 'https://www.ecoindex.fr/images/logo-neutral-it.webp'},
160 | # {'category': 'image',
161 | # 'mime_type': 'image/svg+xml',
162 | # 'size': 3298.0,
163 | # 'status': 200,
164 | # 'url': 'https://www.ecoindex.fr/images/logo-greenit.svg'}]
165 |
166 | pprint(requests_by_category.model_dump())
167 | # {'css': {'total_count': 1, 'total_size': 9631.0},
168 | # 'font': {'total_count': 0, 'total_size': 0.0},
169 | # 'html': {'total_count': 2, 'total_size': 8247.0},
170 | # 'image': {'total_count': 1, 'total_size': 3298.0},
171 | # 'javascript': {'total_count': 1, 'total_size': 9823.0},
172 | # 'other': {'total_count': 1, 'total_size': 892.0},
173 | # 'video': {'total_count': 0, 'total_size': 0.0}}
174 | ```
--------------------------------------------------------------------------------
/projects/ecoindex_scraper/Taskfile.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | includes:
4 | poetry: ../../tasks/PoetryTaskfile.yml
5 | pypi: ../../tasks/PypiTaskFile.yml
6 |
7 | vars:
8 | PROJECT_NAME: scraper
9 |
10 | tasks:
11 | bump:
12 | desc: Bump the scraper version with rules:major, minor, patch, premajor, preminor, prepatch or prerelease.
13 | cmds:
14 | - task: poetry:bump
15 | vars:
16 | VERSION_FILE_PATH: "../../components/ecoindex/scraper/VERSION"
17 | VERSION_RULE: "{{.CLI_ARGS}}"
18 | silent: true
--------------------------------------------------------------------------------
/projects/ecoindex_scraper/dockerfile:
--------------------------------------------------------------------------------
1 |
2 | FROM python:3.12-slim
3 |
4 | WORKDIR /code
5 | ENV PYTHONPATH "/code"
6 |
7 | RUN pip install poetry
8 | COPY ./ ./
9 | RUN poetry install --only scraper
10 |
11 | RUN poetry run playwright install chromium --with-deps
12 |
13 | RUN rm -rf /tmp/dist /var/lib/{apt,dpkg,cache,log}/
--------------------------------------------------------------------------------
/projects/ecoindex_scraper/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "ecoindex_scraper"
3 | version = "3.15.0"
4 | readme = "README.md"
5 | description = "Ecoindex_scraper module provides a way to scrape data from given website while simulating a real web browser"
6 | authors = ['Vincent Vatelot ']
7 | license = "Creative Commons BY-NC-ND"
8 | homepage = "http://www.ecoindex.fr"
9 | repository = "https://github.com/cnumr/ecoindex_scrap_python"
10 | include = ["LICENSE"]
11 | packages = [
12 | { include = "ecoindex/compute", from = "../../components" },
13 | { include = "ecoindex/data", from = "../../components" },
14 | { include = "ecoindex/exceptions", from = "../../components" },
15 | { include = "ecoindex/models", from = "../../components" },
16 | { include = "ecoindex/scraper", from = "../../components" },
17 | { include = "ecoindex/utils", from = "../../components" },
18 | ]
19 |
20 | [tool.poetry.dependencies]
21 | playwright = "^1.39.0"
22 | playwright-stealth = "^1.0.6"
23 | pydantic = "^2.4.2"
24 | python = "^3.10"
25 | typing-extensions = "^4.8.0"
26 | pyyaml = "^6.0.1"
27 | pillow = "^10.1.0"
28 | setuptools = ">=69.5.1,<71.0.0"
29 | ua-generator = "^2.0.5"
30 |
31 | [build-system]
32 | requires = ["poetry-core>=1.0.0"]
33 | build-backend = "poetry.core.masonry.api"
34 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "ecoindex-monorepo"
3 | version = "0.1.0"
4 | description = ""
5 | authors = ["Vincent Vatelot "]
6 | readme = "README.md"
7 | packages = [
8 | { include = "development" },
9 | { include = "ecoindex/backend", from = "bases" },
10 | { include = "ecoindex/cli", from = "bases" },
11 | { include = "ecoindex/compute", from = "components" },
12 | { include = "ecoindex/config", from = "components" },
13 | { include = "ecoindex/data", from = "components" },
14 | { include = "ecoindex/database", from = "components" },
15 | { include = "ecoindex/exceptions", from = "components" },
16 | { include = "ecoindex/models", from = "components" },
17 | { include = "ecoindex/scraper", from = "components" },
18 | { include = "ecoindex/scripts", from = "components" },
19 | { include = "ecoindex/utils", from = "components" },
20 | { include = "ecoindex/worker_component", from = "components" },
21 | { include = "ecoindex/worker", from = "bases" },
22 | ]
23 |
24 | [tool.poetry.dependencies]
25 | aiofile = "^3.8.8"
26 | loguru = "^0.7.2"
27 | pydantic = "^2.4.2"
28 | pydantic-settings = "^2.0.3"
29 | python = ">=3.10,<3.13"
30 | redis = { extras = ["hiredis"], version = "^5.0.1" }
31 | requests = "^2.32.3"
32 | tomli = "^2.0.1"
33 | haralyzer = "^2.4.0"
34 | python-slugify = "^8.0.4"
35 | setuptools = "^74.0.0"
36 | cryptography = "^44.0.2"
37 |
38 | [tool.poetry.group.scraper.dependencies]
39 | pillow = "^10.3.0"
40 | playwright = "^1.39.0"
41 | playwright-stealth = "^1.0.6"
42 |
43 | [tool.poetry.group.cli.dependencies]
44 | click-spinner = "^0.1.10"
45 | jinja2 = "^3.1.3"
46 | matplotlib = "^3.8.0"
47 | pandas = "^2.1.2"
48 | pyyaml = "^6.0.1"
49 | rich = "^13.6.0"
50 | scrapy = "^2.11.1"
51 | typer = "^0.9.0"
52 |
53 | [tool.poetry.group.api.dependencies]
54 | aiosqlite = "^0.19.0"
55 | alembic = "^1.12.1"
56 | celery = "^5.3.4"
57 | redis = "^5.0.1"
58 | sqlmodel = "^0.0.14"
59 | sentry-sdk = "^2.8.0"
60 | ua-generator = "^2.0.3"
61 |
62 | [tool.poetry.group.api-backend.dependencies]
63 | fastapi = "^0.109.1"
64 | uvicorn = "^0.23.2"
65 |
66 | [tool.poetry.group.dev.dependencies]
67 | black = "^24.3.0"
68 | mypy = "^1.7.0"
69 | pytest = "^7.4.2"
70 | pytest-asyncio = "^0.21.1"
71 | pytest-cov = "^4.1.0"
72 | ruff = "^0.1.1"
73 | types-click-spinner = "^0.1.13.6"
74 | types-pyyaml = "^6.0.12.12"
75 | types-requests = "^2.31.0.10"
76 | watchdog = "^6.0.0"
77 | pytest-picked = "^0.5.0"
78 |
79 | [build-system]
80 | requires = ["poetry-core"]
81 | build-backend = "poetry.core.masonry.api"
82 |
83 | [tool.poetry.scripts]
84 | update-values = "ecoindex.scripts:update_values"
85 |
86 | [tool.pytest.ini_options]
87 | filterwarnings = ["ignore::DeprecationWarning"]
88 |
89 | [tool.mypy]
90 | mypy_path = ["bases", "components"]
91 | namespace_packages = true
92 | explicit_package_bases = true
93 | ignore_missing_imports = true
94 | disallow_untyped_defs = false
95 | exclude = ["test", "dist", "__pycache__"]
96 |
97 | [tool.coverage.run]
98 | omit = ["test/*"]
99 |
100 | [tool.coverage.report]
101 | skip_empty = true
102 |
103 | [tool.pyright]
104 | extraPaths = ["bases", "components"]
105 |
--------------------------------------------------------------------------------
/tasks/DockerTaskfile.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | tasks:
4 | build:
5 | internal: true
6 | cmds:
7 | - echo "docker build -t {{.NAME}} {{.VERSION}} {{.OPTIONS}} ."
8 | - docker build -t vvatelot/ecoindex-{{.NAME}}:{{.VERSION}} -t vvatelot/ecoindex-{{.NAME}}:latest {{.OPTIONS}} .
9 | silent: true
10 |
11 | push:
12 | internal: true
13 | cmds:
14 | - echo "docker push vvatelot/ecoindex-{{.NAME}}:{{.VERSION}}"
15 | - docker push vvatelot/ecoindex-{{.NAME}}:{{.VERSION}}
16 | - echo "docker push vvatelot/ecoindex-{{.NAME}}:latest"
17 | - docker push vvatelot/ecoindex-{{.NAME}}:latest
18 | silent: true
19 |
--------------------------------------------------------------------------------
/tasks/PoetryTaskfile.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | tasks:
4 | default:
5 | desc: Run poetry
6 | cmds:
7 | - poetry {{.CLI_ARGS}}
8 | silent: true
9 |
10 | install:
11 | desc: Install the poetry project dependencies of {{.PROJECT_NAME}}
12 | cmds:
13 | - poetry install
14 | silent: true
15 |
16 | add:
17 | desc: Add a new dependency to {{.PROJECT_NAME}}
18 | cmds:
19 | - poetry add {{.CLI_ARGS}}
20 | silent: true
21 |
22 | remove:
23 | desc: Remove a dependency from {{.PROJECT_NAME}}
24 | cmds:
25 | - poetry remove {{.CLI_ARGS}}
26 | silent: true
27 |
28 | update:
29 | desc: Update dependencies of {{.PROJECT_NAME}}
30 | cmds:
31 | - poetry update
32 | silent: true
33 |
34 | build:
35 | desc: Build the poetry project {{.PROJECT_NAME}} dependencies, and then build the project distribution
36 | cmds:
37 | - poetry lock
38 | - poetry build-project
39 | silent: true
40 |
41 | bump:
42 | internal: true
43 | preconditions:
44 | - sh: '[[ "{{.CLI_ARGS}}" =~ ^(major|minor|patch|premajor|preminor|prepatch|prerelease)$ ]]'
45 | msg: "Invalid version rule: `{{.CLI_ARGS}}` must be major, minor, patch, premajor, preminor, prepatch or prerelease."
46 | cmds:
47 | - poetry version -s > /tmp/version-current-output-{{.PROJECT_NAME}}
48 | - poetry version {{.VERSION_RULE}} -s > /tmp/version-bump-output-{{.PROJECT_NAME}}
49 | - echo "Current version:$(cat /tmp/version-current-output-{{.PROJECT_NAME}})"
50 | - echo "Bumped version:$(cat /tmp/version-bump-output-{{.PROJECT_NAME}})"
51 | - echo "Update {{.VERSION_FILE_PATH}}"
52 | - echo "$(cat /tmp/version-bump-output-{{.PROJECT_NAME}})" > {{.VERSION_FILE_PATH}}
53 | silent: true
54 |
55 | version-short:
56 | desc: Show the current version of {{.PROJECT_NAME}}
57 | cmds:
58 | - poetry version -s
59 | silent: true
60 |
61 | install-playwright:
62 | desc: Install playwright
63 | cmds:
64 | - poetry run playwright install chromium --with-deps
65 | silent: true
66 | interactive: true
67 |
--------------------------------------------------------------------------------
/tasks/PypiTaskFile.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | tasks:
4 | publish:
5 | desc: Publish the project {{.PROJECT_NAME}} to pypi
6 | cmds:
7 | - poetry publish
8 | silent: true
--------------------------------------------------------------------------------
/tasks/QualityTaskFile.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | tasks:
4 | tests:
5 | desc: Run tests
6 | cmds:
7 | - poetry run pytest
8 |
9 | tests-coverage:
10 | desc: Run tests with coverage
11 | cmds:
12 | - poetry run pytest --cov-report "xml:coverage.xml" --cov=. test
13 |
14 | tests-coverage-branch:
15 | desc: Run tests with coverage for current branch
16 | cmds:
17 | - poetry run pytest --cov-report "xml:coverage.xml" --cov=. --picked --mode=branch test
18 |
19 | mypy:
20 | desc: Run mypy
21 | cmds:
22 | - poetry run mypy .
23 |
24 | ruff:
25 | desc: Run ruff
26 | cmds:
27 | - poetry run ruff .
28 |
29 | quality-branch:
30 | desc: Run quality checks
31 | deps: [tests-coverage-branch, mypy, ruff]
32 | cmds:
33 | - echo "Quality checks passed"
34 |
35 | default:
36 | desc: Run quality checks
37 | deps: [tests-coverage, mypy, ruff]
38 | cmds:
39 | - echo "Quality checks passed"
--------------------------------------------------------------------------------
/test/bases/ecoindex/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/bases/ecoindex/backend/__init__.py
--------------------------------------------------------------------------------
/test/bases/ecoindex/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/bases/ecoindex/cli/__init__.py
--------------------------------------------------------------------------------
/test/bases/ecoindex/cli/test_app.py:
--------------------------------------------------------------------------------
1 | from os import remove
2 |
3 | from ecoindex.cli.app import app
4 | from typer.testing import CliRunner
5 |
6 | runner = CliRunner()
7 |
8 |
9 | def test_analyze_no_args() -> None:
10 | result = runner.invoke(app=app, args=["analyze"])
11 | assert result.exit_code == 1
12 | assert "🔥 You must provide an url..." in result.stdout
13 |
14 |
15 | def test_analyze_not_valid_url() -> None:
16 | invalid_url = "url"
17 | result = runner.invoke(app=app, args=["analyze", "--url", invalid_url])
18 | assert result.exit_code == 1
19 | assert (
20 | "Input should be a valid URL, relative URL without a base [type=url_parsing, input_value='url', input_type=str]"
21 | in result.stdout
22 | )
23 |
24 |
25 | def test_analyze_one_invalid_url() -> None:
26 | valid_url = "https://www.test.com"
27 | invalid_url = "dummy"
28 | result = runner.invoke(
29 | app=app, args=["analyze", "--url", valid_url, "--url", invalid_url], input="n\n"
30 | )
31 | assert result.exit_code == 1
32 | assert (
33 | "Input should be a valid URL, relative URL without a base [type=url_parsing, input_value='dummy', input_type=str]"
34 | in result.stdout
35 | )
36 |
37 |
38 | def test_analyze_one_valid_url() -> None:
39 | domain = "www.test.com"
40 | valid_url = f"https://{domain}"
41 | result = runner.invoke(app=app, args=["analyze", "--url", valid_url], input="n\n")
42 | assert "There are 1 url(s), do you want to process?" in result.stdout
43 | assert result.exit_code == 1
44 | assert "Aborted" in result.stdout
45 | assert f"📁️ Urls recorded in file `input/{domain}.csv`"
46 | remove(f"/tmp/ecoindex-cli/input/{domain}.csv")
47 |
48 |
49 | def test_analyze_string_window_size() -> None:
50 | invalid_window_size = "window"
51 | result = runner.invoke(
52 | app=app, args=["analyze", "--window-size", invalid_window_size]
53 | )
54 | assert result.exit_code == 1
55 | assert (
56 | f"🔥 `{invalid_window_size}` is not a valid window size. Must be of type `1920,1080`"
57 | in result.stdout
58 | )
59 |
60 |
61 | def test_analyze_one_invalid_window_size() -> None:
62 | valid_window_size = "1920,1080"
63 | invalid_window_size = "1920,height"
64 | result = runner.invoke(
65 | app=app,
66 | args=[
67 | "analyze",
68 | "--window-size",
69 | valid_window_size,
70 | "--window-size",
71 | invalid_window_size,
72 | ],
73 | )
74 | assert result.exit_code == 1
75 | assert (
76 | f"🔥 `{invalid_window_size}` is not a valid window size. Must be of type `1920,1080`"
77 | in result.stdout
78 | )
79 |
80 |
81 | def test_analyze_abort_recursive() -> None:
82 | result = runner.invoke(app=app, args=["analyze", "--recursive"], input="n\n")
83 | assert (
84 | "You are about to perform a recursive website scraping. This can take a long time. Are you sure to want to proceed?"
85 | in result.stdout
86 | )
87 | assert "Aborted" in result.stdout
88 | assert result.exit_code == 1
89 |
90 |
91 | def test_analyze_abort_sitemap() -> None:
92 | domain = "www.test.com"
93 | valid_url = f"https://{domain}/sitemap.xml"
94 | result = runner.invoke(
95 | app=app, args=["analyze", "--sitemap", valid_url], input="n\n"
96 | )
97 | assert (
98 | "You are about to read urls from a website sitemap. This can take a long time. Are you sure to want to proceed?"
99 | in result.stdout
100 | )
101 | assert "Aborted" in result.stdout
102 | assert result.exit_code == 1
103 |
104 |
105 | def test_invalid_sitemap() -> None:
106 | domain = "www.test.com"
107 | invalid_sitemap = f"https://{domain}"
108 | result = runner.invoke(
109 | app=app, args=["analyze", "--sitemap", invalid_sitemap], input="y\n"
110 | )
111 | assert (
112 | "You are about to read urls from a website sitemap. This can take a long time. Are you sure to want to proceed?"
113 | in result.stdout
114 | )
115 | assert "Aborted" not in result.stdout
116 | assert "The provided url is not a valid sitemap url" in result.stdout
117 | assert result.exit_code == 1
118 |
119 |
120 | def test_no_interaction() -> None:
121 | result = runner.invoke(app=app, args=["analyze", "--recursive", "--no-interaction"])
122 | assert "[Y/n]" not in result.stdout
123 | assert result.exit_code == 1
124 |
125 |
126 | def test_unauthorized_export_format() -> None:
127 | result = runner.invoke(app=app, args=["analyze", "--export-format", "txt"])
128 | assert result.exit_code == 2
129 | assert "'txt' is not one of 'csv', 'json'." in result.stdout
130 |
--------------------------------------------------------------------------------
/test/bases/ecoindex/cli/test_arguments_handler.py:
--------------------------------------------------------------------------------
1 | from ecoindex.cli.arguments_handler import (
2 | get_file_prefix_input_file_logger_file,
3 | get_url_from_args,
4 | get_urls_from_file,
5 | get_window_sizes_from_args,
6 | )
7 | from ecoindex.models import WindowSize
8 | from pydantic import ValidationError
9 | from pytest import raises
10 |
11 |
12 | def test_urls_all_valid_from_args():
13 | urls = (
14 | "http://test.com/",
15 | "https://test.com/",
16 | "https://www.dummy.com/page/",
17 | "https://www.dummy.com/another_page",
18 | "http://localhost:8000/",
19 | )
20 | valid_urls = get_url_from_args(urls_arg=urls)
21 | assert len(valid_urls) == 5
22 | for url in valid_urls:
23 | assert str(url) in urls
24 |
25 |
26 | def test_urls_invalid_from_args():
27 | urls = "test.com"
28 | with raises(ValidationError):
29 | get_url_from_args(urls)
30 |
31 |
32 | def test_validate_valid_window_size():
33 | assert get_window_sizes_from_args(["1024,768"]) == [
34 | WindowSize(width=1024, height=768)
35 | ]
36 |
37 |
38 | def test_validate_invalid_window_size():
39 | with raises(ValueError):
40 | get_window_sizes_from_args(("800x600",))
41 |
42 | with raises(ValueError):
43 | get_window_sizes_from_args(("width,600",))
44 |
45 | with raises(ValueError):
46 | get_window_sizes_from_args(("600",))
47 |
48 |
49 | def test_get_file_prefix_input_file_logger_file():
50 | urls = ("http://test.com", "https://test.com", "https://www.dummy.com/page/")
51 | assert get_file_prefix_input_file_logger_file(urls=urls) == (
52 | "test.com",
53 | "/tmp/ecoindex-cli/input/test.com.csv",
54 | "/tmp/ecoindex-cli/logs/test.com.log",
55 | )
56 |
57 | assert get_file_prefix_input_file_logger_file(
58 | urls=urls, urls_file="/home/user/my_urls.csv"
59 | ) == (
60 | "my_urls.csv",
61 | "/home/user/my_urls.csv",
62 | "/tmp/ecoindex-cli/logs/my_urls.csv.log",
63 | )
64 |
65 |
66 | def test_read_file_with_empty_lines():
67 | urls = ("http://test.com", "https://test.com", "https://www.dummy.com/page/", "")
68 | with open(file="/tmp/ecoindex-cli/input/test.com.csv", mode="w") as f:
69 | f.write("\n".join(urls))
70 |
71 | validated_urls = get_urls_from_file(
72 | urls_file="/tmp/ecoindex-cli/input/test.com.csv"
73 | )
74 | assert len(validated_urls) == 3
75 | assert "" not in validated_urls
76 |
--------------------------------------------------------------------------------
/test/bases/ecoindex/cli/test_helper.py:
--------------------------------------------------------------------------------
1 | import os
2 | from urllib.parse import urlparse
3 | from ecoindex.cli.helper import replace_localhost_with_hostdocker
4 | from ecoindex.models.cli import CliHost
5 |
6 |
7 | def test_replace_localhost_with_hostdocker():
8 | assert replace_localhost_with_hostdocker(
9 | urlparse("https://test.com/page/").netloc
10 | ) == CliHost(domain="test.com", netloc="test.com")
11 |
12 | assert replace_localhost_with_hostdocker(
13 | urlparse("https://localhost:8000/page/").netloc
14 | ) == CliHost(domain="localhost", netloc="localhost:8000")
15 |
16 | os.environ["DOCKER_CONTAINER"] = "true"
17 | assert replace_localhost_with_hostdocker(
18 | urlparse("https://localhost:8000/page/").netloc
19 | ) == CliHost(domain="host.docker.internal", netloc="host.docker.internal:8000")
--------------------------------------------------------------------------------
/test/bases/ecoindex/worker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/bases/ecoindex/worker/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/compute/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/compute/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/compute/test_ecoindex.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ecoindex.compute import (
3 | get_ecoindex,
4 | get_grade,
5 | get_greenhouse_gases_emmission,
6 | get_quantile,
7 | get_score,
8 | get_water_consumption,
9 | )
10 | from ecoindex.data import quantiles_dom, quantiles_req, quantiles_size
11 | from ecoindex.models import Ecoindex
12 |
13 |
14 | @pytest.mark.asyncio
15 | class TestAsyncGroup:
16 | async def test_get_quantiles(self):
17 | assert await get_quantile(quantiles_size, 2500) == 14.086372025739513
18 | assert await get_quantile(quantiles_dom, 150) == 2.892857142857143
19 | assert await get_quantile(quantiles_req, 23) == 2.8
20 | assert await get_quantile(quantiles_size, 310182.902) == 20
21 |
22 | async def test_get_score(self):
23 | assert await get_score(dom=100, requests=100, size=100) == 72
24 | assert await get_score(dom=100, requests=100, size=1000) == 67
25 | assert await get_score(dom=100, requests=100, size=10000) == 58
26 | assert await get_score(dom=200, requests=200, size=10000) == 46
27 | assert await get_score(dom=2355, requests=267, size=2493) == 10
28 | assert await get_score(dom=240, requests=20, size=331) == 83
29 |
30 | async def test_get_ecoindex(self):
31 | assert await get_ecoindex(dom=100, requests=100, size=100) == Ecoindex(
32 | score=72,
33 | grade="B",
34 | ges=1.56,
35 | water=2.34,
36 | )
37 |
38 | async def test_get_grade(self):
39 | assert await get_grade(2) == "G"
40 | assert await get_grade(25) == "F"
41 | assert await get_grade(10) == "G"
42 | assert await get_grade(50.2) == "D"
43 | assert await get_grade(100) == "A"
44 |
45 | async def test_get_greenhouse_gases_emission(self):
46 | assert await get_greenhouse_gases_emmission(2) == 2.96
47 | assert await get_greenhouse_gases_emmission(10) == 2.8
48 | assert await get_greenhouse_gases_emmission(50) == 2
49 | assert await get_greenhouse_gases_emmission(70) == 1.6
50 |
51 | async def test_get_water_consumption(self):
52 | assert await get_water_consumption(2) == 4.44
53 | assert await get_water_consumption(10) == 4.2
54 | assert await get_water_consumption(50) == 3
55 | assert await get_water_consumption(70) == 2.4
56 |
57 | async def test_get_ecoindex_out_of_range(self):
58 | assert await get_ecoindex(dom=2240, requests=100, size=310182.902) == Ecoindex(
59 | score=16,
60 | grade="F",
61 | ges=2.68,
62 | water=4.02,
63 | )
64 |
--------------------------------------------------------------------------------
/test/components/ecoindex/compute/test_models.py:
--------------------------------------------------------------------------------
1 | from os import rmdir
2 | from os.path import isdir
3 |
4 | from ecoindex.models import Ecoindex, Result, ScreenShot, WebPage
5 | from pydantic import ValidationError
6 | from pytest import raises
7 |
8 |
9 | def test_model_webpage_no_url() -> None:
10 | with raises(ValidationError) as error:
11 | WebPage()
12 |
13 | assert (
14 | "1 validation error for WebPage\n"
15 | "url\n "
16 | "Field required [type=missing, input_value={}, input_type=dict]\n"
17 | ) in str(error.value)
18 |
19 |
20 | def test_model_webpage_invalid_url() -> None:
21 | with raises(ValidationError) as error:
22 | WebPage(url="toto")
23 |
24 | assert (
25 | "1 validation error for WebPage\n"
26 | "url\n "
27 | "Input should be a valid URL, relative URL without a base "
28 | "[type=url_parsing, input_value='toto', input_type=str]\n"
29 | ) in str(error.value)
30 |
31 | with raises(ValidationError):
32 | WebPage(url="about:config")
33 |
34 |
35 | def test_model_webpage_wrong_size() -> None:
36 | with raises(ValidationError) as error:
37 | WebPage(url="https://www.google.fr", width=0, height=0)
38 |
39 | assert (
40 | "2 validation errors for WebPage\nwidth\n "
41 | "Input should be greater than or equal to 100 [type=greater_than_equal, input_value=0, input_type=int]"
42 | ) in str(error.value)
43 | assert (
44 | "height\n "
45 | "Input should be greater than or equal to 50 [type=greater_than_equal, input_value=0, input_type=int]"
46 | ) in str(error.value)
47 |
48 |
49 | def test_model_webpage_default_size() -> None:
50 | webpage = WebPage(url="https://www.google.fr")
51 | assert webpage.height == 1080
52 | assert webpage.width == 1920
53 |
54 |
55 | def test_model_valid():
56 | valid_ecoindex = Ecoindex(grade="A", score=99.9, ges=0.6, water=0.1)
57 | assert valid_ecoindex.grade == "A"
58 | assert valid_ecoindex.score == 99.9
59 | assert valid_ecoindex.ges == 0.6
60 | assert valid_ecoindex.water == 0.1
61 | assert valid_ecoindex.ecoindex_version not in [None, ""]
62 |
63 |
64 | def test_model_invalid():
65 | with raises(ValidationError) as error:
66 | Ecoindex(grade="dummy", score="dummy")
67 |
68 | assert "2 validation errors for Ecoindex" in str(error.value)
69 |
70 |
71 | def test_ecoindex_model_empty():
72 | ecoindex = Ecoindex()
73 | assert ecoindex.ges is None
74 | assert ecoindex.grade is None
75 | assert ecoindex.score is None
76 | assert ecoindex.water is None
77 |
78 |
79 | def test_result_model():
80 | result = Result(
81 | size=119,
82 | nodes=45,
83 | requests=8,
84 | url="http://www.myurl.com",
85 | width=1920,
86 | height=1080,
87 | grade="A",
88 | score=89,
89 | ges=1.22,
90 | water=1.89,
91 | )
92 | assert result.page_type is None
93 | assert result.size == 119
94 | assert result.nodes == 45
95 | assert result.requests == 8
96 | assert result.width == 1920
97 | assert result.height == 1080
98 | assert result.grade == "A"
99 | assert result.score == 89
100 | assert result.ges == 1.22
101 | assert result.water == 1.89
102 | assert result.ecoindex_version is not None
103 |
104 |
105 | def test_screenshot_model():
106 | id = "screenshot_test_id"
107 | folder = "./screenshot_test"
108 |
109 | screenshot = ScreenShot(id=id, folder=folder)
110 |
111 | assert isdir(folder) is True
112 | assert screenshot.id == id
113 | assert screenshot.folder == folder
114 | assert screenshot.get_png() == f"{folder}/{id}.png"
115 | assert screenshot.get_webp() == f"{folder}/{id}.webp"
116 |
117 | rmdir(folder)
118 | assert isdir(folder) is False
119 |
120 |
121 | if __name__ == "__main__":
122 | test_model_webpage_no_url()
123 | test_model_webpage_invalid_url()
124 | test_model_webpage_wrong_size()
125 | test_model_webpage_default_size()
126 | test_model_valid()
127 | test_model_invalid()
128 | test_ecoindex_model_empty()
129 | test_result_model()
130 | test_screenshot_model()
131 |
--------------------------------------------------------------------------------
/test/components/ecoindex/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/data/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/exceptions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/exceptions/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/models/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/models/test_scraper.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ecoindex.models.scraper import MimetypeAggregation
3 |
4 |
5 | @pytest.mark.asyncio
6 | async def test_get_category_of_resource_video() -> None:
7 | mime_type = "video/mp4"
8 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "video"
9 |
10 |
11 | @pytest.mark.asyncio
12 | async def test_get_category_of_resource_image() -> None:
13 | mime_type = "image/png"
14 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "image"
15 |
16 |
17 | @pytest.mark.asyncio
18 | async def test_get_category_of_resource_font() -> None:
19 | mime_type = "font/woff2"
20 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "font"
21 |
22 |
23 | @pytest.mark.asyncio
24 | async def test_get_category_of_resource_css() -> None:
25 | mime_type = "text/css"
26 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "css"
27 |
28 |
29 | @pytest.mark.asyncio
30 | async def test_get_category_of_resource_javascript() -> None:
31 | mime_type = "application/javascript"
32 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "javascript"
33 |
34 |
35 | @pytest.mark.asyncio
36 | async def test_get_category_of_resource_other() -> None:
37 | mime_type = "application/pdf"
38 | assert await MimetypeAggregation.get_category_of_resource(mime_type) == "other"
39 |
40 |
41 | if __name__ == "__main__":
42 | pytest.main()
43 |
--------------------------------------------------------------------------------
/test/components/ecoindex/scraper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/scraper/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/scraper/test_scraper.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from ecoindex.exceptions.scraper import EcoindexScraperStatusException
4 | from ecoindex.models import ScreenShot, WindowSize
5 | from ecoindex.scraper import EcoindexScraper
6 |
7 |
8 | def test_scraper_init():
9 | url = "https://www.example.com"
10 | scraper = EcoindexScraper(url=url) # type: ignore
11 | assert scraper.url == url
12 | assert scraper.window_size == WindowSize(width=1920, height=1080)
13 | assert scraper.wait_before_scroll == 1
14 | assert scraper.wait_after_scroll == 1
15 | assert scraper.screenshot is None
16 | assert scraper.screenshot_uid is None
17 | assert scraper.screenshot_gid is None
18 | assert scraper.page_load_timeout == 20
19 |
20 |
21 | def test_scraper_init_with_options():
22 | url = "https://www.example.com"
23 | window_size = WindowSize(width=800, height=600)
24 | wait_before_scroll = 2
25 | wait_after_scroll = 2
26 | screenshot_uid = 123
27 | screenshot_gid = 456
28 | page_load_timeout = 30
29 | screenshot_id = "123"
30 | screenshot_folder = "/tmp/screenshots"
31 |
32 | scraper = EcoindexScraper(
33 | url=url, # type: ignore
34 | window_size=window_size,
35 | wait_before_scroll=wait_before_scroll,
36 | wait_after_scroll=wait_after_scroll,
37 | screenshot=ScreenShot(id=screenshot_id, folder=screenshot_folder),
38 | screenshot_uid=screenshot_uid,
39 | screenshot_gid=screenshot_gid,
40 | page_load_timeout=page_load_timeout,
41 | )
42 |
43 | assert scraper.url == url
44 | assert scraper.window_size == window_size
45 | assert scraper.wait_before_scroll == wait_before_scroll
46 | assert scraper.wait_after_scroll == wait_after_scroll
47 | assert scraper.screenshot.get_png() == f"{screenshot_folder}/{screenshot_id}.png" # type: ignore
48 | assert scraper.screenshot.get_webp() == f"{screenshot_folder}/{screenshot_id}.webp" # type: ignore
49 | assert scraper.screenshot_gid == screenshot_gid
50 | assert scraper.page_load_timeout == page_load_timeout
51 |
52 |
53 | def test_get_request_size():
54 | mock_stripped_har_entry = (
55 | {
56 | "request": {
57 | "url": "https://www.ecoindex.fr/",
58 | },
59 | "response": {
60 | "status": 200,
61 | "headers": [
62 | {"name": "content-length", "value": "7347"},
63 | ],
64 | "content": {
65 | "mimeType": "text/html",
66 | },
67 | "_transferSize": 7772,
68 | },
69 | },
70 | {
71 | "request": {
72 | "url": "https://www.ecoindex.fr/",
73 | },
74 | "response": {
75 | "status": 200,
76 | "headers": [
77 | {"name": "content-length", "value": "7347"},
78 | ],
79 | "content": {
80 | "mimeType": "text/html",
81 | },
82 | "_transferSize": -1,
83 | },
84 | },
85 | {
86 | "request": {
87 | "url": "https://www.ecoindex.fr/",
88 | },
89 | "response": {
90 | "status": 206,
91 | "headers": [
92 | {"name": "Content-Length", "value": "7347"},
93 | ],
94 | "content": {
95 | "mimeType": "text/html",
96 | },
97 | "_transferSize": -1,
98 | },
99 | },
100 | )
101 | url = "https://www.example.com"
102 | window_size = WindowSize(width=800, height=600)
103 | wait_before_scroll = 2
104 | wait_after_scroll = 2
105 | screenshot_uid = 123
106 | screenshot_gid = 456
107 | page_load_timeout = 30
108 | screenshot_id = "123"
109 | screenshot_folder = "/tmp/screenshots"
110 |
111 | scraper = EcoindexScraper(
112 | url=url, # type: ignore
113 | window_size=window_size,
114 | wait_before_scroll=wait_before_scroll,
115 | wait_after_scroll=wait_after_scroll,
116 | screenshot=ScreenShot(id=screenshot_id, folder=screenshot_folder),
117 | screenshot_uid=screenshot_uid,
118 | screenshot_gid=screenshot_gid,
119 | page_load_timeout=page_load_timeout,
120 | )
121 | assert scraper.get_request_size(mock_stripped_har_entry[0]) == 7772
122 | assert scraper.get_request_size(mock_stripped_har_entry[1]) == len(
123 | json.dumps(mock_stripped_har_entry[1]["response"]).encode("utf-8")
124 | )
125 | assert scraper.get_request_size(mock_stripped_har_entry[2]) == len(
126 | json.dumps(mock_stripped_har_entry[2]["response"]).encode("utf-8")
127 | )
128 |
129 |
130 | async def test_check_page_response():
131 | mock_stripped_har_entry = (
132 | {
133 | "response": {
134 | "status": 200,
135 | "headers": {"content-type": "audio/mpeg"},
136 | }
137 | },
138 | {
139 | "response": {
140 | "status": 404,
141 | "headers": {"content-type": "text/html"},
142 | "status_text": "Not Found",
143 | }
144 | },
145 | {
146 | "response": {
147 | "status": 200,
148 | "headers": {"content-type": "text/html"},
149 | }
150 | },
151 | )
152 | url = "https://www.example.com"
153 | window_size = WindowSize(width=800, height=600)
154 | wait_before_scroll = 2
155 | wait_after_scroll = 2
156 | screenshot_uid = 123
157 | screenshot_gid = 456
158 | page_load_timeout = 30
159 | screenshot_id = "123"
160 | screenshot_folder = "/tmp/screenshots"
161 |
162 | scraper = EcoindexScraper(
163 | url=url, # type: ignore
164 | window_size=window_size,
165 | wait_before_scroll=wait_before_scroll,
166 | wait_after_scroll=wait_after_scroll,
167 | screenshot=ScreenShot(id=screenshot_id, folder=screenshot_folder),
168 | screenshot_uid=screenshot_uid,
169 | screenshot_gid=screenshot_gid,
170 | page_load_timeout=page_load_timeout,
171 | )
172 | try:
173 | scraper.check_page_response(mock_stripped_har_entry[0])
174 | except TypeError as e:
175 | assert str(e) == {
176 | "mimetype": "audio/mpeg",
177 | "message": (
178 | "This resource is not " "a standard page with mimeType 'text/html'"
179 | ),
180 | }
181 |
182 | try:
183 | scraper.check_page_response(mock_stripped_har_entry[1])
184 | except EcoindexScraperStatusException as e:
185 | assert str(e) == {
186 | "url": "https://www.example.com",
187 | "status": 404,
188 | "message": mock_stripped_har_entry[1]["response"]["status_text"],
189 | }
190 |
191 | assert scraper.check_page_response(mock_stripped_har_entry[2]) is None
192 |
--------------------------------------------------------------------------------
/test/components/ecoindex/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/scripts/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/utils/__init__.py
--------------------------------------------------------------------------------
/test/components/ecoindex/worker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnumr/ecoindex_python_fullstack/44ec88f7de8fcae30400c26774620a98b973c647/test/components/ecoindex/worker/__init__.py
--------------------------------------------------------------------------------
/workspace.toml:
--------------------------------------------------------------------------------
1 | [tool.polylith]
2 | namespace = "ecoindex"
3 | git_tag_pattern = "stable-*"
4 |
5 | [tool.polylith.structure]
6 | theme = "loose"
7 |
8 | [tool.polylith.resources]
9 | brick_docs_enabled = false
10 |
11 | [tool.polylith.test]
12 | enabled = true
13 |
--------------------------------------------------------------------------------