├── database
└── .gitkeep
├── analytics
├── __init__.py
└── notebooks
│ └── __init__.py
├── dbt_
├── analyses
│ └── .gitkeep
├── macros
│ └── .gitkeep
├── seeds
│ ├── .gitkeep
│ └── schema.yml
├── snapshots
│ └── .gitkeep
├── tests
│ ├── .gitkeep
│ ├── test_valeur_saniraire_2.sql
│ ├── unique_datetimeprel_per_cdreseau_referenceprel.sql
│ ├── test__coverage_20pfas_4pfas_98pct.sql
│ ├── test_sub_active_results.sql
│ ├── test_tous_results.sql
│ ├── test_cvm_results.sql
│ ├── test_nitrates_results.sql
│ └── test_pfas_results.sql
├── .gitignore
├── packages.yml
├── profiles.yml
├── models
│ ├── staging
│ │ ├── communes
│ │ │ ├── stg_communes__opendatasoft.sql
│ │ │ ├── stg_communes__cog.sql
│ │ │ └── _communes_models.yml
│ │ ├── atlasante
│ │ │ ├── stg_atlasante_udi_corse.sql
│ │ │ ├── stg_atlasante_udi_2023.sql
│ │ │ ├── stg_atlasante_udi_2024.sql
│ │ │ └── _atlasante_models.yml
│ │ └── edc
│ │ │ ├── stg_edc__communes.sql
│ │ │ ├── stg_edc__resultats.sql
│ │ │ ├── stg_edc__prevelevements.sql
│ │ │ └── val_traduite__docs.md
│ ├── intermediate
│ │ ├── int__udi.sql
│ │ ├── int__commune_geom.sql
│ │ ├── tous
│ │ │ ├── int__resultats_tous_udi_annuel.sql
│ │ │ ├── int__resultats_tous_commune_annuel.sql
│ │ │ ├── int__resultats_tous_udi_dernier.sql
│ │ │ └── int__resultats_tous_commune_dernier.sql
│ │ ├── int__udi_geom.sql
│ │ ├── int__lien_commune_cdreseau.sql
│ │ ├── int__valeurs_de_reference.sql
│ │ ├── int__lien_cdreseau_refreneceprel.sql
│ │ ├── int__prelevements_uniques.sql
│ │ ├── int__parametres_non_references.sql
│ │ ├── nitrate
│ │ │ ├── int__resultats_nitrate_udi_annuel.sql
│ │ │ ├── int__resultats_nitrate_commune_annuel.sql
│ │ │ ├── int__resultats_nitrate_udi_dernier.sql
│ │ │ └── int__resultats_nitrate_commune_dernier.sql
│ │ ├── pesticide
│ │ │ ├── sub_active
│ │ │ │ ├── int__resultats_sub_active_udi_annuel.sql
│ │ │ │ ├── int__resultats_sub_active_commune_annuel.sql
│ │ │ │ ├── int__resultats_sub_active_udi_dernier.sql
│ │ │ │ └── int__resultats_sub_active_commune_dernier.sql
│ │ │ └── metabolite
│ │ │ │ ├── int__resultats_metabolite_udi_dernier.sql
│ │ │ │ └── int__resultats_metabolite_commune_dernier.sql
│ │ ├── cvm
│ │ │ ├── int__resultats_cvm_udi_annuel.sql
│ │ │ ├── int__resultats_cvm_commune_annuel.sql
│ │ │ ├── int__resultats_cvm_udi_dernier.sql
│ │ │ └── int__resultats_cvm_commune_dernier.sql
│ │ ├── sub_indus
│ │ │ ├── int__resultats_sub_indus_udi_annuel.sql
│ │ │ ├── int__resultats_sub_indus_commune_annuel.sql
│ │ │ ├── int__resultats_sub_indus_udi_dernier.sql
│ │ │ └── int__resultats_sub_indus_commune_dernier.sql
│ │ ├── metaux_lourds
│ │ │ ├── int__resultats_metaux_lourds_udi_annuel.sql
│ │ │ ├── int__resultats_metaux_lourds_commune_annuel.sql
│ │ │ ├── int__resultats_metaux_lourds_udi_dernier.sql
│ │ │ └── int__resultats_metaux_lourds_commune_dernier.sql
│ │ ├── int__resultats_udi_communes.sql
│ │ └── pfas
│ │ │ ├── int__resultats_pfas_udi_annuel.sql
│ │ │ └── int__resultats_pfas_commune_annuel.sql
│ ├── sources
│ │ └── __sources.yml
│ └── website
│ │ ├── web__stats_udi.sql
│ │ ├── web__resultats_udi.sql
│ │ └── web__resultats_communes.sql
└── dbt_project.yml
├── .python-version
├── webapp
├── .prettierrc.json
├── app
│ ├── duckdb-example
│ │ ├── loading.tsx
│ │ └── page.tsx
│ ├── page.tsx
│ ├── api
│ │ ├── udi
│ │ │ ├── dummy
│ │ │ │ └── route.ts
│ │ │ └── find
│ │ │ │ └── route.tsx
│ │ └── db-example
│ │ │ └── route.ts
│ ├── embed
│ │ └── page.tsx
│ ├── embed-external
│ │ └── page.tsx
│ ├── lib
│ │ └── duckdb.ts
│ ├── layout.tsx
│ └── config.ts
├── public
│ ├── images
│ │ └── dfg.png
│ └── fonts
│ │ ├── raleway-v37-latin-300.woff2
│ │ ├── raleway-v37-latin-600.woff2
│ │ ├── raleway-v37-latin-700.woff2
│ │ └── raleway-v37-latin-regular.woff2
├── postcss.config.mjs
├── lib
│ ├── utils.ts
│ ├── iframe-scroll.ts
│ ├── mapLocale.ts
│ └── property.ts
├── .env
├── eslint.config.mjs
├── components.json
├── tsconfig.json
├── .gitignore
├── components
│ ├── ui
│ │ ├── input.tsx
│ │ ├── switch.tsx
│ │ ├── hover-card.tsx
│ │ ├── tooltip.tsx
│ │ ├── popover.tsx
│ │ ├── scroll-area.tsx
│ │ ├── button.tsx
│ │ └── card.tsx
│ └── EmbedBanner.tsx
├── package.json
├── next.config.ts
└── tailwind.config.ts
├── .sqlfluff
├── pipelines
├── config
│ ├── .env.example
│ ├── __init__.py
│ └── config.py
├── tasks
│ ├── __init__.py
│ ├── config
│ │ ├── config_insee.py
│ │ ├── config_pmtiles.py
│ │ ├── config_geojson.py
│ │ ├── common.py
│ │ ├── config_uploaded_geojson.py
│ │ └── config_edc.py
│ ├── client
│ │ ├── core
│ │ │ ├── https_to_duck_client.py
│ │ │ └── https_client.py
│ │ ├── commune_client.py
│ │ ├── opendatasoft_client.py
│ │ ├── pmtiles_processor.py
│ │ └── uploaded_geojson_client.py
│ ├── upload_database.py
│ ├── generate_pmtiles.py
│ ├── generate_pmtiles_legacy.py
│ └── download_pmtiles.py
├── utils
│ ├── __init__.py
│ ├── logger.py
│ └── utils.py
├── __init__.py
├── test_pipelines.py
└── notebooks
│ ├── test_geojson_from_db.ipynb
│ └── test_atlasante_udi.ipynb
├── .dockerignore
├── .vscode
├── extensions.json
├── tasks.json
└── settings.json
├── Dockerfile.clevercloud
├── .github
└── workflows
│ ├── pre-commit.yaml
│ ├── lint_nextjs.yml
│ ├── test_dbt.yaml
│ └── test_pipelines.yaml
├── .gitignore
├── LICENSE
├── pyproject.toml
├── .pre-commit-config.yaml
└── Dockerfile.unified
/database/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/analytics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dbt_/analyses/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dbt_/macros/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dbt_/seeds/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dbt_/snapshots/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dbt_/tests/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 |
--------------------------------------------------------------------------------
/analytics/notebooks/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/webapp/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {}
2 |
--------------------------------------------------------------------------------
/.sqlfluff:
--------------------------------------------------------------------------------
1 | [sqlfluff]
2 | dialect = duckdb
3 | max_line_length = 100
4 |
--------------------------------------------------------------------------------
/pipelines/config/.env.example:
--------------------------------------------------------------------------------
1 | SCW_ACCESS_KEY=MyKey
2 | SCW_SECRET_KEY=MySecret
--------------------------------------------------------------------------------
/pipelines/config/__init__.py:
--------------------------------------------------------------------------------
1 | # tasks/__init__.py
2 |
3 | # Initialize the tasks package
4 |
--------------------------------------------------------------------------------
/pipelines/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # tasks/__init__.py
2 |
3 | # Initialize the tasks package
4 |
--------------------------------------------------------------------------------
/pipelines/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # tasks/__init__.py
2 |
3 | # Initialize the tasks package
4 |
--------------------------------------------------------------------------------
/dbt_/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | target/
3 | dbt_packages/
4 | logs/
5 | package-lock.yml
6 | .user.yml
7 | !seeds/*.csv
--------------------------------------------------------------------------------
/webapp/app/duckdb-example/loading.tsx:
--------------------------------------------------------------------------------
1 | export default function Loading() {
2 | return "Loading...";
3 | }
4 |
--------------------------------------------------------------------------------
/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | # init the pipelines package
2 | from .utils.logger import setup_logger
3 |
4 | setup_logger()
5 |
--------------------------------------------------------------------------------
/webapp/public/images/dfg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/images/dfg.png
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | **/node_modules
2 | .git
3 | .github
4 | webapp/.next
5 | .vscode
6 | .env*
7 | **/.env*
8 | npm-debug.log
9 | .DS_Store
10 |
--------------------------------------------------------------------------------
/webapp/app/page.tsx:
--------------------------------------------------------------------------------
1 | import { redirect } from "next/navigation";
2 |
3 | export default async function Home() {
4 | redirect("/embed");
5 | }
6 |
--------------------------------------------------------------------------------
/dbt_/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - package: calogica/dbt_expectations
3 | version: 0.10.4
4 |
5 | - package: dbt-labs/dbt_utils
6 | version: 1.3.0
--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-300.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-300.woff2
--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-600.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-600.woff2
--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-700.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-700.woff2
--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-regular.woff2
--------------------------------------------------------------------------------
/webapp/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 | plugins: {
4 | tailwindcss: {},
5 | },
6 | };
7 |
8 | export default config;
9 |
--------------------------------------------------------------------------------
/dbt_/profiles.yml:
--------------------------------------------------------------------------------
1 | dbt_:
2 | outputs:
3 | dev:
4 | type: duckdb
5 | path: ../database/data.duckdb
6 | threads: 1
7 | extensions:
8 | - spatial
9 | target: dev
10 |
--------------------------------------------------------------------------------
/webapp/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { clsx, type ClassValue } from "clsx";
2 | import { twMerge } from "tailwind-merge";
3 |
4 | export function cn(...inputs: ClassValue[]) {
5 | return twMerge(clsx(inputs));
6 | }
7 |
--------------------------------------------------------------------------------
/dbt_/models/staging/communes/stg_communes__opendatasoft.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | com_code[1]::VARCHAR AS com_code,
3 | com_name[1]::VARCHAR AS com_name,
4 | geom::GEOMETRY AS geom
5 | FROM {{ source('communes', 'opendatasoft_communes') }}
6 |
--------------------------------------------------------------------------------
/webapp/app/api/udi/dummy/route.ts:
--------------------------------------------------------------------------------
1 | import { NextResponse } from "next/server";
2 | import { mockData } from "@/app/lib/mock-data";
3 |
4 | export async function GET() {
5 | return NextResponse.json(mockData["UDI12345"], { status: 200 });
6 | }
7 |
--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/stg_atlasante_udi_corse.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | gid::INTEGER AS gid,
3 | cn_udi::VARCHAR AS cn_udi,
4 | nom_udi::VARCHAR AS nom_udi,
5 | geom::GEOMETRY AS geom,
6 | ingestion_date::DATE AS ingestion_date
7 | FROM {{ source('atlasante', 'atlasante_udi_corse') }}
8 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "ms-python.python",
4 | "ms-toolsai.jupyter",
5 | "charliermarsh.ruff",
6 | "github.vscode-pull-request-github",
7 | "actboy168.tasks",
8 | "dbaeumer.vscode-eslint",
9 | "esbenp.prettier-vscode"
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/Dockerfile.clevercloud:
--------------------------------------------------------------------------------
1 | # Dockerfile for Clever Cloud - pulls pre-built unified image
2 |
3 | ARG IMAGE_TAG=latest
4 | FROM ghcr.io/dataforgoodfr/13_pollution_eau/pollution-eau-unified:${IMAGE_TAG}
5 |
6 | # Expose the port
7 | EXPOSE 8080
8 |
9 | # Use the same entrypoint as the unified image
10 | CMD ["node", "server.js"]
--------------------------------------------------------------------------------
/pipelines/utils/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 |
4 | def setup_logger(
5 | level=logging.INFO,
6 | log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
7 | ):
8 | "config log"
9 | logging.basicConfig(level=level, format=log_format)
10 |
11 |
12 | def get_logger(name):
13 | return logging.getLogger(name)
14 |
--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/stg_atlasante_udi_2023.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | gid::INTEGER AS gid,
3 | code_udi::VARCHAR AS code_udi,
4 | ins_nom::VARCHAR AS ins_nom,
5 | uge_nom::VARCHAR AS uge_nom,
6 | udi_pop::VARCHAR AS udi_pop,
7 | geom::GEOMETRY AS geom,
8 | ingestion_date::DATE AS ingestion_date
9 | FROM {{ source('atlasante', 'atlasante_udi_2023') }}
10 |
--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/stg_atlasante_udi_2024.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | gid::INTEGER AS gid,
3 | code_udi::VARCHAR AS code_udi,
4 | ins_nom::VARCHAR AS ins_nom,
5 | uge_nom::VARCHAR AS uge_nom,
6 | udi_pop::VARCHAR AS udi_pop,
7 | geom::GEOMETRY AS geom,
8 | ingestion_date::DATE AS ingestion_date
9 | FROM {{ source('atlasante', 'atlasante_udi_2024') }}
10 |
--------------------------------------------------------------------------------
/webapp/.env:
--------------------------------------------------------------------------------
1 | # Utiliser le fichier .env est pour ajouter des variables non-secrètes
2 | # voir https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables
3 |
4 | # variables disponible sur le navigateur et le serveur
5 | NEXT_PUBLIC_PROTOMAPS_API_KEY=707d8bc70b393fc0
6 |
7 | # variables disponibles uniquement sur le côté serveur
8 | DUCKDB_PATH=../database/data.duckdb
9 |
--------------------------------------------------------------------------------
/dbt_/tests/test_valeur_saniraire_2.sql:
--------------------------------------------------------------------------------
1 | -- we make sure that valeur_sanitaire_2 is > valeur_sanitaire_1
2 | -- when they are not null
3 | -- cf int__resultats_sub_indus_udi_dernier.sql why it is required
4 |
5 |
6 | select *
7 | from {{ ref('int__valeurs_de_reference') }}
8 | where
9 | valeur_sanitaire_1 is not null
10 | and valeur_sanitaire_2 is not null
11 | and valeur_sanitaire_1 >= valeur_sanitaire_2
12 |
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "label": "Download database",
6 | "type": "shell",
7 | "command": "uv run pipelines/run.py run download_database",
8 | "group": "none",
9 | "icon": {
10 | "id": "cloud-download"
11 | },
12 | "presentation": {
13 | "reveal": "always",
14 | "panel": "new"
15 | }
16 | }
17 | ],
18 | }
--------------------------------------------------------------------------------
/webapp/lib/iframe-scroll.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Utility function to request the parent window to scroll the iframe into view
3 | * This should be called when user interacts with map components
4 | */
5 | export function scrollIframeToFullscreen() {
6 | // Check if we're in an iframe
7 | if (window.self !== window.top) {
8 | // Send message to parent window to scroll this iframe into view
9 | window.parent.postMessage({ type: "scrollToIframe" }, "*");
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__udi.sql:
--------------------------------------------------------------------------------
1 | select
2 | cdreseau,
3 | string_agg(distinct inseecommune) as inseecommunes,
4 | string_agg(distinct quartiers) as quartiers,
5 | string_agg(distinct nomreseaux) as nomreseaux
6 |
7 |
8 | from {{ ref('int__lien_commune_cdreseau') }}
9 | group by cdreseau
10 |
11 | -- TODO: on pourrait garder une partition avec "de_partition".
12 | -- A noter néanmoins que la seule dépendance à ce modèle (web__resultats_udi)
13 | -- ne le requiert pas.
14 |
--------------------------------------------------------------------------------
/pipelines/tasks/config/config_insee.py:
--------------------------------------------------------------------------------
1 | def get_insee_config() -> dict:
2 | """Configuration for La Poste dataset"""
3 | return {
4 | "source": {
5 | "base_url": "https://www.insee.fr/fr/statistiques/fichier/7766585/",
6 | "id": "v_commune_2024.csv",
7 | "datetime": "20240220",
8 | },
9 | "file": {
10 | "file_name": "insee_communes_2024.csv",
11 | "table_name": "cog_communes",
12 | },
13 | }
14 |
--------------------------------------------------------------------------------
/webapp/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import { dirname } from "path";
2 | import { fileURLToPath } from "url";
3 | import { FlatCompat } from "@eslint/eslintrc";
4 |
5 | const __filename = fileURLToPath(import.meta.url);
6 | const __dirname = dirname(__filename);
7 |
8 | const compat = new FlatCompat({
9 | baseDirectory: __dirname,
10 | });
11 |
12 | const eslintConfig = [
13 | ...compat.extends("next/core-web-vitals", "next/typescript", "prettier"),
14 | ];
15 |
16 | export default eslintConfig;
17 |
--------------------------------------------------------------------------------
/dbt_/models/staging/edc/stg_edc__communes.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | inseecommune::VARCHAR(5) AS inseecommune,
3 | nomcommune::VARCHAR AS nomcommune,
4 | quartier::VARCHAR AS quartier,
5 | cdreseau::VARCHAR(9) AS cdreseau,
6 | nomreseau::VARCHAR AS nomreseau,
7 | debutalim::VARCHAR AS debutalim,
8 | de_partition::SMALLINT AS de_partition,
9 | de_ingestion_date::DATE AS de_ingestion_date,
10 | de_dataset_datetime::VARCHAR AS de_dataset_datetime
11 | FROM {{ source('edc', 'edc_communes') }}
12 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__commune_geom.sql:
--------------------------------------------------------------------------------
1 | WITH ranked_communes AS (
2 | SELECT
3 | com_code,
4 | com_name,
5 | geom,
6 | ROW_NUMBER() OVER (
7 | PARTITION BY com_code
8 | ORDER BY com_code
9 | ) AS row_num
10 | FROM {{ ref('stg_communes__opendatasoft') }}
11 | WHERE com_code IS NOT NULL AND com_code != ''
12 | )
13 |
14 | SELECT
15 | com_code,
16 | com_name,
17 | ST_ASGEOJSON(geom) AS geom
18 | FROM ranked_communes
19 | WHERE row_num = 1
20 |
--------------------------------------------------------------------------------
/dbt_/models/staging/communes/stg_communes__cog.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | TYPECOM::VARCHAR AS TYPECOM,
3 | COM::VARCHAR AS COM,
4 | REG::SMALLINT AS REG,
5 | DEP::VARCHAR AS DEP,
6 | CTCD::VARCHAR AS CTCD,
7 | ARR::VARCHAR AS ARR,
8 | TNCC::SMALLINT AS TNCC,
9 | NCC::VARCHAR AS NCC,
10 | NCCENR::VARCHAR AS NCCENR,
11 | LIBELLE::VARCHAR AS LIBELLE,
12 | CAN::VARCHAR AS CAN,
13 | COMPARENT::VARCHAR AS COMPARENT,
14 | DE_PARTITION::SMALLINT AS DE_PARTITION
15 | FROM {{ source('communes', 'cog_communes') }}
16 |
--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
1 | name: pre-commit
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: [main]
7 |
8 | jobs:
9 | pre-commit:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout code
13 | uses: actions/checkout@v3
14 |
15 | - name: Install a specific version of uv
16 | uses: astral-sh/setup-uv@v5
17 | with:
18 | version: ">=0.4.0"
19 | - name: Install dependencies
20 | run: uv sync
21 | - name: Run pre-commit
22 | run: uv run pre-commit run --all-files
--------------------------------------------------------------------------------
/webapp/components.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://ui.shadcn.com/schema.json",
3 | "style": "new-york",
4 | "rsc": true,
5 | "tsx": true,
6 | "tailwind": {
7 | "config": "tailwind.config.ts",
8 | "css": "app/globals.css",
9 | "baseColor": "neutral",
10 | "cssVariables": true,
11 | "prefix": ""
12 | },
13 | "aliases": {
14 | "components": "@/components",
15 | "utils": "@/lib/utils",
16 | "ui": "@/components/ui",
17 | "lib": "@/lib",
18 | "hooks": "@/hooks"
19 | },
20 | "iconLibrary": "lucide"
21 | }
22 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "[python]": {
3 | "editor.formatOnSave": true,
4 | "editor.defaultFormatter": "charliermarsh.ruff",
5 | "editor.codeActionsOnSave": {
6 | "source.organizeImports": "explicit"
7 | }
8 | },
9 | "notebook.formatOnSave.enabled": true,
10 | "python.testing.pytestArgs": ["-sv"],
11 | "python.testing.unittestEnabled": false,
12 | "python.testing.pytestEnabled": true,
13 | // Config for dorzey.vscode-sqlfluff extension
14 | "sqlfluff.executablePath": "${workspaceFolder}/.venv/bin/sqlfluff",
15 | "sqlfluff.linter.run": "onSave"
16 | }
17 |
--------------------------------------------------------------------------------
/dbt_/models/sources/__sources.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | - name: edc
5 | database: data
6 | schema: main
7 | tables:
8 | - name: edc_communes
9 | - name: edc_prelevements
10 | - name: edc_resultats
11 | - name: communes
12 | database: data
13 | schema: main
14 | tables:
15 | - name: cog_communes
16 | - name: opendatasoft_communes
17 | - name: atlasante
18 | database: data
19 | schema: main
20 | tables:
21 | - name: atlasante_udi_2023
22 | - name: atlasante_udi_corse
23 | - name: atlasante_udi_2024
24 |
--------------------------------------------------------------------------------
/webapp/app/api/db-example/route.ts:
--------------------------------------------------------------------------------
1 | import { fetchExample } from "@/app/lib/data";
2 |
3 | // an api route fetching data
4 | export async function GET() {
5 | try {
6 | const reader = await fetchExample();
7 | return Response.json({
8 | status: "OK",
9 | rows: reader.getRowObjectsJson(),
10 | columnNames: reader.columnNames(),
11 | columnTypes: reader.columnTypes(),
12 | count: reader.columnCount,
13 | });
14 | } catch (error) {
15 | console.error("Error while retrieving data:", error);
16 | return Response.json({ error }, { status: 500 });
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/webapp/lib/mapLocale.ts:
--------------------------------------------------------------------------------
1 | import { defaultLocale } from "maplibre-gl/src/ui/default_locale";
2 |
3 | // French locale for MapLibre, inheriting from default locale
4 | // Only overriding the CooperativeGesturesHandler messages
5 | export const frenchLocale = {
6 | ...defaultLocale,
7 | // French overrides for CooperativeGesturesHandler
8 | "CooperativeGesturesHandler.WindowsHelpText":
9 | "Utilisez Ctrl + molette pour zoomer sur la carte",
10 | "CooperativeGesturesHandler.MacHelpText":
11 | "Utilisez ⌘ + molette pour zoomer sur la carte",
12 | "CooperativeGesturesHandler.MobileHelpText":
13 | "Utilisez deux doigts pour déplacer la carte",
14 | };
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 | .pytest_cache/
5 | .mypy_cache/
6 | .ipynb_checkpoints/
7 | .venv/
8 | .idea
9 | .ruff_cache/
10 |
11 |
12 | # JavaScript/Node
13 | node_modules/
14 | npm-debug.log
15 | yarn-debug.log*
16 | yarn-error.log*
17 | build/
18 | dist/
19 | .next/
20 |
21 | # Database & Data
22 | *.sqlite3
23 | *.db
24 | *.duckdb
25 | *.duckdb.*
26 | *.csv
27 | *.parquet
28 | *.xlsx
29 | *.xls
30 | *.pmtiles
31 | logs/
32 |
33 | !database/.gitkeep
34 | cache/
35 |
36 | # OS
37 | .DS_Store
38 | Thumbs.db
39 | *.tmp
40 |
41 | # Environment & Secrets
42 | .env
43 | .env.*
44 | !.env.example
45 | *.pem
46 | secrets.yaml
47 | config.local.yaml
--------------------------------------------------------------------------------
/dbt_/tests/unique_datetimeprel_per_cdreseau_referenceprel.sql:
--------------------------------------------------------------------------------
1 | -- Nous vérifiions que pour chaque couple cdreseau, referenceprel,
2 | -- il n'y a qu'une seule date datetimeprel.
3 | -- En effet, pour trouver tous les paramètres analysés lors du
4 | -- prélèvement le plus récent, on se base sur la date datetimeprel.
5 | -- Si on a plusieurs dates pour un même prélèvement, on ne peut pas
6 | -- savoir quel est le bon.
7 | --
8 | -- cf _int__resultats_metabolite_divers_udi_dernier.sql
9 |
10 | SELECT
11 | cdreseau,
12 | referenceprel,
13 | count(DISTINCT datetimeprel) AS count_datetimeprel
14 | FROM
15 | {{ ref('int__resultats_udi_communes') }}
16 | GROUP BY 1, 2
17 | HAVING count(DISTINCT datetimeprel) > 1
18 |
--------------------------------------------------------------------------------
/webapp/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2017",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "noEmit": true,
9 | "esModuleInterop": true,
10 | "module": "esnext",
11 | "moduleResolution": "bundler",
12 | "resolveJsonModule": true,
13 | "isolatedModules": true,
14 | "jsx": "preserve",
15 | "incremental": true,
16 | "plugins": [
17 | {
18 | "name": "next"
19 | }
20 | ],
21 | "paths": {
22 | "@/*": ["./*"]
23 | }
24 | },
25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 | "exclude": ["node_modules"]
27 | }
28 |
--------------------------------------------------------------------------------
/webapp/app/embed/page.tsx:
--------------------------------------------------------------------------------
1 | import PollutionMap from "@/components/PollutionMap";
2 | import { fetchPollutionStats, fetchParameterValues } from "../lib/data";
3 |
4 | // Mise en cache de la page pour 24 heures
5 | export const revalidate = 86400;
6 |
7 | export default async function Embed() {
8 | const stats = await fetchPollutionStats();
9 | const parameterValues = await fetchParameterValues();
10 |
11 | return (
12 |
21 | );
22 | }
23 |
--------------------------------------------------------------------------------
/.github/workflows/lint_nextjs.yml:
--------------------------------------------------------------------------------
1 | name: Lint Next.js
2 |
3 | on:
4 | pull_request:
5 | branches: [main]
6 | paths:
7 | - "webapp/**"
8 |
9 | jobs:
10 | lint:
11 | name: Next.js Lint
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v4
17 |
18 | - name: Setup Node.js
19 | uses: actions/setup-node@v4
20 | with:
21 | node-version: "20"
22 | cache: "npm"
23 | cache-dependency-path: ./webapp/package.json
24 |
25 | - name: Install dependencies
26 | run: |
27 | cd webapp
28 | npm ci
29 |
30 | - name: Run Next.js lint
31 | run: |
32 | cd webapp
33 | npm run lint
34 |
--------------------------------------------------------------------------------
/webapp/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.*
7 | .yarn/*
8 | !.yarn/patches
9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 |
13 | # testing
14 | /coverage
15 |
16 | # next.js
17 | /.next/
18 | /out/
19 |
20 | # production
21 | /build
22 |
23 | # misc
24 | .DS_Store
25 | *.pem
26 |
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 |
33 | # next.js lets you commit env variables: https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables#loading-environment-variables
34 | !.env*
35 |
36 | # vercel
37 | .vercel
38 |
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cdreseau,
3 | 'tous' AS categorie,
4 | periode,
5 | sum(round(ratio * nb_prelevements))::float / sum(nb_prelevements)::float AS ratio,
6 | sum(nb_prelevements) AS nb_prelevements,
7 | sum(nb_sup_valeur_sanitaire) AS nb_sup_valeur_sanitaire
8 |
9 | FROM {{ ref('int__union_resultats_udi') }}
10 | WHERE
11 | periode LIKE 'bilan_annuel%'
12 | AND
13 | categorie NOT IN (
14 | 'sub_active',
15 | 'metabolite',
16 | 'metabolite_esa_metolachlore',
17 | 'metabolite_chlorothalonil_r471811',
18 | 'metabolite_chloridazone_desphenyl',
19 | 'metabolite_chloridazone_methyl_desphenyl',
20 | 'metabolite_atrazine_desethyl'
21 | )
22 | GROUP BY cdreseau, periode
23 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | inseecommune,
3 | 'tous' AS categorie,
4 | periode,
5 | sum(round(ratio * nb_prelevements))::float / sum(nb_prelevements)::float AS ratio,
6 | sum(nb_prelevements) AS nb_prelevements,
7 | sum(nb_sup_valeur_sanitaire) AS nb_sup_valeur_sanitaire
8 |
9 | FROM {{ ref('int__union_resultats_commune') }}
10 | WHERE
11 | periode LIKE 'bilan_annuel%'
12 | AND
13 | categorie NOT IN (
14 | 'sub_active',
15 | 'metabolite',
16 | 'metabolite_esa_metolachlore',
17 | 'metabolite_chlorothalonil_r471811',
18 | 'metabolite_chloridazone_desphenyl',
19 | 'metabolite_chloridazone_methyl_desphenyl',
20 | 'metabolite_atrazine_desethyl'
21 | )
22 | GROUP BY inseecommune, periode
23 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__udi_geom.sql:
--------------------------------------------------------------------------------
1 | WITH combined_data AS (
2 | -- SELECT
3 | -- code_udi,
4 | -- geom
5 | -- FROM {{ ref("stg_atlasante_udi_2023") }}
6 | -- UNION ALL
7 | -- SELECT
8 | -- cn_udi AS code_udi,
9 | -- geom
10 | -- FROM {{ ref("stg_atlasante_udi_corse") }}
11 |
12 | SELECT
13 | code_udi,
14 | geom
15 | FROM {{ ref("stg_atlasante_udi_2024") }}
16 | ),
17 |
18 | ranked_data AS (
19 | SELECT
20 | code_udi,
21 | geom,
22 | ROW_NUMBER() OVER (
23 | PARTITION BY code_udi
24 | ORDER BY code_udi
25 | ) AS row_num
26 | FROM combined_data
27 | WHERE code_udi IS NOT null AND code_udi != ''
28 | )
29 |
30 | SELECT
31 | code_udi,
32 | ST_ASGEOJSON(geom) AS geom
33 | FROM ranked_data
34 | WHERE row_num = 1
35 |
--------------------------------------------------------------------------------
/webapp/components/ui/input.tsx:
--------------------------------------------------------------------------------
1 | import * as React from "react";
2 |
3 | import { cn } from "@/lib/utils";
4 |
5 | const Input = React.forwardRef>(
6 | ({ className, type, ...props }, ref) => {
7 | return (
8 |
17 | );
18 | },
19 | );
20 | Input.displayName = "Input";
21 |
22 | export { Input };
23 |
--------------------------------------------------------------------------------
/webapp/app/embed-external/page.tsx:
--------------------------------------------------------------------------------
1 | import PollutionMap from "@/components/PollutionMap";
2 | import { fetchPollutionStats, fetchParameterValues } from "../lib/data";
3 |
4 | // Mise en cache de la page pour 24 heures
5 | export const revalidate = 86400;
6 |
7 | export default async function EmbedExternal({
8 | searchParams,
9 | }: {
10 | searchParams: Promise<{ category?: string }>;
11 | }) {
12 | const stats = await fetchPollutionStats();
13 | const parameterValues = await fetchParameterValues();
14 | const { category } = await searchParams;
15 |
16 | return (
17 |
27 | );
28 | }
29 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__lien_commune_cdreseau.sql:
--------------------------------------------------------------------------------
1 | {{
2 | config(
3 | materialized='table'
4 | )
5 | }}
6 |
7 | SELECT
8 | inseecommune,
9 | cdreseau,
10 | de_partition,
11 | -- Prenons toujours le même nom de commune pour une inseecommune donnée
12 | MIN(nomcommune) AS nomcommune,
13 | -- Agréger les différentes valeurs de quartier en une liste sans doublons
14 | STRING_AGG(DISTINCT quartier) FILTER (
15 | WHERE quartier IS NOT NULL AND quartier != '' AND quartier != '-'
16 | ) AS quartiers,
17 | -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
18 | STRING_AGG(DISTINCT nomreseau) FILTER (
19 | WHERE nomreseau IS NOT NULL AND nomreseau != ''
20 | ) AS nomreseaux,
21 | -- Prendre la première date de début d'alimentation
22 | MIN(debutalim) AS debutalim
23 | FROM
24 | {{ ref('stg_edc__communes') }}
25 | GROUP BY
26 | inseecommune,
27 | cdreseau,
28 | de_partition
29 |
--------------------------------------------------------------------------------
/pipelines/test_pipelines.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 | import pytest
4 |
5 |
6 | @pytest.mark.parametrize(
7 | "task",
8 | ["build_database", "upload_database", "download_database"],
9 | )
10 | def test_pipeline_task(task):
11 | """
12 | Test the specified pipeline task.
13 |
14 | This function tests the execution of the specified pipeline task from the
15 | pipelines/run.py script. It ensures that the task runs without raising any exceptions.
16 |
17 | Args:
18 | task (str): The name of the pipeline task to test.
19 | """
20 | commands_list = ["uv", "run", "pipelines/run.py", "run", task]
21 |
22 | # add options
23 | if task == "build_database":
24 | commands_list.extend(["--refresh-type", "last"])
25 | elif task in ("download_database", "upload_database"):
26 | commands_list.extend(["--env", "dev"])
27 |
28 | process = subprocess.run(commands_list)
29 |
30 | assert process.returncode == 0, f"{task} script failed"
31 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__valeurs_de_reference.sql:
--------------------------------------------------------------------------------
1 | {{
2 | config(
3 | materialized='table'
4 | )
5 | }}
6 |
7 | SELECT
8 | cdparametresiseeaux,
9 | MAX(categorie_1) AS categorie_1,
10 | MAX(categorie_2) AS categorie_2,
11 | MAX(categorie_3) AS categorie_3,
12 | MAX(limite_qualite) AS limite_qualite,
13 | MAX(limite_qualite_unite) AS limite_qualite_unite,
14 | MAX(limite_indicative) AS limite_indicative,
15 | MAX(limite_indicative_unite) AS limite_indicative_unite,
16 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
17 | MAX(valeur_sanitaire_1_unite) AS valeur_sanitaire_1_unite,
18 | MAX(valeur_sanitaire_2) AS valeur_sanitaire_2,
19 | MAX(valeur_sanitaire_2_unite) AS valeur_sanitaire_2_unite,
20 | MAX(web_label) AS web_label,
21 | COUNT(*) AS nb_rows -- we enforce this to be 1 in a dbt test
22 | FROM
23 | {{ ref('references_generations_futures') }}
24 | WHERE
25 | cdparametresiseeaux IS NOT NULL
26 | AND cdparametresiseeaux != ''
27 | GROUP BY
28 | cdparametresiseeaux
29 |
--------------------------------------------------------------------------------
/dbt_/models/staging/edc/stg_edc__resultats.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cddept::VARCHAR(3) AS cddept,
3 | referenceprel::VARCHAR(11) AS referenceprel,
4 | cdparametresiseeaux::VARCHAR(10) AS cdparametresiseeaux,
5 | cdparametre::INT AS cdparametre,
6 | libmajparametre::VARCHAR AS libmajparametre,
7 | libminparametre::VARCHAR AS libminparametre,
8 | libwebparametre::VARCHAR AS libwebparametre,
9 | qualitparam::VARCHAR(1) AS qualitparam,
10 | insituana::VARCHAR(1) AS insituana,
11 | rqana::VARCHAR(8) AS rqana,
12 | cdunitereferencesiseeaux::VARCHAR(7) AS cdunitereferencesiseeaux,
13 | cdunitereference::VARCHAR AS cdunitereference,
14 | limitequal::VARCHAR AS limitequal,
15 | refqual::VARCHAR AS refqual,
16 | valtraduite::NUMERIC AS valtraduite,
17 | casparam::VARCHAR AS casparam,
18 | referenceanl::VARCHAR AS referenceanl,
19 | de_partition::SMALLINT AS de_partition,
20 | de_ingestion_date::DATE AS de_ingestion_date,
21 | de_dataset_datetime::VARCHAR AS de_dataset_datetime
22 | FROM {{ source('edc', 'edc_resultats') }}
23 |
--------------------------------------------------------------------------------
/webapp/app/lib/duckdb.ts:
--------------------------------------------------------------------------------
1 | import { DuckDBInstance } from "@duckdb/node-api";
2 | import fs from "fs";
3 | import path from "path";
4 |
5 | // Get database path from environment variable or use default
6 | const envDbPath = process.env.DUCKDB_PATH;
7 | const defaultDbPath = path.join(process.cwd(), "../database/data.duckdb");
8 | const dbFilePath = envDbPath || defaultDbPath;
9 |
10 | console.log(`Using database path: ${dbFilePath}`);
11 |
12 | // Check if the file exists
13 | if (!fs.existsSync(dbFilePath)) {
14 | throw new Error(
15 | `Database file not found at ${dbFilePath}. Please check that your DUCKDB_PATH environment variable is correctly set or that the default database exists.`,
16 | );
17 | }
18 |
19 | // Create DB instance
20 | const db = await DuckDBInstance.create(dbFilePath, {
21 | access_mode: "READ_ONLY",
22 | max_memory: "1GB",
23 | threads: "4",
24 | });
25 |
26 | // Load the geospatial extension
27 | const connection = await db.connect();
28 | await connection.run("INSTALL spatial;");
29 | await connection.run("LOAD spatial;");
30 |
31 | export default db;
32 |
--------------------------------------------------------------------------------
/.github/workflows/test_dbt.yaml:
--------------------------------------------------------------------------------
1 | name: 🧪 Run DBT Tests
2 |
3 | on:
4 | pull_request:
5 | branches: [main]
6 | paths:
7 | - 'dbt_/**'
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - name: Checkout code
15 | uses: actions/checkout@v3
16 |
17 | - name: Install a specific version of uv
18 | uses: astral-sh/setup-uv@v5
19 | with:
20 | version: ">=0.4.0"
21 |
22 | - name: Install dependencies
23 | run: uv sync
24 |
25 | - name: Download production database from Storage
26 | run: |
27 | uv run pipelines/run.py run download_database
28 |
29 | - name: Install dbt dependencies
30 | run: |
31 | cd dbt_
32 | uv run dbt deps || { echo "dbt deps failed"; exit 1; }
33 |
34 | - name: Run dbt build
35 | run: |
36 | cd dbt_
37 | uv run dbt build || { echo "dbt build failed"; exit 1; }
38 |
39 | - name: Run dbt docs generate
40 | run: |
41 | cd dbt_
42 | uv run dbt docs generate || { echo "dbt docs generate failed"; exit 1; }
43 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__lien_cdreseau_refreneceprel.sql:
--------------------------------------------------------------------------------
1 | with ranked as (
2 | select
3 | cdreseau,
4 | referenceprel,
5 | dateprel,
6 | heureprel,
7 | de_partition,
8 | -- Quand heureprel est null ou invalide, on choisit arbitrairement 09:00
9 | -- Examples:
10 | -- referenceprel = '07700233713'
11 | -- referenceprel = '02800116863'
12 | COALESCE(
13 | TRY_STRPTIME(
14 | dateprel || ' ' || REPLACE(heureprel, 'h', ':'),
15 | '%Y-%m-%d %H:%M'
16 | ),
17 | TRY_STRPTIME(
18 | dateprel || ' 09:00',
19 | '%Y-%m-%d %H:%M'
20 | )
21 | ) as datetimeprel,
22 | ROW_NUMBER() over (
23 | partition by cdreseau, referenceprel
24 | order by
25 | dateprel,
26 | heureprel
27 | ) as row_num
28 | from
29 | {{ ref('stg_edc__prevelevements') }}
30 |
31 | )
32 |
33 | select * exclude (row_num)
34 | from
35 | ranked
36 | where
37 | row_num = 1
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015-2024 Data4Good
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/pipelines/tasks/client/core/https_to_duck_client.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
4 | from pipelines.tasks.client.core.https_client import HTTPSClient
5 | from pipelines.tasks.config.common import (
6 | logger,
7 | )
8 |
9 |
10 | class HTTPSToDuckDBClient(HTTPSClient, ABC):
11 | def __init__(self, config, duckdb_client: DuckDBClient):
12 | super().__init__(config["source"]["base_url"])
13 | self.config = config
14 | self.duckdb_client = duckdb_client
15 |
16 | def process_datasets(self):
17 | logger.info(f"Processing {self.__class__.__name__} data")
18 | self._download_data()
19 | self._ingest_to_duckdb()
20 | logger.info(f"Finishing processing {self.__class__.__name__} data")
21 |
22 | @abstractmethod
23 | def _download_data(self):
24 | """Download data - to be implemented by subclasses"""
25 | pass
26 |
27 | @abstractmethod
28 | def _ingest_to_duckdb(self):
29 | """Ingest data to DuckDB - to be implemented by subclasses"""
30 | pass
31 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__prelevements_uniques.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | prelevements_cdfirstreseauamont AS (
3 | SELECT DISTINCT
4 | referenceprel,
5 | dateprel,
6 | heureprel,
7 | conclusionprel,
8 | plvconformitebacterio,
9 | plvconformitechimique,
10 | plvconformitereferencebact,
11 | plvconformitereferencechim,
12 | (CASE
13 | WHEN cdreseauamont IS NULL THEN cdreseau
14 | WHEN cdreseauamont IS NOT NULL THEN cdreseauamont
15 | END) AS cdfirstreseauamont,
16 | TRY_STRPTIME(
17 | dateprel || ' ' || REPLACE(heureprel, 'h', ':'), '%Y-%m-%d %H:%M'
18 | ) AS datetimeprel
19 | FROM
20 | {{ ref('stg_edc__prevelevements') }}
21 | ),
22 |
23 | ranked AS (
24 | SELECT
25 | *,
26 | ROW_NUMBER() OVER (
27 | PARTITION BY referenceprel
28 | ORDER BY
29 | dateprel,
30 | heureprel
31 | ) AS row_num
32 | FROM
33 | prelevements_cdfirstreseauamont
34 | )
35 |
36 | SELECT * EXCLUDE (row_num)
37 | FROM
38 | ranked
39 | WHERE
40 | row_num = 1
41 |
--------------------------------------------------------------------------------
/dbt_/models/staging/edc/stg_edc__prevelevements.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cddept::VARCHAR(3) AS cddept,
3 | cdreseau::VARCHAR(9) AS cdreseau,
4 | inseecommuneprinc::VARCHAR(5) AS inseecommuneprinc,
5 | nomcommuneprinc::VARCHAR AS nomcommuneprinc,
6 | cdreseauamont::VARCHAR(9) AS cdreseauamont,
7 | nomreseauamont::VARCHAR AS nomreseauamont,
8 | REPLACE(pourcentdebit, ' %', '')::TINYINT AS pourcentdebit,
9 | referenceprel::VARCHAR(11) AS referenceprel,
10 | dateprel::DATE AS dateprel,
11 | heureprel::VARCHAR AS heureprel,
12 | conclusionprel::VARCHAR AS conclusionprel,
13 | ugelib::VARCHAR AS ugelib,
14 | distrlib::VARCHAR AS distrlib,
15 | moalib::VARCHAR AS moalib,
16 | plvconformitebacterio::VARCHAR(1) AS plvconformitebacterio,
17 | plvconformitechimique::VARCHAR(1) AS plvconformitechimique,
18 | plvconformitereferencebact::VARCHAR(1) AS plvconformitereferencebact,
19 | plvconformitereferencechim::VARCHAR(1) AS plvconformitereferencechim,
20 | de_partition::SMALLINT AS de_partition,
21 | de_ingestion_date::DATE AS de_ingestion_date,
22 | de_dataset_datetime::VARCHAR AS de_dataset_datetime
23 | FROM {{ source('edc', 'edc_prelevements') }}
24 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__parametres_non_references.sql:
--------------------------------------------------------------------------------
1 | -- Analyse des paramètres présents dans stg_edc__resultats mais absents des
2 | -- références Générations Futures
3 |
4 | WITH parametres_resultats AS (
5 | SELECT
6 | cdparametresiseeaux,
7 | STRING_AGG(DISTINCT cdparametre, ', ') AS cdparametre,
8 | STRING_AGG(DISTINCT libmajparametre, ', ') AS libmajparametre,
9 | STRING_AGG(DISTINCT libminparametre, ', ') AS libminparametre,
10 | STRING_AGG(DISTINCT casparam, ', ') AS casparam,
11 | COUNT(*) AS nb
12 | FROM {{ ref('stg_edc__resultats') }}
13 | WHERE cdparametresiseeaux IS NOT NULL
14 | GROUP BY
15 | cdparametresiseeaux
16 | ),
17 |
18 | parametres_non_references AS (
19 | SELECT pr.*
20 | FROM parametres_resultats AS pr
21 | LEFT JOIN {{ ref('int__valeurs_de_reference') }} AS vr
22 | ON pr.cdparametresiseeaux = vr.cdparametresiseeaux
23 | WHERE vr.cdparametresiseeaux IS NULL
24 | )
25 |
26 | SELECT
27 | cdparametresiseeaux,
28 | cdparametre,
29 | libmajparametre,
30 | libminparametre,
31 | casparam,
32 | nb
33 | FROM parametres_non_references
34 | ORDER BY nb DESC, cdparametresiseeaux ASC
35 |
--------------------------------------------------------------------------------
/pipelines/tasks/upload_database.py:
--------------------------------------------------------------------------------
1 | """
2 | Upload database to S3 storage.
3 |
4 | Args:
5 | - env (str): Environment to upload to ("dev" or "prod")
6 |
7 | Examples:
8 | - upload_database --env dev : Upload database to development environment
9 | - upload_database --env prod : Upload database to production environment
10 | """
11 |
12 | from pipelines.config.config import get_s3_path
13 | from pipelines.tasks.config.common import DUCKDB_FILE
14 | from pipelines.utils.logger import get_logger
15 | from pipelines.utils.storage_client import ObjectStorageClient
16 |
17 | logger = get_logger(__name__)
18 |
19 |
20 | def upload_database_to_storage(env):
21 | """
22 | Upload the database built locally to Storage Object depending on the environment
23 | This requires setting the correct environment variables for the Scaleway credentials
24 | """
25 | s3 = ObjectStorageClient()
26 |
27 | db_path = DUCKDB_FILE # Fichier local
28 | s3_path = get_s3_path(env) # Destination sur S3
29 |
30 | s3.upload_object(local_path=db_path, file_key=s3_path, public_read=True)
31 | logger.info(f"✅ Base uploadée sur s3://{s3.bucket_name}/{s3_path}")
32 |
33 |
34 | def execute(env):
35 | upload_database_to_storage(env)
36 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "pollution_eau"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.12"
7 | dependencies = [
8 | "click>=8.1.8,<9",
9 | "duckdb==1.2.0",
10 | "duckdb-engine==0.15.0",
11 | "folium>=0.19.4",
12 | "ipykernel>=6.29.5,<7",
13 | "ipython>=8.31.0,<9",
14 | "ipython-sql>=0.5.0,<1",
15 | "jupysql>=0.10.17,<1",
16 | "matplotlib>=3.10.0,<4",
17 | "pandas>=2.2.3,<3",
18 | "requests>=2.32.3,<3",
19 | "boto3==1.34.11,<2",
20 | "python-dotenv>=1.0.1,<2",
21 | "ibis-framework[duckdb]==10.1.0",
22 | "dbt-core>=1.9.2,<2",
23 | "dbt-duckdb>=1.9.1,<2",
24 | "tqdm>=4.67.1,<5",
25 | "pre-commit>=4.1.0",
26 | "ruff>=0.9.10",
27 | "sqlfluff>=3.3.1,<4",
28 | ]
29 |
30 | [dependency-groups]
31 | dev = [
32 | "jupyter>=1.1.0,<2",
33 | "pre-commit>=4.1.0,<5",
34 | "pytest>=8.3.4,<9",
35 | "pytest-cov>=6.0.0,<7"
36 | ]
37 |
38 | [project.optional-dependencies]
39 | pmtiles = [
40 | "tippecanoe>=2.72.0,<3"
41 | ]
42 |
43 | [build-system]
44 | requires = ["hatchling"]
45 | build-backend = "hatchling.build"
46 |
47 | [tool.hatch.build.targets.wheel]
48 | packages = ["pipelines"]
49 |
--------------------------------------------------------------------------------
/pipelines/tasks/client/core/https_client.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Union
3 |
4 | import requests
5 |
6 | from pipelines.tasks.config.common import download_file_from_https, logger
7 |
8 |
9 | class HTTPSClient:
10 | def __init__(self, base_url: str):
11 | self.base_url = base_url
12 |
13 | def download_file_from_https(self, path: str, filepath: Union[str, Path]):
14 | """
15 | Downloads a file from a https link to a local file.
16 | :param path: The url path to download the file.
17 | :param filepath: The path to the local file.
18 | :return: Downloaded file filename.
19 | """
20 | url = self.base_url + path
21 | return download_file_from_https(url, filepath)
22 |
23 | @staticmethod
24 | def get_url_headers(url: str) -> dict:
25 | """
26 | Get url HTTP headers
27 | :param url: static dataset url
28 | :return: HTTP headers
29 | """
30 | try:
31 | response = requests.head(url, timeout=5)
32 | response.raise_for_status()
33 | return dict(response.headers)
34 | except requests.exceptions.RequestException as ex:
35 | logger.error(f"Exception raised: {ex}")
36 | return {}
37 |
--------------------------------------------------------------------------------
/webapp/components/ui/switch.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as React from "react";
4 | import * as SwitchPrimitives from "@radix-ui/react-switch";
5 |
6 | import { cn } from "@/lib/utils";
7 |
8 | const Switch = React.forwardRef<
9 | React.ElementRef,
10 | React.ComponentPropsWithoutRef
11 | >(({ className, ...props }, ref) => (
12 |
20 |
25 |
26 | ));
27 | Switch.displayName = SwitchPrimitives.Root.displayName;
28 |
29 | export { Switch };
30 |
--------------------------------------------------------------------------------
/webapp/components/ui/hover-card.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as React from "react";
4 | import * as HoverCardPrimitive from "@radix-ui/react-hover-card";
5 |
6 | import { cn } from "@/lib/utils";
7 |
8 | const HoverCard = HoverCardPrimitive.Root;
9 |
10 | const HoverCardTrigger = HoverCardPrimitive.Trigger;
11 |
12 | const HoverCardContent = React.forwardRef<
13 | React.ElementRef,
14 | React.ComponentPropsWithoutRef
15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
16 |
26 | ));
27 | HoverCardContent.displayName = HoverCardPrimitive.Content.displayName;
28 |
29 | export { HoverCard, HoverCardTrigger, HoverCardContent };
30 |
--------------------------------------------------------------------------------
/pipelines/tasks/config/config_pmtiles.py:
--------------------------------------------------------------------------------
1 | """Configuration for DuckDB-based PMTiles generation."""
2 |
3 | # Value columns to pivot for both data types
4 | value_columns = [
5 | "resultat",
6 | "ratio",
7 | "date_dernier_prel",
8 | "nb_parametres",
9 | "nb_prelevements",
10 | "nb_sup_valeur_sanitaire",
11 | "parametres_detectes",
12 | ]
13 |
14 | # Configuration for both commune and UDI data processing
15 | config_pmtiles: dict[str, dict[str, str | list[str] | None]] = {
16 | "communes": {
17 | "result_table": "web__resultats_communes",
18 | "geom_table": "int__commune_geom",
19 | "id_columns": ["commune_code_insee", "commune_nom"],
20 | "result_id_column": "commune_code_insee",
21 | "geom_id_column": "com_code",
22 | "geom_name_column": "com_name",
23 | "output_filename": "commune_data",
24 | "layer_name": "data_communes",
25 | },
26 | "udi": {
27 | "result_table": "web__resultats_udi",
28 | "geom_table": "int__udi_geom",
29 | "id_columns": ["cdreseau", "nomreseaux"],
30 | "result_id_column": "cdreseau",
31 | "geom_id_column": "code_udi",
32 | "geom_name_column": None, # UDI geom table doesn't have a name column
33 | "output_filename": "udi_data",
34 | "layer_name": "data_udi",
35 | },
36 | }
37 |
--------------------------------------------------------------------------------
/pipelines/tasks/client/commune_client.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | from pipelines.tasks.client.core.https_to_duck_client import HTTPSToDuckDBClient
5 | from pipelines.tasks.config.common import (
6 | CACHE_FOLDER,
7 | logger,
8 | )
9 |
10 |
11 | class CommuneClient(HTTPSToDuckDBClient):
12 | def __init__(self, config, duckdb_client):
13 | super().__init__(config, duckdb_client)
14 |
15 | def _download_data(self):
16 | """Process the COG datasets"""
17 | logger.info("Launching processing of Insee communes")
18 |
19 | os.makedirs(CACHE_FOLDER, exist_ok=True)
20 | self.download_file_from_https(
21 | path=self.config["source"]["id"],
22 | filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]),
23 | )
24 |
25 | def _ingest_to_duckdb(self):
26 | """Implement INSEE specific ingestion logic"""
27 | self.duckdb_client.drop_tables([self.config["file"]["table_name"]])
28 | self.duckdb_client.ingest_from_csv(
29 | ingest_type="CREATE",
30 | table_name=self.config["file"]["table_name"],
31 | de_partition=self.config["source"]["datetime"][:4],
32 | dataset_datetime=self.config["source"]["datetime"],
33 | filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]),
34 | )
35 |
--------------------------------------------------------------------------------
/dbt_/models/staging/edc/val_traduite__docs.md:
--------------------------------------------------------------------------------
1 | {% docs val_traduite_docs %}
2 | Traduction au format numérique du résultat textuel d’une mesure Rqana par application automatisée de règles prédéfinies.
3 |
4 | Principe de traduction :
5 |
6 | | Résultat | Valeur traduite | Commentaire |
7 | |:--------:|:---------------:|:--------------------------------------------------:|
8 | | XXX | XXX | |
9 | | XXX | -XXX | |
10 | | XXX | XXX | |
12 | | TRACES | 0 | Entre seuil de quantification et de détection |
13 | | INCOMPT. | 1,11 | Valeur trop élevée en microbiologie. Préférer >XXX |
14 | | PRESENCE | 1 | Présence non quantifiée |
15 | | N.D | 0 | < seuil de détection |
16 | | ILLISIBL | NULL | Non interprétable en bactériologie |
17 | | ,
16 | React.ComponentPropsWithoutRef
17 | >(({ className, sideOffset = 4, ...props }, ref) => (
18 |
19 |
28 |
29 | ));
30 | TooltipContent.displayName = TooltipPrimitive.Content.displayName;
31 |
32 | export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider };
33 |
--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/_atlasante_models.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: stg_atlasante_udi_2023
5 | description: "Unités de distribution (UDI) de la France métropolitaine"
6 | columns:
7 | - name: gid
8 | type: INTEGER
9 | - name: code_udi
10 | type: VARCHAR
11 | - name: ins_nom
12 | type: VARCHAR
13 | - name: uge_nom
14 | type: VARCHAR
15 | - name: udi_pop
16 | type: VARCHAR
17 | - name: geom
18 | type: GEOMETRY
19 | - name: ingestion_date
20 | type: DATE
21 | - name: stg_atlasante_udi_corse
22 | description: "Unités de distribution (UDI) de la Corse"
23 | columns:
24 | - name: gid
25 | type: INTEGER
26 | - name: cn_udi
27 | type: VARCHAR
28 | - name: nom_udi
29 | type: VARCHAR
30 | - name: geom
31 | type: GEOMETRY
32 | - name: ingestion_date
33 | type: DATE
34 | - name: stg_atlasante_udi_2024
35 | description: "Unités de distribution (UDI) de la France métropolitaine"
36 | columns:
37 | - name: gid
38 | type: INTEGER
39 | - name: code_udi
40 | type: VARCHAR
41 | - name: ins_nom
42 | type: VARCHAR
43 | - name: uge_nom
44 | type: VARCHAR
45 | - name: udi_pop
46 | type: VARCHAR
47 | - name: geom
48 | type: GEOMETRY
49 | - name: ingestion_date
50 | type: DATE
51 |
--------------------------------------------------------------------------------
/webapp/lib/property.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Parse a property name into its components
3 | * @param propertyName The full property name in format period_category_variable
4 | * @returns An object with the period, category, and variable
5 | */
6 | export function parsePropertyName(propertyName: string): {
7 | period: string;
8 | category: string;
9 | variable: string;
10 | } | null {
11 | // Handle null or empty values
12 | if (!propertyName) {
13 | return null;
14 | }
15 |
16 | // Uses regex to match the pattern
17 | const pattern = /^(bilan_annuel_\d{4}|dernier_prel)_([^_]+)_(.+)$/;
18 | const match = propertyName.match(pattern);
19 |
20 | // If the property name doesn't match our expected format
21 | if (!match) {
22 | return null;
23 | }
24 |
25 | // Extract components from regex match
26 | const [, period, category, variable] = match;
27 |
28 | return {
29 | period,
30 | category,
31 | variable,
32 | };
33 | }
34 |
35 | /**
36 | * Get the full property name from components
37 | * @param period The period (e.g., "bilan_annuel_2022", "dernier_prel")
38 | * @param category The category (e.g., "pfas", "cvm")
39 | * @param variable The variable (e.g., "resultat", "parametres_detectes")
40 | * @returns The full property name
41 | */
42 | export function getPropertyName(
43 | period: string,
44 | category: string,
45 | variable: string,
46 | ): string {
47 | return `${period}_${category}_${variable}`;
48 | }
49 |
--------------------------------------------------------------------------------
/dbt_/dbt_project.yml:
--------------------------------------------------------------------------------
1 | # Name your project! Project names should contain only lowercase characters
2 | # and underscores. A good package name should reflect your organization's
3 | # name or the intended use of these models
4 | name: "dbt_"
5 | version: "1.0.0"
6 |
7 | # This setting configures which "profile" dbt uses for this project.
8 | profile: "dbt_"
9 |
10 | # These configurations specify where dbt should look for different types of files.
11 | # The `model-paths` config, for example, states that models in this project can be
12 | # found in the "models/" directory. You probably won't need to change these!
13 | model-paths: ["models"]
14 | analysis-paths: ["analyses"]
15 | test-paths: ["tests"]
16 | seed-paths: ["seeds"]
17 | macro-paths: ["macros"]
18 | snapshot-paths: ["snapshots"]
19 |
20 | clean-targets: # directories to be removed by `dbt clean`
21 | - "target"
22 | - "dbt_packages"
23 |
24 | # Configuring models
25 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
26 |
27 | # In this example config, we tell dbt to build all models in the example/
28 | # directory as views. These settings can be overridden in the individual model
29 | # files using the `{{ config(...) }}` macro.
30 | models:
31 | dbt_:
32 | # Config indicated by + and applies to all files under models/example/
33 | staging:
34 | +materialized: view
35 | intermediate:
36 | +materialized: view
37 | website:
38 | +materialized: table
39 |
--------------------------------------------------------------------------------
/webapp/components/ui/popover.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as React from "react";
4 | import * as PopoverPrimitive from "@radix-ui/react-popover";
5 |
6 | import { cn } from "@/lib/utils";
7 |
8 | const Popover = PopoverPrimitive.Root;
9 |
10 | const PopoverTrigger = PopoverPrimitive.Trigger;
11 |
12 | const PopoverAnchor = PopoverPrimitive.Anchor;
13 |
14 | const PopoverContent = React.forwardRef<
15 | React.ElementRef,
16 | React.ComponentPropsWithoutRef
17 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
18 |
19 |
29 |
30 | ));
31 | PopoverContent.displayName = PopoverPrimitive.Content.displayName;
32 |
33 | export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor };
34 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/sqlfluff/sqlfluff
3 | rev: 3.3.1 # Vérifie la dernière version sur GitHub
4 | hooks:
5 | - id: sqlfluff-lint
6 | args: [ "--config", ".sqlfluff" ] # Adapte au dialecte que tu utilises (bigquery, snowflake, etc.)
7 | files: dbt_/
8 |
9 | - id: sqlfluff-fix # Optionnel, pour corriger automatiquement les erreurs
10 | args: [ "--config", ".sqlfluff" ]
11 | files: dbt_/
12 | - repo: https://github.com/astral-sh/ruff-pre-commit
13 | # Ruff version.
14 | rev: v0.9.3
15 | hooks:
16 | # Run the linter.
17 | - id: ruff
18 | args: [ --fix ]
19 | # Run the formatter.
20 | - id: ruff-format
21 | - repo: https://github.com/pre-commit/pre-commit-hooks
22 | rev: v5.0.0
23 | hooks:
24 | - id: check-merge-conflict
25 | - id: mixed-line-ending
26 | #- repo: https://github.com/pycqa/bandit
27 | # rev: 1.7.4
28 | # hooks:
29 | # - id: bandit
30 | # exclude: tests/
31 |
32 | # This pre commit only work with poetry so we commented it as we work with uv
33 | # - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
34 | # rev: v1.3.1
35 | # hooks:
36 | # - id: python-safety-dependencies-check
37 |
38 | - repo: https://github.com/rbubley/mirrors-prettier
39 | rev: v3.5.1
40 | hooks:
41 | - id: prettier
42 | types_or: [markdown, javascript, jsx, ts, tsx, json]
43 | files: 'webapp'
44 |
--------------------------------------------------------------------------------
/pipelines/utils/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 | from urllib.parse import urlparse
4 |
5 | import requests
6 |
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def get_project_root() -> Path:
12 | """
13 | Returns project root folder when called from anywhere in the project
14 | This is useful for specifying paths that are relative to the project root
15 | e.g. `local_db_path = Path(get_project_root(), "database/data.duckdb")`
16 | """
17 | return Path(__file__).parent.parent.parent
18 |
19 |
20 | def get_url_headers(url: str) -> dict:
21 | """
22 | Get url HTTP headers
23 | :param url: static dataset url
24 | :return: HTTP headers
25 | """
26 | try:
27 | response = requests.head(url, timeout=5)
28 | response.raise_for_status()
29 | return response.headers
30 | except requests.exceptions.RequestException as ex:
31 | logger.error(f"Exception raised: {ex}")
32 | return {}
33 |
34 |
35 | def extract_dataset_datetime(url: str) -> str:
36 | """
37 | Extract the dataset datetime from dataset location url
38 | which can be found in the static dataset url headers
39 | :param url: static dataset url
40 | :return: dataset datetime under format "YYYYMMDD-HHMMSS"
41 | """
42 | metadata = get_url_headers(url)
43 | parsed_url = urlparse(metadata.get("location"))
44 | path_parts = parsed_url.path.strip("/").split("/")
45 | return path_parts[-2]
46 |
--------------------------------------------------------------------------------
/pipelines/config/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from dotenv import load_dotenv
4 |
5 | from pipelines.utils.logger import get_logger
6 |
7 | logger = get_logger(__name__)
8 |
9 | current_dir = os.path.dirname(os.path.abspath(__file__))
10 | # Construct the path to the .env file
11 | dotenv_path = os.path.join(current_dir, ".env")
12 |
13 |
14 | def load_env_variables():
15 | load_dotenv(dotenv_path)
16 |
17 |
18 | def get_environment(default="prod"):
19 | env = os.getenv("ENV", default)
20 | logger.info(f"Running on env {env}")
21 | if env not in ["dev", "prod"]:
22 | raise ValueError(f"Invalid environment: {env}. Must be 'dev' or 'prod'.")
23 | return env
24 |
25 |
26 | def get_s3_path(env, filename="data.duckdb"):
27 | return f"{env}/database/{filename}"
28 |
29 |
30 | def get_s3_udi_path(env, filename):
31 | return f"{env}/UDI/{filename}"
32 |
33 |
34 | def get_s3_path_geojson(env, filename="new-georef-france-commune-prelevement.geojson"):
35 | """Get the S3 path for GeoJSON file based on environment.
36 |
37 | Args:
38 | env (str): Environment ("dev" or "prod")
39 |
40 | Returns:
41 | str: S3 path for the GeoJSON file
42 | """
43 | if env not in ["dev", "prod"]:
44 | raise ValueError("Environment must be 'dev' or 'prod'")
45 | return f"{env}/geojson/{filename}"
46 |
47 |
48 | def get_s3_path_pmtiles(env, filename="georef-france-commune-prelevement.pmtiles"):
49 | if env not in ["dev", "prod"]:
50 | raise ValueError("Environment must be 'dev' or 'prod'")
51 | return f"{env}/pmtiles/{filename}"
52 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | nitrate_prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
6 | SELECT DISTINCT
7 | de_partition AS annee,
8 | cdreseau,
9 | referenceprel,
10 | datetimeprel,
11 | valeur_sanitaire_1,
12 | valtraduite
13 | FROM
14 | {{ ref('int__resultats_udi_communes') }}
15 | WHERE
16 | categorie = 'nitrate'
17 | AND cdparametresiseeaux = 'NO3'
18 | )
19 |
20 | SELECT
21 | cdreseau,
22 | annee,
23 | 'nitrate' AS categorie,
24 | 'bilan_annuel_' || annee AS periode,
25 | count(
26 | DISTINCT
27 | CASE
28 | WHEN
29 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
30 | THEN referenceprel
31 | END
32 | ) AS nb_depassements,
33 | count(DISTINCT referenceprel) AS nb_prelevements,
34 | (
35 | count(
36 | DISTINCT
37 | CASE
38 | WHEN
39 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
40 | THEN referenceprel
41 | END
42 | )::float
43 | /
44 | count(DISTINCT referenceprel)::float
45 | ) AS ratio,
46 | to_json({
47 | 'NO3': max(valtraduite)
48 | }) AS parametres_detectes,
49 | max(datetimeprel) AS date_dernier_prel
50 |
51 | FROM nitrate_prels
52 |
53 | GROUP BY cdreseau, annee
54 |
--------------------------------------------------------------------------------
/.github/workflows/test_pipelines.yaml:
--------------------------------------------------------------------------------
1 | name: 🧪 Run Pipelines Tests
2 |
3 | on:
4 | pull_request:
5 | branches: [main]
6 | paths:
7 | - 'pipelines/**'
8 |
9 | env:
10 | SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
11 | SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
12 |
13 | jobs:
14 | test:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: Checkout code
19 | uses: actions/checkout@v3
20 |
21 | - name: Check if SCW_ACCESS_KEY and SCW_SECRET_KEY are set
22 | run: |
23 | if [ -z "$SCW_ACCESS_KEY" ]; then
24 | echo "SCW_ACCESS_KEY is not set, cannot run tests without access key"
25 | exit 1
26 | else
27 | echo "SCW_ACCESS_KEY is properly set."
28 | fi
29 |
30 | if [ -z "$SCW_SECRET_KEY" ]; then
31 | echo "SCW_SECRET_KEY is not set, cannot run tests without secret key"
32 | exit 1
33 | else
34 | echo "SCW_SECRET_KEY is properly set."
35 | fi
36 |
37 | - name: Install a specific version of uv
38 | uses: astral-sh/setup-uv@v5
39 | with:
40 | version: ">=0.4.0"
41 |
42 | - name: Install dependencies
43 | run: |
44 | uv sync
45 |
46 | - name: Run tests with coverage
47 | run: uv run pytest -s --cov=. --cov-report=term-missing
48 |
49 | - name: test dbt
50 | run: |
51 | cd dbt_
52 | uv run dbt deps
53 | uv run dbt seed
54 | uv run dbt run
55 |
56 | - name: test pmtiles generation
57 | run: |
58 | uv pip install .[pmtiles]
59 | uv run pipelines/run.py run generate_pmtiles --env dev
60 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | sub_active_prels AS (
3 | SELECT DISTINCT
4 | de_partition AS annee,
5 | cdreseau,
6 | referenceprel,
7 | datetimeprel,
8 | cdparametresiseeaux,
9 | valtraduite,
10 | limite_qualite,
11 | valeur_sanitaire_1
12 | FROM
13 | {{ ref('int__resultats_udi_communes') }}
14 | WHERE
15 | categorie = 'pesticide'
16 | AND
17 | categorie_2 = 'sub_active'
18 | )
19 |
20 | SELECT
21 | cdreseau,
22 | annee,
23 | 'sub_active' AS categorie,
24 | 'bilan_annuel_' || annee AS periode,
25 | COUNT(
26 | DISTINCT
27 | CASE
28 | WHEN
29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 | THEN referenceprel
31 | END
32 | ) AS nb_depassements,
33 | COUNT(
34 | DISTINCT
35 | CASE
36 | WHEN
37 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
38 | THEN referenceprel
39 | END
40 | ) AS nb_sup_valeur_sanitaire,
41 | COUNT(DISTINCT referenceprel) AS nb_prelevements,
42 | (
43 | COUNT(
44 | DISTINCT
45 | CASE
46 | WHEN
47 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
48 | THEN referenceprel
49 | END
50 | )::float
51 | /
52 | COUNT(DISTINCT referenceprel)::float
53 | ) AS ratio_limite_qualite
54 |
55 | FROM sub_active_prels
56 |
57 | GROUP BY cdreseau, annee
58 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | cvm_prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- C'est très rare pour les CVM (de l'ordre d'une dizaine de cas)
5 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
6 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
7 | SELECT DISTINCT
8 | de_partition AS annee,
9 | cdreseau,
10 | referenceprel,
11 | datetimeprel,
12 | limite_qualite,
13 | valtraduite
14 | FROM
15 | {{ ref('int__resultats_udi_communes') }}
16 | WHERE
17 | categorie = 'cvm'
18 | )
19 |
20 | SELECT
21 | cdreseau,
22 | annee,
23 | 'cvm' AS categorie,
24 | 'bilan_annuel_' || annee AS periode,
25 | count(
26 | DISTINCT
27 | CASE
28 | WHEN
29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 | THEN referenceprel
31 | END
32 | ) AS nb_depassements,
33 | count(DISTINCT referenceprel) AS nb_prelevements,
34 | (
35 | count(
36 | DISTINCT
37 | CASE
38 | WHEN
39 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
40 | THEN referenceprel
41 | END
42 | )::float
43 | /
44 | count(DISTINCT referenceprel)::float
45 | ) AS ratio_limite_qualite,
46 | to_json({
47 | 'CLVYL': max(valtraduite)
48 | }) AS parametres_detectes,
49 | max(datetimeprel) AS date_dernier_prel
50 |
51 | FROM cvm_prels
52 |
53 | GROUP BY cdreseau, annee
54 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | nitrate_prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
6 | SELECT DISTINCT
7 | de_partition AS annee,
8 | inseecommune,
9 | referenceprel,
10 | datetimeprel,
11 | valeur_sanitaire_1,
12 | valtraduite
13 | FROM
14 | {{ ref('int__resultats_udi_communes') }}
15 | WHERE
16 | categorie = 'nitrate'
17 | AND cdparametresiseeaux = 'NO3'
18 | )
19 |
20 | SELECT
21 | inseecommune,
22 | annee,
23 | 'nitrate' AS categorie,
24 | 'bilan_annuel_' || annee AS periode,
25 | count(
26 | DISTINCT
27 | CASE
28 | WHEN
29 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
30 | THEN referenceprel
31 | END
32 | ) AS nb_depassements,
33 | count(DISTINCT referenceprel) AS nb_prelevements,
34 | (
35 | count(
36 | DISTINCT
37 | CASE
38 | WHEN
39 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
40 | THEN referenceprel
41 | END
42 | )::float
43 | /
44 | count(DISTINCT referenceprel)::float
45 | ) AS ratio,
46 | to_json({
47 | 'NO3': max(valtraduite)
48 | }) AS parametres_detectes,
49 | max(datetimeprel) AS date_dernier_prel
50 |
51 | FROM nitrate_prels
52 |
53 | GROUP BY inseecommune, annee
54 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | sub_active_prels AS (
3 | SELECT DISTINCT
4 | de_partition AS annee,
5 | inseecommune,
6 | referenceprel,
7 | datetimeprel,
8 | cdparametresiseeaux,
9 | valtraduite,
10 | limite_qualite,
11 | valeur_sanitaire_1
12 | FROM
13 | {{ ref('int__resultats_udi_communes') }}
14 | WHERE
15 | categorie = 'pesticide'
16 | AND
17 | categorie_2 = 'sub_active'
18 | )
19 |
20 | SELECT
21 | inseecommune,
22 | annee,
23 | 'sub_active' AS categorie,
24 | 'bilan_annuel_' || annee AS periode,
25 | COUNT(
26 | DISTINCT
27 | CASE
28 | WHEN
29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 | THEN referenceprel
31 | END
32 | ) AS nb_depassements,
33 | COUNT(
34 | DISTINCT
35 | CASE
36 | WHEN
37 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
38 | THEN referenceprel
39 | END
40 | ) AS nb_sup_valeur_sanitaire,
41 | COUNT(DISTINCT referenceprel) AS nb_prelevements,
42 | (
43 | COUNT(
44 | DISTINCT
45 | CASE
46 | WHEN
47 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
48 | THEN referenceprel
49 | END
50 | )::float
51 | /
52 | COUNT(DISTINCT referenceprel)::float
53 | ) AS ratio_limite_qualite
54 |
55 | FROM sub_active_prels
56 |
57 | GROUP BY inseecommune, annee
58 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | cvm_prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- C'est très rare pour les CVM (de l'ordre d'une dizaine de cas)
5 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
6 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
7 | SELECT DISTINCT
8 | de_partition AS annee,
9 | inseecommune,
10 | referenceprel,
11 | datetimeprel,
12 | limite_qualite,
13 | valtraduite
14 | FROM
15 | {{ ref('int__resultats_udi_communes') }}
16 | WHERE
17 | categorie = 'cvm'
18 | )
19 |
20 | SELECT
21 | inseecommune,
22 | annee,
23 | 'cvm' AS categorie,
24 | 'bilan_annuel_' || annee AS periode,
25 | count(
26 | DISTINCT
27 | CASE
28 | WHEN
29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 | THEN referenceprel
31 | END
32 | ) AS nb_depassements,
33 | count(DISTINCT referenceprel) AS nb_prelevements,
34 | (
35 | count(
36 | DISTINCT
37 | CASE
38 | WHEN
39 | valtraduite IS NOT NULL AND valtraduite > limite_qualite
40 | THEN referenceprel
41 | END
42 | )::float
43 | /
44 | count(DISTINCT referenceprel)::float
45 | ) AS ratio_limite_qualite,
46 | to_json({
47 | 'CLVYL': max(valtraduite)
48 | }) AS parametres_detectes,
49 | max(datetimeprel) AS date_dernier_prel
50 |
51 | FROM cvm_prels
52 |
53 | GROUP BY inseecommune, annee
54 |
--------------------------------------------------------------------------------
/webapp/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "13_pollution_eau",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev --turbopack",
7 | "build": "next build",
8 | "start": "next start",
9 | "lint": "next lint"
10 | },
11 | "dependencies": {
12 | "@duckdb/node-api": "^1.2.0-alpha.14",
13 | "@radix-ui/react-dialog": "^1.1.15",
14 | "@radix-ui/react-hover-card": "^1.1.15",
15 | "@radix-ui/react-popover": "^1.1.15",
16 | "@radix-ui/react-scroll-area": "^1.2.10",
17 | "@radix-ui/react-select": "^2.2.6",
18 | "@radix-ui/react-slot": "^1.2.3",
19 | "@radix-ui/react-switch": "^1.2.6",
20 | "@radix-ui/react-tooltip": "^1.2.8",
21 | "class-variance-authority": "^0.7.1",
22 | "clsx": "^2.1.1",
23 | "cmdk": "^1.0.4",
24 | "lucide-react": "^0.475.0",
25 | "maplibre-gl": "^5.1.0",
26 | "next": "^15.2.3",
27 | "pmtiles": "^4.2.1",
28 | "protomaps-themes-base": "^4.4.0",
29 | "react": "^19.0.0",
30 | "react-dom": "^19.0.0",
31 | "react-map-gl": "^8.0.0",
32 | "tailwind-merge": "^3.0.1",
33 | "tailwindcss-animate": "^1.0.7"
34 | },
35 | "devDependencies": {
36 | "@eslint/eslintrc": "^3",
37 | "@types/node": "^20",
38 | "@types/react": "^19",
39 | "@types/react-dom": "^19",
40 | "eslint": "^9",
41 | "eslint-config-next": "15.1.7",
42 | "eslint-config-prettier": "^10.0.1",
43 | "postcss": "^8",
44 | "prettier": "3.5.1",
45 | "tailwindcss": "^3.4",
46 | "typescript": "^5"
47 | },
48 | "resolutions": {
49 | "react": "^19.0.0",
50 | "react-dom": "^19.0.0",
51 | "@types/react": "^19",
52 | "@types/react-dom": "^19"
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/pipelines/tasks/config/config_geojson.py:
--------------------------------------------------------------------------------
1 | def get_opendatasoft_config() -> dict:
2 | """Get OpenDataSoft configuration parameters.
3 |
4 | Returns:
5 | dict: Configuration parameters for OpenDataSoft client
6 | """
7 |
8 | return {
9 | "source": {
10 | "base_url": "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/",
11 | "id": "georef-france-commune/exports/geojson",
12 | "datetime": "20240220",
13 | },
14 | "file": {
15 | "file_name": "georef-france-commune.geojson",
16 | "table_name": "opendatasoft_communes",
17 | },
18 | }
19 |
20 |
21 | config_merge_geo = {
22 | "communes": {
23 | "result_table": "web__resultats_communes",
24 | "geom_table": "stg_communes__opendatasoft_json",
25 | "groupby_columns": ["commune_code_insee", "commune_nom"],
26 | "result_join_column": "commune_code_insee",
27 | "geom_join_column": "com_code",
28 | "upload_file_name": "georef-france-communes-prelevement.geojson",
29 | },
30 | "udi": {
31 | "result_table": "web__resultats_udi",
32 | "geom_table": "stg_udi_json",
33 | "groupby_columns": ["cdreseau", "nomreseaux"],
34 | "result_join_column": "cdreseau",
35 | "geom_join_column": "code_udi",
36 | "upload_file_name": "georef-france-udi-prelevement.geojson",
37 | },
38 | }
39 |
40 | col_input = ["periode", "categorie"]
41 |
42 | list_column_result = [
43 | "resultat",
44 | "ratio",
45 | "date_dernier_prel",
46 | "nb_parametres",
47 | "nb_prelevements",
48 | "nb_sup_valeur_sanitaire",
49 | "parametres_detectes",
50 | ]
51 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cdreseau,
3 | 'tous' AS categorie,
4 | 'dernier_prel' AS periode,
5 | MAX(date_dernier_prel) AS date_dernier_prel,
6 | SUM(nb_parametres) AS nb_parametres,
7 | CASE
8 | WHEN BOOL_OR(resultat IN (
9 | 'sup_valeur_sanitaire',
10 | 'sup_valeur_sanitaire_2'
11 | )) THEN 'sup_limite_sanitaire'
12 |
13 | WHEN BOOL_OR(resultat IN (
14 | 'cvm_sup_0_5',
15 | 'somme_20pfas_sup_0_1',
16 | 'sup_limite_qualite'
17 | )) THEN 'sup_limite_qualite'
18 |
19 | WHEN BOOL_OR(resultat IN (
20 | 'inf_valeur_sanitaire',
21 | 'inf_limite_qualite',
22 | -- 'inf_limites_sup_0_1',
23 | --'sup_limite_indicative',
24 | 'inf_limites',
25 | 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02',
26 | 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02',
27 | 'sup_limite_qualite_2036',
28 | 'no3_inf_25',
29 | 'no3_inf_40'
30 |
31 | )) THEN 'quantifie'
32 |
33 | WHEN BOOL_AND(resultat IN (
34 | 'non_quantifie'
35 | )) THEN 'non_quantifie'
36 |
37 | ELSE 'erreur'
38 | END AS resultat
39 |
40 | FROM {{ ref('int__union_resultats_udi') }}
41 | WHERE
42 | periode = 'dernier_prel'
43 | AND
44 | categorie NOT IN (
45 | 'sub_active',
46 | 'metabolite',
47 | 'metabolite_esa_metolachlore',
48 | 'metabolite_chlorothalonil_r471811',
49 | 'metabolite_chloridazone_desphenyl',
50 | 'metabolite_chloridazone_methyl_desphenyl',
51 | 'metabolite_atrazine_desethyl'
52 | )
53 | GROUP BY cdreseau
54 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | inseecommune,
3 | 'tous' AS categorie,
4 | 'dernier_prel' AS periode,
5 | MAX(date_dernier_prel) AS date_dernier_prel,
6 | SUM(nb_parametres) AS nb_parametres,
7 | CASE
8 | WHEN BOOL_OR(resultat IN (
9 | 'sup_valeur_sanitaire',
10 | 'sup_valeur_sanitaire_2'
11 | )) THEN 'sup_limite_sanitaire'
12 |
13 | WHEN BOOL_OR(resultat IN (
14 | 'cvm_sup_0_5',
15 | 'somme_20pfas_sup_0_1',
16 | 'sup_limite_qualite'
17 | )) THEN 'sup_limite_qualite'
18 |
19 | WHEN BOOL_OR(resultat IN (
20 | 'inf_valeur_sanitaire',
21 | 'inf_limite_qualite',
22 | -- 'inf_limites_sup_0_1',
23 | --'sup_limite_indicative',
24 | 'inf_limites',
25 | 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02',
26 | 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02',
27 | 'sup_limite_qualite_2036',
28 | 'no3_inf_25',
29 | 'no3_inf_40'
30 |
31 | )) THEN 'quantifie'
32 |
33 | WHEN BOOL_AND(resultat IN (
34 | 'non_quantifie'
35 | )) THEN 'non_quantifie'
36 |
37 | ELSE 'erreur'
38 | END AS resultat
39 |
40 | FROM {{ ref('int__union_resultats_commune') }}
41 | WHERE
42 | periode = 'dernier_prel'
43 | AND
44 | categorie NOT IN (
45 | 'sub_active',
46 | 'metabolite',
47 | 'metabolite_esa_metolachlore',
48 | 'metabolite_chlorothalonil_r471811',
49 | 'metabolite_chloridazone_desphenyl',
50 | 'metabolite_chloridazone_methyl_desphenyl',
51 | 'metabolite_atrazine_desethyl'
52 | )
53 | GROUP BY inseecommune
54 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT
4 | cdreseau,
5 | categorie,
6 | cdparametresiseeaux,
7 | datetimeprel,
8 | limite_qualite,
9 | valtraduite,
10 | ROW_NUMBER()
11 | OVER (
12 | PARTITION BY cdreseau, cdparametresiseeaux
13 | ORDER BY datetimeprel DESC
14 | )
15 | AS row_number
16 | FROM
17 | {{ ref('int__resultats_udi_communes') }}
18 | WHERE
19 | categorie = 'cvm'
20 | AND
21 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
22 | datetimeprel >= DATE_TRUNC('day', (
23 | SELECT MAX(sub.datetimeprel)
24 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
25 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
26 | )
27 |
28 | SELECT
29 | last_pvl.cdreseau,
30 | last_pvl.categorie,
31 | 'dernier_prel' AS periode,
32 | last_pvl.datetimeprel AS date_dernier_prel,
33 | 1 AS nb_parametres,
34 | CASE
35 | WHEN
36 | last_pvl.valtraduite = 0
37 | OR last_pvl.valtraduite IS NULL
38 | THEN 'non_quantifie'
39 | WHEN
40 | last_pvl.valtraduite > last_pvl.limite_qualite
41 | THEN 'cvm_sup_0_5'
42 | WHEN
43 | last_pvl.valtraduite <= last_pvl.limite_qualite
44 | THEN 'inf_limites'
45 | ELSE 'erreur'
46 | END AS resultat,
47 | CASE
48 | WHEN
49 | last_pvl.valtraduite > 0
50 | THEN TO_JSON(MAP([last_pvl.cdparametresiseeaux], [last_pvl.valtraduite]))
51 | ELSE TO_JSON(MAP([], []))
52 | END AS parametres_detectes
53 | FROM
54 | last_pvl
55 | WHERE
56 | last_pvl.row_number = 1
57 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT
4 | inseecommune,
5 | categorie,
6 | cdparametresiseeaux,
7 | datetimeprel,
8 | valtraduite,
9 | limite_qualite,
10 | ROW_NUMBER()
11 | OVER (
12 | PARTITION BY inseecommune, cdparametresiseeaux
13 | ORDER BY datetimeprel DESC
14 | )
15 | AS row_number
16 | FROM
17 | {{ ref('int__resultats_udi_communes') }}
18 | WHERE
19 | categorie = 'cvm'
20 | AND
21 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
22 | datetimeprel >= DATE_TRUNC('day', (
23 | SELECT MAX(sub.datetimeprel)
24 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
25 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
26 | )
27 |
28 | SELECT
29 | last_pvl.inseecommune,
30 | last_pvl.categorie,
31 | 'dernier_prel' AS periode,
32 | last_pvl.datetimeprel AS date_dernier_prel,
33 | 1 AS nb_parametres,
34 | CASE
35 | WHEN
36 | last_pvl.valtraduite = 0
37 | OR last_pvl.valtraduite IS NULL
38 | THEN 'non_quantifie'
39 | WHEN
40 | last_pvl.valtraduite > last_pvl.limite_qualite
41 | THEN 'cvm_sup_0_5'
42 | WHEN
43 | last_pvl.valtraduite <= last_pvl.limite_qualite
44 | THEN 'inf_limites'
45 | ELSE 'erreur'
46 | END AS resultat,
47 | CASE
48 | WHEN
49 | last_pvl.valtraduite > 0
50 | THEN TO_JSON(MAP([last_pvl.cdparametresiseeaux], [last_pvl.valtraduite]))
51 | ELSE TO_JSON(MAP([], []))
52 | END AS parametres_detectes
53 | FROM
54 | last_pvl
55 | WHERE
56 | last_pvl.row_number = 1
57 |
--------------------------------------------------------------------------------
/webapp/components/ui/scroll-area.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as React from "react";
4 | import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";
5 |
6 | import { cn } from "@/lib/utils";
7 |
8 | const ScrollArea = React.forwardRef<
9 | React.ElementRef,
10 | React.ComponentPropsWithoutRef
11 | >(({ className, children, ...props }, ref) => (
12 |
17 |
18 | {children}
19 |
20 |
21 |
22 |
23 | ));
24 | ScrollArea.displayName = ScrollAreaPrimitive.Root.displayName;
25 |
26 | const ScrollBar = React.forwardRef<
27 | React.ElementRef,
28 | React.ComponentPropsWithoutRef
29 | >(({ className, orientation = "vertical", ...props }, ref) => (
30 |
43 |
44 |
45 | ));
46 | ScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName;
47 |
48 | export { ScrollArea, ScrollBar };
49 |
--------------------------------------------------------------------------------
/dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql:
--------------------------------------------------------------------------------
1 | -- Dans le calcul des résultats PFAS derniers prélèvements
2 | -- (int__resultats_pfas_udi_dernier.sql), on présuppose que la plupart du temps
3 | -- la somme des 20 PFAS (SPFAS) et la somme des 4 PFAS (PFOA, PFOS, PFNA,
4 | -- PFHXS) sont bien présentes. Ce test permet de vérifier que pour au moins 98%
5 | -- des couples cdreseau/referenceprel c'est le cas.
6 |
7 | WITH yearly_pfas_results AS (
8 | SELECT
9 | cdreseau,
10 | referenceprel,
11 | -- Vérifie si la somme des 20 PFAS est disponible
12 | COUNT(
13 | DISTINCT CASE
14 | WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux
15 | END
16 | ) AS has_sum_20_pfas,
17 | -- Vérifie si tous les 4 PFAS spécifiques sont disponibles
18 | COUNT(
19 | DISTINCT CASE
20 | WHEN
21 | cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')
22 | THEN cdparametresiseeaux
23 | END
24 | ) AS count_4_pfas
25 | FROM {{ ref('int__resultats_udi_communes') }}
26 | WHERE
27 | categorie = 'pfas'
28 | AND CURRENT_DATE - datetimeprel < INTERVAL 1 YEAR
29 | GROUP BY cdreseau, referenceprel
30 | )
31 |
32 | SELECT
33 |
34 | COUNT(*) AS total_aggregations,
35 | -- Pourcentage d'agrégations avec la somme des 20 PFAS présente
36 | ROUND(
37 | (
38 | SUM(CASE WHEN has_sum_20_pfas = 1 THEN 1 ELSE 0 END)
39 | * 100.0
40 | / COUNT(*)
41 | ),
42 | 2
43 | ) AS pct_with_sum_20_pfas,
44 | -- Pourcentage d'agrégations avec tous les 4 PFAS spécifiques présents
45 | ROUND(
46 | (SUM(CASE WHEN count_4_pfas = 4 THEN 1 ELSE 0 END) * 100.0 / COUNT(*)),
47 | 2
48 | ) AS pct_with_all_4_pfas
49 | FROM yearly_pfas_results
50 |
51 | HAVING pct_with_sum_20_pfas < 98 OR pct_with_all_4_pfas < 98
52 |
--------------------------------------------------------------------------------
/webapp/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 |
3 | const nextConfig: NextConfig = {
4 | serverExternalPackages: ["@duckdb/node-api"],
5 | eslint: {
6 | // Warning: This allows production builds to successfully complete even if
7 | // your project has ESLint errors.
8 | ignoreDuringBuilds: true,
9 | },
10 | output: "standalone",
11 | headers: async () => {
12 | return [
13 | {
14 | source: "/pmtiles/:path*.pmtiles",
15 | headers: [
16 | {
17 | key: "Cache-Control",
18 | value: "public, max-age=120, s-maxage=60",
19 | },
20 | {
21 | key: "Accept-Ranges",
22 | value: "bytes",
23 | },
24 | ],
25 | },
26 | // {
27 | // source: "/_next/static/:path*",
28 | // headers: [
29 | // {
30 | // key: "Cache-Control",
31 | // value: "public, max-age=31536000, immutable",
32 | // },
33 | // ],
34 | // },
35 | {
36 | source: "/embed",
37 | headers: [
38 | {
39 | key: "Cache-Control",
40 | value: "public, max-age=120, s-maxage=60",
41 | },
42 | {
43 | key: "Content-Security-Policy",
44 | value:
45 | "frame-ancestors 'self' https://dansmoneau.fr https://*.dansmoneau.fr",
46 | },
47 | ],
48 | },
49 | {
50 | source: "/embed-external",
51 | headers: [
52 | {
53 | key: "Cache-Control",
54 | value: "public, max-age=120, s-maxage=60",
55 | },
56 | ],
57 | },
58 | ];
59 | },
60 | rewrites: async () => {
61 | return [
62 | {
63 | source: "/s3/:path*",
64 | destination: "https://s3.fr-par.scw.cloud/pollution-eau-s3/:path*",
65 | },
66 | ];
67 | },
68 | };
69 |
70 | export default nextConfig;
71 |
--------------------------------------------------------------------------------
/pipelines/notebooks/test_geojson_from_db.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pipelines.tasks.client.core.duckdb_client import DuckDBClient\n",
10 | "\n",
11 | "duckdb_client = DuckDBClient()"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "from pipelines.tasks.client.geojson_processor import GeoJSONProcessor\n",
21 | "\n",
22 | "geojson_processor = GeoJSONProcessor(\"communes\", duckdb_client)\n",
23 | "geojson_communes = geojson_processor.generate_geojson()"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "from pipelines.tasks.client.geojson_processor import GeoJSONProcessor\n",
33 | "\n",
34 | "geojson_processor = GeoJSONProcessor(\"udi\", duckdb_client)\n",
35 | "geojson_udi = geojson_processor.generate_geojson()"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "geojson_udi"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": []
53 | }
54 | ],
55 | "metadata": {
56 | "kernelspec": {
57 | "display_name": ".venv",
58 | "language": "python",
59 | "name": "python3"
60 | },
61 | "language_info": {
62 | "codemirror_mode": {
63 | "name": "ipython",
64 | "version": 3
65 | },
66 | "file_extension": ".py",
67 | "mimetype": "text/x-python",
68 | "name": "python",
69 | "nbconvert_exporter": "python",
70 | "pygments_lexer": "ipython3",
71 | "version": "3.12.7"
72 | }
73 | },
74 | "nbformat": 4,
75 | "nbformat_minor": 2
76 | }
77 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
6 | SELECT DISTINCT
7 | de_partition AS annee,
8 | cdreseau,
9 | cdparametresiseeaux,
10 | valeur_sanitaire_1,
11 | referenceprel,
12 | datetimeprel,
13 | valtraduite
14 | FROM
15 | {{ ref('int__resultats_udi_communes') }}
16 | WHERE
17 | cdparametresiseeaux IN (
18 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
19 | --'14DAN',
20 | 'PCLAT'
21 | )
22 | )
23 |
24 | SELECT
25 | cdreseau,
26 | annee,
27 | CASE
28 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
29 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
30 | END AS categorie,
31 | 'bilan_annuel_' || annee AS periode,
32 | count(
33 | DISTINCT
34 | CASE
35 | WHEN
36 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
37 | THEN referenceprel
38 | END
39 | ) AS nb_depassements,
40 | count(DISTINCT referenceprel) AS nb_prelevements,
41 | (
42 | count(
43 | DISTINCT
44 | CASE
45 | WHEN
46 | valtraduite IS NOT NULL
47 | AND valtraduite > valeur_sanitaire_1
48 | THEN referenceprel
49 | END
50 | )::float
51 | /
52 | count(DISTINCT referenceprel)::float
53 | ) AS ratio_limite_sanitaire,
54 | json_object(
55 | max(cdparametresiseeaux), max(valtraduite)
56 | ) AS parametres_detectes,
57 | date_trunc('day', max(datetimeprel)) AS date_dernier_prel
58 |
59 | FROM prels
60 |
61 | GROUP BY cdreseau, annee, categorie
62 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT
4 | cdreseau,
5 | categorie,
6 | cdparametresiseeaux,
7 | valeur_sanitaire_1,
8 | datetimeprel,
9 | valtraduite,
10 | ROW_NUMBER()
11 | OVER (
12 | PARTITION BY cdreseau, cdparametresiseeaux
13 | ORDER BY datetimeprel DESC
14 | )
15 | AS row_number
16 | FROM
17 | {{ ref('int__resultats_udi_communes') }}
18 | WHERE
19 | categorie = 'nitrate'
20 | AND
21 | cdparametresiseeaux = 'NO3'
22 | AND
23 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
24 | datetimeprel >= DATE_TRUNC('day', (
25 | SELECT MAX(sub.datetimeprel)
26 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
27 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
28 | AND
29 | -- Cf cas cdreseau IN( '034005906') , referenceprel= 03400327764
30 | valtraduite IS NOT NULL
31 | )
32 |
33 | SELECT
34 | cdreseau,
35 | 'dernier_prel' AS periode,
36 | datetimeprel AS date_dernier_prel,
37 | 1 AS nb_parametres,
38 | categorie,
39 | CASE
40 | WHEN
41 | valtraduite > valeur_sanitaire_1
42 | THEN 'sup_valeur_sanitaire'
43 | WHEN
44 | valtraduite <= 10
45 | THEN 'non_quantifie'
46 | WHEN
47 | valtraduite <= 25
48 | THEN 'no3_inf_25'
49 | WHEN
50 | valtraduite > 25 AND valtraduite <= 40
51 | THEN 'no3_inf_40'
52 | WHEN
53 | valtraduite > 40 AND valtraduite <= valeur_sanitaire_1
54 | THEN 'inf_valeur_sanitaire'
55 | ELSE 'error'
56 | END AS resultat,
57 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
58 | AS parametres_detectes
59 | FROM
60 | last_pvl
61 | WHERE
62 | row_number = 1
63 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
6 | SELECT DISTINCT
7 | de_partition AS annee,
8 | inseecommune,
9 | cdparametresiseeaux,
10 | valeur_sanitaire_1,
11 | referenceprel,
12 | datetimeprel,
13 | valtraduite
14 | FROM
15 | {{ ref('int__resultats_udi_communes') }}
16 | WHERE
17 | cdparametresiseeaux IN (
18 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
19 | --'14DAN',
20 | 'PCLAT'
21 | )
22 | )
23 |
24 | SELECT
25 | inseecommune,
26 | annee,
27 | CASE
28 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
29 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
30 | END AS categorie,
31 | 'bilan_annuel_' || annee AS periode,
32 | count(
33 | DISTINCT
34 | CASE
35 | WHEN
36 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
37 | THEN referenceprel
38 | END
39 | ) AS nb_depassements,
40 | count(DISTINCT referenceprel) AS nb_prelevements,
41 | (
42 | count(
43 | DISTINCT
44 | CASE
45 | WHEN
46 | valtraduite IS NOT NULL
47 | AND valtraduite > valeur_sanitaire_1
48 | THEN referenceprel
49 | END
50 | )::float
51 | /
52 | count(DISTINCT referenceprel)::float
53 | ) AS ratio_limite_sanitaire,
54 | json_object(
55 | max(cdparametresiseeaux), max(valtraduite)
56 | ) AS parametres_detectes,
57 | date_trunc('day', max(datetimeprel)) AS date_dernier_prel
58 |
59 | FROM prels
60 |
61 | GROUP BY inseecommune, annee, categorie
62 |
--------------------------------------------------------------------------------
/webapp/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import type { Metadata, Viewport } from "next";
2 | import "./globals.css";
3 | import Script from "next/script";
4 |
5 | export const metadata: Metadata = {
6 | title: "Pollution de l'Eau Potable en France",
7 | description: "",
8 | robots: {
9 | index: false,
10 | follow: false,
11 | },
12 | icons: {
13 | icon: "/images/dfg.png",
14 | shortcut: "/images/dfg.png",
15 | apple: "/images/dfg.png",
16 | },
17 | };
18 |
19 | export const viewport: Viewport = {
20 | width: "device-width",
21 | initialScale: 1,
22 | maximumScale: 1,
23 | userScalable: false,
24 | };
25 |
26 | export default function RootLayout({
27 | children,
28 | }: Readonly<{
29 | children: React.ReactNode;
30 | }>) {
31 | return (
32 |
33 |
34 | {/* Hard fix to prevent iframe scrolling: When the PollutionMapSearchBox Popover appears, it causes unwanted scrolling in the parent window containing the iframe. Despite attempts to find a more elegant solution, this override is the only reliable way to prevent this behavior when the app is embedded in an iframe. */}
35 |
55 | {children}
56 |
57 |
58 | );
59 | }
60 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT
4 | inseecommune,
5 | categorie,
6 | cdparametresiseeaux,
7 | valeur_sanitaire_1,
8 | datetimeprel,
9 | valtraduite,
10 | ROW_NUMBER()
11 | OVER (
12 | PARTITION BY inseecommune, cdparametresiseeaux
13 | ORDER BY datetimeprel DESC
14 | )
15 | AS row_number
16 | FROM
17 | {{ ref('int__resultats_udi_communes') }}
18 | WHERE
19 | categorie = 'nitrate'
20 | AND
21 | cdparametresiseeaux = 'NO3'
22 | AND
23 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
24 | datetimeprel >= DATE_TRUNC('day', (
25 | SELECT MAX(sub.datetimeprel)
26 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
27 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
28 | AND
29 | -- Cf cas cdreseau IN( '034005906') , referenceprel= 03400327764
30 | valtraduite IS NOT NULL
31 | )
32 |
33 | SELECT
34 | inseecommune,
35 | 'dernier_prel' AS periode,
36 | datetimeprel AS date_dernier_prel,
37 | 1 AS nb_parametres,
38 | categorie,
39 | CASE
40 | WHEN
41 | valtraduite > valeur_sanitaire_1
42 | THEN 'sup_valeur_sanitaire'
43 | WHEN
44 | valtraduite <= 10
45 | THEN 'non_quantifie'
46 | WHEN
47 | valtraduite <= 25
48 | THEN 'no3_inf_25'
49 | WHEN
50 | valtraduite > 25 AND valtraduite <= 40
51 | THEN 'no3_inf_40'
52 | WHEN
53 | valtraduite > 40 AND valtraduite <= valeur_sanitaire_1
54 | THEN 'inf_valeur_sanitaire'
55 | ELSE 'error'
56 | END AS resultat,
57 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
58 | AS parametres_detectes
59 | FROM
60 | last_pvl
61 | WHERE
62 | row_number = 1
63 |
--------------------------------------------------------------------------------
/webapp/components/EmbedBanner.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { ExternalLink, InfoIcon } from "lucide-react";
4 |
5 | export default function EmbedBanner() {
6 | return (
7 |
11 |
12 |
13 |
14 |
15 |
16 | Générations Futures et Data For Good ont créé Dans Mon Eau.
17 |
18 |
19 | Un outil pour connaître la qualité de votre eau du robinet.
20 |
21 |
22 |
23 |
34 |
35 |
51 |
52 | );
53 | }
54 |
--------------------------------------------------------------------------------
/dbt_/tests/test_sub_active_results.sql:
--------------------------------------------------------------------------------
1 | -- dernier udi
2 | SELECT
3 | 'dernier_prel' AS periode,
4 | cdreseau,
5 | resultat,
6 | 0 AS ratio_limite_qualite,
7 | 0 AS nb_sup_valeur_sanitaire
8 | FROM
9 | {{ ref('int__resultats_sub_active_udi_dernier') }}
10 | WHERE
11 | (
12 | cdreseau = '051000769'
13 | AND date_dernier_prel = TIMESTAMP '2025-03-31 13:58:00'
14 | AND resultat != 'non_quantifie'
15 | )
16 | OR
17 | (
18 | cdreseau = '030000509'
19 | AND date_dernier_prel = TIMESTAMP '2025-03-31 11:56:00'
20 | AND resultat != 'inf_limite_qualite'
21 | )
22 | OR
23 | (
24 | cdreseau = '029000947'
25 | AND date_dernier_prel = TIMESTAMP '2025-03-31 11:00:00'
26 | AND resultat != 'sup_limite_qualite'
27 | )
28 | OR
29 | (
30 | cdreseau = '060001302'
31 | AND date_dernier_prel = TIMESTAMP '2024-12-19 08:29:00'
32 | AND resultat != 'sup_valeur_sanitaire'
33 | )
34 | -- annuel udi
35 | UNION ALL
36 | SELECT
37 | 'bilan_annuel' AS periode,
38 | cdreseau,
39 | '' AS resultat,
40 | ratio_limite_qualite,
41 | nb_sup_valeur_sanitaire
42 | FROM
43 | {{ ref('int__resultats_sub_active_udi_annuel') }}
44 | WHERE
45 | (
46 | cdreseau = '051000769'
47 | AND annee = 2024
48 | AND (
49 | nb_prelevements != 6
50 | OR nb_depassements != 3
51 | OR nb_sup_valeur_sanitaire != 0
52 | OR ratio_limite_qualite != 0.5
53 | )
54 | )
55 | OR
56 | (
57 | cdreseau = '030000509'
58 | AND annee = 2024
59 | AND (
60 | nb_prelevements != 7
61 | OR nb_depassements != 0
62 | OR nb_sup_valeur_sanitaire != 0
63 | OR ratio_limite_qualite != 0
64 | )
65 | )
66 | OR
67 | (
68 | cdreseau = '060001302'
69 | AND annee = 2024
70 | AND (
71 | nb_prelevements != 4
72 | OR nb_depassements != 1
73 | OR nb_sup_valeur_sanitaire != 1
74 | OR ratio_limite_qualite != 0.25
75 | )
76 | )
77 |
--------------------------------------------------------------------------------
/webapp/components/ui/button.tsx:
--------------------------------------------------------------------------------
1 | import * as React from "react";
2 | import { Slot } from "@radix-ui/react-slot";
3 | import { cva, type VariantProps } from "class-variance-authority";
4 |
5 | import { cn } from "@/lib/utils";
6 |
7 | const buttonVariants = cva(
8 | "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0",
9 | {
10 | variants: {
11 | variant: {
12 | default:
13 | "bg-primary text-primary-foreground shadow hover:bg-primary/90",
14 | destructive:
15 | "bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90",
16 | outline:
17 | "border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground",
18 | secondary:
19 | "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80",
20 | ghost: "hover:bg-accent hover:text-accent-foreground",
21 | link: "text-primary underline-offset-4 hover:underline",
22 | },
23 | size: {
24 | default: "h-9 px-4 py-2",
25 | sm: "h-8 rounded-md px-3 text-xs",
26 | lg: "h-10 rounded-md px-8",
27 | icon: "h-9 w-9",
28 | },
29 | },
30 | defaultVariants: {
31 | variant: "default",
32 | size: "default",
33 | },
34 | },
35 | );
36 |
37 | export interface ButtonProps
38 | extends React.ButtonHTMLAttributes,
39 | VariantProps {
40 | asChild?: boolean;
41 | }
42 |
43 | const Button = React.forwardRef(
44 | ({ className, variant, size, asChild = false, ...props }, ref) => {
45 | const Comp = asChild ? Slot : "button";
46 | return (
47 |
52 | );
53 | },
54 | );
55 | Button.displayName = "Button";
56 |
57 | export { Button, buttonVariants };
58 |
--------------------------------------------------------------------------------
/dbt_/tests/test_tous_results.sql:
--------------------------------------------------------------------------------
1 | -- dernier relevé
2 | SELECT
3 | 'dernier_prel' AS periode,
4 | cdreseau,
5 | categorie,
6 | resultat,
7 | null AS ratio,
8 | null AS nb_prelevements,
9 | null AS nb_sup_valeur_sanitaire
10 | FROM
11 | {{ ref('int__resultats_tous_udi_dernier') }}
12 | WHERE
13 | (
14 | cdreseau = '001000598'
15 | AND date_dernier_prel = '2025-03-26 10:59:00'
16 | AND resultat != 'sup_limite_qualite'
17 | )
18 | OR
19 | (
20 | cdreseau = '049000506'
21 | AND date_dernier_prel = '2025-07-08 08:30:00'
22 | AND resultat != 'quantifie'
23 | )
24 | OR
25 | (
26 | cdreseau = '033000400'
27 | AND date_dernier_prel = '2025-07-17 09:50:00'
28 | AND resultat != 'non_quantifie'
29 | )
30 | OR
31 | (
32 | cdreseau = '088002246'
33 | AND date_dernier_prel = '2025-04-22 08:11:00'
34 | AND resultat != 'sup_limite_sanitaire'
35 | )
36 | UNION ALL
37 | -- annuel
38 | SELECT
39 | periode,
40 | cdreseau,
41 | categorie,
42 | null AS resultat,
43 | ratio,
44 | nb_prelevements,
45 | nb_sup_valeur_sanitaire
46 | FROM
47 | {{ ref('int__resultats_tous_udi_annuel') }}
48 | WHERE
49 | (
50 | cdreseau = '054000780'
51 | AND periode = 'bilan_annuel_2024'
52 | AND (
53 | nb_prelevements != 7
54 | -- cvm : 1
55 | -- metaux_lourds_as : 1
56 | -- nitrate : 5
57 | -- pesticide : 1
58 | OR
59 | ratio != 0
60 | OR
61 | nb_sup_valeur_sanitaire != 0
62 | )
63 | )
64 | OR
65 | (
66 | cdreseau = '061000423'
67 | AND periode = 'bilan_annuel_2024'
68 | AND (
69 | nb_prelevements != 27
70 | -- cvm: 1
71 | -- metaux_lourds_as: 1
72 | -- nitrate: 14
73 | -- pesticide: 12
74 | OR
75 | ratio <= 0.4
76 | OR
77 | ratio >= 0.5
78 | -- ratio = 12/28 ~= 0.42
79 | OR
80 | nb_sup_valeur_sanitaire != 0
81 | )
82 | )
83 |
--------------------------------------------------------------------------------
/webapp/components/ui/card.tsx:
--------------------------------------------------------------------------------
1 | import * as React from "react";
2 |
3 | import { cn } from "@/lib/utils";
4 |
5 | const Card = React.forwardRef<
6 | HTMLDivElement,
7 | React.HTMLAttributes
8 | >(({ className, ...props }, ref) => (
9 |
17 | ));
18 | Card.displayName = "Card";
19 |
20 | const CardHeader = React.forwardRef<
21 | HTMLDivElement,
22 | React.HTMLAttributes
23 | >(({ className, ...props }, ref) => (
24 |
29 | ));
30 | CardHeader.displayName = "CardHeader";
31 |
32 | const CardTitle = React.forwardRef<
33 | HTMLDivElement,
34 | React.HTMLAttributes
35 | >(({ className, ...props }, ref) => (
36 |
41 | ));
42 | CardTitle.displayName = "CardTitle";
43 |
44 | const CardDescription = React.forwardRef<
45 | HTMLDivElement,
46 | React.HTMLAttributes
47 | >(({ className, ...props }, ref) => (
48 |
53 | ));
54 | CardDescription.displayName = "CardDescription";
55 |
56 | const CardContent = React.forwardRef<
57 | HTMLDivElement,
58 | React.HTMLAttributes
59 | >(({ className, ...props }, ref) => (
60 |
61 | ));
62 | CardContent.displayName = "CardContent";
63 |
64 | const CardFooter = React.forwardRef<
65 | HTMLDivElement,
66 | React.HTMLAttributes
67 | >(({ className, ...props }, ref) => (
68 |
73 | ));
74 | CardFooter.displayName = "CardFooter";
75 |
76 | export {
77 | Card,
78 | CardHeader,
79 | CardFooter,
80 | CardTitle,
81 | CardDescription,
82 | CardContent,
83 | };
84 |
--------------------------------------------------------------------------------
/webapp/app/config.ts:
--------------------------------------------------------------------------------
1 | import layers from "protomaps-themes-base";
2 |
3 | export const MAPLIBRE_MAP = {
4 | protomaps: {
5 | // https://protomaps.com/api
6 | api_key: process.env.NEXT_PUBLIC_PROTOMAPS_API_KEY || "",
7 | maxzoom: 15,
8 | theme: "white", // unsure between "white" and "light"
9 | language: "fr",
10 | },
11 | initialViewState: {
12 | longitude: 0.882755215151974,
13 | latitude: 46.489410422633256,
14 | zoom: 5.2,
15 | },
16 | countryBorderWidth: 2,
17 | countryBorderColor: "#bdb8b8",
18 | };
19 |
20 | // Default map style without layers (will be added dynamically)
21 | export const DEFAULT_MAP_STYLE: maplibregl.StyleSpecification = {
22 | version: 8,
23 | glyphs:
24 | "https://protomaps.github.io/basemaps-assets/fonts/{fontstack}/{range}.pbf",
25 | sprite: "https://protomaps.github.io/basemaps-assets/sprites/v4/light",
26 | sources: {
27 | protomaps: {
28 | type: "vector",
29 | maxzoom: MAPLIBRE_MAP.protomaps.maxzoom,
30 | url: `https://api.protomaps.com/tiles/v4.json?key=${MAPLIBRE_MAP.protomaps.api_key}`,
31 | attribution:
32 | '© OpenStreetMap',
33 | },
34 | communes: {
35 | type: "vector",
36 | url: "pmtiles:///pmtiles/commune_data.pmtiles",
37 | },
38 | udis: {
39 | type: "vector",
40 | url: "pmtiles:///pmtiles/udi_data.pmtiles",
41 | },
42 | },
43 | layers: [], // layers will be added dynamically in the Map component
44 | } satisfies maplibregl.StyleSpecification;
45 |
46 | // Helper function to get the default base layers
47 | export const getDefaultLayers = () => {
48 | return [
49 | ...layers(
50 | "protomaps",
51 | MAPLIBRE_MAP.protomaps.theme,
52 | MAPLIBRE_MAP.protomaps.language,
53 | ).filter((layer) => !["boundaries_country"].includes(layer.id)),
54 | {
55 | id: "boundaries_country",
56 | type: "line",
57 | source: "protomaps",
58 | "source-layer": "boundaries",
59 | filter: ["<=", "kind_detail", 2],
60 | paint: {
61 | "line-color": MAPLIBRE_MAP.countryBorderColor,
62 | "line-width": MAPLIBRE_MAP.countryBorderWidth,
63 | },
64 | },
65 | ];
66 | };
67 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | metaux_lourds_prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
6 | SELECT DISTINCT
7 | de_partition AS annee,
8 | cdreseau,
9 | cdparametresiseeaux,
10 | valeur_sanitaire_1,
11 | limite_qualite,
12 | referenceprel,
13 | datetimeprel,
14 | valtraduite
15 | FROM
16 | {{ ref('int__resultats_udi_communes') }}
17 | WHERE
18 | cdparametresiseeaux IN ('PB', 'AS')
19 | )
20 |
21 | SELECT
22 | cdreseau,
23 | annee,
24 | CASE
25 | WHEN cdparametresiseeaux = 'PB' THEN 'metaux_lourds_pb'
26 | WHEN cdparametresiseeaux = 'AS' THEN 'metaux_lourds_as'
27 | END AS categorie,
28 | 'bilan_annuel_' || annee AS periode,
29 | count(
30 | DISTINCT
31 | CASE
32 | WHEN
33 | cdparametresiseeaux = 'PB'
34 | AND valtraduite IS NOT NULL AND valtraduite >= limite_qualite
35 | THEN referenceprel
36 | WHEN
37 | cdparametresiseeaux = 'AS'
38 | AND valtraduite IS NOT NULL
39 | AND valtraduite >= valeur_sanitaire_1
40 | THEN referenceprel
41 | END
42 | ) AS nb_depassements,
43 | count(DISTINCT referenceprel) AS nb_prelevements,
44 | (
45 | count(
46 | DISTINCT
47 | CASE
48 | WHEN
49 | cdparametresiseeaux = 'PB'
50 | AND valtraduite IS NOT NULL
51 | AND valtraduite >= limite_qualite
52 | THEN referenceprel
53 | WHEN
54 | cdparametresiseeaux = 'AS'
55 | AND valtraduite IS NOT NULL
56 | AND valtraduite >= valeur_sanitaire_1
57 | THEN referenceprel
58 | END
59 | )::float
60 | /
61 | count(DISTINCT referenceprel)::float
62 | ) AS ratio
63 |
64 | FROM metaux_lourds_prels
65 |
66 | GROUP BY cdreseau, annee, categorie
67 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | metaux_lourds_prels AS (
3 | -- Certains prélèvements ont plusieurs analyses pour la même substance
4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
6 | SELECT DISTINCT
7 | de_partition AS annee,
8 | inseecommune,
9 | cdparametresiseeaux,
10 | valeur_sanitaire_1,
11 | limite_qualite,
12 | referenceprel,
13 | datetimeprel,
14 | valtraduite
15 | FROM
16 | {{ ref('int__resultats_udi_communes') }}
17 | WHERE
18 | cdparametresiseeaux IN ('PB', 'AS')
19 | )
20 |
21 | SELECT
22 | inseecommune,
23 | annee,
24 | CASE
25 | WHEN cdparametresiseeaux = 'PB' THEN 'metaux_lourds_pb'
26 | WHEN cdparametresiseeaux = 'AS' THEN 'metaux_lourds_as'
27 | END AS categorie,
28 | 'bilan_annuel_' || annee AS periode,
29 | count(
30 | DISTINCT
31 | CASE
32 | WHEN
33 | cdparametresiseeaux = 'PB'
34 | AND valtraduite IS NOT NULL AND valtraduite >= limite_qualite
35 | THEN referenceprel
36 | WHEN
37 | cdparametresiseeaux = 'AS'
38 | AND valtraduite IS NOT NULL
39 | AND valtraduite >= valeur_sanitaire_1
40 | THEN referenceprel
41 | END
42 | ) AS nb_depassements,
43 | count(DISTINCT referenceprel) AS nb_prelevements,
44 | (
45 | count(
46 | DISTINCT
47 | CASE
48 | WHEN
49 | cdparametresiseeaux = 'PB'
50 | AND valtraduite IS NOT NULL
51 | AND valtraduite >= limite_qualite
52 | THEN referenceprel
53 | WHEN
54 | cdparametresiseeaux = 'AS'
55 | AND valtraduite IS NOT NULL
56 | AND valtraduite >= valeur_sanitaire_1
57 | THEN referenceprel
58 | END
59 | )::float
60 | /
61 | count(DISTINCT referenceprel)::float
62 | ) AS ratio
63 |
64 | FROM metaux_lourds_prels
65 |
66 | GROUP BY inseecommune, annee, categorie
67 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT
4 | cdreseau,
5 | categorie,
6 | cdparametresiseeaux,
7 | valeur_sanitaire_1,
8 | valeur_sanitaire_2,
9 | datetimeprel,
10 | valtraduite,
11 | ROW_NUMBER()
12 | OVER (
13 | PARTITION BY cdreseau, cdparametresiseeaux
14 | ORDER BY datetimeprel DESC
15 | )
16 | AS row_number
17 | FROM
18 | {{ ref('int__resultats_udi_communes') }}
19 | WHERE
20 | cdparametresiseeaux IN (
21 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
22 | --'14DAN',
23 | 'PCLAT'
24 | )
25 | AND
26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 | datetimeprel >= DATE_TRUNC('day', (
28 | SELECT MAX(sub.datetimeprel)
29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | )
32 |
33 | SELECT
34 | cdreseau,
35 | 'dernier_prel' AS periode,
36 | datetimeprel AS date_dernier_prel,
37 | 1 AS nb_parametres,
38 | CASE
39 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
40 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
41 | END AS categorie,
42 | CASE
43 | WHEN
44 | valtraduite = 0 OR valtraduite IS NULL
45 | THEN 'non_quantifie'
46 | WHEN
47 | valtraduite > valeur_sanitaire_2
48 | THEN 'sup_valeur_sanitaire_2'
49 | WHEN
50 | -- by construction, valeur_sanitaire_2 > valeur_sanitaire_1
51 | -- so here the result is actually:
52 | -- valeur_sanitaire_1 < valtraduite <= valeur_sanitaire_2
53 | valtraduite > valeur_sanitaire_1
54 | THEN 'sup_valeur_sanitaire'
55 | WHEN
56 | valtraduite <= valeur_sanitaire_1
57 | THEN 'inf_valeur_sanitaire'
58 | ELSE 'error'
59 | END AS resultat,
60 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
61 | AS parametres_detectes
62 | FROM
63 | last_pvl
64 | WHERE
65 | row_number = 1
66 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT
4 | inseecommune,
5 | categorie,
6 | cdparametresiseeaux,
7 | valeur_sanitaire_1,
8 | valeur_sanitaire_2,
9 | datetimeprel,
10 | valtraduite,
11 | ROW_NUMBER()
12 | OVER (
13 | PARTITION BY inseecommune, cdparametresiseeaux
14 | ORDER BY datetimeprel DESC
15 | )
16 | AS row_number
17 | FROM
18 | {{ ref('int__resultats_udi_communes') }}
19 | WHERE
20 | cdparametresiseeaux IN (
21 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
22 | --'14DAN',
23 | 'PCLAT'
24 | )
25 | AND
26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 | datetimeprel >= DATE_TRUNC('day', (
28 | SELECT MAX(sub.datetimeprel)
29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | )
32 |
33 | SELECT
34 | inseecommune,
35 | 'dernier_prel' AS periode,
36 | datetimeprel AS date_dernier_prel,
37 | 1 AS nb_parametres,
38 | CASE
39 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
40 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
41 | END AS categorie,
42 | CASE
43 | WHEN
44 | valtraduite = 0 OR valtraduite IS NULL
45 | THEN 'non_quantifie'
46 | WHEN
47 | valtraduite > valeur_sanitaire_2
48 | THEN 'sup_valeur_sanitaire_2'
49 | WHEN
50 | -- by construction, valeur_sanitaire_2 > valeur_sanitaire_1
51 | -- so here the result is actually:
52 | -- valeur_sanitaire_1 < valtraduite <= valeur_sanitaire_2
53 | valtraduite > valeur_sanitaire_1
54 | THEN 'sup_valeur_sanitaire'
55 | WHEN
56 | valtraduite <= valeur_sanitaire_1
57 | THEN 'inf_valeur_sanitaire'
58 | ELSE 'error'
59 | END AS resultat,
60 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
61 | AS parametres_detectes
62 | FROM
63 | last_pvl
64 | WHERE
65 | row_number = 1
66 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__resultats_udi_communes.sql:
--------------------------------------------------------------------------------
1 | WITH resultats AS (
2 | SELECT
3 | referenceprel,
4 | cdparametresiseeaux,
5 | de_partition,
6 |
7 | -- Correction de la colonne valtraduite qui contient les valeurs
8 | -- textuelles de rqana converties en valeurs numériques.
9 | -- Certaines valeurs textuelles telles que "Changement anormal", "OUI",
10 | -- "PRESENCE" étaient converties en 1.
11 | -- Ces valeurs sont corrigées en 0 car on veut les considérer comme
12 | -- des valeurs non quantifiées.
13 | -- Les valeurs purement numériques restent inchangées.
14 | -- Exemples après correction :
15 | -- 'Changement anormal' → 0
16 | -- 'OUI' → 0
17 | -- 'PRESENCE' → 0
18 | -- '1,0' → 1
19 | -- '>1' → 1
20 | CASE
21 | WHEN valtraduite = 1 AND REGEXP_MATCHES(rqana, '[a-zA-Z]') THEN 0
22 | ELSE valtraduite
23 | END AS valtraduite
24 |
25 | -- On n'utilise plus limitequal des données d'origine
26 | -- car on se base sur des valeurs fournies par Générations Futures
27 | --
28 | -- CAST(
29 | -- REGEXP_EXTRACT(
30 | -- REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?'
31 | -- ) AS FLOAT
32 | -- ) AS limitequal_float,
33 | -- REGEXP_EXTRACT(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite
34 | FROM
35 | {{ ref("stg_edc__resultats") }}
36 | ),
37 |
38 | resultats_with_ref AS (
39 | SELECT
40 | resultats.*,
41 | r.categorie_1 AS categorie,
42 | r.categorie_2,
43 | r.categorie_3,
44 | r.limite_qualite,
45 | r.limite_indicative,
46 | r.valeur_sanitaire_1,
47 | r.valeur_sanitaire_2
48 | FROM
49 | resultats
50 | INNER JOIN
51 | {{ ref("int__valeurs_de_reference") }} AS r
52 | ON
53 | resultats.cdparametresiseeaux = r.cdparametresiseeaux
54 | )
55 |
56 |
57 | SELECT
58 | resultats_with_ref.*,
59 | udi.cdreseau,
60 | udi.inseecommune,
61 | plv.datetimeprel
62 | FROM
63 | resultats_with_ref
64 | INNER JOIN
65 | {{ ref("int__lien_cdreseau_refreneceprel") }} AS plv
66 | ON
67 | resultats_with_ref.referenceprel = plv.referenceprel
68 | AND
69 | resultats_with_ref.de_partition = plv.de_partition
70 |
71 | LEFT JOIN
72 | {{ ref("int__lien_commune_cdreseau") }} AS udi
73 | ON
74 | plv.cdreseau = udi.cdreseau
75 | AND plv.de_partition = udi.de_partition
76 |
--------------------------------------------------------------------------------
/webapp/app/duckdb-example/page.tsx:
--------------------------------------------------------------------------------
1 | import { fetchExample } from "../lib/data";
2 |
3 | export default async function Page() {
4 | //using api route
5 | // try {
6 |
7 | // const response = await fetch("http://localhost:3001/api/db-example", { cache: "no-store" })
8 | // const results = response.json();
9 | // } catch (err) {
10 | // console.error("Error fetching DB status:", err)
11 | // }
12 |
13 | // using directly the data layer
14 | const reader = await fetchExample();
15 |
16 | return (
17 |
18 |
19 |
20 |
21 |
22 | | Row |
23 | {Array.from({ length: reader.columnCount }, (_, i) => (
24 |
25 | {reader.columnName(i)}
26 | |
27 | ))}
28 |
29 |
30 |
31 | {Object.entries(reader.getRows()).map(([key, value]) => (
32 |
33 | |
34 | {key}
35 | |
36 | {Array.from({ length: reader.columnCount }, (_, i) => (
37 |
38 | {/* Affichage par type - exemple avec des méthodes propre à certains types => pas d'erreur, le typage semble bon */}
39 | {/* {value[i] != null &&
40 | ((reader.columnType(i).typeId === DuckDBTypeId.VARCHAR &&
41 | String(value[i]).slice(0, 3)) ||
42 | (reader.columnType(i).typeId === DuckDBTypeId.BIGINT &&
43 | (value[i] as bigint) * BigInt(100000)) ||
44 | (reader.columnType(i).typeId === DuckDBTypeId.DOUBLE &&
45 | (value[i] as number)?.toExponential()))} */}
46 | {/* Affichage simple */}
47 | {String(value[i])}
48 | |
49 | ))}
50 |
51 | ))}
52 |
53 |
54 |
55 |
56 | );
57 | }
58 |
--------------------------------------------------------------------------------
/pipelines/tasks/config/common.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from pathlib import Path
4 | from typing import Union
5 | from zipfile import ZipFile
6 |
7 | import requests
8 | from tqdm import tqdm
9 |
10 | from pipelines.utils.logger import get_logger
11 |
12 | logger = get_logger(__name__)
13 |
14 | ROOT_FOLDER = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
15 | DATABASE_FOLDER = os.path.join(ROOT_FOLDER, "database")
16 | DUCKDB_FILE = os.path.join(DATABASE_FOLDER, "data.duckdb")
17 | CACHE_FOLDER = os.path.join(ROOT_FOLDER, "database", "cache")
18 |
19 | os.makedirs(CACHE_FOLDER, exist_ok=True)
20 | os.makedirs(DATABASE_FOLDER, exist_ok=True)
21 |
22 |
23 | # common style for the progressbar dans cli
24 | tqdm_common = {
25 | "ncols": 100,
26 | "bar_format": "{l_bar}{bar}| {n_fmt}/{total_fmt}",
27 | "mininterval": 2.0, # Log progress every 2 second
28 | }
29 |
30 |
31 | def clear_cache(recreate_folder: bool = True):
32 | """Clear the cache folder."""
33 | shutil.rmtree(CACHE_FOLDER)
34 | if recreate_folder:
35 | os.makedirs(CACHE_FOLDER, exist_ok=True)
36 |
37 |
38 | def download_file_from_https(url: str, filepath: Union[str, Path]):
39 | """
40 | Downloads a file from a https link to a local file.
41 | :param url: The url where to download the file.
42 | :param filepath: The path to the local file.
43 | :return: Downloaded file filename.
44 | """
45 | logger.info(f"Downloading file from {url} to {filepath}")
46 | response = requests.get(
47 | url, stream=True, headers={"Accept-Encoding": "gzip, deflate"}
48 | )
49 | response.raise_for_status()
50 | response_size = int(response.headers.get("content-length", 0))
51 | filepath = Path(filepath)
52 | with open(filepath, "wb") as f:
53 | with tqdm(
54 | total=response_size,
55 | unit="B",
56 | unit_scale=True,
57 | desc=filepath.name,
58 | **tqdm_common,
59 | ) as pbar:
60 | for chunk in response.iter_content(chunk_size=8192):
61 | f.write(chunk)
62 | pbar.update(len(chunk))
63 |
64 | return filepath.name
65 |
66 |
67 | def extract_file(zip_file, extract_folder):
68 | with ZipFile(zip_file, "r") as zip_ref:
69 | file_list = zip_ref.namelist()
70 | with tqdm(
71 | total=len(file_list), unit="file", desc="Extracting", **tqdm_common
72 | ) as pbar:
73 | for file in file_list:
74 | zip_ref.extract(file, extract_folder) # Extract each file
75 | pbar.update(1)
76 | return True
77 |
--------------------------------------------------------------------------------
/pipelines/tasks/client/pmtiles_processor.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | from pathlib import Path
3 |
4 | from pipelines.config.config import get_s3_path_pmtiles
5 | from pipelines.tasks.config.config_geojson import (
6 | config_merge_geo,
7 | )
8 | from pipelines.utils.logger import get_logger
9 | from pipelines.utils.storage_client import ObjectStorageClient
10 |
11 | logger = get_logger(__name__)
12 |
13 | types = config_merge_geo.keys()
14 |
15 |
16 | class PmtilesProcessor:
17 | def __init__(self, type="communes"):
18 | if type not in types:
19 | raise Exception(f"type {type} must be in {types}")
20 | self.upload_file_path = f"georef-france-{type}-prelevement.pmtiles"
21 |
22 | def convert_geojson_to_pmtiles(
23 | self, geojson_file: str, pmtiles_file: str, layer="data_communes"
24 | ):
25 | """Convert a GeoJSON file to PMTiles format using Tippecanoe."""
26 | # try:
27 | # Construct the Tippecanoe command
28 | command = [
29 | "tippecanoe",
30 | "-zg", # Zoom levels
31 | "-o", # output
32 | pmtiles_file, # Output PMTiles file
33 | "--layer", # le nom de la couche dans les tuiles vectorielles
34 | layer,
35 | "--coalesce-densest-as-needed",
36 | "--extend-zooms-if-still-dropping",
37 | geojson_file, # Input GeoJSON file
38 | ]
39 | # if file already exists then remove it
40 | if Path(pmtiles_file).exists():
41 | Path(pmtiles_file).unlink()
42 | # Run the command
43 | subprocess.run(command, check=True)
44 | logger.info(f"Successfully converted '{geojson_file}' to '{pmtiles_file}'.")
45 |
46 | # except subprocess.CalledProcessError as e:
47 | # logger.error(f"Error during conversion: {e}")
48 | # except Exception as e:
49 | # logger.error(f"An error occurred: {e}")
50 |
51 | def upload_pmtils_to_storage(self, env: str, pmtils_path: str):
52 | """
53 | Upload the Pmtiles file to Storage Object depending on the environment
54 | This requires setting the correct environment variables for the Scaleway credentials
55 | """
56 | s3 = ObjectStorageClient()
57 | s3_path = get_s3_path_pmtiles(env, self.upload_file_path)
58 |
59 | s3.upload_object(local_path=pmtils_path, file_key=s3_path, public_read=True)
60 | logger.info(f"✅ pmtils uploaded to s3://{s3.bucket_name}/{s3_path}")
61 | url = (
62 | f"https://{s3.bucket_name}.{s3.endpoint_url.split('https://')[1]}/{s3_path}"
63 | )
64 | return url
65 |
--------------------------------------------------------------------------------
/pipelines/tasks/generate_pmtiles_legacy.py:
--------------------------------------------------------------------------------
1 | """Generate and upload merged new PMtiles file. LEGACY method.
2 | For both UDI and communes data:
3 | - Get geom data from duck db
4 | - Get sample results from duckdb, merge with geom, convert to pmtiles and uploads the new Pmtiles to S3.
5 |
6 | Args:
7 | - env (str): Environment to download from ("dev" or "prod")
8 | """
9 |
10 | import json
11 | import os
12 |
13 | from tasks.config.common import CACHE_FOLDER
14 |
15 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
16 | from pipelines.tasks.client.geojson_processor import GeoJSONProcessor
17 | from pipelines.tasks.client.pmtiles_processor import PmtilesProcessor
18 | from pipelines.utils.logger import get_logger
19 |
20 | logger = get_logger(__name__)
21 |
22 |
23 | def execute(env: str):
24 | """
25 | Execute GeoJSON generation and upload process.
26 |
27 | Args:
28 | env: Environment to use ("dev" or "prod")
29 | """
30 | duckdb_client = DuckDBClient()
31 | generate_pmtiles(env, "communes", duckdb_client)
32 | generate_pmtiles(env, "udi", duckdb_client)
33 | duckdb_client.close()
34 |
35 |
36 | def generate_pmtiles(env, type, duckdb_client):
37 | logger.info(f"Starting {type} GeoJSON generation process")
38 |
39 | # Initialize clients
40 | geojson_processor = GeoJSONProcessor(type, duckdb_client)
41 | pmtiles_processor = PmtilesProcessor(type)
42 |
43 | # Process and merge data
44 | logger.info(f"Merging GeoJSON with {type} results")
45 | geojson_output_path = os.path.join(
46 | CACHE_FOLDER, f"new-georef-france-{type}-prelevement.geojson"
47 | )
48 | geojson = geojson_processor.generate_geojson()
49 |
50 | with open(geojson_output_path, "w", encoding="utf-8") as f:
51 | json.dump(geojson, f)
52 |
53 | logger.info(f"✅ GeoJSON processed and stored at: {geojson_output_path}")
54 |
55 | # logger.info("Uploading geojson to S3")
56 | # url = geojson_processor.upload_geojson_to_storage(
57 | # env, file_path=geojson_output_path
58 | # )
59 | # logger.info(f"geojson in s3 pubic Url: {url}")
60 |
61 | logger.info("Convert new-GeoJSON to pmtiles")
62 | pmtils_output_path = os.path.join(
63 | CACHE_FOLDER, f"georef-france-{type}-prelevement.pmtiles"
64 | )
65 | pmtiles_processor.convert_geojson_to_pmtiles(
66 | geojson_output_path, pmtils_output_path, f"data_{type}"
67 | )
68 |
69 | logger.info("Uploading pmtiles to S3")
70 | url = pmtiles_processor.upload_pmtils_to_storage(
71 | env, pmtils_path=pmtils_output_path
72 | )
73 | logger.info(f"pmtiles in s3 pubic Url: {url}")
74 |
--------------------------------------------------------------------------------
/dbt_/models/staging/communes/_communes_models.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: stg_communes__cog
5 | description: "Liste des communes et leurs unités de distribution chargé depuis le site de l'insee https://www.insee.fr/fr/information/7766585"
6 | columns:
7 | - name: TYPECOM
8 | description: >
9 | Type de commune
10 | COM Commune
11 | COMA Commune associée
12 | COMD Commune déléguée
13 | ARM Arrondissement municipal
14 | - name: COM
15 | description: Code Commune
16 | - name: REG
17 | description: Code Region
18 | - name: DEP
19 | description: Code Departement
20 | - name: CTCD
21 | description: Code de la collectivité territoriale ayant les compétences départementales
22 | - name: ARR
23 | description: Code arrondissement
24 | - name: TNCC
25 | description: >
26 | Type de nom en clair
27 | 0 Pas d'article et le nom commence par une consonne sauf H muet charnière = DE
28 | 1 Pas d'article et le nom commence par une voyelle ou un H muet charnière = D'
29 | 2 Article = LE charnière = DU
30 | 3 Article = LA charnière = DE LA
31 | 4 Article = LES charnière = DES
32 | 5 Article = L' charnière = DE L'
33 | 6 Article = AUX charnière = DES
34 | 7 Article = LAS charnière = DE LAS
35 | 8 Article = LOS charnière = DE LOS
36 | - name: NCC
37 | description: Nom en clair (majuscules)
38 | - name: NCCENR
39 | description: Nom en clair (typographie riche)
40 | - name: LIBELLE
41 | description: Nom en clair (typographie riche) avec article
42 | - name: CAN
43 | description: Code canton. Pour les communes « multi-cantonales », code décliné de 99 à 90 (pseudo-canton) ou de 89 à 80 (communes nouvelles)
44 | - name: COMPARENT
45 | description: Code de la commune parente pour les arrondissements municipaux et les communes associées ou déléguées.
46 |
47 | - name: stg_communes__opendatasoft
48 | description: Tracé des communes chargé depuis https://public.opendatasoft.com/explore/dataset/georef-france-commune/information
49 | columns:
50 | - name: com_code
51 | description: "Code de la commune (extrait du champ com_code[1] de la source OpenDataSoft)"
52 | type: VARCHAR
53 | - name: com_name
54 | description: "Nom de la commune (extrait du champ com_name[1] de la source OpenDataSoft)"
55 | type: VARCHAR
56 | - name: geom
57 | description: "Géométrie de la commune"
58 | type: GEOMETRY
59 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT DISTINCT
4 | cdreseau,
5 | categorie,
6 | cdparametresiseeaux,
7 | valtraduite,
8 | limite_qualite,
9 | valeur_sanitaire_1,
10 | datetimeprel,
11 | DENSE_RANK()
12 | OVER (
13 | PARTITION BY cdreseau
14 | ORDER BY datetimeprel DESC
15 | )
16 | AS row_number
17 |
18 | FROM
19 | {{ ref('int__resultats_udi_communes') }}
20 | WHERE
21 | categorie = 'pesticide'
22 | AND
23 | categorie_2 = 'sub_active'
24 | AND
25 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
26 | datetimeprel >= DATE_TRUNC('day', (
27 | SELECT MAX(sub.datetimeprel)
28 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
29 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
30 | ),
31 |
32 | aggregated AS (
33 | SELECT
34 | cdreseau,
35 | cdparametresiseeaux,
36 | MAX(valtraduite) AS valtraduite,
37 | MAX(limite_qualite) AS limite_qualite,
38 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
39 | MAX(datetimeprel) AS datetimeprel
40 | FROM last_pvl
41 | WHERE row_number = 1
42 | GROUP BY cdreseau, cdparametresiseeaux
43 | )
44 |
45 | SELECT
46 | cdreseau,
47 | 'sub_active' AS categorie,
48 | 'dernier_prel' AS periode,
49 | MAX(datetimeprel) AS date_dernier_prel,
50 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
51 | CASE
52 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
53 | WHEN
54 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
55 | THEN 'sup_valeur_sanitaire'
56 | WHEN
57 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
58 | THEN 'sup_limite_qualite'
59 | WHEN
60 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite <= limite_qualite)
61 | THEN 'inf_limite_qualite'
62 | ELSE 'erreur'
63 | END AS resultat,
64 | TO_JSON(
65 | MAP(
66 | LIST(
67 | cdparametresiseeaux
68 | ORDER BY cdparametresiseeaux
69 | ) FILTER (WHERE valtraduite > 0
70 | ),
71 | LIST(
72 | valtraduite
73 | ORDER BY cdparametresiseeaux
74 | ) FILTER (WHERE valtraduite > 0
75 | )
76 | )
77 | ) AS parametres_detectes
78 |
79 | FROM aggregated
80 | GROUP BY cdreseau
81 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT DISTINCT
4 | inseecommune,
5 | categorie,
6 | cdparametresiseeaux,
7 | valtraduite,
8 | limite_qualite,
9 | valeur_sanitaire_1,
10 | datetimeprel,
11 | DENSE_RANK()
12 | OVER (
13 | PARTITION BY inseecommune
14 | ORDER BY datetimeprel DESC
15 | )
16 | AS row_number
17 |
18 | FROM
19 | {{ ref('int__resultats_udi_communes') }}
20 | WHERE
21 | categorie = 'pesticide'
22 | AND
23 | categorie_2 = 'sub_active'
24 | AND
25 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
26 | datetimeprel >= DATE_TRUNC('day', (
27 | SELECT MAX(sub.datetimeprel)
28 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
29 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
30 | ),
31 |
32 | aggregated AS (
33 | SELECT
34 | inseecommune,
35 | cdparametresiseeaux,
36 | MAX(valtraduite) AS valtraduite,
37 | MAX(limite_qualite) AS limite_qualite,
38 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
39 | MAX(datetimeprel) AS datetimeprel
40 | FROM last_pvl
41 | WHERE row_number = 1
42 | GROUP BY inseecommune, cdparametresiseeaux
43 | )
44 |
45 | SELECT
46 | inseecommune,
47 | 'sub_active' AS categorie,
48 | 'dernier_prel' AS periode,
49 | MAX(datetimeprel) AS date_dernier_prel,
50 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
51 | CASE
52 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
53 | WHEN
54 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
55 | THEN 'sup_valeur_sanitaire'
56 | WHEN
57 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
58 | THEN 'sup_limite_qualite'
59 | WHEN
60 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite <= limite_qualite)
61 | THEN 'inf_limite_qualite'
62 | ELSE 'erreur'
63 | END AS resultat,
64 | TO_JSON(
65 | MAP(
66 | LIST(
67 | cdparametresiseeaux
68 | ORDER BY cdparametresiseeaux
69 | ) FILTER (WHERE valtraduite > 0
70 | ),
71 | LIST(
72 | valtraduite
73 | ORDER BY cdparametresiseeaux
74 | ) FILTER (WHERE valtraduite > 0
75 | )
76 | )
77 | ) AS parametres_detectes
78 |
79 | FROM aggregated
80 | GROUP BY inseecommune
81 |
--------------------------------------------------------------------------------
/webapp/app/api/udi/find/route.tsx:
--------------------------------------------------------------------------------
1 | // an api route fetching data
2 |
3 | import db from "@/app/lib/duckdb";
4 | import { NextRequest, NextResponse } from "next/server";
5 |
6 | export async function GET(req: NextRequest) {
7 | // Set CORS headers to allow requests from any origin
8 | const corsHeaders = {
9 | "Access-Control-Allow-Origin": "*",
10 | "Access-Control-Allow-Methods": "GET, OPTIONS",
11 | "Access-Control-Allow-Headers": "Content-Type, Authorization",
12 | };
13 |
14 | // Handle OPTIONS request for CORS preflight
15 | if (req.method === "OPTIONS") {
16 | return NextResponse.json({}, { headers: corsHeaders });
17 | }
18 |
19 | const { searchParams } = new URL(req.url);
20 | const lonParam = searchParams.get("lon");
21 | const latParam = searchParams.get("lat");
22 |
23 | if (lonParam == null || latParam == null) {
24 | return NextResponse.json(
25 | { message: "Paramètres manquants: lon et lat sont requis" },
26 | { status: 400, headers: corsHeaders },
27 | );
28 | }
29 | const lon = parseFloat(lonParam);
30 | const lat = parseFloat(latParam);
31 |
32 | if (
33 | isNaN(lon) ||
34 | isNaN(lat) ||
35 | lon < -180 ||
36 | lon > 180 ||
37 | lat < -90 ||
38 | lat > 90
39 | ) {
40 | return NextResponse.json(
41 | { message: "Paramètres invalides" },
42 | { status: 400, headers: corsHeaders },
43 | );
44 | }
45 |
46 | const connection = await db.connect();
47 | try {
48 | await connection.run("LOAD spatial;");
49 |
50 | const prepared = await connection.prepare(`
51 | SELECT code_udi
52 | FROM atlasante_udi
53 | WHERE ST_Contains(geom, ST_GeomFromText($1::VARCHAR))
54 | ORDER BY udi_pop DESC
55 | LIMIT 1
56 | `);
57 |
58 | const point = `POINT(${lon} ${lat})`;
59 | prepared.bindVarchar(1, point);
60 |
61 | const result = await prepared.runAndReadAll();
62 |
63 | if (result.currentRowCount > 0) {
64 | return NextResponse.json(
65 | { id: result.getRowObjectsJson()[0]["code_udi"] },
66 | { status: 200, headers: corsHeaders },
67 | );
68 | } else {
69 | return NextResponse.json(
70 | { message: "Aucune UDI ne correspond à ces coordonnées" },
71 | { status: 404, headers: corsHeaders },
72 | );
73 | }
74 | } catch (error) {
75 | console.error("Erreur de base de données:", error);
76 | return NextResponse.json(
77 | {
78 | message:
79 | "Une erreur interne s'est produite. Veuillez réessayer ultérieurement.",
80 | },
81 | { status: 500, headers: corsHeaders },
82 | );
83 | } finally {
84 | await connection.close();
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/webapp/tailwind.config.ts:
--------------------------------------------------------------------------------
1 | import type { Config } from "tailwindcss";
2 | import TailwindAnimate from "tailwindcss-animate";
3 |
4 | export default {
5 | darkMode: ["class"],
6 | content: [
7 | "./pages/**/*.{js,ts,jsx,tsx,mdx}",
8 | "./components/**/*.{js,ts,jsx,tsx,mdx}",
9 | "./app/**/*.{js,ts,jsx,tsx,mdx}",
10 | ],
11 | theme: {
12 | extend: {
13 | colors: {
14 | background: "hsl(var(--background))",
15 | foreground: "hsl(var(--foreground))",
16 | card: {
17 | DEFAULT: "hsl(var(--card))",
18 | foreground: "hsl(var(--card-foreground))",
19 | },
20 | popover: {
21 | DEFAULT: "hsl(var(--popover))",
22 | foreground: "hsl(var(--popover-foreground))",
23 | },
24 | primary: {
25 | DEFAULT: "hsl(var(--primary))",
26 | foreground: "hsl(var(--primary-foreground))",
27 | },
28 | secondary: {
29 | DEFAULT: "hsl(var(--secondary))",
30 | foreground: "hsl(var(--secondary-foreground))",
31 | },
32 | muted: {
33 | DEFAULT: "hsl(var(--muted))",
34 | foreground: "hsl(var(--muted-foreground))",
35 | },
36 | accent: {
37 | DEFAULT: "hsl(var(--accent))",
38 | foreground: "hsl(var(--accent-foreground))",
39 | },
40 | destructive: {
41 | DEFAULT: "hsl(var(--destructive))",
42 | foreground: "hsl(var(--destructive-foreground))",
43 | },
44 | border: "hsl(var(--border))",
45 | input: "hsl(var(--input))",
46 | ring: "hsl(var(--ring))",
47 | chart: {
48 | "1": "hsl(var(--chart-1))",
49 | "2": "hsl(var(--chart-2))",
50 | "3": "hsl(var(--chart-3))",
51 | "4": "hsl(var(--chart-4))",
52 | "5": "hsl(var(--chart-5))",
53 | },
54 | "custom-drom": "#22394e",
55 | },
56 | borderRadius: {
57 | lg: "var(--radius)",
58 | md: "calc(var(--radius) - 2px)",
59 | sm: "calc(var(--radius) - 4px)",
60 | },
61 | fontFamily: {
62 | sans: [
63 | "Raleway",
64 | "ui-sans-serif",
65 | "system-ui",
66 | "-apple-system",
67 | "BlinkMacSystemFont",
68 | "Segoe UI",
69 | "Roboto",
70 | "Helvetica Neue",
71 | "Arial",
72 | "Noto Sans",
73 | "sans-serif",
74 | ],
75 | numbers: [
76 | "ui-sans-serif",
77 | "system-ui",
78 | "-apple-system",
79 | "BlinkMacSystemFont",
80 | "Segoe UI",
81 | "Roboto",
82 | "Helvetica Neue",
83 | "Arial",
84 | "Noto Sans",
85 | "sans-serif",
86 | ],
87 | },
88 | },
89 | },
90 | plugins: [TailwindAnimate],
91 | } satisfies Config;
92 |
--------------------------------------------------------------------------------
/dbt_/tests/test_cvm_results.sql:
--------------------------------------------------------------------------------
1 | -- dernier relevé
2 | SELECT
3 | 'dernier relevé' AS periode,
4 | cdreseau,
5 | categorie,
6 | resultat,
7 | 0 AS nb_depassements,
8 | 0 AS nb_prelevements,
9 | 0 AS ratio_limite_qualite
10 | FROM
11 | {{ ref('int__resultats_cvm_udi_dernier') }}
12 | WHERE
13 | (
14 | cdreseau = '976003489'
15 | AND categorie = 'cvm'
16 | AND date_dernier_prel = '2024-07-16 08:30:00'
17 | AND resultat != 'non_quantifie'
18 | )
19 | OR
20 | (
21 | cdreseau = '001000241'
22 | AND categorie = 'cvm'
23 | AND date_dernier_prel = '2024-12-31 14:00:00'
24 | AND resultat != 'non_quantifie'
25 | )
26 | OR
27 | (
28 | cdreseau = '087003637'
29 | AND categorie = 'cvm'
30 | AND date_dernier_prel = '2024-07-04 10:50:00'
31 | AND resultat != 'cvm_sup_0_5'
32 | )
33 | OR
34 | (
35 | cdreseau = '095004048'
36 | AND categorie = 'cvm'
37 | AND date_dernier_prel = '2024-07-23 08:26:00'
38 | AND resultat != 'inf_limites'
39 | )
40 | UNION ALL
41 | -- annuel
42 | SELECT
43 | 'annuel' AS periode,
44 | cdreseau,
45 | categorie,
46 | '' AS resultat,
47 | nb_depassements,
48 | nb_prelevements,
49 | ratio_limite_qualite
50 | FROM
51 | {{ ref('int__resultats_cvm_udi_annuel') }}
52 | WHERE
53 | (
54 | cdreseau = '001001073'
55 | AND categorie = 'cvm'
56 | AND annee = '2024'
57 | AND nb_depassements != 0
58 | )
59 | OR
60 | (
61 | cdreseau = '001001073'
62 | AND categorie = 'cvm'
63 | AND annee = '2024'
64 | AND ratio_limite_qualite != 0
65 | )
66 | OR
67 | (
68 | cdreseau = '001001073'
69 | AND categorie = 'cvm'
70 | AND annee = '2023'
71 | AND nb_depassements != 0
72 | )
73 | OR
74 | (
75 | cdreseau = '001001073'
76 | AND categorie = 'cvm'
77 | AND annee = '2022'
78 | AND nb_depassements != 0
79 | )
80 | OR
81 | (
82 | cdreseau = '007000088'
83 | AND categorie = 'cvm'
84 | AND annee IN ('2022', '2023', '2024')
85 | AND nb_depassements != 0
86 | )
87 | OR
88 | (
89 | cdreseau = '095004048'
90 | AND categorie = 'cvm'
91 | AND annee = '2024'
92 | AND nb_prelevements != 21
93 | )
94 | OR
95 | (
96 | cdreseau = '005001358'
97 | AND categorie = 'cvm'
98 | AND annee = '2022'
99 | AND nb_depassements != 2
100 | )
101 | OR
102 | (
103 | cdreseau = '032000209'
104 | AND categorie = 'cvm'
105 | AND annee = '2024'
106 | AND (
107 | ratio_limite_qualite != 0.25
108 | OR
109 | nb_prelevements != 4
110 | )
111 | )
112 |
--------------------------------------------------------------------------------
/pipelines/tasks/client/uploaded_geojson_client.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
5 | from pipelines.tasks.config.common import (
6 | CACHE_FOLDER,
7 | logger,
8 | )
9 | from pipelines.utils.storage_client import ObjectStorageClient
10 |
11 |
12 | class UploadedGeoJSONClient:
13 | """Client pour télécharger et ingérer plusieurs fichiers GeoJSON uploadés préalablement manuellement sur S3"""
14 |
15 | def __init__(self, config, duckdb_client: DuckDBClient):
16 | self.config = config
17 | self.duckdb_client = duckdb_client
18 | self.storage_client = ObjectStorageClient()
19 |
20 | if "files" not in self.config:
21 | raise ValueError(
22 | "Configuration must contain a 'files' list with the GeoJSON files to process"
23 | )
24 |
25 | self.files_config = self.config["files"]
26 | logger.info(
27 | f"UploadedGeoJSONClient initialized with {len(self.files_config)} file(s)"
28 | )
29 |
30 | def process_datasets(self):
31 | logger.info(f"Processing {self.__class__.__name__} data")
32 | self._download_data()
33 | self._ingest_to_duckdb()
34 | logger.info(f"Finishing processing {self.__class__.__name__} data")
35 |
36 | def _download_data(self):
37 | os.makedirs(CACHE_FOLDER, exist_ok=True)
38 |
39 | for file_config in self.files_config:
40 | s3_key = (
41 | f"{self.config['source'].get('prefix', 'upload')}/{file_config['path']}"
42 | )
43 | local_path = Path(CACHE_FOLDER, file_config["local_file_name"])
44 | logger.info(f"Downloading {s3_key} to {local_path}")
45 | self.storage_client.download_object(
46 | file_key=s3_key, local_path=str(local_path)
47 | )
48 |
49 | def _ingest_to_duckdb(self):
50 | logger.info(
51 | f"Ingesting {len(self.files_config)} uploaded GeoJSON file(s) into DuckDB"
52 | )
53 |
54 | # Collect all table names for dropping
55 | table_names = [file_config["table_name"] for file_config in self.files_config]
56 | self.duckdb_client.drop_tables(table_names=table_names)
57 |
58 | # Ingest each file
59 | for file_config in self.files_config:
60 | logger.info(
61 | f"Ingesting {file_config['local_file_name']} into table {file_config['table_name']}"
62 | )
63 | self.duckdb_client.ingest_from_geojson(
64 | table_name=file_config["table_name"],
65 | filepath=Path(CACHE_FOLDER, file_config["local_file_name"]),
66 | )
67 | logger.info(
68 | f"✅ {file_config['local_file_name']} has been ingested into table {file_config['table_name']}"
69 | )
70 |
71 | logger.info("✅ All uploaded GeoJSON files have been ingested in DB")
72 |
--------------------------------------------------------------------------------
/dbt_/tests/test_nitrates_results.sql:
--------------------------------------------------------------------------------
1 | -- dernier relevé
2 | SELECT
3 | 'dernier relevé' AS periode,
4 | cdreseau,
5 | resultat,
6 | 0 AS nb_depassements,
7 | 0 AS nb_prelevements,
8 | 0 AS ratio_depassements
9 | FROM
10 | {{ ref('int__resultats_nitrate_udi_dernier') }}
11 | WHERE
12 | (
13 | cdreseau = '001000003'
14 | AND date_dernier_prel = '2025-05-23 09:06:00'
15 | AND resultat != 'no3_inf_25'
16 | )
17 | OR
18 | (
19 | cdreseau = '037000175'
20 | AND date_dernier_prel = '2025-06-17 10:02:00'
21 | AND resultat != 'no3_inf_40'
22 | )
23 | OR
24 | (
25 | cdreseau = '002000060'
26 | AND date_dernier_prel = '2025-04-10 09:22:00'
27 | AND resultat != 'sup_valeur_sanitaire'
28 | )
29 | OR
30 | (
31 | cdreseau = '060001271'
32 | AND date_dernier_prel = '2025-04-09 13:44:00'
33 | AND resultat != 'inf_valeur_sanitaire'
34 | )
35 | OR
36 | (
37 | cdreseau = '973000028'
38 | AND date_dernier_prel = '2025-05-20 10:44:00'
39 | AND resultat != 'non_quantifie'
40 | )
41 | UNION ALL
42 | -- annuel
43 | SELECT
44 | 'annuel' AS periode,
45 | cdreseau,
46 | '' AS resultat,
47 | nb_depassements,
48 | nb_prelevements,
49 | ratio
50 | FROM
51 | {{ ref('int__resultats_nitrate_udi_annuel') }}
52 | WHERE
53 | (
54 | cdreseau = '092003070'
55 | AND annee = '2024'
56 | AND (
57 | nb_prelevements != 806
58 | OR nb_depassements != 0
59 | OR ratio != 0
60 | )
61 | )
62 | OR
63 | (
64 | cdreseau = '071001155'
65 | AND annee = '2023'
66 | AND (
67 | nb_prelevements != 1
68 | OR nb_depassements != 0
69 | OR ratio != 0
70 | )
71 | )
72 | OR
73 | (
74 | cdreseau = '036000670'
75 | AND annee = '2024'
76 | AND (
77 | nb_prelevements != 27
78 | OR nb_depassements != 25
79 | -- il y a 2 prélèvements = à 50 (= valeur_sanitaire_1)
80 | -- comme c'est un strict > dans la requête, on a 25 et pas 27
81 | OR ratio < 0.92
82 | )
83 | )
84 | OR
85 | (
86 | cdreseau = '089003503'
87 | AND annee = '2020'
88 | AND (
89 | nb_prelevements != 12
90 | OR nb_depassements != 3
91 | OR ratio != 0.25
92 | )
93 | )
94 | OR
95 | (
96 | cdreseau = '055000713'
97 | AND annee = '2023'
98 | AND (
99 | nb_prelevements != 4
100 | OR nb_depassements != 0
101 | OR ratio != 0
102 | )
103 | )
104 | OR
105 | (
106 | cdreseau = '027000943'
107 | AND annee = '2021'
108 | AND (
109 | nb_prelevements != 63
110 | OR nb_depassements != 1
111 | -- il y a 1 prélèvement = à 50 (= valeur_sanitaire_1)
112 | )
113 | )
114 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | -- Ici on ne garde que le dernier prélèvement
2 | -- pour chaque UDI dans la dernière année
3 | WITH metaux_lourds_dernier_prel AS (
4 | SELECT
5 | cdreseau,
6 | categorie,
7 | cdparametresiseeaux,
8 | limite_qualite,
9 | valeur_sanitaire_1,
10 | valeur_sanitaire_2,
11 | datetimeprel,
12 | valtraduite,
13 | ROW_NUMBER()
14 | OVER (
15 | PARTITION BY cdreseau, cdparametresiseeaux
16 | ORDER BY datetimeprel DESC
17 | )
18 | AS row_number
19 | FROM
20 | {{ ref('int__resultats_udi_communes') }}
21 | WHERE
22 | cdparametresiseeaux IN ('PB', 'AS')
23 | AND
24 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
25 | datetimeprel >= DATE_TRUNC('day', (
26 | SELECT MAX(sub.datetimeprel)
27 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
28 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
29 | )
30 |
31 | -- Ici on ne prend que le prélèvement le plus récent (avec row_number = 1)
32 | -- pour chaque type de métaux lourds
33 | SELECT
34 | cdreseau,
35 | datetimeprel AS date_dernier_prel,
36 | 'dernier_prel' AS periode,
37 | 1 AS nb_parametres,
38 | CASE
39 | WHEN
40 | cdparametresiseeaux = 'PB'
41 | THEN 'metaux_lourds_pb'
42 | WHEN
43 | cdparametresiseeaux = 'AS'
44 | THEN 'metaux_lourds_as'
45 | END AS categorie,
46 | CASE
47 | WHEN
48 | -- Pas de distinction PB/AS car même résultat
49 | valtraduite IS NULL
50 | OR valtraduite = 0
51 | THEN 'non_quantifie'
52 | WHEN
53 | cdparametresiseeaux = 'PB'
54 | AND valtraduite >= limite_qualite
55 | THEN 'sup_limite_qualite'
56 | WHEN
57 | -- 5 est la future limite de qualité appliquée
58 | -- à partir de 2036
59 | cdparametresiseeaux = 'PB'
60 | AND valtraduite >= 5
61 | AND valtraduite < limite_qualite
62 | THEN 'sup_limite_qualite_2036'
63 | WHEN
64 | cdparametresiseeaux = 'PB'
65 | AND valtraduite < 5
66 | THEN 'inf_limite_qualite'
67 | WHEN
68 | cdparametresiseeaux = 'AS'
69 | AND valtraduite >= valeur_sanitaire_1
70 | THEN 'sup_valeur_sanitaire'
71 | WHEN
72 | cdparametresiseeaux = 'AS'
73 | AND valtraduite >= limite_qualite
74 | AND valtraduite < valeur_sanitaire_1
75 | THEN 'sup_limite_qualite'
76 | WHEN
77 | cdparametresiseeaux = 'AS'
78 | AND valtraduite < limite_qualite
79 | THEN 'inf_limite_qualite'
80 | ELSE 'erreur'
81 | END AS resultat,
82 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
83 | AS parametres_detectes
84 | FROM
85 | metaux_lourds_dernier_prel
86 | WHERE
87 | row_number = 1
88 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/metabolite/int__resultats_metabolite_udi_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT DISTINCT
4 | cdreseau,
5 | categorie,
6 | cdparametresiseeaux,
7 | valtraduite,
8 | limite_qualite,
9 | limite_indicative,
10 | valeur_sanitaire_1,
11 | datetimeprel,
12 | DENSE_RANK()
13 | OVER (
14 | PARTITION BY cdreseau
15 | ORDER BY datetimeprel DESC
16 | )
17 | AS row_number
18 |
19 | FROM
20 | {{ ref('int__resultats_udi_communes') }}
21 | WHERE
22 | categorie = 'pesticide'
23 | AND
24 | categorie_2 = 'metabolite'
25 | AND
26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 | datetimeprel >= DATE_TRUNC('day', (
28 | SELECT MAX(sub.datetimeprel)
29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | ),
32 |
33 | aggregated AS (
34 | SELECT
35 | cdreseau,
36 | cdparametresiseeaux,
37 | MAX(valtraduite) AS valtraduite,
38 | MAX(limite_qualite) AS limite_qualite,
39 | MAX(limite_indicative) AS limite_indicative,
40 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
41 | MAX(datetimeprel) AS datetimeprel
42 | FROM last_pvl
43 | WHERE row_number = 1
44 | GROUP BY cdreseau, cdparametresiseeaux
45 | )
46 |
47 | SELECT
48 | cdreseau,
49 | 'metabolite' AS categorie,
50 | 'dernier_prel' AS periode,
51 | MAX(datetimeprel) AS date_dernier_prel,
52 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
53 | CASE
54 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
55 | WHEN
56 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
57 | THEN 'sup_valeur_sanitaire'
58 | WHEN
59 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
60 | THEN 'sup_limite_qualite'
61 | WHEN
62 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_indicative)
63 | THEN 'sup_limite_indicative'
64 | WHEN
65 | BOOL_OR(
66 | valtraduite IS NOT NULL
67 | AND (limite_qualite IS NULL OR valtraduite <= limite_qualite)
68 | AND (limite_indicative IS NULL OR valtraduite <= limite_indicative)
69 | AND (limite_qualite IS NOT NULL OR limite_indicative IS NOT NULL)
70 | )
71 | THEN 'inf_limites'
72 | ELSE 'erreur'
73 | END AS resultat,
74 | TO_JSON(
75 | MAP(
76 | LIST(
77 | cdparametresiseeaux
78 | ORDER BY cdparametresiseeaux
79 | ) FILTER (WHERE valtraduite > 0
80 | ),
81 | LIST(
82 | valtraduite
83 | ORDER BY cdparametresiseeaux
84 | ) FILTER (WHERE valtraduite > 0
85 | )
86 | )
87 | ) AS parametres_detectes
88 |
89 | FROM aggregated
90 | GROUP BY cdreseau
91 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | -- Ici on ne garde que le dernier prélèvement
2 | -- pour chaque UDI dans la dernière année
3 | WITH metaux_lourds_dernier_prel AS (
4 | SELECT
5 | inseecommune,
6 | categorie,
7 | cdparametresiseeaux,
8 | limite_qualite,
9 | valeur_sanitaire_1,
10 | valeur_sanitaire_2,
11 | datetimeprel,
12 | valtraduite,
13 | ROW_NUMBER()
14 | OVER (
15 | PARTITION BY inseecommune, cdparametresiseeaux
16 | ORDER BY datetimeprel DESC
17 | )
18 | AS row_number
19 | FROM
20 | {{ ref('int__resultats_udi_communes') }}
21 | WHERE
22 | cdparametresiseeaux IN ('PB', 'AS')
23 | AND
24 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
25 | datetimeprel >= DATE_TRUNC('day', (
26 | SELECT MAX(sub.datetimeprel)
27 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
28 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
29 | )
30 |
31 | -- Ici on ne prend que le prélèvement le plus récent (avec row_number = 1)
32 | -- pour chaque type de métaux lourds
33 | SELECT
34 | inseecommune,
35 | datetimeprel AS date_dernier_prel,
36 | 'dernier_prel' AS periode,
37 | 1 AS nb_parametres,
38 | CASE
39 | WHEN
40 | cdparametresiseeaux = 'PB'
41 | THEN 'metaux_lourds_pb'
42 | WHEN
43 | cdparametresiseeaux = 'AS'
44 | THEN 'metaux_lourds_as'
45 | END AS categorie,
46 | CASE
47 | WHEN
48 | -- Pas de distinction PB/AS car même résultat
49 | valtraduite IS NULL
50 | OR valtraduite = 0
51 | THEN 'non_quantifie'
52 | WHEN
53 | cdparametresiseeaux = 'PB'
54 | AND valtraduite >= limite_qualite
55 | THEN 'sup_limite_qualite'
56 | WHEN
57 | -- 5 est la future limite de qualité appliquée
58 | -- à partir de 2036
59 | cdparametresiseeaux = 'PB'
60 | AND valtraduite >= 5
61 | AND valtraduite < limite_qualite
62 | THEN 'sup_limite_qualite_2036'
63 | WHEN
64 | cdparametresiseeaux = 'PB'
65 | AND valtraduite < 5
66 | THEN 'inf_limite_qualite'
67 | WHEN
68 | cdparametresiseeaux = 'AS'
69 | AND valtraduite >= valeur_sanitaire_1
70 | THEN 'sup_valeur_sanitaire'
71 | WHEN
72 | cdparametresiseeaux = 'AS'
73 | AND valtraduite >= limite_qualite
74 | AND valtraduite < valeur_sanitaire_1
75 | THEN 'sup_limite_qualite'
76 | WHEN
77 | cdparametresiseeaux = 'AS'
78 | AND valtraduite < limite_qualite
79 | THEN 'inf_limite_qualite'
80 | ELSE 'erreur'
81 | END AS resultat,
82 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
83 | AS parametres_detectes
84 | FROM
85 | metaux_lourds_dernier_prel
86 | WHERE
87 | row_number = 1
88 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/metabolite/int__resultats_metabolite_commune_dernier.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | last_pvl AS (
3 | SELECT DISTINCT
4 | inseecommune,
5 | categorie,
6 | cdparametresiseeaux,
7 | valtraduite,
8 | limite_qualite,
9 | limite_indicative,
10 | valeur_sanitaire_1,
11 | datetimeprel,
12 | DENSE_RANK()
13 | OVER (
14 | PARTITION BY inseecommune
15 | ORDER BY datetimeprel DESC
16 | )
17 | AS row_number
18 |
19 | FROM
20 | {{ ref('int__resultats_udi_communes') }}
21 | WHERE
22 | categorie = 'pesticide'
23 | AND
24 | categorie_2 = 'metabolite'
25 | AND
26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 | datetimeprel >= DATE_TRUNC('day', (
28 | SELECT MAX(sub.datetimeprel)
29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | ),
32 |
33 | aggregated AS (
34 | SELECT
35 | inseecommune,
36 | cdparametresiseeaux,
37 | MAX(valtraduite) AS valtraduite,
38 | MAX(limite_qualite) AS limite_qualite,
39 | MAX(limite_indicative) AS limite_indicative,
40 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
41 | MAX(datetimeprel) AS datetimeprel
42 | FROM last_pvl
43 | WHERE row_number = 1
44 | GROUP BY inseecommune, cdparametresiseeaux
45 | )
46 |
47 | SELECT
48 | inseecommune,
49 | 'metabolite' AS categorie,
50 | 'dernier_prel' AS periode,
51 | MAX(datetimeprel) AS date_dernier_prel,
52 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
53 | CASE
54 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
55 | WHEN
56 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
57 | THEN 'sup_valeur_sanitaire'
58 | WHEN
59 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
60 | THEN 'sup_limite_qualite'
61 | WHEN
62 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_indicative)
63 | THEN 'sup_limite_indicative'
64 | WHEN
65 | BOOL_OR(
66 | valtraduite IS NOT NULL
67 | AND (limite_qualite IS NULL OR valtraduite <= limite_qualite)
68 | AND (limite_indicative IS NULL OR valtraduite <= limite_indicative)
69 | AND (limite_qualite IS NOT NULL OR limite_indicative IS NOT NULL)
70 | )
71 | THEN 'inf_limites'
72 | ELSE 'erreur'
73 | END AS resultat,
74 | TO_JSON(
75 | MAP(
76 | LIST(
77 | cdparametresiseeaux
78 | ORDER BY cdparametresiseeaux
79 | ) FILTER (WHERE valtraduite > 0
80 | ),
81 | LIST(
82 | valtraduite
83 | ORDER BY cdparametresiseeaux
84 | ) FILTER (WHERE valtraduite > 0
85 | )
86 | )
87 | ) AS parametres_detectes
88 |
89 | FROM aggregated
90 | GROUP BY inseecommune
91 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | pfas_prels AS (
3 | SELECT DISTINCT
4 | de_partition AS annee,
5 | cdreseau,
6 | referenceprel,
7 | datetimeprel,
8 | cdparametresiseeaux,
9 | limite_qualite,
10 | valeur_sanitaire_1,
11 | valtraduite
12 | FROM
13 | {{ ref('int__resultats_udi_communes') }}
14 | WHERE
15 | categorie = 'pfas'
16 | ),
17 |
18 | -- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année
19 | pfas_results_udi_agg AS (
20 | SELECT
21 | referenceprel,
22 | cdreseau,
23 | annee,
24 | -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)
25 | MAX(
26 | CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END
27 | ) AS sum_20_pfas,
28 | COUNT(
29 | DISTINCT CASE
30 | WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel
31 | END
32 | ) AS count_20_pfas,
33 | -- On calcule une somme de 4 PFAS pour une limite recommandée par le
34 | -- haut conseil de la santé public
35 | SUM(
36 | CASE
37 | WHEN
38 | cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')
39 | THEN valtraduite
40 | ELSE 0
41 | END
42 | ) AS sum_4_pfas,
43 | -- On check si la somme des 20 PFAS est supérieure
44 | -- à la limite reglementaire
45 | MAX(
46 | CASE
47 | WHEN
48 | cdparametresiseeaux = 'SPFAS'
49 | AND limite_qualite IS NOT NULL
50 | AND valtraduite IS NOT NULL
51 | AND valtraduite > limite_qualite
52 | THEN 1
53 | ELSE 0
54 | END
55 | ) AS sum_20_pfas_above_limit,
56 | MAX(
57 | CASE
58 | WHEN
59 | valeur_sanitaire_1 IS NOT NULL
60 | AND valtraduite IS NOT NULL
61 | AND valtraduite > valeur_sanitaire_1
62 | THEN 1
63 | ELSE 0
64 | END
65 | ) AS has_pfas_above_vs,
66 | MAX(datetimeprel) AS max_datetimeprel
67 | FROM pfas_prels
68 | GROUP BY referenceprel, cdreseau, annee
69 | -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS
70 | HAVING count_20_pfas = 1
71 | )
72 |
73 | SELECT
74 | cdreseau,
75 | annee,
76 | 'pfas' AS categorie,
77 | 'bilan_annuel_' || annee AS periode,
78 | COUNT(DISTINCT referenceprel) AS nb_prelevements,
79 | ROUND((
80 | SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END)
81 | /
82 | COUNT(DISTINCT referenceprel)
83 | ), 2) AS ratio_limite_qualite,
84 | SUM(has_pfas_above_vs) AS nb_sup_valeur_sanitaire,
85 | TO_JSON({
86 | 'SPFAS': MAX(sum_20_pfas),
87 | 'SUM_4_PFAS': MAX(sum_4_pfas)
88 | }) AS parametres_detectes,
89 | MAX(max_datetimeprel) AS date_dernier_prel
90 |
91 | FROM pfas_results_udi_agg
92 | GROUP BY cdreseau, annee
93 |
--------------------------------------------------------------------------------
/dbt_/tests/test_pfas_results.sql:
--------------------------------------------------------------------------------
1 | -- dernier udi
2 | SELECT
3 | 'dernier_prel' AS periode,
4 | cdreseau,
5 | categorie,
6 | resultat,
7 | 0 AS ratio_limite_qualite,
8 | 0 AS nb_sup_valeur_sanitaire
9 | FROM
10 | {{ ref('int__resultats_pfas_udi_dernier') }}
11 | WHERE
12 | (
13 | -- test
14 | -- l'UDI 013001457 a un prélevement le 2025-02-19 09:58:00
15 | -- avec un dépassement de valeur sanitaire pour PFOS
16 | cdreseau = '013001457'
17 | AND date_dernier_prel = TIMESTAMP '2025-02-19 09:58:00'
18 | AND resultat != 'sup_valeur_sanitaire'
19 | )
20 | OR (
21 | cdreseau = '004001032'
22 | AND date_dernier_prel = TIMESTAMP '2025-02-28 12:33:00'
23 | AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02'
24 | )
25 | OR (
26 | cdreseau = '008000855'
27 | AND date_dernier_prel = TIMESTAMP '2025-02-27 09:24:00'
28 | AND resultat != 'sup_valeur_sanitaire'
29 | )
30 | OR
31 | (
32 | cdreseau = '00800107747'
33 | AND date_dernier_prel = '2025-02-27 09:24:00'
34 | AND resultat != 'sup_valeur_sanitaire'
35 | )
36 | OR
37 | (
38 | cdreseau = '011004114'
39 | AND date_dernier_prel = '2025-02-24 13:55:00'
40 | AND resultat != 'somme_20pfas_sup_0_1'
41 | )
42 | OR
43 | (
44 | cdreseau = '001000404'
45 | AND date_dernier_prel = '2024-11-29 08:08:00'
46 | AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02'
47 | )
48 | OR
49 | (
50 | cdreseau = '001000511'
51 | AND date_dernier_prel = '2024-11-28 09:58:00'
52 | AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02'
53 | )
54 | OR
55 | (
56 | cdreseau = '003000370'
57 | AND date_dernier_prel = TIMESTAMP '2025-02-18 08:45:00'
58 | AND resultat != 'non_quantifie'
59 | )
60 | UNION ALL
61 | -- annuel udi
62 | SELECT
63 | 'bilan_annuel' AS periode,
64 | cdreseau,
65 | categorie,
66 | '' AS resultat,
67 | ratio_limite_qualite,
68 | nb_sup_valeur_sanitaire
69 | FROM
70 | {{ ref('int__resultats_pfas_udi_annuel') }}
71 | WHERE
72 | (
73 | cdreseau = '001000356'
74 | AND annee = '2025'
75 | AND
76 | (
77 | ratio_limite_qualite != 0
78 | OR nb_sup_valeur_sanitaire != 0
79 | )
80 | )
81 | OR
82 | (
83 | cdreseau = '074000043'
84 | AND annee = '2022'
85 | AND (
86 | ratio_limite_qualite != 0.1
87 | OR nb_sup_valeur_sanitaire != 2
88 | )
89 | )
90 | OR
91 | (
92 | cdreseau = '030000200'
93 | AND annee = '2024'
94 | AND (
95 | nb_sup_valeur_sanitaire != 0
96 | OR ratio_limite_qualite != 0.25
97 | )
98 | )
99 | OR
100 | (
101 | cdreseau = '069000025'
102 | AND annee IN ('2022', '2023', '2024')
103 | AND (
104 | nb_sup_valeur_sanitaire != 0
105 | OR ratio_limite_qualite != 0
106 | )
107 | )
108 |
--------------------------------------------------------------------------------
/dbt_/models/intermediate/pfas/int__resultats_pfas_commune_annuel.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | pfas_prels AS (
3 | SELECT DISTINCT
4 | de_partition AS annee,
5 | inseecommune,
6 | referenceprel,
7 | datetimeprel,
8 | cdparametresiseeaux,
9 | limite_qualite,
10 | valeur_sanitaire_1,
11 | valtraduite
12 | FROM
13 | {{ ref('int__resultats_udi_communes') }}
14 | WHERE
15 | categorie = 'pfas'
16 | ),
17 |
18 | -- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année
19 | pfas_results_udi_agg AS (
20 | SELECT
21 | referenceprel,
22 | inseecommune,
23 | annee,
24 | -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)
25 | MAX(
26 | CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END
27 | ) AS sum_20_pfas,
28 | COUNT(
29 | DISTINCT CASE
30 | WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel
31 | END
32 | ) AS count_20_pfas,
33 | -- On calcule une somme de 4 PFAS pour une limite recommandée par le
34 | -- haut conseil de la santé public
35 | SUM(
36 | CASE
37 | WHEN
38 | cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')
39 | THEN valtraduite
40 | ELSE 0
41 | END
42 | ) AS sum_4_pfas,
43 | -- On check si la somme des 20 PFAS est supérieure
44 | -- à la limite reglementaire
45 | MAX(
46 | CASE
47 | WHEN
48 | cdparametresiseeaux = 'SPFAS'
49 | AND limite_qualite IS NOT NULL
50 | AND valtraduite IS NOT NULL
51 | AND valtraduite > limite_qualite
52 | THEN 1
53 | ELSE 0
54 | END
55 | ) AS sum_20_pfas_above_limit,
56 | MAX(
57 | CASE
58 | WHEN
59 | valeur_sanitaire_1 IS NOT NULL
60 | AND valtraduite IS NOT NULL
61 | AND valtraduite > valeur_sanitaire_1
62 | THEN 1
63 | ELSE 0
64 | END
65 | ) AS has_pfas_above_vs,
66 | MAX(datetimeprel) AS max_datetimeprel
67 | FROM pfas_prels
68 | GROUP BY referenceprel, inseecommune, annee
69 | -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS
70 | HAVING count_20_pfas = 1
71 | )
72 |
73 | SELECT
74 | inseecommune,
75 | annee,
76 | 'pfas' AS categorie,
77 | 'bilan_annuel_' || annee AS periode,
78 | COUNT(DISTINCT referenceprel) AS nb_prelevements,
79 | ROUND((
80 | SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END)
81 | /
82 | COUNT(DISTINCT referenceprel)
83 | ), 2) AS ratio_limite_qualite,
84 | SUM(has_pfas_above_vs) AS nb_sup_valeur_sanitaire,
85 | TO_JSON({
86 | 'SPFAS': MAX(sum_20_pfas),
87 | 'SUM_4_PFAS': MAX(sum_4_pfas)
88 | }) AS parametres_detectes,
89 | MAX(max_datetimeprel) AS date_dernier_prel
90 |
91 | FROM pfas_results_udi_agg
92 | GROUP BY inseecommune, annee
93 |
--------------------------------------------------------------------------------
/pipelines/notebooks/test_atlasante_udi.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import duckdb\n",
10 | "from pipelines.tasks.config.common import DUCKDB_FILE\n",
11 | "\n",
12 | "con = duckdb.connect(database=DUCKDB_FILE, read_only=True)\n",
13 | "# show all tables in DB\n",
14 | "con.sql(\"SHOW TABLES;\").show()"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "# describe atlasante_udi table\n",
24 | "df = con.sql(\"DESCRIBE atlasante_udi;\").df()\n",
25 | "print(df)"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "# install spatial extention for spactial functions\n",
35 | "con.sql(\"INSTALL spatial;\")\n",
36 | "# Load spatial extension\n",
37 | "con.sql(\"LOAD spatial;\")"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "# show same paris's UDI\n",
47 | "df = con.sql(\"Select * from atlasante_udi where uge_nom like '%EAU DE PARIS%'\").df()\n",
48 | "df.head()"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "# Paris North (Nord - near Saint-Denis):\n",
58 | "# latitudeN = 48.9358\n",
59 | "# longitudeN = 2.3538\n",
60 | "# Paris South\n",
61 | "# latitudeS = 48.8186\n",
62 | "# longitudeS = 2.3326\n",
63 | "# Paris West (Ouest - near Porte Maillot/Neuilly):\n",
64 | "# latitudeW = 48.8781\n",
65 | "# longitudeW = 2.2785\n",
66 | "# Central Paris (Centre - Notre-Dame):\n",
67 | "latitude = 48.8566\n",
68 | "longitude = 2.3522\n",
69 | "\n",
70 | "sql = f\"\"\"\n",
71 | "SELECT *\n",
72 | "FROM atlasante_udi\n",
73 | "WHERE ST_Contains(geom, ST_GeomFromText('POINT({longitude} {latitude})'));\n",
74 | "\"\"\"\n",
75 | "df = con.sql(sql).df()\n",
76 | "df.head()"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "con.close()"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": []
94 | }
95 | ],
96 | "metadata": {
97 | "kernelspec": {
98 | "display_name": ".venv",
99 | "language": "python",
100 | "name": "python3"
101 | },
102 | "language_info": {
103 | "codemirror_mode": {
104 | "name": "ipython",
105 | "version": 3
106 | },
107 | "file_extension": ".py",
108 | "mimetype": "text/x-python",
109 | "name": "python",
110 | "nbconvert_exporter": "python",
111 | "pygments_lexer": "ipython3",
112 | "version": "3.12.7"
113 | }
114 | },
115 | "nbformat": 4,
116 | "nbformat_minor": 2
117 | }
118 |
--------------------------------------------------------------------------------
/dbt_/models/website/web__stats_udi.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | -- Dernière mise à jour
3 | derniere_maj AS (
4 | SELECT
5 | 'derniere_mise_a_jour' AS stat_nom,
6 | NULL AS stat_chiffre,
7 | max(date_dernier_prel)::VARCHAR AS stat_texte
8 | FROM {{ ref('web__resultats_udi') }}
9 | WHERE periode = 'dernier_prel'
10 | ),
11 |
12 | -- Total UDIs
13 | total_udis AS (
14 | SELECT
15 | 'total_udis' AS stat_nom,
16 | NULL AS stat_texte,
17 | count(DISTINCT cdreseau) AS stat_chiffre
18 | FROM {{ ref('web__resultats_udi') }}
19 | WHERE periode = 'dernier_prel'
20 | ),
21 |
22 | -- Statistiques par catégorie pour dernier prélèvement
23 | stats_dernier_prel AS (
24 | SELECT
25 | NULL AS stat_texte,
26 | 'dernier_prel_' || categorie || '_' || coalesce(resultat, 'non_recherche')
27 | AS stat_nom,
28 | count(*) AS stat_chiffre
29 | FROM {{ ref('web__resultats_udi') }}
30 | WHERE
31 | periode = 'dernier_prel'
32 |
33 | GROUP BY categorie, resultat
34 |
35 | ),
36 |
37 | -- Statistiques par catégorie et année pour bilan annuel - ratios par intervalles
38 | stats_bilan_annuel_ratio AS (
39 | SELECT
40 | NULL AS stat_texte,
41 | periode || '_' || categorie || '_ratio_'
42 | || CASE
43 | WHEN ratio = 0 THEN '0'
44 | WHEN ratio <= 0.25 THEN '0.25'
45 | WHEN ratio <= 0.5 THEN '0.5'
46 | WHEN ratio <= 0.75 THEN '0.75'
47 | WHEN ratio <= 1 THEN '1'
48 | ELSE 'erreur'
49 | END AS stat_nom,
50 | count(*) AS stat_chiffre
51 | FROM {{ ref('web__resultats_udi') }}
52 | WHERE
53 | periode LIKE 'bilan_annuel_%'
54 | AND ratio IS NOT NULL
55 | GROUP BY
56 | periode,
57 | categorie,
58 | CASE
59 | WHEN ratio = 0 THEN '0'
60 | WHEN ratio <= 0.25 THEN '0.25'
61 | WHEN ratio <= 0.5 THEN '0.5'
62 | WHEN ratio <= 0.75 THEN '0.75'
63 | WHEN ratio <= 1 THEN '1'
64 | ELSE 'erreur'
65 | END
66 | ),
67 |
68 | -- Statistiques par catégorie et année pour bilan annuel - non recherche (ratio null)
69 | stats_bilan_annuel_non_recherche AS (
70 | SELECT
71 | NULL AS stat_texte,
72 | periode || '_' || categorie || '_non_recherche' AS stat_nom,
73 | count(*) AS stat_chiffre
74 | FROM {{ ref('web__resultats_udi') }}
75 | WHERE
76 | periode LIKE 'bilan_annuel_%'
77 | AND ratio IS NULL
78 | GROUP BY periode, categorie
79 | )
80 |
81 | -- Union de toutes les statistiques
82 | SELECT
83 | stat_nom,
84 | stat_chiffre,
85 | stat_texte
86 | FROM derniere_maj
87 | UNION ALL
88 | SELECT
89 | stat_nom,
90 | stat_chiffre,
91 | stat_texte
92 | FROM total_udis
93 | UNION ALL
94 | SELECT
95 | stat_nom,
96 | stat_chiffre,
97 | stat_texte
98 | FROM stats_dernier_prel
99 | UNION ALL
100 | SELECT
101 | stat_nom,
102 | stat_chiffre,
103 | stat_texte
104 | FROM stats_bilan_annuel_ratio
105 | UNION ALL
106 | SELECT
107 | stat_nom,
108 | stat_chiffre,
109 | stat_texte
110 | FROM stats_bilan_annuel_non_recherche
111 |
--------------------------------------------------------------------------------
/dbt_/seeds/schema.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | seeds:
4 | - name: references_generations_futures
5 | config:
6 | quote_char: '"'
7 | delimiter: ","
8 | full_refresh: true
9 | description: "Table de référence fournie par Générations Futures"
10 | columns:
11 | - name: cdparametresiseeaux
12 | description: "Code SISE-Eaux (Système d'Information des services Santé-Environnement Eaux) du paramètre"
13 | - name: cdparametre
14 | description: "Code SANDRE (Service National d'Administration des Données et Référentiels sur l'Eau) du paramètre"
15 | - name: libmajparametre
16 | description: "Nom du paramètre en majuscule"
17 | tests:
18 | - dbt_expectations.expect_column_values_to_be_of_type:
19 | column_type: VARCHAR
20 | - name: libminparametre
21 | description: "Nom du paramètre en minuscule"
22 | tests:
23 | - dbt_expectations.expect_column_values_to_be_of_type:
24 | column_type: VARCHAR
25 | - name: casparam
26 | description: "Code CAS (Chemical Abstracts Service) de la substance chimique"
27 | - name: categorie_1
28 | description: "Catégorie du paramètre"
29 | tests:
30 | - not_null
31 | - accepted_values:
32 | values:
33 | - "pfas"
34 | - "cvm"
35 | - "nitrate"
36 | - "metaux_lourds"
37 | - "substances_indus"
38 | - "pesticide"
39 | - name: categorie_2
40 | description: "Sous-catégorie"
41 | - name: categorie_3
42 | description: "Détail de la sous-catégorie"
43 | - name: limite_qualite
44 | description: "Limite de qualité du paramètre"
45 | tests:
46 | - dbt_expectations.expect_column_values_to_be_of_type:
47 | column_type: double
48 | - name: limite_qualite_unite
49 | description: "Unité de la limite de qualité"
50 | - name: limite_qualite_commentaire
51 | description: "Commentaire sur la limite de qualité"
52 | - name: limite_indicative
53 | description: "Limite indicative du paramètre"
54 | tests:
55 | - dbt_expectations.expect_column_values_to_be_of_type:
56 | column_type: double
57 | - name: limite_indicative_unite
58 | description: "Unité de la limite indicative"
59 | - name: valeur_sanitaire_1
60 | description: "Valeur sanitaire"
61 | tests:
62 | - dbt_expectations.expect_column_values_to_be_of_type:
63 | column_type: double
64 | - name: valeur_sanitaire_1_unite
65 | description: "Unité de la valeur sanitaire 1"
66 | - name: valeur_sanitaire_1_commentaire
67 | description: "Commentaire sur la valeur sanitaire 1"
68 | - name: valeur_sanitaire_2
69 | description: "Deuxième valeur sanitaire"
70 | tests:
71 | - dbt_expectations.expect_column_values_to_be_of_type:
72 | column_type: integer
73 | - name: valeur_sanitaire_2_unite
74 | description: "Unité de la valeur sanitaire 2"
75 | - name: valeur_sanitaire_2_commentaire
76 | description: "Commentaire sur la valeur sanitaire 2"
77 | - name: web_label
78 | description: "Libellé utilisé dans le site web"
79 |
--------------------------------------------------------------------------------
/pipelines/tasks/config/config_uploaded_geojson.py:
--------------------------------------------------------------------------------
1 | """Configuration for uploaded GeoJSON files.
2 |
3 | Cette configuration supporte plusieurs fichiers GeoJSON uploadés manuellement sur S3.
4 | Pour ajouter un nouveau fichier, ajoutez simplement un dictionnaire dans la liste 'files'.
5 |
6 | Format de chaque fichier:
7 | - path: chemin relatif du fichier sur S3 (sera combiné avec prefix)
8 | - table_name: nom de la table à créer dans DuckDB
9 | - file_name: nom du fichier local à télécharger
10 | """
11 |
12 | uploaded_geojson_config = {
13 | "source": {
14 | "prefix": "upload", # Préfixe S3
15 | },
16 | "files": [
17 | # {
18 | # # Cette première source contient le GeoJSON des UDIs de Atlasante issu des infofactures pour la métropole pour l'année 2023.
19 | # # Pour l'obtenir:
20 | # # - partir de la source suivante: https://catalogue.atlasante.fr/geonetwork/srv/fre/catalog.search#/metadata/1d02cd8b-137d-4360-b566-f6082a47ee32
21 | # # - cliquer sur "accès à la carte" (normalement on arrive sur cette URL: https://carto.atlasante.fr/1/ars_metropole_udi_infofactures.map)
22 | # # - à gauche de la carte, cliquer sur le bouton avec les trois "couches"
23 | # # - choisir le layer "Réseaux (UDI) - 2023" dans la liste (dans "Historique"), puis cliquer sur les trois points à droite, puis cliquer sur "Télécharger la donnée"
24 | # # - choisir format "GeoJSON" et projection "WGS84 - GPS (EPSG 4326)" puis cliquer sur "Exécution directe" pour télécharger le fichier
25 | # # - extraire le fichier "dgs_metropole_udi_infofactures_j.json" du zip téléchargé
26 | # # - renommer le fichier téléchargé et l'uploader dans le dossier approprié (cf `path` ci-dessous)
27 | # #
28 | # "path": "atlasante/udi_infofactures_2023.json",
29 | # "table_name": "atlasante_udi_2023",
30 | # "local_file_name": "udi_infofactures_2023.json",
31 | # },
32 | # {
33 | # # UDIs de la Corse
34 | # # Pour l'obtenir:
35 | # # - partir de la source suivante: https://catalogue.atlasante.fr/geonetwork/srv/fre/catalog.search#/metadata/67a6998e-15b2-4796-9584-c87af156f549
36 | # # - sur "Accès au téléchargement des données", cliquer sur "Télécharger"
37 | # # - choisir format "GeoJSON" et projection "WGS84 - GPS (EPSG 4326)" puis cliquer sur "Exécution directe" pour télécharger le fichier
38 | # # - extraire le fichier "ars_r94_udi_2018_z.json" du zip téléchargé
39 | # # - renommer le fichier téléchargé et l'uploader dans le dossier approprié (cf `path` ci-dessous)
40 | # #
41 | # "path": "atlasante/udi_corse.json",
42 | # "table_name": "atlasante_udi_corse",
43 | # "local_file_name": "udi_corse.json",
44 | # },
45 | {
46 | # GeoJSON des UDIs de Atlasante issu des infofactures pour l'année 2024.
47 | # S'obtient de la même manière que pour l'année 2023 (cf. commentaire dans le premier bloc).
48 | # Concernant la couverture géographique, il s'agit de la métropole + Corse.
49 | "path": "atlasante/udi_infofactures_2024.json",
50 | "table_name": "atlasante_udi_2024",
51 | "local_file_name": "udi_infofactures_2024.json",
52 | }
53 | ],
54 | }
55 |
--------------------------------------------------------------------------------
/pipelines/tasks/download_pmtiles.py:
--------------------------------------------------------------------------------
1 | """
2 | Download PMtiles files.
3 |
4 | Args:
5 | - env (str): Environment to download from ("dev" or "prod")
6 | - use-boto3 (bool): Use boto3 library to download from S3 storage, instead of using public HTTPS URL (default: False)
7 |
8 | Examples:
9 | - download_pmtiles --env prod : Download PMtiles from production environment
10 | - download_pmtiles --env dev : Download PMtiles from development environment
11 | - download_pmtiles --use-boto3 : Download PMtiles from S3 storage
12 | """
13 |
14 | import os
15 | from abc import ABC, abstractmethod
16 |
17 | from pipelines.config.config import get_s3_path_pmtiles
18 | from pipelines.tasks.config.common import CACHE_FOLDER, download_file_from_https
19 | from pipelines.utils.logger import get_logger
20 | from pipelines.utils.storage_client import ObjectStorageClient
21 |
22 | logger = get_logger(__name__)
23 |
24 |
25 | class PMtilesDownloadStrategy(ABC):
26 | """Interface for GeoJSON download strategies."""
27 |
28 | def __init__(self):
29 | super().__init__()
30 | self.s3 = ObjectStorageClient()
31 |
32 | @abstractmethod
33 | def download(self, env: str, local_path: str):
34 | pass
35 |
36 |
37 | class Boto3DownloadStrategy(PMtilesDownloadStrategy):
38 | """Strategy for downloading PMtiles from S3 storage using boto3."""
39 |
40 | def download(self, env: str, local_path: str):
41 | logger.info(f"Downloading PMtiles from S3 in environment {env}")
42 | remote_s3_path = get_s3_path_pmtiles(env)
43 | self.s3.download_object(remote_s3_path, local_path)
44 | logger.info(
45 | f"✅ GeoJSON downloaded from s3://{self.s3.bucket_name}/{remote_s3_path}"
46 | )
47 |
48 |
49 | class HTTPSDownloadStrategy(PMtilesDownloadStrategy):
50 | """Strategy for downloading PMtiles via HTTPS."""
51 |
52 | def download(self, env: str, local_path: str):
53 | logger.info("Downloading PMtiles via HTTPS")
54 | remote_s3_path = get_s3_path_pmtiles(env)
55 | url = f"https://{self.s3.bucket_name}.{self.s3.endpoint_url.split('https://')[1]}/{remote_s3_path}"
56 | download_file_from_https(url=url, filepath=local_path)
57 | logger.info(f"✅ GeoJSON downloaded via HTTPS: {url} -> {local_path}")
58 |
59 |
60 | class PMtilesDownloader:
61 | """Manages the PMtiles download process."""
62 |
63 | def __init__(self, strategy: PMtilesDownloadStrategy, env: str):
64 | self.strategy = strategy
65 | self.local_geojson_path = os.path.join(
66 | CACHE_FOLDER, "new-georef-france-commune-prelevement.pmtiles"
67 | )
68 | if env not in ("dev", "prod"):
69 | raise ValueError("'env' must be 'dev' or 'prod'")
70 | self.env = env
71 |
72 | def download(self):
73 | self.strategy.download(self.env, self.local_geojson_path)
74 |
75 |
76 | def execute(env: str, use_boto3: bool = False):
77 | """
78 | Execute PMtiles download using the appropriate strategy.
79 |
80 | Args:
81 | env (str): Environment to download from ("dev" or "prod")
82 | use_boto3 (bool): Whether to use boto3 instead of HTTPS. Default is False.
83 | """
84 | strategy = Boto3DownloadStrategy() if use_boto3 else HTTPSDownloadStrategy()
85 | downloader = PMtilesDownloader(strategy, env)
86 | downloader.download()
87 |
--------------------------------------------------------------------------------
/Dockerfile.unified:
--------------------------------------------------------------------------------
1 | # Unified Dockerfile - Embeds database and pmtiles for atomic deployments
2 |
3 | # Builder stage for compiling the application
4 | # Note: debian bookworm is supported until 2028-06-30
5 | FROM debian:bookworm-slim AS builder
6 |
7 | # Define build argument for API key
8 | ARG NEXT_PUBLIC_PROTOMAPS_API_KEY
9 |
10 | # Install UV
11 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
12 | ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
13 | ENV UV_PYTHON_INSTALL_DIR=/python
14 | ENV UV_PYTHON_PREFERENCE=only-managed
15 | ENV UV_NO_CACHE=1
16 | RUN uv python install 3.12
17 |
18 | # Install Node.js and other required dependencies
19 | RUN apt-get update && apt-get install -y --no-install-recommends \
20 | nodejs \
21 | npm \
22 | ca-certificates \
23 | curl \
24 | && rm -rf /var/lib/apt/lists/*
25 |
26 | # Set up Node.js environment
27 | WORKDIR /app/webapp
28 | COPY webapp/package.json webapp/package-lock.json /app/webapp/
29 | RUN npm ci
30 |
31 | # Set up Python environment with UV
32 | WORKDIR /app
33 | COPY README.md pyproject.toml uv.lock /app/
34 | COPY pipelines /app/pipelines
35 | RUN uv sync
36 |
37 | # Copy pre-built database and pmtiles
38 | COPY database/data.duckdb /app/database/data.duckdb
39 | COPY database/cache/*.pmtiles /app/public/pmtiles/
40 |
41 | # Create trimmed database for website
42 | RUN uv run pipelines/run.py run trim_database_for_website --output-file=database/data_for_website.duckdb
43 |
44 | # Copy next.js app and build it
45 | WORKDIR /app/webapp
46 | COPY webapp /app/webapp
47 | ENV NEXT_TELEMETRY_DISABLED=1
48 | ENV NODE_ENV=production
49 | ENV NEXT_PUBLIC_PROTOMAPS_API_KEY=$NEXT_PUBLIC_PROTOMAPS_API_KEY
50 | ENV DUCKDB_PATH="/app/database/data_for_website.duckdb"
51 | RUN npm run build
52 |
53 |
54 |
55 | # Runner stage - only contains the necessary runtime files
56 | FROM debian:bookworm-slim AS runner
57 |
58 | # Define build argument for API key
59 | ARG NEXT_PUBLIC_PROTOMAPS_API_KEY
60 |
61 | # Install Node.js (minimal dependencies for runtime)
62 | RUN apt-get update && apt-get install -y --no-install-recommends \
63 | nodejs \
64 | ca-certificates \
65 | && rm -rf /var/lib/apt/lists/*
66 |
67 | # Create non-root user
68 | RUN addgroup --system --gid 1000 appgroup && \
69 | adduser --system --uid 1000 appuser
70 |
71 | WORKDIR /app
72 |
73 | # Create directories
74 | RUN mkdir -p /app/database /app/public/pmtiles
75 | RUN chown -R appuser:appgroup /app
76 |
77 | # Copy webapp files
78 | COPY --from=builder --chown=appuser:appgroup /app/webapp/.next/standalone /app
79 | COPY --from=builder --chown=appuser:appgroup /app/webapp/.next/static /app/.next/static
80 | COPY --from=builder --chown=appuser:appgroup /app/webapp/public /app/public
81 |
82 | # Copy database and pmtiles
83 | COPY --from=builder --chown=appuser:appgroup /app/database/data_for_website.duckdb /app/database/data_for_website.duckdb
84 | COPY --from=builder --chown=appuser:appgroup /app/public/pmtiles/ /app/public/pmtiles/
85 |
86 | # Set environment variables
87 | ENV NODE_ENV=production
88 | ENV NEXT_TELEMETRY_DISABLED=1
89 | ENV PORT=8080
90 | ENV HOSTNAME="0.0.0.0"
91 | ENV NEXT_PUBLIC_PROTOMAPS_API_KEY=$NEXT_PUBLIC_PROTOMAPS_API_KEY
92 | ENV DUCKDB_PATH="/app/database/data_for_website.duckdb"
93 | ENV HOME="/app"
94 |
95 | # Switch to non-root user
96 | USER appuser
97 |
98 | # Expose the port
99 | EXPOSE 8080
100 |
101 | # Start the application
102 | CMD ["node", "server.js"]
--------------------------------------------------------------------------------
/dbt_/models/website/web__resultats_udi.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | periodes AS (
3 | SELECT unnest(ARRAY[
4 | 'bilan_annuel_2020',
5 | 'bilan_annuel_2021',
6 | 'bilan_annuel_2022',
7 | 'bilan_annuel_2023',
8 | 'bilan_annuel_2024',
9 | 'bilan_annuel_2025',
10 | 'dernier_prel'
11 | ]) AS periode
12 | ),
13 |
14 | categories AS (
15 | SELECT unnest(ARRAY[
16 | 'cvm',
17 | 'pfas',
18 | 'sub_indus_perchlorate',
19 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
20 | --'sub_indus_14dioxane',
21 | 'pesticide',
22 | 'sub_active',
23 | 'metabolite',
24 | 'metabolite_esa_metolachlore',
25 | 'metabolite_chlorothalonil_r471811',
26 | 'metabolite_chloridazone_desphenyl',
27 | 'metabolite_chloridazone_methyl_desphenyl',
28 | 'metabolite_atrazine_desethyl',
29 | --'metaux_lourds_as',
30 | --'metaux_lourds_pb',
31 | 'nitrate',
32 | 'tous'
33 | ]) AS categorie
34 | ),
35 |
36 | udi AS (
37 | SELECT
38 | cdreseau,
39 | nomreseaux
40 | FROM
41 | {{ ref('int__udi') }}
42 | ),
43 |
44 | -- Cross join to ensure all combinations exist
45 | udi_periodes_categories AS (
46 | SELECT
47 | u.cdreseau,
48 | u.nomreseaux,
49 | p.periode,
50 | categories.categorie
51 | FROM
52 | udi AS u
53 | CROSS JOIN
54 | periodes AS p
55 | CROSS JOIN
56 | categories
57 | ),
58 |
59 | -- Append results from 'tous' category (in another model to avoid circular dependency)
60 | results AS (
61 | SELECT
62 | cdreseau,
63 | periode,
64 | categorie,
65 | resultat,
66 | ratio,
67 | date_dernier_prel,
68 | nb_parametres,
69 | nb_prelevements,
70 | nb_sup_valeur_sanitaire,
71 | parametres_detectes
72 | FROM {{ ref('int__union_resultats_udi') }}
73 | UNION ALL
74 | SELECT
75 | cdreseau,
76 | periode,
77 | categorie,
78 | null AS resultat,
79 | ratio,
80 | null AS date_dernier_prel,
81 | null AS nb_parametres,
82 | nb_prelevements,
83 | nb_sup_valeur_sanitaire,
84 | null AS parametres_detectes
85 | FROM {{ ref('int__resultats_tous_udi_annuel') }}
86 | UNION ALL
87 | SELECT
88 | cdreseau,
89 | periode,
90 | categorie,
91 | resultat,
92 | null AS ratio,
93 | date_dernier_prel,
94 | nb_parametres,
95 | null AS nb_prelevements,
96 | null AS nb_sup_valeur_sanitaire,
97 | null AS parametres_detectes
98 | FROM {{ ref('int__resultats_tous_udi_dernier') }}
99 | )
100 |
101 | -- Final output with all UDI-periodes-categories combinations
102 | SELECT
103 | upc.cdreseau,
104 | upc.nomreseaux,
105 | upc.periode,
106 | upc.categorie,
107 | r.resultat,
108 | r.ratio,
109 | r.date_dernier_prel,
110 | r.nb_parametres,
111 | r.nb_prelevements,
112 | r.nb_sup_valeur_sanitaire,
113 | r.parametres_detectes
114 | FROM
115 | udi_periodes_categories AS upc
116 | LEFT JOIN
117 | results AS r
118 | ON
119 | upc.cdreseau = r.cdreseau
120 | AND upc.periode = r.periode
121 | AND upc.categorie = r.categorie
122 | ORDER BY
123 | upc.cdreseau,
124 | upc.periode,
125 | r.categorie
126 |
--------------------------------------------------------------------------------
/dbt_/models/website/web__resultats_communes.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | periodes AS (
3 | SELECT unnest(ARRAY[
4 | 'bilan_annuel_2020',
5 | 'bilan_annuel_2021',
6 | 'bilan_annuel_2022',
7 | 'bilan_annuel_2023',
8 | 'bilan_annuel_2024',
9 | 'bilan_annuel_2025',
10 | 'dernier_prel'
11 | ]) AS periode
12 | ),
13 |
14 | categories AS (
15 | SELECT unnest(ARRAY[
16 | 'cvm',
17 | 'pfas',
18 | 'sub_indus_perchlorate',
19 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
20 | --'sub_indus_14dioxane',
21 | 'pesticide',
22 | 'sub_active',
23 | 'metabolite',
24 | 'metabolite_esa_metolachlore',
25 | 'metabolite_chlorothalonil_r471811',
26 | 'metabolite_chloridazone_desphenyl',
27 | 'metabolite_chloridazone_methyl_desphenyl',
28 | 'metabolite_atrazine_desethyl',
29 | --'metaux_lourds_as',
30 | --'metaux_lourds_pb',
31 | 'nitrate',
32 | 'tous'
33 | ]) AS categorie
34 | ),
35 |
36 | cog_communes AS (
37 | SELECT
38 | com AS commune_code_insee,
39 | libelle AS commune_nom
40 | FROM {{ ref("stg_communes__cog") }}
41 | WHERE typecom = 'COM'
42 | ),
43 |
44 | -- Cross join to ensure all combinations exist
45 | communes_periodes_categories AS (
46 | SELECT
47 | cog.commune_code_insee,
48 | cog.commune_nom,
49 | p.periode,
50 | categories.categorie
51 | FROM
52 | cog_communes AS cog
53 | CROSS JOIN
54 | periodes AS p
55 | CROSS JOIN
56 | categories
57 | ),
58 |
59 | -- Append results from 'tous' category (in another model to avoid circular dependency)
60 | results AS (
61 | SELECT
62 | inseecommune,
63 | periode,
64 | categorie,
65 | resultat,
66 | ratio,
67 | date_dernier_prel,
68 | nb_parametres,
69 | nb_prelevements,
70 | nb_sup_valeur_sanitaire,
71 | parametres_detectes
72 | FROM {{ ref('int__union_resultats_commune') }}
73 | UNION ALL
74 | SELECT
75 | inseecommune,
76 | periode,
77 | categorie,
78 | null AS resultat,
79 | ratio,
80 | null AS date_dernier_prel,
81 | null AS nb_parametres,
82 | nb_prelevements,
83 | nb_sup_valeur_sanitaire,
84 | null AS parametres_detectes
85 | FROM {{ ref('int__resultats_tous_commune_annuel') }}
86 | UNION ALL
87 | SELECT
88 | inseecommune,
89 | periode,
90 | categorie,
91 | resultat,
92 | null AS ratio,
93 | date_dernier_prel,
94 | nb_parametres,
95 | null AS nb_prelevements,
96 | null AS nb_sup_valeur_sanitaire,
97 | null AS parametres_detectes
98 | FROM {{ ref('int__resultats_tous_commune_dernier') }}
99 | )
100 |
101 | -- Final output with all inseecommune-periodes-categories combinations
102 | SELECT
103 | cpc.commune_code_insee,
104 | cpc.commune_nom,
105 | cpc.periode,
106 | cpc.categorie,
107 | r.resultat,
108 | r.ratio,
109 | r.date_dernier_prel,
110 | r.nb_parametres,
111 | r.nb_prelevements,
112 | r.nb_sup_valeur_sanitaire,
113 | r.parametres_detectes
114 | FROM
115 | communes_periodes_categories AS cpc
116 | LEFT JOIN
117 | results AS r
118 | ON
119 | cpc.commune_code_insee = r.inseecommune
120 | AND cpc.periode = r.periode
121 | AND cpc.categorie = r.categorie
122 | ORDER BY
123 | cpc.commune_code_insee,
124 | cpc.periode,
125 | r.categorie
126 |
--------------------------------------------------------------------------------
/pipelines/tasks/config/config_edc.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 |
4 | def get_edc_config() -> Dict:
5 | """
6 | Returns various configuration for processing the EDC (Eau distribuée par commune) datasets.
7 | The data comes from https://www.data.gouv.fr/fr/datasets/resultats-du-controle-sanitaire-de-leau-distribuee-commune-par-commune/
8 | For each year a dataset is downloadable on a URL like this (ex. 2024):
9 | https://www.data.gouv.fr/fr/datasets/r/84a67a3b-08a7-4001-98e6-231c74a98139
10 | :return: A dict with the config used for processing.
11 | The "source" part is related to the data.gouv datasource
12 | The "files" part is related to the extracted files information and sql table names
13 | """
14 |
15 | edc_config = {
16 | "source": {
17 | "base_url": "https://www.data.gouv.fr/fr/datasets/r/",
18 | "available_years": [
19 | # "2016",
20 | # "2017",
21 | # "2018",
22 | # "2019", it was decided to use dataset from 2020
23 | "2020",
24 | "2021",
25 | "2022",
26 | "2023",
27 | "2024",
28 | "2025",
29 | ],
30 | "yearly_files_infos": {
31 | "2025": {
32 | "id": "7e38c236-dd3c-455e-a728-f0ecb84b1a7c",
33 | "zipfile": "dis-2025.zip",
34 | },
35 | "2024": {
36 | "id": "a631e486-c790-42d0-8368-6a42b1a3dc1d",
37 | "zipfile": "dis-2024.zip",
38 | },
39 | "2023": {
40 | "id": "c89dec4a-d985-447c-a102-75ba814c398e",
41 | "zipfile": "dis-2023.zip",
42 | },
43 | "2022": {
44 | "id": "a97b6074-c4dd-4ef2-8922-b0cf04dbff9a",
45 | "zipfile": "dis-2022.zip",
46 | },
47 | "2021": {
48 | "id": "d2b432cc-3761-44d3-8e66-48bc15300bb5",
49 | "zipfile": "dis-2021.zip",
50 | },
51 | "2020": {
52 | "id": "a6cb4fea-ef8c-47a5-acb3-14e49ccad801",
53 | "zipfile": "dis-2020.zip",
54 | },
55 | "2019": {
56 | "id": "861f2a7d-024c-4bf0-968b-9e3069d9de07",
57 | "zipfile": "dis-2019.zip",
58 | },
59 | "2018": {
60 | "id": "0513b3c0-dc18-468d-a969-b3508f079792",
61 | "zipfile": "dis-2018.zip",
62 | },
63 | "2017": {
64 | "id": "5785427b-3167-49fa-a581-aef835f0fb04",
65 | "zipfile": "dis-2017.zip",
66 | },
67 | "2016": {
68 | "id": "483c84dd-7912-483b-b96f-4fa5e1d8651f",
69 | "zipfile": "dis-2016.zip",
70 | },
71 | },
72 | },
73 | "files": {
74 | "communes": {
75 | "file_name_prefix": "DIS_COM_UDI_",
76 | "file_extension": ".txt",
77 | "table_name": "edc_communes",
78 | },
79 | "prelevements": {
80 | "file_name_prefix": "DIS_PLV_",
81 | "file_extension": ".txt",
82 | "table_name": "edc_prelevements",
83 | },
84 | "resultats": {
85 | "file_name_prefix": "DIS_RESULT_",
86 | "file_extension": ".txt",
87 | "table_name": "edc_resultats",
88 | },
89 | },
90 | }
91 |
92 | return edc_config
93 |
--------------------------------------------------------------------------------