├── database └── .gitkeep ├── analytics ├── __init__.py └── notebooks │ └── __init__.py ├── dbt_ ├── analyses │ └── .gitkeep ├── macros │ └── .gitkeep ├── seeds │ ├── .gitkeep │ └── schema.yml ├── snapshots │ └── .gitkeep ├── tests │ ├── .gitkeep │ ├── test_valeur_saniraire_2.sql │ ├── unique_datetimeprel_per_cdreseau_referenceprel.sql │ ├── test__coverage_20pfas_4pfas_98pct.sql │ ├── test_sub_active_results.sql │ ├── test_tous_results.sql │ ├── test_cvm_results.sql │ ├── test_nitrates_results.sql │ └── test_pfas_results.sql ├── .gitignore ├── packages.yml ├── profiles.yml ├── models │ ├── staging │ │ ├── communes │ │ │ ├── stg_communes__opendatasoft.sql │ │ │ ├── stg_communes__cog.sql │ │ │ └── _communes_models.yml │ │ ├── atlasante │ │ │ ├── stg_atlasante_udi_corse.sql │ │ │ ├── stg_atlasante_udi_2023.sql │ │ │ ├── stg_atlasante_udi_2024.sql │ │ │ └── _atlasante_models.yml │ │ └── edc │ │ │ ├── stg_edc__communes.sql │ │ │ ├── stg_edc__resultats.sql │ │ │ ├── stg_edc__prevelevements.sql │ │ │ └── val_traduite__docs.md │ ├── intermediate │ │ ├── int__udi.sql │ │ ├── int__commune_geom.sql │ │ ├── tous │ │ │ ├── int__resultats_tous_udi_annuel.sql │ │ │ ├── int__resultats_tous_commune_annuel.sql │ │ │ ├── int__resultats_tous_udi_dernier.sql │ │ │ └── int__resultats_tous_commune_dernier.sql │ │ ├── int__udi_geom.sql │ │ ├── int__lien_commune_cdreseau.sql │ │ ├── int__valeurs_de_reference.sql │ │ ├── int__lien_cdreseau_refreneceprel.sql │ │ ├── int__prelevements_uniques.sql │ │ ├── int__parametres_non_references.sql │ │ ├── nitrate │ │ │ ├── int__resultats_nitrate_udi_annuel.sql │ │ │ ├── int__resultats_nitrate_commune_annuel.sql │ │ │ ├── int__resultats_nitrate_udi_dernier.sql │ │ │ └── int__resultats_nitrate_commune_dernier.sql │ │ ├── pesticide │ │ │ ├── sub_active │ │ │ │ ├── int__resultats_sub_active_udi_annuel.sql │ │ │ │ ├── int__resultats_sub_active_commune_annuel.sql │ │ │ │ ├── int__resultats_sub_active_udi_dernier.sql │ │ │ │ └── int__resultats_sub_active_commune_dernier.sql │ │ │ └── metabolite │ │ │ │ ├── int__resultats_metabolite_udi_dernier.sql │ │ │ │ └── int__resultats_metabolite_commune_dernier.sql │ │ ├── cvm │ │ │ ├── int__resultats_cvm_udi_annuel.sql │ │ │ ├── int__resultats_cvm_commune_annuel.sql │ │ │ ├── int__resultats_cvm_udi_dernier.sql │ │ │ └── int__resultats_cvm_commune_dernier.sql │ │ ├── sub_indus │ │ │ ├── int__resultats_sub_indus_udi_annuel.sql │ │ │ ├── int__resultats_sub_indus_commune_annuel.sql │ │ │ ├── int__resultats_sub_indus_udi_dernier.sql │ │ │ └── int__resultats_sub_indus_commune_dernier.sql │ │ ├── metaux_lourds │ │ │ ├── int__resultats_metaux_lourds_udi_annuel.sql │ │ │ ├── int__resultats_metaux_lourds_commune_annuel.sql │ │ │ ├── int__resultats_metaux_lourds_udi_dernier.sql │ │ │ └── int__resultats_metaux_lourds_commune_dernier.sql │ │ ├── int__resultats_udi_communes.sql │ │ └── pfas │ │ │ ├── int__resultats_pfas_udi_annuel.sql │ │ │ └── int__resultats_pfas_commune_annuel.sql │ ├── sources │ │ └── __sources.yml │ └── website │ │ ├── web__stats_udi.sql │ │ ├── web__resultats_udi.sql │ │ └── web__resultats_communes.sql └── dbt_project.yml ├── .python-version ├── webapp ├── .prettierrc.json ├── app │ ├── duckdb-example │ │ ├── loading.tsx │ │ └── page.tsx │ ├── page.tsx │ ├── api │ │ ├── udi │ │ │ ├── dummy │ │ │ │ └── route.ts │ │ │ └── find │ │ │ │ └── route.tsx │ │ └── db-example │ │ │ └── route.ts │ ├── embed │ │ └── page.tsx │ ├── embed-external │ │ └── page.tsx │ ├── lib │ │ └── duckdb.ts │ ├── layout.tsx │ └── config.ts ├── public │ ├── images │ │ └── dfg.png │ └── fonts │ │ ├── raleway-v37-latin-300.woff2 │ │ ├── raleway-v37-latin-600.woff2 │ │ ├── raleway-v37-latin-700.woff2 │ │ └── raleway-v37-latin-regular.woff2 ├── postcss.config.mjs ├── lib │ ├── utils.ts │ ├── iframe-scroll.ts │ ├── mapLocale.ts │ └── property.ts ├── .env ├── eslint.config.mjs ├── components.json ├── tsconfig.json ├── .gitignore ├── components │ ├── ui │ │ ├── input.tsx │ │ ├── switch.tsx │ │ ├── hover-card.tsx │ │ ├── tooltip.tsx │ │ ├── popover.tsx │ │ ├── scroll-area.tsx │ │ ├── button.tsx │ │ └── card.tsx │ └── EmbedBanner.tsx ├── package.json ├── next.config.ts └── tailwind.config.ts ├── .sqlfluff ├── pipelines ├── config │ ├── .env.example │ ├── __init__.py │ └── config.py ├── tasks │ ├── __init__.py │ ├── config │ │ ├── config_insee.py │ │ ├── config_pmtiles.py │ │ ├── config_geojson.py │ │ ├── common.py │ │ ├── config_uploaded_geojson.py │ │ └── config_edc.py │ ├── client │ │ ├── core │ │ │ ├── https_to_duck_client.py │ │ │ └── https_client.py │ │ ├── commune_client.py │ │ ├── opendatasoft_client.py │ │ ├── pmtiles_processor.py │ │ └── uploaded_geojson_client.py │ ├── upload_database.py │ ├── generate_pmtiles.py │ ├── generate_pmtiles_legacy.py │ └── download_pmtiles.py ├── utils │ ├── __init__.py │ ├── logger.py │ └── utils.py ├── __init__.py ├── test_pipelines.py └── notebooks │ ├── test_geojson_from_db.ipynb │ └── test_atlasante_udi.ipynb ├── .dockerignore ├── .vscode ├── extensions.json ├── tasks.json └── settings.json ├── Dockerfile.clevercloud ├── .github └── workflows │ ├── pre-commit.yaml │ ├── lint_nextjs.yml │ ├── test_dbt.yaml │ └── test_pipelines.yaml ├── .gitignore ├── LICENSE ├── pyproject.toml ├── .pre-commit-config.yaml └── Dockerfile.unified /database/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /analytics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt_/analyses/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt_/macros/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt_/seeds/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt_/snapshots/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt_/tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /analytics/notebooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webapp/.prettierrc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /.sqlfluff: -------------------------------------------------------------------------------- 1 | [sqlfluff] 2 | dialect = duckdb 3 | max_line_length = 100 4 | -------------------------------------------------------------------------------- /pipelines/config/.env.example: -------------------------------------------------------------------------------- 1 | SCW_ACCESS_KEY=MyKey 2 | SCW_SECRET_KEY=MySecret -------------------------------------------------------------------------------- /pipelines/config/__init__.py: -------------------------------------------------------------------------------- 1 | # tasks/__init__.py 2 | 3 | # Initialize the tasks package 4 | -------------------------------------------------------------------------------- /pipelines/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # tasks/__init__.py 2 | 3 | # Initialize the tasks package 4 | -------------------------------------------------------------------------------- /pipelines/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # tasks/__init__.py 2 | 3 | # Initialize the tasks package 4 | -------------------------------------------------------------------------------- /dbt_/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | package-lock.yml 6 | .user.yml 7 | !seeds/*.csv -------------------------------------------------------------------------------- /webapp/app/duckdb-example/loading.tsx: -------------------------------------------------------------------------------- 1 | export default function Loading() { 2 | return "Loading..."; 3 | } 4 | -------------------------------------------------------------------------------- /pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # init the pipelines package 2 | from .utils.logger import setup_logger 3 | 4 | setup_logger() 5 | -------------------------------------------------------------------------------- /webapp/public/images/dfg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/images/dfg.png -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/node_modules 2 | .git 3 | .github 4 | webapp/.next 5 | .vscode 6 | .env* 7 | **/.env* 8 | npm-debug.log 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /webapp/app/page.tsx: -------------------------------------------------------------------------------- 1 | import { redirect } from "next/navigation"; 2 | 3 | export default async function Home() { 4 | redirect("/embed"); 5 | } 6 | -------------------------------------------------------------------------------- /dbt_/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: calogica/dbt_expectations 3 | version: 0.10.4 4 | 5 | - package: dbt-labs/dbt_utils 6 | version: 1.3.0 -------------------------------------------------------------------------------- /webapp/public/fonts/raleway-v37-latin-300.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-300.woff2 -------------------------------------------------------------------------------- /webapp/public/fonts/raleway-v37-latin-600.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-600.woff2 -------------------------------------------------------------------------------- /webapp/public/fonts/raleway-v37-latin-700.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-700.woff2 -------------------------------------------------------------------------------- /webapp/public/fonts/raleway-v37-latin-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-regular.woff2 -------------------------------------------------------------------------------- /webapp/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | tailwindcss: {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /dbt_/profiles.yml: -------------------------------------------------------------------------------- 1 | dbt_: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | path: ../database/data.duckdb 6 | threads: 1 7 | extensions: 8 | - spatial 9 | target: dev 10 | -------------------------------------------------------------------------------- /webapp/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { clsx, type ClassValue } from "clsx"; 2 | import { twMerge } from "tailwind-merge"; 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)); 6 | } 7 | -------------------------------------------------------------------------------- /dbt_/models/staging/communes/stg_communes__opendatasoft.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | com_code[1]::VARCHAR AS com_code, 3 | com_name[1]::VARCHAR AS com_name, 4 | geom::GEOMETRY AS geom 5 | FROM {{ source('communes', 'opendatasoft_communes') }} 6 | -------------------------------------------------------------------------------- /webapp/app/api/udi/dummy/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from "next/server"; 2 | import { mockData } from "@/app/lib/mock-data"; 3 | 4 | export async function GET() { 5 | return NextResponse.json(mockData["UDI12345"], { status: 200 }); 6 | } 7 | -------------------------------------------------------------------------------- /dbt_/models/staging/atlasante/stg_atlasante_udi_corse.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | gid::INTEGER AS gid, 3 | cn_udi::VARCHAR AS cn_udi, 4 | nom_udi::VARCHAR AS nom_udi, 5 | geom::GEOMETRY AS geom, 6 | ingestion_date::DATE AS ingestion_date 7 | FROM {{ source('atlasante', 'atlasante_udi_corse') }} 8 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-python.python", 4 | "ms-toolsai.jupyter", 5 | "charliermarsh.ruff", 6 | "github.vscode-pull-request-github", 7 | "actboy168.tasks", 8 | "dbaeumer.vscode-eslint", 9 | "esbenp.prettier-vscode" 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /Dockerfile.clevercloud: -------------------------------------------------------------------------------- 1 | # Dockerfile for Clever Cloud - pulls pre-built unified image 2 | 3 | ARG IMAGE_TAG=latest 4 | FROM ghcr.io/dataforgoodfr/13_pollution_eau/pollution-eau-unified:${IMAGE_TAG} 5 | 6 | # Expose the port 7 | EXPOSE 8080 8 | 9 | # Use the same entrypoint as the unified image 10 | CMD ["node", "server.js"] -------------------------------------------------------------------------------- /pipelines/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def setup_logger( 5 | level=logging.INFO, 6 | log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 7 | ): 8 | "config log" 9 | logging.basicConfig(level=level, format=log_format) 10 | 11 | 12 | def get_logger(name): 13 | return logging.getLogger(name) 14 | -------------------------------------------------------------------------------- /dbt_/models/staging/atlasante/stg_atlasante_udi_2023.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | gid::INTEGER AS gid, 3 | code_udi::VARCHAR AS code_udi, 4 | ins_nom::VARCHAR AS ins_nom, 5 | uge_nom::VARCHAR AS uge_nom, 6 | udi_pop::VARCHAR AS udi_pop, 7 | geom::GEOMETRY AS geom, 8 | ingestion_date::DATE AS ingestion_date 9 | FROM {{ source('atlasante', 'atlasante_udi_2023') }} 10 | -------------------------------------------------------------------------------- /dbt_/models/staging/atlasante/stg_atlasante_udi_2024.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | gid::INTEGER AS gid, 3 | code_udi::VARCHAR AS code_udi, 4 | ins_nom::VARCHAR AS ins_nom, 5 | uge_nom::VARCHAR AS uge_nom, 6 | udi_pop::VARCHAR AS udi_pop, 7 | geom::GEOMETRY AS geom, 8 | ingestion_date::DATE AS ingestion_date 9 | FROM {{ source('atlasante', 'atlasante_udi_2024') }} 10 | -------------------------------------------------------------------------------- /webapp/.env: -------------------------------------------------------------------------------- 1 | # Utiliser le fichier .env est pour ajouter des variables non-secrètes 2 | # voir https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables 3 | 4 | # variables disponible sur le navigateur et le serveur 5 | NEXT_PUBLIC_PROTOMAPS_API_KEY=707d8bc70b393fc0 6 | 7 | # variables disponibles uniquement sur le côté serveur 8 | DUCKDB_PATH=../database/data.duckdb 9 | -------------------------------------------------------------------------------- /dbt_/tests/test_valeur_saniraire_2.sql: -------------------------------------------------------------------------------- 1 | -- we make sure that valeur_sanitaire_2 is > valeur_sanitaire_1 2 | -- when they are not null 3 | -- cf int__resultats_sub_indus_udi_dernier.sql why it is required 4 | 5 | 6 | select * 7 | from {{ ref('int__valeurs_de_reference') }} 8 | where 9 | valeur_sanitaire_1 is not null 10 | and valeur_sanitaire_2 is not null 11 | and valeur_sanitaire_1 >= valeur_sanitaire_2 12 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "Download database", 6 | "type": "shell", 7 | "command": "uv run pipelines/run.py run download_database", 8 | "group": "none", 9 | "icon": { 10 | "id": "cloud-download" 11 | }, 12 | "presentation": { 13 | "reveal": "always", 14 | "panel": "new" 15 | } 16 | } 17 | ], 18 | } -------------------------------------------------------------------------------- /webapp/lib/iframe-scroll.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Utility function to request the parent window to scroll the iframe into view 3 | * This should be called when user interacts with map components 4 | */ 5 | export function scrollIframeToFullscreen() { 6 | // Check if we're in an iframe 7 | if (window.self !== window.top) { 8 | // Send message to parent window to scroll this iframe into view 9 | window.parent.postMessage({ type: "scrollToIframe" }, "*"); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__udi.sql: -------------------------------------------------------------------------------- 1 | select 2 | cdreseau, 3 | string_agg(distinct inseecommune) as inseecommunes, 4 | string_agg(distinct quartiers) as quartiers, 5 | string_agg(distinct nomreseaux) as nomreseaux 6 | 7 | 8 | from {{ ref('int__lien_commune_cdreseau') }} 9 | group by cdreseau 10 | 11 | -- TODO: on pourrait garder une partition avec "de_partition". 12 | -- A noter néanmoins que la seule dépendance à ce modèle (web__resultats_udi) 13 | -- ne le requiert pas. 14 | -------------------------------------------------------------------------------- /pipelines/tasks/config/config_insee.py: -------------------------------------------------------------------------------- 1 | def get_insee_config() -> dict: 2 | """Configuration for La Poste dataset""" 3 | return { 4 | "source": { 5 | "base_url": "https://www.insee.fr/fr/statistiques/fichier/7766585/", 6 | "id": "v_commune_2024.csv", 7 | "datetime": "20240220", 8 | }, 9 | "file": { 10 | "file_name": "insee_communes_2024.csv", 11 | "table_name": "cog_communes", 12 | }, 13 | } 14 | -------------------------------------------------------------------------------- /webapp/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript", "prettier"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /dbt_/models/staging/edc/stg_edc__communes.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | inseecommune::VARCHAR(5) AS inseecommune, 3 | nomcommune::VARCHAR AS nomcommune, 4 | quartier::VARCHAR AS quartier, 5 | cdreseau::VARCHAR(9) AS cdreseau, 6 | nomreseau::VARCHAR AS nomreseau, 7 | debutalim::VARCHAR AS debutalim, 8 | de_partition::SMALLINT AS de_partition, 9 | de_ingestion_date::DATE AS de_ingestion_date, 10 | de_dataset_datetime::VARCHAR AS de_dataset_datetime 11 | FROM {{ source('edc', 'edc_communes') }} 12 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__commune_geom.sql: -------------------------------------------------------------------------------- 1 | WITH ranked_communes AS ( 2 | SELECT 3 | com_code, 4 | com_name, 5 | geom, 6 | ROW_NUMBER() OVER ( 7 | PARTITION BY com_code 8 | ORDER BY com_code 9 | ) AS row_num 10 | FROM {{ ref('stg_communes__opendatasoft') }} 11 | WHERE com_code IS NOT NULL AND com_code != '' 12 | ) 13 | 14 | SELECT 15 | com_code, 16 | com_name, 17 | ST_ASGEOJSON(geom) AS geom 18 | FROM ranked_communes 19 | WHERE row_num = 1 20 | -------------------------------------------------------------------------------- /dbt_/models/staging/communes/stg_communes__cog.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | TYPECOM::VARCHAR AS TYPECOM, 3 | COM::VARCHAR AS COM, 4 | REG::SMALLINT AS REG, 5 | DEP::VARCHAR AS DEP, 6 | CTCD::VARCHAR AS CTCD, 7 | ARR::VARCHAR AS ARR, 8 | TNCC::SMALLINT AS TNCC, 9 | NCC::VARCHAR AS NCC, 10 | NCCENR::VARCHAR AS NCCENR, 11 | LIBELLE::VARCHAR AS LIBELLE, 12 | CAN::VARCHAR AS CAN, 13 | COMPARENT::VARCHAR AS COMPARENT, 14 | DE_PARTITION::SMALLINT AS DE_PARTITION 15 | FROM {{ source('communes', 'cog_communes') }} 16 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v3 14 | 15 | - name: Install a specific version of uv 16 | uses: astral-sh/setup-uv@v5 17 | with: 18 | version: ">=0.4.0" 19 | - name: Install dependencies 20 | run: uv sync 21 | - name: Run pre-commit 22 | run: uv run pre-commit run --all-files -------------------------------------------------------------------------------- /webapp/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "new-york", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "app/globals.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils", 16 | "ui": "@/components/ui", 17 | "lib": "@/lib", 18 | "hooks": "@/hooks" 19 | }, 20 | "iconLibrary": "lucide" 21 | } 22 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.formatOnSave": true, 4 | "editor.defaultFormatter": "charliermarsh.ruff", 5 | "editor.codeActionsOnSave": { 6 | "source.organizeImports": "explicit" 7 | } 8 | }, 9 | "notebook.formatOnSave.enabled": true, 10 | "python.testing.pytestArgs": ["-sv"], 11 | "python.testing.unittestEnabled": false, 12 | "python.testing.pytestEnabled": true, 13 | // Config for dorzey.vscode-sqlfluff extension 14 | "sqlfluff.executablePath": "${workspaceFolder}/.venv/bin/sqlfluff", 15 | "sqlfluff.linter.run": "onSave" 16 | } 17 | -------------------------------------------------------------------------------- /dbt_/models/sources/__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: edc 5 | database: data 6 | schema: main 7 | tables: 8 | - name: edc_communes 9 | - name: edc_prelevements 10 | - name: edc_resultats 11 | - name: communes 12 | database: data 13 | schema: main 14 | tables: 15 | - name: cog_communes 16 | - name: opendatasoft_communes 17 | - name: atlasante 18 | database: data 19 | schema: main 20 | tables: 21 | - name: atlasante_udi_2023 22 | - name: atlasante_udi_corse 23 | - name: atlasante_udi_2024 24 | -------------------------------------------------------------------------------- /webapp/app/api/db-example/route.ts: -------------------------------------------------------------------------------- 1 | import { fetchExample } from "@/app/lib/data"; 2 | 3 | // an api route fetching data 4 | export async function GET() { 5 | try { 6 | const reader = await fetchExample(); 7 | return Response.json({ 8 | status: "OK", 9 | rows: reader.getRowObjectsJson(), 10 | columnNames: reader.columnNames(), 11 | columnTypes: reader.columnTypes(), 12 | count: reader.columnCount, 13 | }); 14 | } catch (error) { 15 | console.error("Error while retrieving data:", error); 16 | return Response.json({ error }, { status: 500 }); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /webapp/lib/mapLocale.ts: -------------------------------------------------------------------------------- 1 | import { defaultLocale } from "maplibre-gl/src/ui/default_locale"; 2 | 3 | // French locale for MapLibre, inheriting from default locale 4 | // Only overriding the CooperativeGesturesHandler messages 5 | export const frenchLocale = { 6 | ...defaultLocale, 7 | // French overrides for CooperativeGesturesHandler 8 | "CooperativeGesturesHandler.WindowsHelpText": 9 | "Utilisez Ctrl + molette pour zoomer sur la carte", 10 | "CooperativeGesturesHandler.MacHelpText": 11 | "Utilisez ⌘ + molette pour zoomer sur la carte", 12 | "CooperativeGesturesHandler.MobileHelpText": 13 | "Utilisez deux doigts pour déplacer la carte", 14 | }; 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | .pytest_cache/ 5 | .mypy_cache/ 6 | .ipynb_checkpoints/ 7 | .venv/ 8 | .idea 9 | .ruff_cache/ 10 | 11 | 12 | # JavaScript/Node 13 | node_modules/ 14 | npm-debug.log 15 | yarn-debug.log* 16 | yarn-error.log* 17 | build/ 18 | dist/ 19 | .next/ 20 | 21 | # Database & Data 22 | *.sqlite3 23 | *.db 24 | *.duckdb 25 | *.duckdb.* 26 | *.csv 27 | *.parquet 28 | *.xlsx 29 | *.xls 30 | *.pmtiles 31 | logs/ 32 | 33 | !database/.gitkeep 34 | cache/ 35 | 36 | # OS 37 | .DS_Store 38 | Thumbs.db 39 | *.tmp 40 | 41 | # Environment & Secrets 42 | .env 43 | .env.* 44 | !.env.example 45 | *.pem 46 | secrets.yaml 47 | config.local.yaml -------------------------------------------------------------------------------- /dbt_/tests/unique_datetimeprel_per_cdreseau_referenceprel.sql: -------------------------------------------------------------------------------- 1 | -- Nous vérifiions que pour chaque couple cdreseau, referenceprel, 2 | -- il n'y a qu'une seule date datetimeprel. 3 | -- En effet, pour trouver tous les paramètres analysés lors du 4 | -- prélèvement le plus récent, on se base sur la date datetimeprel. 5 | -- Si on a plusieurs dates pour un même prélèvement, on ne peut pas 6 | -- savoir quel est le bon. 7 | -- 8 | -- cf _int__resultats_metabolite_divers_udi_dernier.sql 9 | 10 | SELECT 11 | cdreseau, 12 | referenceprel, 13 | count(DISTINCT datetimeprel) AS count_datetimeprel 14 | FROM 15 | {{ ref('int__resultats_udi_communes') }} 16 | GROUP BY 1, 2 17 | HAVING count(DISTINCT datetimeprel) > 1 18 | -------------------------------------------------------------------------------- /webapp/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /webapp/app/embed/page.tsx: -------------------------------------------------------------------------------- 1 | import PollutionMap from "@/components/PollutionMap"; 2 | import { fetchPollutionStats, fetchParameterValues } from "../lib/data"; 3 | 4 | // Mise en cache de la page pour 24 heures 5 | export const revalidate = 86400; 6 | 7 | export default async function Embed() { 8 | const stats = await fetchPollutionStats(); 9 | const parameterValues = await fetchParameterValues(); 10 | 11 | return ( 12 |
13 |
14 | 19 |
20 |
21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /.github/workflows/lint_nextjs.yml: -------------------------------------------------------------------------------- 1 | name: Lint Next.js 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | paths: 7 | - "webapp/**" 8 | 9 | jobs: 10 | lint: 11 | name: Next.js Lint 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v4 17 | 18 | - name: Setup Node.js 19 | uses: actions/setup-node@v4 20 | with: 21 | node-version: "20" 22 | cache: "npm" 23 | cache-dependency-path: ./webapp/package.json 24 | 25 | - name: Install dependencies 26 | run: | 27 | cd webapp 28 | npm ci 29 | 30 | - name: Run Next.js lint 31 | run: | 32 | cd webapp 33 | npm run lint 34 | -------------------------------------------------------------------------------- /webapp/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # next.js lets you commit env variables: https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables#loading-environment-variables 34 | !.env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/tous/int__resultats_tous_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cdreseau, 3 | 'tous' AS categorie, 4 | periode, 5 | sum(round(ratio * nb_prelevements))::float / sum(nb_prelevements)::float AS ratio, 6 | sum(nb_prelevements) AS nb_prelevements, 7 | sum(nb_sup_valeur_sanitaire) AS nb_sup_valeur_sanitaire 8 | 9 | FROM {{ ref('int__union_resultats_udi') }} 10 | WHERE 11 | periode LIKE 'bilan_annuel%' 12 | AND 13 | categorie NOT IN ( 14 | 'sub_active', 15 | 'metabolite', 16 | 'metabolite_esa_metolachlore', 17 | 'metabolite_chlorothalonil_r471811', 18 | 'metabolite_chloridazone_desphenyl', 19 | 'metabolite_chloridazone_methyl_desphenyl', 20 | 'metabolite_atrazine_desethyl' 21 | ) 22 | GROUP BY cdreseau, periode 23 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/tous/int__resultats_tous_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | inseecommune, 3 | 'tous' AS categorie, 4 | periode, 5 | sum(round(ratio * nb_prelevements))::float / sum(nb_prelevements)::float AS ratio, 6 | sum(nb_prelevements) AS nb_prelevements, 7 | sum(nb_sup_valeur_sanitaire) AS nb_sup_valeur_sanitaire 8 | 9 | FROM {{ ref('int__union_resultats_commune') }} 10 | WHERE 11 | periode LIKE 'bilan_annuel%' 12 | AND 13 | categorie NOT IN ( 14 | 'sub_active', 15 | 'metabolite', 16 | 'metabolite_esa_metolachlore', 17 | 'metabolite_chlorothalonil_r471811', 18 | 'metabolite_chloridazone_desphenyl', 19 | 'metabolite_chloridazone_methyl_desphenyl', 20 | 'metabolite_atrazine_desethyl' 21 | ) 22 | GROUP BY inseecommune, periode 23 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__udi_geom.sql: -------------------------------------------------------------------------------- 1 | WITH combined_data AS ( 2 | -- SELECT 3 | -- code_udi, 4 | -- geom 5 | -- FROM {{ ref("stg_atlasante_udi_2023") }} 6 | -- UNION ALL 7 | -- SELECT 8 | -- cn_udi AS code_udi, 9 | -- geom 10 | -- FROM {{ ref("stg_atlasante_udi_corse") }} 11 | 12 | SELECT 13 | code_udi, 14 | geom 15 | FROM {{ ref("stg_atlasante_udi_2024") }} 16 | ), 17 | 18 | ranked_data AS ( 19 | SELECT 20 | code_udi, 21 | geom, 22 | ROW_NUMBER() OVER ( 23 | PARTITION BY code_udi 24 | ORDER BY code_udi 25 | ) AS row_num 26 | FROM combined_data 27 | WHERE code_udi IS NOT null AND code_udi != '' 28 | ) 29 | 30 | SELECT 31 | code_udi, 32 | ST_ASGEOJSON(geom) AS geom 33 | FROM ranked_data 34 | WHERE row_num = 1 35 | -------------------------------------------------------------------------------- /webapp/components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | 3 | import { cn } from "@/lib/utils"; 4 | 5 | const Input = React.forwardRef>( 6 | ({ className, type, ...props }, ref) => { 7 | return ( 8 | 17 | ); 18 | }, 19 | ); 20 | Input.displayName = "Input"; 21 | 22 | export { Input }; 23 | -------------------------------------------------------------------------------- /webapp/app/embed-external/page.tsx: -------------------------------------------------------------------------------- 1 | import PollutionMap from "@/components/PollutionMap"; 2 | import { fetchPollutionStats, fetchParameterValues } from "../lib/data"; 3 | 4 | // Mise en cache de la page pour 24 heures 5 | export const revalidate = 86400; 6 | 7 | export default async function EmbedExternal({ 8 | searchParams, 9 | }: { 10 | searchParams: Promise<{ category?: string }>; 11 | }) { 12 | const stats = await fetchPollutionStats(); 13 | const parameterValues = await fetchParameterValues(); 14 | const { category } = await searchParams; 15 | 16 | return ( 17 |
18 |
19 | 25 |
26 |
27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__lien_commune_cdreseau.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table' 4 | ) 5 | }} 6 | 7 | SELECT 8 | inseecommune, 9 | cdreseau, 10 | de_partition, 11 | -- Prenons toujours le même nom de commune pour une inseecommune donnée 12 | MIN(nomcommune) AS nomcommune, 13 | -- Agréger les différentes valeurs de quartier en une liste sans doublons 14 | STRING_AGG(DISTINCT quartier) FILTER ( 15 | WHERE quartier IS NOT NULL AND quartier != '' AND quartier != '-' 16 | ) AS quartiers, 17 | -- Agréger les différentes valeurs de nomreseau en une liste sans doublons 18 | STRING_AGG(DISTINCT nomreseau) FILTER ( 19 | WHERE nomreseau IS NOT NULL AND nomreseau != '' 20 | ) AS nomreseaux, 21 | -- Prendre la première date de début d'alimentation 22 | MIN(debutalim) AS debutalim 23 | FROM 24 | {{ ref('stg_edc__communes') }} 25 | GROUP BY 26 | inseecommune, 27 | cdreseau, 28 | de_partition 29 | -------------------------------------------------------------------------------- /pipelines/test_pipelines.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | import pytest 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "task", 8 | ["build_database", "upload_database", "download_database"], 9 | ) 10 | def test_pipeline_task(task): 11 | """ 12 | Test the specified pipeline task. 13 | 14 | This function tests the execution of the specified pipeline task from the 15 | pipelines/run.py script. It ensures that the task runs without raising any exceptions. 16 | 17 | Args: 18 | task (str): The name of the pipeline task to test. 19 | """ 20 | commands_list = ["uv", "run", "pipelines/run.py", "run", task] 21 | 22 | # add options 23 | if task == "build_database": 24 | commands_list.extend(["--refresh-type", "last"]) 25 | elif task in ("download_database", "upload_database"): 26 | commands_list.extend(["--env", "dev"]) 27 | 28 | process = subprocess.run(commands_list) 29 | 30 | assert process.returncode == 0, f"{task} script failed" 31 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__valeurs_de_reference.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table' 4 | ) 5 | }} 6 | 7 | SELECT 8 | cdparametresiseeaux, 9 | MAX(categorie_1) AS categorie_1, 10 | MAX(categorie_2) AS categorie_2, 11 | MAX(categorie_3) AS categorie_3, 12 | MAX(limite_qualite) AS limite_qualite, 13 | MAX(limite_qualite_unite) AS limite_qualite_unite, 14 | MAX(limite_indicative) AS limite_indicative, 15 | MAX(limite_indicative_unite) AS limite_indicative_unite, 16 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1, 17 | MAX(valeur_sanitaire_1_unite) AS valeur_sanitaire_1_unite, 18 | MAX(valeur_sanitaire_2) AS valeur_sanitaire_2, 19 | MAX(valeur_sanitaire_2_unite) AS valeur_sanitaire_2_unite, 20 | MAX(web_label) AS web_label, 21 | COUNT(*) AS nb_rows -- we enforce this to be 1 in a dbt test 22 | FROM 23 | {{ ref('references_generations_futures') }} 24 | WHERE 25 | cdparametresiseeaux IS NOT NULL 26 | AND cdparametresiseeaux != '' 27 | GROUP BY 28 | cdparametresiseeaux 29 | -------------------------------------------------------------------------------- /dbt_/models/staging/edc/stg_edc__resultats.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cddept::VARCHAR(3) AS cddept, 3 | referenceprel::VARCHAR(11) AS referenceprel, 4 | cdparametresiseeaux::VARCHAR(10) AS cdparametresiseeaux, 5 | cdparametre::INT AS cdparametre, 6 | libmajparametre::VARCHAR AS libmajparametre, 7 | libminparametre::VARCHAR AS libminparametre, 8 | libwebparametre::VARCHAR AS libwebparametre, 9 | qualitparam::VARCHAR(1) AS qualitparam, 10 | insituana::VARCHAR(1) AS insituana, 11 | rqana::VARCHAR(8) AS rqana, 12 | cdunitereferencesiseeaux::VARCHAR(7) AS cdunitereferencesiseeaux, 13 | cdunitereference::VARCHAR AS cdunitereference, 14 | limitequal::VARCHAR AS limitequal, 15 | refqual::VARCHAR AS refqual, 16 | valtraduite::NUMERIC AS valtraduite, 17 | casparam::VARCHAR AS casparam, 18 | referenceanl::VARCHAR AS referenceanl, 19 | de_partition::SMALLINT AS de_partition, 20 | de_ingestion_date::DATE AS de_ingestion_date, 21 | de_dataset_datetime::VARCHAR AS de_dataset_datetime 22 | FROM {{ source('edc', 'edc_resultats') }} 23 | -------------------------------------------------------------------------------- /webapp/app/lib/duckdb.ts: -------------------------------------------------------------------------------- 1 | import { DuckDBInstance } from "@duckdb/node-api"; 2 | import fs from "fs"; 3 | import path from "path"; 4 | 5 | // Get database path from environment variable or use default 6 | const envDbPath = process.env.DUCKDB_PATH; 7 | const defaultDbPath = path.join(process.cwd(), "../database/data.duckdb"); 8 | const dbFilePath = envDbPath || defaultDbPath; 9 | 10 | console.log(`Using database path: ${dbFilePath}`); 11 | 12 | // Check if the file exists 13 | if (!fs.existsSync(dbFilePath)) { 14 | throw new Error( 15 | `Database file not found at ${dbFilePath}. Please check that your DUCKDB_PATH environment variable is correctly set or that the default database exists.`, 16 | ); 17 | } 18 | 19 | // Create DB instance 20 | const db = await DuckDBInstance.create(dbFilePath, { 21 | access_mode: "READ_ONLY", 22 | max_memory: "1GB", 23 | threads: "4", 24 | }); 25 | 26 | // Load the geospatial extension 27 | const connection = await db.connect(); 28 | await connection.run("INSTALL spatial;"); 29 | await connection.run("LOAD spatial;"); 30 | 31 | export default db; 32 | -------------------------------------------------------------------------------- /.github/workflows/test_dbt.yaml: -------------------------------------------------------------------------------- 1 | name: 🧪 Run DBT Tests 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | paths: 7 | - 'dbt_/**' 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v3 16 | 17 | - name: Install a specific version of uv 18 | uses: astral-sh/setup-uv@v5 19 | with: 20 | version: ">=0.4.0" 21 | 22 | - name: Install dependencies 23 | run: uv sync 24 | 25 | - name: Download production database from Storage 26 | run: | 27 | uv run pipelines/run.py run download_database 28 | 29 | - name: Install dbt dependencies 30 | run: | 31 | cd dbt_ 32 | uv run dbt deps || { echo "dbt deps failed"; exit 1; } 33 | 34 | - name: Run dbt build 35 | run: | 36 | cd dbt_ 37 | uv run dbt build || { echo "dbt build failed"; exit 1; } 38 | 39 | - name: Run dbt docs generate 40 | run: | 41 | cd dbt_ 42 | uv run dbt docs generate || { echo "dbt docs generate failed"; exit 1; } 43 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__lien_cdreseau_refreneceprel.sql: -------------------------------------------------------------------------------- 1 | with ranked as ( 2 | select 3 | cdreseau, 4 | referenceprel, 5 | dateprel, 6 | heureprel, 7 | de_partition, 8 | -- Quand heureprel est null ou invalide, on choisit arbitrairement 09:00 9 | -- Examples: 10 | -- referenceprel = '07700233713' 11 | -- referenceprel = '02800116863' 12 | COALESCE( 13 | TRY_STRPTIME( 14 | dateprel || ' ' || REPLACE(heureprel, 'h', ':'), 15 | '%Y-%m-%d %H:%M' 16 | ), 17 | TRY_STRPTIME( 18 | dateprel || ' 09:00', 19 | '%Y-%m-%d %H:%M' 20 | ) 21 | ) as datetimeprel, 22 | ROW_NUMBER() over ( 23 | partition by cdreseau, referenceprel 24 | order by 25 | dateprel, 26 | heureprel 27 | ) as row_num 28 | from 29 | {{ ref('stg_edc__prevelevements') }} 30 | 31 | ) 32 | 33 | select * exclude (row_num) 34 | from 35 | ranked 36 | where 37 | row_num = 1 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2024 Data4Good 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /pipelines/tasks/client/core/https_to_duck_client.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient 4 | from pipelines.tasks.client.core.https_client import HTTPSClient 5 | from pipelines.tasks.config.common import ( 6 | logger, 7 | ) 8 | 9 | 10 | class HTTPSToDuckDBClient(HTTPSClient, ABC): 11 | def __init__(self, config, duckdb_client: DuckDBClient): 12 | super().__init__(config["source"]["base_url"]) 13 | self.config = config 14 | self.duckdb_client = duckdb_client 15 | 16 | def process_datasets(self): 17 | logger.info(f"Processing {self.__class__.__name__} data") 18 | self._download_data() 19 | self._ingest_to_duckdb() 20 | logger.info(f"Finishing processing {self.__class__.__name__} data") 21 | 22 | @abstractmethod 23 | def _download_data(self): 24 | """Download data - to be implemented by subclasses""" 25 | pass 26 | 27 | @abstractmethod 28 | def _ingest_to_duckdb(self): 29 | """Ingest data to DuckDB - to be implemented by subclasses""" 30 | pass 31 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__prelevements_uniques.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | prelevements_cdfirstreseauamont AS ( 3 | SELECT DISTINCT 4 | referenceprel, 5 | dateprel, 6 | heureprel, 7 | conclusionprel, 8 | plvconformitebacterio, 9 | plvconformitechimique, 10 | plvconformitereferencebact, 11 | plvconformitereferencechim, 12 | (CASE 13 | WHEN cdreseauamont IS NULL THEN cdreseau 14 | WHEN cdreseauamont IS NOT NULL THEN cdreseauamont 15 | END) AS cdfirstreseauamont, 16 | TRY_STRPTIME( 17 | dateprel || ' ' || REPLACE(heureprel, 'h', ':'), '%Y-%m-%d %H:%M' 18 | ) AS datetimeprel 19 | FROM 20 | {{ ref('stg_edc__prevelevements') }} 21 | ), 22 | 23 | ranked AS ( 24 | SELECT 25 | *, 26 | ROW_NUMBER() OVER ( 27 | PARTITION BY referenceprel 28 | ORDER BY 29 | dateprel, 30 | heureprel 31 | ) AS row_num 32 | FROM 33 | prelevements_cdfirstreseauamont 34 | ) 35 | 36 | SELECT * EXCLUDE (row_num) 37 | FROM 38 | ranked 39 | WHERE 40 | row_num = 1 41 | -------------------------------------------------------------------------------- /dbt_/models/staging/edc/stg_edc__prevelevements.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cddept::VARCHAR(3) AS cddept, 3 | cdreseau::VARCHAR(9) AS cdreseau, 4 | inseecommuneprinc::VARCHAR(5) AS inseecommuneprinc, 5 | nomcommuneprinc::VARCHAR AS nomcommuneprinc, 6 | cdreseauamont::VARCHAR(9) AS cdreseauamont, 7 | nomreseauamont::VARCHAR AS nomreseauamont, 8 | REPLACE(pourcentdebit, ' %', '')::TINYINT AS pourcentdebit, 9 | referenceprel::VARCHAR(11) AS referenceprel, 10 | dateprel::DATE AS dateprel, 11 | heureprel::VARCHAR AS heureprel, 12 | conclusionprel::VARCHAR AS conclusionprel, 13 | ugelib::VARCHAR AS ugelib, 14 | distrlib::VARCHAR AS distrlib, 15 | moalib::VARCHAR AS moalib, 16 | plvconformitebacterio::VARCHAR(1) AS plvconformitebacterio, 17 | plvconformitechimique::VARCHAR(1) AS plvconformitechimique, 18 | plvconformitereferencebact::VARCHAR(1) AS plvconformitereferencebact, 19 | plvconformitereferencechim::VARCHAR(1) AS plvconformitereferencechim, 20 | de_partition::SMALLINT AS de_partition, 21 | de_ingestion_date::DATE AS de_ingestion_date, 22 | de_dataset_datetime::VARCHAR AS de_dataset_datetime 23 | FROM {{ source('edc', 'edc_prelevements') }} 24 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__parametres_non_references.sql: -------------------------------------------------------------------------------- 1 | -- Analyse des paramètres présents dans stg_edc__resultats mais absents des 2 | -- références Générations Futures 3 | 4 | WITH parametres_resultats AS ( 5 | SELECT 6 | cdparametresiseeaux, 7 | STRING_AGG(DISTINCT cdparametre, ', ') AS cdparametre, 8 | STRING_AGG(DISTINCT libmajparametre, ', ') AS libmajparametre, 9 | STRING_AGG(DISTINCT libminparametre, ', ') AS libminparametre, 10 | STRING_AGG(DISTINCT casparam, ', ') AS casparam, 11 | COUNT(*) AS nb 12 | FROM {{ ref('stg_edc__resultats') }} 13 | WHERE cdparametresiseeaux IS NOT NULL 14 | GROUP BY 15 | cdparametresiseeaux 16 | ), 17 | 18 | parametres_non_references AS ( 19 | SELECT pr.* 20 | FROM parametres_resultats AS pr 21 | LEFT JOIN {{ ref('int__valeurs_de_reference') }} AS vr 22 | ON pr.cdparametresiseeaux = vr.cdparametresiseeaux 23 | WHERE vr.cdparametresiseeaux IS NULL 24 | ) 25 | 26 | SELECT 27 | cdparametresiseeaux, 28 | cdparametre, 29 | libmajparametre, 30 | libminparametre, 31 | casparam, 32 | nb 33 | FROM parametres_non_references 34 | ORDER BY nb DESC, cdparametresiseeaux ASC 35 | -------------------------------------------------------------------------------- /pipelines/tasks/upload_database.py: -------------------------------------------------------------------------------- 1 | """ 2 | Upload database to S3 storage. 3 | 4 | Args: 5 | - env (str): Environment to upload to ("dev" or "prod") 6 | 7 | Examples: 8 | - upload_database --env dev : Upload database to development environment 9 | - upload_database --env prod : Upload database to production environment 10 | """ 11 | 12 | from pipelines.config.config import get_s3_path 13 | from pipelines.tasks.config.common import DUCKDB_FILE 14 | from pipelines.utils.logger import get_logger 15 | from pipelines.utils.storage_client import ObjectStorageClient 16 | 17 | logger = get_logger(__name__) 18 | 19 | 20 | def upload_database_to_storage(env): 21 | """ 22 | Upload the database built locally to Storage Object depending on the environment 23 | This requires setting the correct environment variables for the Scaleway credentials 24 | """ 25 | s3 = ObjectStorageClient() 26 | 27 | db_path = DUCKDB_FILE # Fichier local 28 | s3_path = get_s3_path(env) # Destination sur S3 29 | 30 | s3.upload_object(local_path=db_path, file_key=s3_path, public_read=True) 31 | logger.info(f"✅ Base uploadée sur s3://{s3.bucket_name}/{s3_path}") 32 | 33 | 34 | def execute(env): 35 | upload_database_to_storage(env) 36 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pollution_eau" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.12" 7 | dependencies = [ 8 | "click>=8.1.8,<9", 9 | "duckdb==1.2.0", 10 | "duckdb-engine==0.15.0", 11 | "folium>=0.19.4", 12 | "ipykernel>=6.29.5,<7", 13 | "ipython>=8.31.0,<9", 14 | "ipython-sql>=0.5.0,<1", 15 | "jupysql>=0.10.17,<1", 16 | "matplotlib>=3.10.0,<4", 17 | "pandas>=2.2.3,<3", 18 | "requests>=2.32.3,<3", 19 | "boto3==1.34.11,<2", 20 | "python-dotenv>=1.0.1,<2", 21 | "ibis-framework[duckdb]==10.1.0", 22 | "dbt-core>=1.9.2,<2", 23 | "dbt-duckdb>=1.9.1,<2", 24 | "tqdm>=4.67.1,<5", 25 | "pre-commit>=4.1.0", 26 | "ruff>=0.9.10", 27 | "sqlfluff>=3.3.1,<4", 28 | ] 29 | 30 | [dependency-groups] 31 | dev = [ 32 | "jupyter>=1.1.0,<2", 33 | "pre-commit>=4.1.0,<5", 34 | "pytest>=8.3.4,<9", 35 | "pytest-cov>=6.0.0,<7" 36 | ] 37 | 38 | [project.optional-dependencies] 39 | pmtiles = [ 40 | "tippecanoe>=2.72.0,<3" 41 | ] 42 | 43 | [build-system] 44 | requires = ["hatchling"] 45 | build-backend = "hatchling.build" 46 | 47 | [tool.hatch.build.targets.wheel] 48 | packages = ["pipelines"] 49 | -------------------------------------------------------------------------------- /pipelines/tasks/client/core/https_client.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | import requests 5 | 6 | from pipelines.tasks.config.common import download_file_from_https, logger 7 | 8 | 9 | class HTTPSClient: 10 | def __init__(self, base_url: str): 11 | self.base_url = base_url 12 | 13 | def download_file_from_https(self, path: str, filepath: Union[str, Path]): 14 | """ 15 | Downloads a file from a https link to a local file. 16 | :param path: The url path to download the file. 17 | :param filepath: The path to the local file. 18 | :return: Downloaded file filename. 19 | """ 20 | url = self.base_url + path 21 | return download_file_from_https(url, filepath) 22 | 23 | @staticmethod 24 | def get_url_headers(url: str) -> dict: 25 | """ 26 | Get url HTTP headers 27 | :param url: static dataset url 28 | :return: HTTP headers 29 | """ 30 | try: 31 | response = requests.head(url, timeout=5) 32 | response.raise_for_status() 33 | return dict(response.headers) 34 | except requests.exceptions.RequestException as ex: 35 | logger.error(f"Exception raised: {ex}") 36 | return {} 37 | -------------------------------------------------------------------------------- /webapp/components/ui/switch.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as SwitchPrimitives from "@radix-ui/react-switch"; 5 | 6 | import { cn } from "@/lib/utils"; 7 | 8 | const Switch = React.forwardRef< 9 | React.ElementRef, 10 | React.ComponentPropsWithoutRef 11 | >(({ className, ...props }, ref) => ( 12 | 20 | 25 | 26 | )); 27 | Switch.displayName = SwitchPrimitives.Root.displayName; 28 | 29 | export { Switch }; 30 | -------------------------------------------------------------------------------- /webapp/components/ui/hover-card.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as HoverCardPrimitive from "@radix-ui/react-hover-card"; 5 | 6 | import { cn } from "@/lib/utils"; 7 | 8 | const HoverCard = HoverCardPrimitive.Root; 9 | 10 | const HoverCardTrigger = HoverCardPrimitive.Trigger; 11 | 12 | const HoverCardContent = React.forwardRef< 13 | React.ElementRef, 14 | React.ComponentPropsWithoutRef 15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => ( 16 | 26 | )); 27 | HoverCardContent.displayName = HoverCardPrimitive.Content.displayName; 28 | 29 | export { HoverCard, HoverCardTrigger, HoverCardContent }; 30 | -------------------------------------------------------------------------------- /pipelines/tasks/config/config_pmtiles.py: -------------------------------------------------------------------------------- 1 | """Configuration for DuckDB-based PMTiles generation.""" 2 | 3 | # Value columns to pivot for both data types 4 | value_columns = [ 5 | "resultat", 6 | "ratio", 7 | "date_dernier_prel", 8 | "nb_parametres", 9 | "nb_prelevements", 10 | "nb_sup_valeur_sanitaire", 11 | "parametres_detectes", 12 | ] 13 | 14 | # Configuration for both commune and UDI data processing 15 | config_pmtiles: dict[str, dict[str, str | list[str] | None]] = { 16 | "communes": { 17 | "result_table": "web__resultats_communes", 18 | "geom_table": "int__commune_geom", 19 | "id_columns": ["commune_code_insee", "commune_nom"], 20 | "result_id_column": "commune_code_insee", 21 | "geom_id_column": "com_code", 22 | "geom_name_column": "com_name", 23 | "output_filename": "commune_data", 24 | "layer_name": "data_communes", 25 | }, 26 | "udi": { 27 | "result_table": "web__resultats_udi", 28 | "geom_table": "int__udi_geom", 29 | "id_columns": ["cdreseau", "nomreseaux"], 30 | "result_id_column": "cdreseau", 31 | "geom_id_column": "code_udi", 32 | "geom_name_column": None, # UDI geom table doesn't have a name column 33 | "output_filename": "udi_data", 34 | "layer_name": "data_udi", 35 | }, 36 | } 37 | -------------------------------------------------------------------------------- /pipelines/tasks/client/commune_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from pipelines.tasks.client.core.https_to_duck_client import HTTPSToDuckDBClient 5 | from pipelines.tasks.config.common import ( 6 | CACHE_FOLDER, 7 | logger, 8 | ) 9 | 10 | 11 | class CommuneClient(HTTPSToDuckDBClient): 12 | def __init__(self, config, duckdb_client): 13 | super().__init__(config, duckdb_client) 14 | 15 | def _download_data(self): 16 | """Process the COG datasets""" 17 | logger.info("Launching processing of Insee communes") 18 | 19 | os.makedirs(CACHE_FOLDER, exist_ok=True) 20 | self.download_file_from_https( 21 | path=self.config["source"]["id"], 22 | filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]), 23 | ) 24 | 25 | def _ingest_to_duckdb(self): 26 | """Implement INSEE specific ingestion logic""" 27 | self.duckdb_client.drop_tables([self.config["file"]["table_name"]]) 28 | self.duckdb_client.ingest_from_csv( 29 | ingest_type="CREATE", 30 | table_name=self.config["file"]["table_name"], 31 | de_partition=self.config["source"]["datetime"][:4], 32 | dataset_datetime=self.config["source"]["datetime"], 33 | filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]), 34 | ) 35 | -------------------------------------------------------------------------------- /dbt_/models/staging/edc/val_traduite__docs.md: -------------------------------------------------------------------------------- 1 | {% docs val_traduite_docs %} 2 | Traduction au format numérique du résultat textuel d’une mesure Rqana par application automatisée de règles prédéfinies. 3 | 4 | Principe de traduction : 5 | 6 | | Résultat | Valeur traduite | Commentaire | 7 | |:--------:|:---------------:|:--------------------------------------------------:| 8 | | XXX | XXX | | 9 | | XXX | -XXX | | 10 | | XXX | XXX | | 12 | | TRACES | 0 | Entre seuil de quantification et de détection | 13 | | INCOMPT. | 1,11 | Valeur trop élevée en microbiologie. Préférer >XXX | 14 | | PRESENCE | 1 | Présence non quantifiée | 15 | | N.D | 0 | < seuil de détection | 16 | | ILLISIBL | NULL | Non interprétable en bactériologie | 17 | | , 16 | React.ComponentPropsWithoutRef 17 | >(({ className, sideOffset = 4, ...props }, ref) => ( 18 | 19 | 28 | 29 | )); 30 | TooltipContent.displayName = TooltipPrimitive.Content.displayName; 31 | 32 | export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider }; 33 | -------------------------------------------------------------------------------- /dbt_/models/staging/atlasante/_atlasante_models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_atlasante_udi_2023 5 | description: "Unités de distribution (UDI) de la France métropolitaine" 6 | columns: 7 | - name: gid 8 | type: INTEGER 9 | - name: code_udi 10 | type: VARCHAR 11 | - name: ins_nom 12 | type: VARCHAR 13 | - name: uge_nom 14 | type: VARCHAR 15 | - name: udi_pop 16 | type: VARCHAR 17 | - name: geom 18 | type: GEOMETRY 19 | - name: ingestion_date 20 | type: DATE 21 | - name: stg_atlasante_udi_corse 22 | description: "Unités de distribution (UDI) de la Corse" 23 | columns: 24 | - name: gid 25 | type: INTEGER 26 | - name: cn_udi 27 | type: VARCHAR 28 | - name: nom_udi 29 | type: VARCHAR 30 | - name: geom 31 | type: GEOMETRY 32 | - name: ingestion_date 33 | type: DATE 34 | - name: stg_atlasante_udi_2024 35 | description: "Unités de distribution (UDI) de la France métropolitaine" 36 | columns: 37 | - name: gid 38 | type: INTEGER 39 | - name: code_udi 40 | type: VARCHAR 41 | - name: ins_nom 42 | type: VARCHAR 43 | - name: uge_nom 44 | type: VARCHAR 45 | - name: udi_pop 46 | type: VARCHAR 47 | - name: geom 48 | type: GEOMETRY 49 | - name: ingestion_date 50 | type: DATE 51 | -------------------------------------------------------------------------------- /webapp/lib/property.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Parse a property name into its components 3 | * @param propertyName The full property name in format period_category_variable 4 | * @returns An object with the period, category, and variable 5 | */ 6 | export function parsePropertyName(propertyName: string): { 7 | period: string; 8 | category: string; 9 | variable: string; 10 | } | null { 11 | // Handle null or empty values 12 | if (!propertyName) { 13 | return null; 14 | } 15 | 16 | // Uses regex to match the pattern 17 | const pattern = /^(bilan_annuel_\d{4}|dernier_prel)_([^_]+)_(.+)$/; 18 | const match = propertyName.match(pattern); 19 | 20 | // If the property name doesn't match our expected format 21 | if (!match) { 22 | return null; 23 | } 24 | 25 | // Extract components from regex match 26 | const [, period, category, variable] = match; 27 | 28 | return { 29 | period, 30 | category, 31 | variable, 32 | }; 33 | } 34 | 35 | /** 36 | * Get the full property name from components 37 | * @param period The period (e.g., "bilan_annuel_2022", "dernier_prel") 38 | * @param category The category (e.g., "pfas", "cvm") 39 | * @param variable The variable (e.g., "resultat", "parametres_detectes") 40 | * @returns The full property name 41 | */ 42 | export function getPropertyName( 43 | period: string, 44 | category: string, 45 | variable: string, 46 | ): string { 47 | return `${period}_${category}_${variable}`; 48 | } 49 | -------------------------------------------------------------------------------- /dbt_/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Name your project! Project names should contain only lowercase characters 2 | # and underscores. A good package name should reflect your organization's 3 | # name or the intended use of these models 4 | name: "dbt_" 5 | version: "1.0.0" 6 | 7 | # This setting configures which "profile" dbt uses for this project. 8 | profile: "dbt_" 9 | 10 | # These configurations specify where dbt should look for different types of files. 11 | # The `model-paths` config, for example, states that models in this project can be 12 | # found in the "models/" directory. You probably won't need to change these! 13 | model-paths: ["models"] 14 | analysis-paths: ["analyses"] 15 | test-paths: ["tests"] 16 | seed-paths: ["seeds"] 17 | macro-paths: ["macros"] 18 | snapshot-paths: ["snapshots"] 19 | 20 | clean-targets: # directories to be removed by `dbt clean` 21 | - "target" 22 | - "dbt_packages" 23 | 24 | # Configuring models 25 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 26 | 27 | # In this example config, we tell dbt to build all models in the example/ 28 | # directory as views. These settings can be overridden in the individual model 29 | # files using the `{{ config(...) }}` macro. 30 | models: 31 | dbt_: 32 | # Config indicated by + and applies to all files under models/example/ 33 | staging: 34 | +materialized: view 35 | intermediate: 36 | +materialized: view 37 | website: 38 | +materialized: table 39 | -------------------------------------------------------------------------------- /webapp/components/ui/popover.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as PopoverPrimitive from "@radix-ui/react-popover"; 5 | 6 | import { cn } from "@/lib/utils"; 7 | 8 | const Popover = PopoverPrimitive.Root; 9 | 10 | const PopoverTrigger = PopoverPrimitive.Trigger; 11 | 12 | const PopoverAnchor = PopoverPrimitive.Anchor; 13 | 14 | const PopoverContent = React.forwardRef< 15 | React.ElementRef, 16 | React.ComponentPropsWithoutRef 17 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => ( 18 | 19 | 29 | 30 | )); 31 | PopoverContent.displayName = PopoverPrimitive.Content.displayName; 32 | 33 | export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor }; 34 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/sqlfluff/sqlfluff 3 | rev: 3.3.1 # Vérifie la dernière version sur GitHub 4 | hooks: 5 | - id: sqlfluff-lint 6 | args: [ "--config", ".sqlfluff" ] # Adapte au dialecte que tu utilises (bigquery, snowflake, etc.) 7 | files: dbt_/ 8 | 9 | - id: sqlfluff-fix # Optionnel, pour corriger automatiquement les erreurs 10 | args: [ "--config", ".sqlfluff" ] 11 | files: dbt_/ 12 | - repo: https://github.com/astral-sh/ruff-pre-commit 13 | # Ruff version. 14 | rev: v0.9.3 15 | hooks: 16 | # Run the linter. 17 | - id: ruff 18 | args: [ --fix ] 19 | # Run the formatter. 20 | - id: ruff-format 21 | - repo: https://github.com/pre-commit/pre-commit-hooks 22 | rev: v5.0.0 23 | hooks: 24 | - id: check-merge-conflict 25 | - id: mixed-line-ending 26 | #- repo: https://github.com/pycqa/bandit 27 | # rev: 1.7.4 28 | # hooks: 29 | # - id: bandit 30 | # exclude: tests/ 31 | 32 | # This pre commit only work with poetry so we commented it as we work with uv 33 | # - repo: https://github.com/Lucas-C/pre-commit-hooks-safety 34 | # rev: v1.3.1 35 | # hooks: 36 | # - id: python-safety-dependencies-check 37 | 38 | - repo: https://github.com/rbubley/mirrors-prettier 39 | rev: v3.5.1 40 | hooks: 41 | - id: prettier 42 | types_or: [markdown, javascript, jsx, ts, tsx, json] 43 | files: 'webapp' 44 | -------------------------------------------------------------------------------- /pipelines/utils/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from urllib.parse import urlparse 4 | 5 | import requests 6 | 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def get_project_root() -> Path: 12 | """ 13 | Returns project root folder when called from anywhere in the project 14 | This is useful for specifying paths that are relative to the project root 15 | e.g. `local_db_path = Path(get_project_root(), "database/data.duckdb")` 16 | """ 17 | return Path(__file__).parent.parent.parent 18 | 19 | 20 | def get_url_headers(url: str) -> dict: 21 | """ 22 | Get url HTTP headers 23 | :param url: static dataset url 24 | :return: HTTP headers 25 | """ 26 | try: 27 | response = requests.head(url, timeout=5) 28 | response.raise_for_status() 29 | return response.headers 30 | except requests.exceptions.RequestException as ex: 31 | logger.error(f"Exception raised: {ex}") 32 | return {} 33 | 34 | 35 | def extract_dataset_datetime(url: str) -> str: 36 | """ 37 | Extract the dataset datetime from dataset location url 38 | which can be found in the static dataset url headers 39 | :param url: static dataset url 40 | :return: dataset datetime under format "YYYYMMDD-HHMMSS" 41 | """ 42 | metadata = get_url_headers(url) 43 | parsed_url = urlparse(metadata.get("location")) 44 | path_parts = parsed_url.path.strip("/").split("/") 45 | return path_parts[-2] 46 | -------------------------------------------------------------------------------- /pipelines/config/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from pipelines.utils.logger import get_logger 6 | 7 | logger = get_logger(__name__) 8 | 9 | current_dir = os.path.dirname(os.path.abspath(__file__)) 10 | # Construct the path to the .env file 11 | dotenv_path = os.path.join(current_dir, ".env") 12 | 13 | 14 | def load_env_variables(): 15 | load_dotenv(dotenv_path) 16 | 17 | 18 | def get_environment(default="prod"): 19 | env = os.getenv("ENV", default) 20 | logger.info(f"Running on env {env}") 21 | if env not in ["dev", "prod"]: 22 | raise ValueError(f"Invalid environment: {env}. Must be 'dev' or 'prod'.") 23 | return env 24 | 25 | 26 | def get_s3_path(env, filename="data.duckdb"): 27 | return f"{env}/database/{filename}" 28 | 29 | 30 | def get_s3_udi_path(env, filename): 31 | return f"{env}/UDI/{filename}" 32 | 33 | 34 | def get_s3_path_geojson(env, filename="new-georef-france-commune-prelevement.geojson"): 35 | """Get the S3 path for GeoJSON file based on environment. 36 | 37 | Args: 38 | env (str): Environment ("dev" or "prod") 39 | 40 | Returns: 41 | str: S3 path for the GeoJSON file 42 | """ 43 | if env not in ["dev", "prod"]: 44 | raise ValueError("Environment must be 'dev' or 'prod'") 45 | return f"{env}/geojson/{filename}" 46 | 47 | 48 | def get_s3_path_pmtiles(env, filename="georef-france-commune-prelevement.pmtiles"): 49 | if env not in ["dev", "prod"]: 50 | raise ValueError("Environment must be 'dev' or 'prod'") 51 | return f"{env}/pmtiles/{filename}" 52 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/nitrate/int__resultats_nitrate_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | nitrate_prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 6 | SELECT DISTINCT 7 | de_partition AS annee, 8 | cdreseau, 9 | referenceprel, 10 | datetimeprel, 11 | valeur_sanitaire_1, 12 | valtraduite 13 | FROM 14 | {{ ref('int__resultats_udi_communes') }} 15 | WHERE 16 | categorie = 'nitrate' 17 | AND cdparametresiseeaux = 'NO3' 18 | ) 19 | 20 | SELECT 21 | cdreseau, 22 | annee, 23 | 'nitrate' AS categorie, 24 | 'bilan_annuel_' || annee AS periode, 25 | count( 26 | DISTINCT 27 | CASE 28 | WHEN 29 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 30 | THEN referenceprel 31 | END 32 | ) AS nb_depassements, 33 | count(DISTINCT referenceprel) AS nb_prelevements, 34 | ( 35 | count( 36 | DISTINCT 37 | CASE 38 | WHEN 39 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 40 | THEN referenceprel 41 | END 42 | )::float 43 | / 44 | count(DISTINCT referenceprel)::float 45 | ) AS ratio, 46 | to_json({ 47 | 'NO3': max(valtraduite) 48 | }) AS parametres_detectes, 49 | max(datetimeprel) AS date_dernier_prel 50 | 51 | FROM nitrate_prels 52 | 53 | GROUP BY cdreseau, annee 54 | -------------------------------------------------------------------------------- /.github/workflows/test_pipelines.yaml: -------------------------------------------------------------------------------- 1 | name: 🧪 Run Pipelines Tests 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | paths: 7 | - 'pipelines/**' 8 | 9 | env: 10 | SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }} 11 | SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }} 12 | 13 | jobs: 14 | test: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v3 20 | 21 | - name: Check if SCW_ACCESS_KEY and SCW_SECRET_KEY are set 22 | run: | 23 | if [ -z "$SCW_ACCESS_KEY" ]; then 24 | echo "SCW_ACCESS_KEY is not set, cannot run tests without access key" 25 | exit 1 26 | else 27 | echo "SCW_ACCESS_KEY is properly set." 28 | fi 29 | 30 | if [ -z "$SCW_SECRET_KEY" ]; then 31 | echo "SCW_SECRET_KEY is not set, cannot run tests without secret key" 32 | exit 1 33 | else 34 | echo "SCW_SECRET_KEY is properly set." 35 | fi 36 | 37 | - name: Install a specific version of uv 38 | uses: astral-sh/setup-uv@v5 39 | with: 40 | version: ">=0.4.0" 41 | 42 | - name: Install dependencies 43 | run: | 44 | uv sync 45 | 46 | - name: Run tests with coverage 47 | run: uv run pytest -s --cov=. --cov-report=term-missing 48 | 49 | - name: test dbt 50 | run: | 51 | cd dbt_ 52 | uv run dbt deps 53 | uv run dbt seed 54 | uv run dbt run 55 | 56 | - name: test pmtiles generation 57 | run: | 58 | uv pip install .[pmtiles] 59 | uv run pipelines/run.py run generate_pmtiles --env dev 60 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | sub_active_prels AS ( 3 | SELECT DISTINCT 4 | de_partition AS annee, 5 | cdreseau, 6 | referenceprel, 7 | datetimeprel, 8 | cdparametresiseeaux, 9 | valtraduite, 10 | limite_qualite, 11 | valeur_sanitaire_1 12 | FROM 13 | {{ ref('int__resultats_udi_communes') }} 14 | WHERE 15 | categorie = 'pesticide' 16 | AND 17 | categorie_2 = 'sub_active' 18 | ) 19 | 20 | SELECT 21 | cdreseau, 22 | annee, 23 | 'sub_active' AS categorie, 24 | 'bilan_annuel_' || annee AS periode, 25 | COUNT( 26 | DISTINCT 27 | CASE 28 | WHEN 29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 30 | THEN referenceprel 31 | END 32 | ) AS nb_depassements, 33 | COUNT( 34 | DISTINCT 35 | CASE 36 | WHEN 37 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 38 | THEN referenceprel 39 | END 40 | ) AS nb_sup_valeur_sanitaire, 41 | COUNT(DISTINCT referenceprel) AS nb_prelevements, 42 | ( 43 | COUNT( 44 | DISTINCT 45 | CASE 46 | WHEN 47 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 48 | THEN referenceprel 49 | END 50 | )::float 51 | / 52 | COUNT(DISTINCT referenceprel)::float 53 | ) AS ratio_limite_qualite 54 | 55 | FROM sub_active_prels 56 | 57 | GROUP BY cdreseau, annee 58 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/cvm/int__resultats_cvm_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | cvm_prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- C'est très rare pour les CVM (de l'ordre d'une dizaine de cas) 5 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 6 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 7 | SELECT DISTINCT 8 | de_partition AS annee, 9 | cdreseau, 10 | referenceprel, 11 | datetimeprel, 12 | limite_qualite, 13 | valtraduite 14 | FROM 15 | {{ ref('int__resultats_udi_communes') }} 16 | WHERE 17 | categorie = 'cvm' 18 | ) 19 | 20 | SELECT 21 | cdreseau, 22 | annee, 23 | 'cvm' AS categorie, 24 | 'bilan_annuel_' || annee AS periode, 25 | count( 26 | DISTINCT 27 | CASE 28 | WHEN 29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 30 | THEN referenceprel 31 | END 32 | ) AS nb_depassements, 33 | count(DISTINCT referenceprel) AS nb_prelevements, 34 | ( 35 | count( 36 | DISTINCT 37 | CASE 38 | WHEN 39 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 40 | THEN referenceprel 41 | END 42 | )::float 43 | / 44 | count(DISTINCT referenceprel)::float 45 | ) AS ratio_limite_qualite, 46 | to_json({ 47 | 'CLVYL': max(valtraduite) 48 | }) AS parametres_detectes, 49 | max(datetimeprel) AS date_dernier_prel 50 | 51 | FROM cvm_prels 52 | 53 | GROUP BY cdreseau, annee 54 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/nitrate/int__resultats_nitrate_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | nitrate_prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 6 | SELECT DISTINCT 7 | de_partition AS annee, 8 | inseecommune, 9 | referenceprel, 10 | datetimeprel, 11 | valeur_sanitaire_1, 12 | valtraduite 13 | FROM 14 | {{ ref('int__resultats_udi_communes') }} 15 | WHERE 16 | categorie = 'nitrate' 17 | AND cdparametresiseeaux = 'NO3' 18 | ) 19 | 20 | SELECT 21 | inseecommune, 22 | annee, 23 | 'nitrate' AS categorie, 24 | 'bilan_annuel_' || annee AS periode, 25 | count( 26 | DISTINCT 27 | CASE 28 | WHEN 29 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 30 | THEN referenceprel 31 | END 32 | ) AS nb_depassements, 33 | count(DISTINCT referenceprel) AS nb_prelevements, 34 | ( 35 | count( 36 | DISTINCT 37 | CASE 38 | WHEN 39 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 40 | THEN referenceprel 41 | END 42 | )::float 43 | / 44 | count(DISTINCT referenceprel)::float 45 | ) AS ratio, 46 | to_json({ 47 | 'NO3': max(valtraduite) 48 | }) AS parametres_detectes, 49 | max(datetimeprel) AS date_dernier_prel 50 | 51 | FROM nitrate_prels 52 | 53 | GROUP BY inseecommune, annee 54 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | sub_active_prels AS ( 3 | SELECT DISTINCT 4 | de_partition AS annee, 5 | inseecommune, 6 | referenceprel, 7 | datetimeprel, 8 | cdparametresiseeaux, 9 | valtraduite, 10 | limite_qualite, 11 | valeur_sanitaire_1 12 | FROM 13 | {{ ref('int__resultats_udi_communes') }} 14 | WHERE 15 | categorie = 'pesticide' 16 | AND 17 | categorie_2 = 'sub_active' 18 | ) 19 | 20 | SELECT 21 | inseecommune, 22 | annee, 23 | 'sub_active' AS categorie, 24 | 'bilan_annuel_' || annee AS periode, 25 | COUNT( 26 | DISTINCT 27 | CASE 28 | WHEN 29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 30 | THEN referenceprel 31 | END 32 | ) AS nb_depassements, 33 | COUNT( 34 | DISTINCT 35 | CASE 36 | WHEN 37 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 38 | THEN referenceprel 39 | END 40 | ) AS nb_sup_valeur_sanitaire, 41 | COUNT(DISTINCT referenceprel) AS nb_prelevements, 42 | ( 43 | COUNT( 44 | DISTINCT 45 | CASE 46 | WHEN 47 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 48 | THEN referenceprel 49 | END 50 | )::float 51 | / 52 | COUNT(DISTINCT referenceprel)::float 53 | ) AS ratio_limite_qualite 54 | 55 | FROM sub_active_prels 56 | 57 | GROUP BY inseecommune, annee 58 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/cvm/int__resultats_cvm_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | cvm_prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- C'est très rare pour les CVM (de l'ordre d'une dizaine de cas) 5 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 6 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 7 | SELECT DISTINCT 8 | de_partition AS annee, 9 | inseecommune, 10 | referenceprel, 11 | datetimeprel, 12 | limite_qualite, 13 | valtraduite 14 | FROM 15 | {{ ref('int__resultats_udi_communes') }} 16 | WHERE 17 | categorie = 'cvm' 18 | ) 19 | 20 | SELECT 21 | inseecommune, 22 | annee, 23 | 'cvm' AS categorie, 24 | 'bilan_annuel_' || annee AS periode, 25 | count( 26 | DISTINCT 27 | CASE 28 | WHEN 29 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 30 | THEN referenceprel 31 | END 32 | ) AS nb_depassements, 33 | count(DISTINCT referenceprel) AS nb_prelevements, 34 | ( 35 | count( 36 | DISTINCT 37 | CASE 38 | WHEN 39 | valtraduite IS NOT NULL AND valtraduite > limite_qualite 40 | THEN referenceprel 41 | END 42 | )::float 43 | / 44 | count(DISTINCT referenceprel)::float 45 | ) AS ratio_limite_qualite, 46 | to_json({ 47 | 'CLVYL': max(valtraduite) 48 | }) AS parametres_detectes, 49 | max(datetimeprel) AS date_dernier_prel 50 | 51 | FROM cvm_prels 52 | 53 | GROUP BY inseecommune, annee 54 | -------------------------------------------------------------------------------- /webapp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "13_pollution_eau", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@duckdb/node-api": "^1.2.0-alpha.14", 13 | "@radix-ui/react-dialog": "^1.1.15", 14 | "@radix-ui/react-hover-card": "^1.1.15", 15 | "@radix-ui/react-popover": "^1.1.15", 16 | "@radix-ui/react-scroll-area": "^1.2.10", 17 | "@radix-ui/react-select": "^2.2.6", 18 | "@radix-ui/react-slot": "^1.2.3", 19 | "@radix-ui/react-switch": "^1.2.6", 20 | "@radix-ui/react-tooltip": "^1.2.8", 21 | "class-variance-authority": "^0.7.1", 22 | "clsx": "^2.1.1", 23 | "cmdk": "^1.0.4", 24 | "lucide-react": "^0.475.0", 25 | "maplibre-gl": "^5.1.0", 26 | "next": "^15.2.3", 27 | "pmtiles": "^4.2.1", 28 | "protomaps-themes-base": "^4.4.0", 29 | "react": "^19.0.0", 30 | "react-dom": "^19.0.0", 31 | "react-map-gl": "^8.0.0", 32 | "tailwind-merge": "^3.0.1", 33 | "tailwindcss-animate": "^1.0.7" 34 | }, 35 | "devDependencies": { 36 | "@eslint/eslintrc": "^3", 37 | "@types/node": "^20", 38 | "@types/react": "^19", 39 | "@types/react-dom": "^19", 40 | "eslint": "^9", 41 | "eslint-config-next": "15.1.7", 42 | "eslint-config-prettier": "^10.0.1", 43 | "postcss": "^8", 44 | "prettier": "3.5.1", 45 | "tailwindcss": "^3.4", 46 | "typescript": "^5" 47 | }, 48 | "resolutions": { 49 | "react": "^19.0.0", 50 | "react-dom": "^19.0.0", 51 | "@types/react": "^19", 52 | "@types/react-dom": "^19" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /pipelines/tasks/config/config_geojson.py: -------------------------------------------------------------------------------- 1 | def get_opendatasoft_config() -> dict: 2 | """Get OpenDataSoft configuration parameters. 3 | 4 | Returns: 5 | dict: Configuration parameters for OpenDataSoft client 6 | """ 7 | 8 | return { 9 | "source": { 10 | "base_url": "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/", 11 | "id": "georef-france-commune/exports/geojson", 12 | "datetime": "20240220", 13 | }, 14 | "file": { 15 | "file_name": "georef-france-commune.geojson", 16 | "table_name": "opendatasoft_communes", 17 | }, 18 | } 19 | 20 | 21 | config_merge_geo = { 22 | "communes": { 23 | "result_table": "web__resultats_communes", 24 | "geom_table": "stg_communes__opendatasoft_json", 25 | "groupby_columns": ["commune_code_insee", "commune_nom"], 26 | "result_join_column": "commune_code_insee", 27 | "geom_join_column": "com_code", 28 | "upload_file_name": "georef-france-communes-prelevement.geojson", 29 | }, 30 | "udi": { 31 | "result_table": "web__resultats_udi", 32 | "geom_table": "stg_udi_json", 33 | "groupby_columns": ["cdreseau", "nomreseaux"], 34 | "result_join_column": "cdreseau", 35 | "geom_join_column": "code_udi", 36 | "upload_file_name": "georef-france-udi-prelevement.geojson", 37 | }, 38 | } 39 | 40 | col_input = ["periode", "categorie"] 41 | 42 | list_column_result = [ 43 | "resultat", 44 | "ratio", 45 | "date_dernier_prel", 46 | "nb_parametres", 47 | "nb_prelevements", 48 | "nb_sup_valeur_sanitaire", 49 | "parametres_detectes", 50 | ] 51 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/tous/int__resultats_tous_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cdreseau, 3 | 'tous' AS categorie, 4 | 'dernier_prel' AS periode, 5 | MAX(date_dernier_prel) AS date_dernier_prel, 6 | SUM(nb_parametres) AS nb_parametres, 7 | CASE 8 | WHEN BOOL_OR(resultat IN ( 9 | 'sup_valeur_sanitaire', 10 | 'sup_valeur_sanitaire_2' 11 | )) THEN 'sup_limite_sanitaire' 12 | 13 | WHEN BOOL_OR(resultat IN ( 14 | 'cvm_sup_0_5', 15 | 'somme_20pfas_sup_0_1', 16 | 'sup_limite_qualite' 17 | )) THEN 'sup_limite_qualite' 18 | 19 | WHEN BOOL_OR(resultat IN ( 20 | 'inf_valeur_sanitaire', 21 | 'inf_limite_qualite', 22 | -- 'inf_limites_sup_0_1', 23 | --'sup_limite_indicative', 24 | 'inf_limites', 25 | 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02', 26 | 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02', 27 | 'sup_limite_qualite_2036', 28 | 'no3_inf_25', 29 | 'no3_inf_40' 30 | 31 | )) THEN 'quantifie' 32 | 33 | WHEN BOOL_AND(resultat IN ( 34 | 'non_quantifie' 35 | )) THEN 'non_quantifie' 36 | 37 | ELSE 'erreur' 38 | END AS resultat 39 | 40 | FROM {{ ref('int__union_resultats_udi') }} 41 | WHERE 42 | periode = 'dernier_prel' 43 | AND 44 | categorie NOT IN ( 45 | 'sub_active', 46 | 'metabolite', 47 | 'metabolite_esa_metolachlore', 48 | 'metabolite_chlorothalonil_r471811', 49 | 'metabolite_chloridazone_desphenyl', 50 | 'metabolite_chloridazone_methyl_desphenyl', 51 | 'metabolite_atrazine_desethyl' 52 | ) 53 | GROUP BY cdreseau 54 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/tous/int__resultats_tous_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | inseecommune, 3 | 'tous' AS categorie, 4 | 'dernier_prel' AS periode, 5 | MAX(date_dernier_prel) AS date_dernier_prel, 6 | SUM(nb_parametres) AS nb_parametres, 7 | CASE 8 | WHEN BOOL_OR(resultat IN ( 9 | 'sup_valeur_sanitaire', 10 | 'sup_valeur_sanitaire_2' 11 | )) THEN 'sup_limite_sanitaire' 12 | 13 | WHEN BOOL_OR(resultat IN ( 14 | 'cvm_sup_0_5', 15 | 'somme_20pfas_sup_0_1', 16 | 'sup_limite_qualite' 17 | )) THEN 'sup_limite_qualite' 18 | 19 | WHEN BOOL_OR(resultat IN ( 20 | 'inf_valeur_sanitaire', 21 | 'inf_limite_qualite', 22 | -- 'inf_limites_sup_0_1', 23 | --'sup_limite_indicative', 24 | 'inf_limites', 25 | 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02', 26 | 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02', 27 | 'sup_limite_qualite_2036', 28 | 'no3_inf_25', 29 | 'no3_inf_40' 30 | 31 | )) THEN 'quantifie' 32 | 33 | WHEN BOOL_AND(resultat IN ( 34 | 'non_quantifie' 35 | )) THEN 'non_quantifie' 36 | 37 | ELSE 'erreur' 38 | END AS resultat 39 | 40 | FROM {{ ref('int__union_resultats_commune') }} 41 | WHERE 42 | periode = 'dernier_prel' 43 | AND 44 | categorie NOT IN ( 45 | 'sub_active', 46 | 'metabolite', 47 | 'metabolite_esa_metolachlore', 48 | 'metabolite_chlorothalonil_r471811', 49 | 'metabolite_chloridazone_desphenyl', 50 | 'metabolite_chloridazone_methyl_desphenyl', 51 | 'metabolite_atrazine_desethyl' 52 | ) 53 | GROUP BY inseecommune 54 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/cvm/int__resultats_cvm_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT 4 | cdreseau, 5 | categorie, 6 | cdparametresiseeaux, 7 | datetimeprel, 8 | limite_qualite, 9 | valtraduite, 10 | ROW_NUMBER() 11 | OVER ( 12 | PARTITION BY cdreseau, cdparametresiseeaux 13 | ORDER BY datetimeprel DESC 14 | ) 15 | AS row_number 16 | FROM 17 | {{ ref('int__resultats_udi_communes') }} 18 | WHERE 19 | categorie = 'cvm' 20 | AND 21 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 22 | datetimeprel >= DATE_TRUNC('day', ( 23 | SELECT MAX(sub.datetimeprel) 24 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 25 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 26 | ) 27 | 28 | SELECT 29 | last_pvl.cdreseau, 30 | last_pvl.categorie, 31 | 'dernier_prel' AS periode, 32 | last_pvl.datetimeprel AS date_dernier_prel, 33 | 1 AS nb_parametres, 34 | CASE 35 | WHEN 36 | last_pvl.valtraduite = 0 37 | OR last_pvl.valtraduite IS NULL 38 | THEN 'non_quantifie' 39 | WHEN 40 | last_pvl.valtraduite > last_pvl.limite_qualite 41 | THEN 'cvm_sup_0_5' 42 | WHEN 43 | last_pvl.valtraduite <= last_pvl.limite_qualite 44 | THEN 'inf_limites' 45 | ELSE 'erreur' 46 | END AS resultat, 47 | CASE 48 | WHEN 49 | last_pvl.valtraduite > 0 50 | THEN TO_JSON(MAP([last_pvl.cdparametresiseeaux], [last_pvl.valtraduite])) 51 | ELSE TO_JSON(MAP([], [])) 52 | END AS parametres_detectes 53 | FROM 54 | last_pvl 55 | WHERE 56 | last_pvl.row_number = 1 57 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/cvm/int__resultats_cvm_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT 4 | inseecommune, 5 | categorie, 6 | cdparametresiseeaux, 7 | datetimeprel, 8 | valtraduite, 9 | limite_qualite, 10 | ROW_NUMBER() 11 | OVER ( 12 | PARTITION BY inseecommune, cdparametresiseeaux 13 | ORDER BY datetimeprel DESC 14 | ) 15 | AS row_number 16 | FROM 17 | {{ ref('int__resultats_udi_communes') }} 18 | WHERE 19 | categorie = 'cvm' 20 | AND 21 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 22 | datetimeprel >= DATE_TRUNC('day', ( 23 | SELECT MAX(sub.datetimeprel) 24 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 25 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 26 | ) 27 | 28 | SELECT 29 | last_pvl.inseecommune, 30 | last_pvl.categorie, 31 | 'dernier_prel' AS periode, 32 | last_pvl.datetimeprel AS date_dernier_prel, 33 | 1 AS nb_parametres, 34 | CASE 35 | WHEN 36 | last_pvl.valtraduite = 0 37 | OR last_pvl.valtraduite IS NULL 38 | THEN 'non_quantifie' 39 | WHEN 40 | last_pvl.valtraduite > last_pvl.limite_qualite 41 | THEN 'cvm_sup_0_5' 42 | WHEN 43 | last_pvl.valtraduite <= last_pvl.limite_qualite 44 | THEN 'inf_limites' 45 | ELSE 'erreur' 46 | END AS resultat, 47 | CASE 48 | WHEN 49 | last_pvl.valtraduite > 0 50 | THEN TO_JSON(MAP([last_pvl.cdparametresiseeaux], [last_pvl.valtraduite])) 51 | ELSE TO_JSON(MAP([], [])) 52 | END AS parametres_detectes 53 | FROM 54 | last_pvl 55 | WHERE 56 | last_pvl.row_number = 1 57 | -------------------------------------------------------------------------------- /webapp/components/ui/scroll-area.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area"; 5 | 6 | import { cn } from "@/lib/utils"; 7 | 8 | const ScrollArea = React.forwardRef< 9 | React.ElementRef, 10 | React.ComponentPropsWithoutRef 11 | >(({ className, children, ...props }, ref) => ( 12 | 17 | 18 | {children} 19 | 20 | 21 | 22 | 23 | )); 24 | ScrollArea.displayName = ScrollAreaPrimitive.Root.displayName; 25 | 26 | const ScrollBar = React.forwardRef< 27 | React.ElementRef, 28 | React.ComponentPropsWithoutRef 29 | >(({ className, orientation = "vertical", ...props }, ref) => ( 30 | 43 | 44 | 45 | )); 46 | ScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName; 47 | 48 | export { ScrollArea, ScrollBar }; 49 | -------------------------------------------------------------------------------- /dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql: -------------------------------------------------------------------------------- 1 | -- Dans le calcul des résultats PFAS derniers prélèvements 2 | -- (int__resultats_pfas_udi_dernier.sql), on présuppose que la plupart du temps 3 | -- la somme des 20 PFAS (SPFAS) et la somme des 4 PFAS (PFOA, PFOS, PFNA, 4 | -- PFHXS) sont bien présentes. Ce test permet de vérifier que pour au moins 98% 5 | -- des couples cdreseau/referenceprel c'est le cas. 6 | 7 | WITH yearly_pfas_results AS ( 8 | SELECT 9 | cdreseau, 10 | referenceprel, 11 | -- Vérifie si la somme des 20 PFAS est disponible 12 | COUNT( 13 | DISTINCT CASE 14 | WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux 15 | END 16 | ) AS has_sum_20_pfas, 17 | -- Vérifie si tous les 4 PFAS spécifiques sont disponibles 18 | COUNT( 19 | DISTINCT CASE 20 | WHEN 21 | cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') 22 | THEN cdparametresiseeaux 23 | END 24 | ) AS count_4_pfas 25 | FROM {{ ref('int__resultats_udi_communes') }} 26 | WHERE 27 | categorie = 'pfas' 28 | AND CURRENT_DATE - datetimeprel < INTERVAL 1 YEAR 29 | GROUP BY cdreseau, referenceprel 30 | ) 31 | 32 | SELECT 33 | 34 | COUNT(*) AS total_aggregations, 35 | -- Pourcentage d'agrégations avec la somme des 20 PFAS présente 36 | ROUND( 37 | ( 38 | SUM(CASE WHEN has_sum_20_pfas = 1 THEN 1 ELSE 0 END) 39 | * 100.0 40 | / COUNT(*) 41 | ), 42 | 2 43 | ) AS pct_with_sum_20_pfas, 44 | -- Pourcentage d'agrégations avec tous les 4 PFAS spécifiques présents 45 | ROUND( 46 | (SUM(CASE WHEN count_4_pfas = 4 THEN 1 ELSE 0 END) * 100.0 / COUNT(*)), 47 | 2 48 | ) AS pct_with_all_4_pfas 49 | FROM yearly_pfas_results 50 | 51 | HAVING pct_with_sum_20_pfas < 98 OR pct_with_all_4_pfas < 98 52 | -------------------------------------------------------------------------------- /webapp/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | serverExternalPackages: ["@duckdb/node-api"], 5 | eslint: { 6 | // Warning: This allows production builds to successfully complete even if 7 | // your project has ESLint errors. 8 | ignoreDuringBuilds: true, 9 | }, 10 | output: "standalone", 11 | headers: async () => { 12 | return [ 13 | { 14 | source: "/pmtiles/:path*.pmtiles", 15 | headers: [ 16 | { 17 | key: "Cache-Control", 18 | value: "public, max-age=120, s-maxage=60", 19 | }, 20 | { 21 | key: "Accept-Ranges", 22 | value: "bytes", 23 | }, 24 | ], 25 | }, 26 | // { 27 | // source: "/_next/static/:path*", 28 | // headers: [ 29 | // { 30 | // key: "Cache-Control", 31 | // value: "public, max-age=31536000, immutable", 32 | // }, 33 | // ], 34 | // }, 35 | { 36 | source: "/embed", 37 | headers: [ 38 | { 39 | key: "Cache-Control", 40 | value: "public, max-age=120, s-maxage=60", 41 | }, 42 | { 43 | key: "Content-Security-Policy", 44 | value: 45 | "frame-ancestors 'self' https://dansmoneau.fr https://*.dansmoneau.fr", 46 | }, 47 | ], 48 | }, 49 | { 50 | source: "/embed-external", 51 | headers: [ 52 | { 53 | key: "Cache-Control", 54 | value: "public, max-age=120, s-maxage=60", 55 | }, 56 | ], 57 | }, 58 | ]; 59 | }, 60 | rewrites: async () => { 61 | return [ 62 | { 63 | source: "/s3/:path*", 64 | destination: "https://s3.fr-par.scw.cloud/pollution-eau-s3/:path*", 65 | }, 66 | ]; 67 | }, 68 | }; 69 | 70 | export default nextConfig; 71 | -------------------------------------------------------------------------------- /pipelines/notebooks/test_geojson_from_db.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pipelines.tasks.client.core.duckdb_client import DuckDBClient\n", 10 | "\n", 11 | "duckdb_client = DuckDBClient()" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from pipelines.tasks.client.geojson_processor import GeoJSONProcessor\n", 21 | "\n", 22 | "geojson_processor = GeoJSONProcessor(\"communes\", duckdb_client)\n", 23 | "geojson_communes = geojson_processor.generate_geojson()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from pipelines.tasks.client.geojson_processor import GeoJSONProcessor\n", 33 | "\n", 34 | "geojson_processor = GeoJSONProcessor(\"udi\", duckdb_client)\n", 35 | "geojson_udi = geojson_processor.generate_geojson()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "geojson_udi" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [] 53 | } 54 | ], 55 | "metadata": { 56 | "kernelspec": { 57 | "display_name": ".venv", 58 | "language": "python", 59 | "name": "python3" 60 | }, 61 | "language_info": { 62 | "codemirror_mode": { 63 | "name": "ipython", 64 | "version": 3 65 | }, 66 | "file_extension": ".py", 67 | "mimetype": "text/x-python", 68 | "name": "python", 69 | "nbconvert_exporter": "python", 70 | "pygments_lexer": "ipython3", 71 | "version": "3.12.7" 72 | } 73 | }, 74 | "nbformat": 4, 75 | "nbformat_minor": 2 76 | } 77 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 6 | SELECT DISTINCT 7 | de_partition AS annee, 8 | cdreseau, 9 | cdparametresiseeaux, 10 | valeur_sanitaire_1, 11 | referenceprel, 12 | datetimeprel, 13 | valtraduite 14 | FROM 15 | {{ ref('int__resultats_udi_communes') }} 16 | WHERE 17 | cdparametresiseeaux IN ( 18 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant 19 | --'14DAN', 20 | 'PCLAT' 21 | ) 22 | ) 23 | 24 | SELECT 25 | cdreseau, 26 | annee, 27 | CASE 28 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane' 29 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate' 30 | END AS categorie, 31 | 'bilan_annuel_' || annee AS periode, 32 | count( 33 | DISTINCT 34 | CASE 35 | WHEN 36 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 37 | THEN referenceprel 38 | END 39 | ) AS nb_depassements, 40 | count(DISTINCT referenceprel) AS nb_prelevements, 41 | ( 42 | count( 43 | DISTINCT 44 | CASE 45 | WHEN 46 | valtraduite IS NOT NULL 47 | AND valtraduite > valeur_sanitaire_1 48 | THEN referenceprel 49 | END 50 | )::float 51 | / 52 | count(DISTINCT referenceprel)::float 53 | ) AS ratio_limite_sanitaire, 54 | json_object( 55 | max(cdparametresiseeaux), max(valtraduite) 56 | ) AS parametres_detectes, 57 | date_trunc('day', max(datetimeprel)) AS date_dernier_prel 58 | 59 | FROM prels 60 | 61 | GROUP BY cdreseau, annee, categorie 62 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/nitrate/int__resultats_nitrate_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT 4 | cdreseau, 5 | categorie, 6 | cdparametresiseeaux, 7 | valeur_sanitaire_1, 8 | datetimeprel, 9 | valtraduite, 10 | ROW_NUMBER() 11 | OVER ( 12 | PARTITION BY cdreseau, cdparametresiseeaux 13 | ORDER BY datetimeprel DESC 14 | ) 15 | AS row_number 16 | FROM 17 | {{ ref('int__resultats_udi_communes') }} 18 | WHERE 19 | categorie = 'nitrate' 20 | AND 21 | cdparametresiseeaux = 'NO3' 22 | AND 23 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 24 | datetimeprel >= DATE_TRUNC('day', ( 25 | SELECT MAX(sub.datetimeprel) 26 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 27 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 28 | AND 29 | -- Cf cas cdreseau IN( '034005906') , referenceprel= 03400327764 30 | valtraduite IS NOT NULL 31 | ) 32 | 33 | SELECT 34 | cdreseau, 35 | 'dernier_prel' AS periode, 36 | datetimeprel AS date_dernier_prel, 37 | 1 AS nb_parametres, 38 | categorie, 39 | CASE 40 | WHEN 41 | valtraduite > valeur_sanitaire_1 42 | THEN 'sup_valeur_sanitaire' 43 | WHEN 44 | valtraduite <= 10 45 | THEN 'non_quantifie' 46 | WHEN 47 | valtraduite <= 25 48 | THEN 'no3_inf_25' 49 | WHEN 50 | valtraduite > 25 AND valtraduite <= 40 51 | THEN 'no3_inf_40' 52 | WHEN 53 | valtraduite > 40 AND valtraduite <= valeur_sanitaire_1 54 | THEN 'inf_valeur_sanitaire' 55 | ELSE 'error' 56 | END AS resultat, 57 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite) 58 | AS parametres_detectes 59 | FROM 60 | last_pvl 61 | WHERE 62 | row_number = 1 63 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 6 | SELECT DISTINCT 7 | de_partition AS annee, 8 | inseecommune, 9 | cdparametresiseeaux, 10 | valeur_sanitaire_1, 11 | referenceprel, 12 | datetimeprel, 13 | valtraduite 14 | FROM 15 | {{ ref('int__resultats_udi_communes') }} 16 | WHERE 17 | cdparametresiseeaux IN ( 18 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant 19 | --'14DAN', 20 | 'PCLAT' 21 | ) 22 | ) 23 | 24 | SELECT 25 | inseecommune, 26 | annee, 27 | CASE 28 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane' 29 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate' 30 | END AS categorie, 31 | 'bilan_annuel_' || annee AS periode, 32 | count( 33 | DISTINCT 34 | CASE 35 | WHEN 36 | valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1 37 | THEN referenceprel 38 | END 39 | ) AS nb_depassements, 40 | count(DISTINCT referenceprel) AS nb_prelevements, 41 | ( 42 | count( 43 | DISTINCT 44 | CASE 45 | WHEN 46 | valtraduite IS NOT NULL 47 | AND valtraduite > valeur_sanitaire_1 48 | THEN referenceprel 49 | END 50 | )::float 51 | / 52 | count(DISTINCT referenceprel)::float 53 | ) AS ratio_limite_sanitaire, 54 | json_object( 55 | max(cdparametresiseeaux), max(valtraduite) 56 | ) AS parametres_detectes, 57 | date_trunc('day', max(datetimeprel)) AS date_dernier_prel 58 | 59 | FROM prels 60 | 61 | GROUP BY inseecommune, annee, categorie 62 | -------------------------------------------------------------------------------- /webapp/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata, Viewport } from "next"; 2 | import "./globals.css"; 3 | import Script from "next/script"; 4 | 5 | export const metadata: Metadata = { 6 | title: "Pollution de l'Eau Potable en France", 7 | description: "", 8 | robots: { 9 | index: false, 10 | follow: false, 11 | }, 12 | icons: { 13 | icon: "/images/dfg.png", 14 | shortcut: "/images/dfg.png", 15 | apple: "/images/dfg.png", 16 | }, 17 | }; 18 | 19 | export const viewport: Viewport = { 20 | width: "device-width", 21 | initialScale: 1, 22 | maximumScale: 1, 23 | userScalable: false, 24 | }; 25 | 26 | export default function RootLayout({ 27 | children, 28 | }: Readonly<{ 29 | children: React.ReactNode; 30 | }>) { 31 | return ( 32 | 33 | 34 | {/* Hard fix to prevent iframe scrolling: When the PollutionMapSearchBox Popover appears, it causes unwanted scrolling in the parent window containing the iframe. Despite attempts to find a more elegant solution, this override is the only reliable way to prevent this behavior when the app is embedded in an iframe. */} 35 | 55 | {children} 56 | 57 | 58 | ); 59 | } 60 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/nitrate/int__resultats_nitrate_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT 4 | inseecommune, 5 | categorie, 6 | cdparametresiseeaux, 7 | valeur_sanitaire_1, 8 | datetimeprel, 9 | valtraduite, 10 | ROW_NUMBER() 11 | OVER ( 12 | PARTITION BY inseecommune, cdparametresiseeaux 13 | ORDER BY datetimeprel DESC 14 | ) 15 | AS row_number 16 | FROM 17 | {{ ref('int__resultats_udi_communes') }} 18 | WHERE 19 | categorie = 'nitrate' 20 | AND 21 | cdparametresiseeaux = 'NO3' 22 | AND 23 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 24 | datetimeprel >= DATE_TRUNC('day', ( 25 | SELECT MAX(sub.datetimeprel) 26 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 27 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 28 | AND 29 | -- Cf cas cdreseau IN( '034005906') , referenceprel= 03400327764 30 | valtraduite IS NOT NULL 31 | ) 32 | 33 | SELECT 34 | inseecommune, 35 | 'dernier_prel' AS periode, 36 | datetimeprel AS date_dernier_prel, 37 | 1 AS nb_parametres, 38 | categorie, 39 | CASE 40 | WHEN 41 | valtraduite > valeur_sanitaire_1 42 | THEN 'sup_valeur_sanitaire' 43 | WHEN 44 | valtraduite <= 10 45 | THEN 'non_quantifie' 46 | WHEN 47 | valtraduite <= 25 48 | THEN 'no3_inf_25' 49 | WHEN 50 | valtraduite > 25 AND valtraduite <= 40 51 | THEN 'no3_inf_40' 52 | WHEN 53 | valtraduite > 40 AND valtraduite <= valeur_sanitaire_1 54 | THEN 'inf_valeur_sanitaire' 55 | ELSE 'error' 56 | END AS resultat, 57 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite) 58 | AS parametres_detectes 59 | FROM 60 | last_pvl 61 | WHERE 62 | row_number = 1 63 | -------------------------------------------------------------------------------- /webapp/components/EmbedBanner.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { ExternalLink, InfoIcon } from "lucide-react"; 4 | 5 | export default function EmbedBanner() { 6 | return ( 7 |
11 |
12 |
13 | 14 |
15 |
16 | Générations Futures et Data For Good ont créé Dans Mon Eau. 17 |
18 |
19 | Un outil pour connaître la qualité de votre eau du robinet. 20 |
21 |
22 |
23 | 34 |
35 | 51 |
52 | ); 53 | } 54 | -------------------------------------------------------------------------------- /dbt_/tests/test_sub_active_results.sql: -------------------------------------------------------------------------------- 1 | -- dernier udi 2 | SELECT 3 | 'dernier_prel' AS periode, 4 | cdreseau, 5 | resultat, 6 | 0 AS ratio_limite_qualite, 7 | 0 AS nb_sup_valeur_sanitaire 8 | FROM 9 | {{ ref('int__resultats_sub_active_udi_dernier') }} 10 | WHERE 11 | ( 12 | cdreseau = '051000769' 13 | AND date_dernier_prel = TIMESTAMP '2025-03-31 13:58:00' 14 | AND resultat != 'non_quantifie' 15 | ) 16 | OR 17 | ( 18 | cdreseau = '030000509' 19 | AND date_dernier_prel = TIMESTAMP '2025-03-31 11:56:00' 20 | AND resultat != 'inf_limite_qualite' 21 | ) 22 | OR 23 | ( 24 | cdreseau = '029000947' 25 | AND date_dernier_prel = TIMESTAMP '2025-03-31 11:00:00' 26 | AND resultat != 'sup_limite_qualite' 27 | ) 28 | OR 29 | ( 30 | cdreseau = '060001302' 31 | AND date_dernier_prel = TIMESTAMP '2024-12-19 08:29:00' 32 | AND resultat != 'sup_valeur_sanitaire' 33 | ) 34 | -- annuel udi 35 | UNION ALL 36 | SELECT 37 | 'bilan_annuel' AS periode, 38 | cdreseau, 39 | '' AS resultat, 40 | ratio_limite_qualite, 41 | nb_sup_valeur_sanitaire 42 | FROM 43 | {{ ref('int__resultats_sub_active_udi_annuel') }} 44 | WHERE 45 | ( 46 | cdreseau = '051000769' 47 | AND annee = 2024 48 | AND ( 49 | nb_prelevements != 6 50 | OR nb_depassements != 3 51 | OR nb_sup_valeur_sanitaire != 0 52 | OR ratio_limite_qualite != 0.5 53 | ) 54 | ) 55 | OR 56 | ( 57 | cdreseau = '030000509' 58 | AND annee = 2024 59 | AND ( 60 | nb_prelevements != 7 61 | OR nb_depassements != 0 62 | OR nb_sup_valeur_sanitaire != 0 63 | OR ratio_limite_qualite != 0 64 | ) 65 | ) 66 | OR 67 | ( 68 | cdreseau = '060001302' 69 | AND annee = 2024 70 | AND ( 71 | nb_prelevements != 4 72 | OR nb_depassements != 1 73 | OR nb_sup_valeur_sanitaire != 1 74 | OR ratio_limite_qualite != 0.25 75 | ) 76 | ) 77 | -------------------------------------------------------------------------------- /webapp/components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | import { Slot } from "@radix-ui/react-slot"; 3 | import { cva, type VariantProps } from "class-variance-authority"; 4 | 5 | import { cn } from "@/lib/utils"; 6 | 7 | const buttonVariants = cva( 8 | "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0", 9 | { 10 | variants: { 11 | variant: { 12 | default: 13 | "bg-primary text-primary-foreground shadow hover:bg-primary/90", 14 | destructive: 15 | "bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90", 16 | outline: 17 | "border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground", 18 | secondary: 19 | "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80", 20 | ghost: "hover:bg-accent hover:text-accent-foreground", 21 | link: "text-primary underline-offset-4 hover:underline", 22 | }, 23 | size: { 24 | default: "h-9 px-4 py-2", 25 | sm: "h-8 rounded-md px-3 text-xs", 26 | lg: "h-10 rounded-md px-8", 27 | icon: "h-9 w-9", 28 | }, 29 | }, 30 | defaultVariants: { 31 | variant: "default", 32 | size: "default", 33 | }, 34 | }, 35 | ); 36 | 37 | export interface ButtonProps 38 | extends React.ButtonHTMLAttributes, 39 | VariantProps { 40 | asChild?: boolean; 41 | } 42 | 43 | const Button = React.forwardRef( 44 | ({ className, variant, size, asChild = false, ...props }, ref) => { 45 | const Comp = asChild ? Slot : "button"; 46 | return ( 47 | 52 | ); 53 | }, 54 | ); 55 | Button.displayName = "Button"; 56 | 57 | export { Button, buttonVariants }; 58 | -------------------------------------------------------------------------------- /dbt_/tests/test_tous_results.sql: -------------------------------------------------------------------------------- 1 | -- dernier relevé 2 | SELECT 3 | 'dernier_prel' AS periode, 4 | cdreseau, 5 | categorie, 6 | resultat, 7 | null AS ratio, 8 | null AS nb_prelevements, 9 | null AS nb_sup_valeur_sanitaire 10 | FROM 11 | {{ ref('int__resultats_tous_udi_dernier') }} 12 | WHERE 13 | ( 14 | cdreseau = '001000598' 15 | AND date_dernier_prel = '2025-03-26 10:59:00' 16 | AND resultat != 'sup_limite_qualite' 17 | ) 18 | OR 19 | ( 20 | cdreseau = '049000506' 21 | AND date_dernier_prel = '2025-07-08 08:30:00' 22 | AND resultat != 'quantifie' 23 | ) 24 | OR 25 | ( 26 | cdreseau = '033000400' 27 | AND date_dernier_prel = '2025-07-17 09:50:00' 28 | AND resultat != 'non_quantifie' 29 | ) 30 | OR 31 | ( 32 | cdreseau = '088002246' 33 | AND date_dernier_prel = '2025-04-22 08:11:00' 34 | AND resultat != 'sup_limite_sanitaire' 35 | ) 36 | UNION ALL 37 | -- annuel 38 | SELECT 39 | periode, 40 | cdreseau, 41 | categorie, 42 | null AS resultat, 43 | ratio, 44 | nb_prelevements, 45 | nb_sup_valeur_sanitaire 46 | FROM 47 | {{ ref('int__resultats_tous_udi_annuel') }} 48 | WHERE 49 | ( 50 | cdreseau = '054000780' 51 | AND periode = 'bilan_annuel_2024' 52 | AND ( 53 | nb_prelevements != 7 54 | -- cvm : 1 55 | -- metaux_lourds_as : 1 56 | -- nitrate : 5 57 | -- pesticide : 1 58 | OR 59 | ratio != 0 60 | OR 61 | nb_sup_valeur_sanitaire != 0 62 | ) 63 | ) 64 | OR 65 | ( 66 | cdreseau = '061000423' 67 | AND periode = 'bilan_annuel_2024' 68 | AND ( 69 | nb_prelevements != 27 70 | -- cvm: 1 71 | -- metaux_lourds_as: 1 72 | -- nitrate: 14 73 | -- pesticide: 12 74 | OR 75 | ratio <= 0.4 76 | OR 77 | ratio >= 0.5 78 | -- ratio = 12/28 ~= 0.42 79 | OR 80 | nb_sup_valeur_sanitaire != 0 81 | ) 82 | ) 83 | -------------------------------------------------------------------------------- /webapp/components/ui/card.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | 3 | import { cn } from "@/lib/utils"; 4 | 5 | const Card = React.forwardRef< 6 | HTMLDivElement, 7 | React.HTMLAttributes 8 | >(({ className, ...props }, ref) => ( 9 |
17 | )); 18 | Card.displayName = "Card"; 19 | 20 | const CardHeader = React.forwardRef< 21 | HTMLDivElement, 22 | React.HTMLAttributes 23 | >(({ className, ...props }, ref) => ( 24 |
29 | )); 30 | CardHeader.displayName = "CardHeader"; 31 | 32 | const CardTitle = React.forwardRef< 33 | HTMLDivElement, 34 | React.HTMLAttributes 35 | >(({ className, ...props }, ref) => ( 36 |
41 | )); 42 | CardTitle.displayName = "CardTitle"; 43 | 44 | const CardDescription = React.forwardRef< 45 | HTMLDivElement, 46 | React.HTMLAttributes 47 | >(({ className, ...props }, ref) => ( 48 |
53 | )); 54 | CardDescription.displayName = "CardDescription"; 55 | 56 | const CardContent = React.forwardRef< 57 | HTMLDivElement, 58 | React.HTMLAttributes 59 | >(({ className, ...props }, ref) => ( 60 |
61 | )); 62 | CardContent.displayName = "CardContent"; 63 | 64 | const CardFooter = React.forwardRef< 65 | HTMLDivElement, 66 | React.HTMLAttributes 67 | >(({ className, ...props }, ref) => ( 68 |
73 | )); 74 | CardFooter.displayName = "CardFooter"; 75 | 76 | export { 77 | Card, 78 | CardHeader, 79 | CardFooter, 80 | CardTitle, 81 | CardDescription, 82 | CardContent, 83 | }; 84 | -------------------------------------------------------------------------------- /webapp/app/config.ts: -------------------------------------------------------------------------------- 1 | import layers from "protomaps-themes-base"; 2 | 3 | export const MAPLIBRE_MAP = { 4 | protomaps: { 5 | // https://protomaps.com/api 6 | api_key: process.env.NEXT_PUBLIC_PROTOMAPS_API_KEY || "", 7 | maxzoom: 15, 8 | theme: "white", // unsure between "white" and "light" 9 | language: "fr", 10 | }, 11 | initialViewState: { 12 | longitude: 0.882755215151974, 13 | latitude: 46.489410422633256, 14 | zoom: 5.2, 15 | }, 16 | countryBorderWidth: 2, 17 | countryBorderColor: "#bdb8b8", 18 | }; 19 | 20 | // Default map style without layers (will be added dynamically) 21 | export const DEFAULT_MAP_STYLE: maplibregl.StyleSpecification = { 22 | version: 8, 23 | glyphs: 24 | "https://protomaps.github.io/basemaps-assets/fonts/{fontstack}/{range}.pbf", 25 | sprite: "https://protomaps.github.io/basemaps-assets/sprites/v4/light", 26 | sources: { 27 | protomaps: { 28 | type: "vector", 29 | maxzoom: MAPLIBRE_MAP.protomaps.maxzoom, 30 | url: `https://api.protomaps.com/tiles/v4.json?key=${MAPLIBRE_MAP.protomaps.api_key}`, 31 | attribution: 32 | '© OpenStreetMap', 33 | }, 34 | communes: { 35 | type: "vector", 36 | url: "pmtiles:///pmtiles/commune_data.pmtiles", 37 | }, 38 | udis: { 39 | type: "vector", 40 | url: "pmtiles:///pmtiles/udi_data.pmtiles", 41 | }, 42 | }, 43 | layers: [], // layers will be added dynamically in the Map component 44 | } satisfies maplibregl.StyleSpecification; 45 | 46 | // Helper function to get the default base layers 47 | export const getDefaultLayers = () => { 48 | return [ 49 | ...layers( 50 | "protomaps", 51 | MAPLIBRE_MAP.protomaps.theme, 52 | MAPLIBRE_MAP.protomaps.language, 53 | ).filter((layer) => !["boundaries_country"].includes(layer.id)), 54 | { 55 | id: "boundaries_country", 56 | type: "line", 57 | source: "protomaps", 58 | "source-layer": "boundaries", 59 | filter: ["<=", "kind_detail", 2], 60 | paint: { 61 | "line-color": MAPLIBRE_MAP.countryBorderColor, 62 | "line-width": MAPLIBRE_MAP.countryBorderWidth, 63 | }, 64 | }, 65 | ]; 66 | }; 67 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | metaux_lourds_prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 6 | SELECT DISTINCT 7 | de_partition AS annee, 8 | cdreseau, 9 | cdparametresiseeaux, 10 | valeur_sanitaire_1, 11 | limite_qualite, 12 | referenceprel, 13 | datetimeprel, 14 | valtraduite 15 | FROM 16 | {{ ref('int__resultats_udi_communes') }} 17 | WHERE 18 | cdparametresiseeaux IN ('PB', 'AS') 19 | ) 20 | 21 | SELECT 22 | cdreseau, 23 | annee, 24 | CASE 25 | WHEN cdparametresiseeaux = 'PB' THEN 'metaux_lourds_pb' 26 | WHEN cdparametresiseeaux = 'AS' THEN 'metaux_lourds_as' 27 | END AS categorie, 28 | 'bilan_annuel_' || annee AS periode, 29 | count( 30 | DISTINCT 31 | CASE 32 | WHEN 33 | cdparametresiseeaux = 'PB' 34 | AND valtraduite IS NOT NULL AND valtraduite >= limite_qualite 35 | THEN referenceprel 36 | WHEN 37 | cdparametresiseeaux = 'AS' 38 | AND valtraduite IS NOT NULL 39 | AND valtraduite >= valeur_sanitaire_1 40 | THEN referenceprel 41 | END 42 | ) AS nb_depassements, 43 | count(DISTINCT referenceprel) AS nb_prelevements, 44 | ( 45 | count( 46 | DISTINCT 47 | CASE 48 | WHEN 49 | cdparametresiseeaux = 'PB' 50 | AND valtraduite IS NOT NULL 51 | AND valtraduite >= limite_qualite 52 | THEN referenceprel 53 | WHEN 54 | cdparametresiseeaux = 'AS' 55 | AND valtraduite IS NOT NULL 56 | AND valtraduite >= valeur_sanitaire_1 57 | THEN referenceprel 58 | END 59 | )::float 60 | / 61 | count(DISTINCT referenceprel)::float 62 | ) AS ratio 63 | 64 | FROM metaux_lourds_prels 65 | 66 | GROUP BY cdreseau, annee, categorie 67 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | metaux_lourds_prels AS ( 3 | -- Certains prélèvements ont plusieurs analyses pour la même substance 4 | -- Le SELECT DISTINCT ne dédoublonne pas ces cas là 5 | -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête 6 | SELECT DISTINCT 7 | de_partition AS annee, 8 | inseecommune, 9 | cdparametresiseeaux, 10 | valeur_sanitaire_1, 11 | limite_qualite, 12 | referenceprel, 13 | datetimeprel, 14 | valtraduite 15 | FROM 16 | {{ ref('int__resultats_udi_communes') }} 17 | WHERE 18 | cdparametresiseeaux IN ('PB', 'AS') 19 | ) 20 | 21 | SELECT 22 | inseecommune, 23 | annee, 24 | CASE 25 | WHEN cdparametresiseeaux = 'PB' THEN 'metaux_lourds_pb' 26 | WHEN cdparametresiseeaux = 'AS' THEN 'metaux_lourds_as' 27 | END AS categorie, 28 | 'bilan_annuel_' || annee AS periode, 29 | count( 30 | DISTINCT 31 | CASE 32 | WHEN 33 | cdparametresiseeaux = 'PB' 34 | AND valtraduite IS NOT NULL AND valtraduite >= limite_qualite 35 | THEN referenceprel 36 | WHEN 37 | cdparametresiseeaux = 'AS' 38 | AND valtraduite IS NOT NULL 39 | AND valtraduite >= valeur_sanitaire_1 40 | THEN referenceprel 41 | END 42 | ) AS nb_depassements, 43 | count(DISTINCT referenceprel) AS nb_prelevements, 44 | ( 45 | count( 46 | DISTINCT 47 | CASE 48 | WHEN 49 | cdparametresiseeaux = 'PB' 50 | AND valtraduite IS NOT NULL 51 | AND valtraduite >= limite_qualite 52 | THEN referenceprel 53 | WHEN 54 | cdparametresiseeaux = 'AS' 55 | AND valtraduite IS NOT NULL 56 | AND valtraduite >= valeur_sanitaire_1 57 | THEN referenceprel 58 | END 59 | )::float 60 | / 61 | count(DISTINCT referenceprel)::float 62 | ) AS ratio 63 | 64 | FROM metaux_lourds_prels 65 | 66 | GROUP BY inseecommune, annee, categorie 67 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT 4 | cdreseau, 5 | categorie, 6 | cdparametresiseeaux, 7 | valeur_sanitaire_1, 8 | valeur_sanitaire_2, 9 | datetimeprel, 10 | valtraduite, 11 | ROW_NUMBER() 12 | OVER ( 13 | PARTITION BY cdreseau, cdparametresiseeaux 14 | ORDER BY datetimeprel DESC 15 | ) 16 | AS row_number 17 | FROM 18 | {{ ref('int__resultats_udi_communes') }} 19 | WHERE 20 | cdparametresiseeaux IN ( 21 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant 22 | --'14DAN', 23 | 'PCLAT' 24 | ) 25 | AND 26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 27 | datetimeprel >= DATE_TRUNC('day', ( 28 | SELECT MAX(sub.datetimeprel) 29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 31 | ) 32 | 33 | SELECT 34 | cdreseau, 35 | 'dernier_prel' AS periode, 36 | datetimeprel AS date_dernier_prel, 37 | 1 AS nb_parametres, 38 | CASE 39 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane' 40 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate' 41 | END AS categorie, 42 | CASE 43 | WHEN 44 | valtraduite = 0 OR valtraduite IS NULL 45 | THEN 'non_quantifie' 46 | WHEN 47 | valtraduite > valeur_sanitaire_2 48 | THEN 'sup_valeur_sanitaire_2' 49 | WHEN 50 | -- by construction, valeur_sanitaire_2 > valeur_sanitaire_1 51 | -- so here the result is actually: 52 | -- valeur_sanitaire_1 < valtraduite <= valeur_sanitaire_2 53 | valtraduite > valeur_sanitaire_1 54 | THEN 'sup_valeur_sanitaire' 55 | WHEN 56 | valtraduite <= valeur_sanitaire_1 57 | THEN 'inf_valeur_sanitaire' 58 | ELSE 'error' 59 | END AS resultat, 60 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite) 61 | AS parametres_detectes 62 | FROM 63 | last_pvl 64 | WHERE 65 | row_number = 1 66 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT 4 | inseecommune, 5 | categorie, 6 | cdparametresiseeaux, 7 | valeur_sanitaire_1, 8 | valeur_sanitaire_2, 9 | datetimeprel, 10 | valtraduite, 11 | ROW_NUMBER() 12 | OVER ( 13 | PARTITION BY inseecommune, cdparametresiseeaux 14 | ORDER BY datetimeprel DESC 15 | ) 16 | AS row_number 17 | FROM 18 | {{ ref('int__resultats_udi_communes') }} 19 | WHERE 20 | cdparametresiseeaux IN ( 21 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant 22 | --'14DAN', 23 | 'PCLAT' 24 | ) 25 | AND 26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 27 | datetimeprel >= DATE_TRUNC('day', ( 28 | SELECT MAX(sub.datetimeprel) 29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 31 | ) 32 | 33 | SELECT 34 | inseecommune, 35 | 'dernier_prel' AS periode, 36 | datetimeprel AS date_dernier_prel, 37 | 1 AS nb_parametres, 38 | CASE 39 | WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane' 40 | WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate' 41 | END AS categorie, 42 | CASE 43 | WHEN 44 | valtraduite = 0 OR valtraduite IS NULL 45 | THEN 'non_quantifie' 46 | WHEN 47 | valtraduite > valeur_sanitaire_2 48 | THEN 'sup_valeur_sanitaire_2' 49 | WHEN 50 | -- by construction, valeur_sanitaire_2 > valeur_sanitaire_1 51 | -- so here the result is actually: 52 | -- valeur_sanitaire_1 < valtraduite <= valeur_sanitaire_2 53 | valtraduite > valeur_sanitaire_1 54 | THEN 'sup_valeur_sanitaire' 55 | WHEN 56 | valtraduite <= valeur_sanitaire_1 57 | THEN 'inf_valeur_sanitaire' 58 | ELSE 'error' 59 | END AS resultat, 60 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite) 61 | AS parametres_detectes 62 | FROM 63 | last_pvl 64 | WHERE 65 | row_number = 1 66 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/int__resultats_udi_communes.sql: -------------------------------------------------------------------------------- 1 | WITH resultats AS ( 2 | SELECT 3 | referenceprel, 4 | cdparametresiseeaux, 5 | de_partition, 6 | 7 | -- Correction de la colonne valtraduite qui contient les valeurs 8 | -- textuelles de rqana converties en valeurs numériques. 9 | -- Certaines valeurs textuelles telles que "Changement anormal", "OUI", 10 | -- "PRESENCE" étaient converties en 1. 11 | -- Ces valeurs sont corrigées en 0 car on veut les considérer comme 12 | -- des valeurs non quantifiées. 13 | -- Les valeurs purement numériques restent inchangées. 14 | -- Exemples après correction : 15 | -- 'Changement anormal' → 0 16 | -- 'OUI' → 0 17 | -- 'PRESENCE' → 0 18 | -- '1,0' → 1 19 | -- '>1' → 1 20 | CASE 21 | WHEN valtraduite = 1 AND REGEXP_MATCHES(rqana, '[a-zA-Z]') THEN 0 22 | ELSE valtraduite 23 | END AS valtraduite 24 | 25 | -- On n'utilise plus limitequal des données d'origine 26 | -- car on se base sur des valeurs fournies par Générations Futures 27 | -- 28 | -- CAST( 29 | -- REGEXP_EXTRACT( 30 | -- REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?' 31 | -- ) AS FLOAT 32 | -- ) AS limitequal_float, 33 | -- REGEXP_EXTRACT(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite 34 | FROM 35 | {{ ref("stg_edc__resultats") }} 36 | ), 37 | 38 | resultats_with_ref AS ( 39 | SELECT 40 | resultats.*, 41 | r.categorie_1 AS categorie, 42 | r.categorie_2, 43 | r.categorie_3, 44 | r.limite_qualite, 45 | r.limite_indicative, 46 | r.valeur_sanitaire_1, 47 | r.valeur_sanitaire_2 48 | FROM 49 | resultats 50 | INNER JOIN 51 | {{ ref("int__valeurs_de_reference") }} AS r 52 | ON 53 | resultats.cdparametresiseeaux = r.cdparametresiseeaux 54 | ) 55 | 56 | 57 | SELECT 58 | resultats_with_ref.*, 59 | udi.cdreseau, 60 | udi.inseecommune, 61 | plv.datetimeprel 62 | FROM 63 | resultats_with_ref 64 | INNER JOIN 65 | {{ ref("int__lien_cdreseau_refreneceprel") }} AS plv 66 | ON 67 | resultats_with_ref.referenceprel = plv.referenceprel 68 | AND 69 | resultats_with_ref.de_partition = plv.de_partition 70 | 71 | LEFT JOIN 72 | {{ ref("int__lien_commune_cdreseau") }} AS udi 73 | ON 74 | plv.cdreseau = udi.cdreseau 75 | AND plv.de_partition = udi.de_partition 76 | -------------------------------------------------------------------------------- /webapp/app/duckdb-example/page.tsx: -------------------------------------------------------------------------------- 1 | import { fetchExample } from "../lib/data"; 2 | 3 | export default async function Page() { 4 | //using api route 5 | // try { 6 | 7 | // const response = await fetch("http://localhost:3001/api/db-example", { cache: "no-store" }) 8 | // const results = response.json(); 9 | // } catch (err) { 10 | // console.error("Error fetching DB status:", err) 11 | // } 12 | 13 | // using directly the data layer 14 | const reader = await fetchExample(); 15 | 16 | return ( 17 |
18 |
19 | 20 | 21 | 22 | 23 | {Array.from({ length: reader.columnCount }, (_, i) => ( 24 | 27 | ))} 28 | 29 | 30 | 31 | {Object.entries(reader.getRows()).map(([key, value]) => ( 32 | 33 | 36 | {Array.from({ length: reader.columnCount }, (_, i) => ( 37 | 49 | ))} 50 | 51 | ))} 52 | 53 |
Row 25 | {reader.columnName(i)} 26 |
34 | {key} 35 | 38 | {/* Affichage par type - exemple avec des méthodes propre à certains types => pas d'erreur, le typage semble bon */} 39 | {/* {value[i] != null && 40 | ((reader.columnType(i).typeId === DuckDBTypeId.VARCHAR && 41 | String(value[i]).slice(0, 3)) || 42 | (reader.columnType(i).typeId === DuckDBTypeId.BIGINT && 43 | (value[i] as bigint) * BigInt(100000)) || 44 | (reader.columnType(i).typeId === DuckDBTypeId.DOUBLE && 45 | (value[i] as number)?.toExponential()))} */} 46 | {/* Affichage simple */} 47 | {String(value[i])} 48 |
54 |
55 |
56 | ); 57 | } 58 | -------------------------------------------------------------------------------- /pipelines/tasks/config/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from pathlib import Path 4 | from typing import Union 5 | from zipfile import ZipFile 6 | 7 | import requests 8 | from tqdm import tqdm 9 | 10 | from pipelines.utils.logger import get_logger 11 | 12 | logger = get_logger(__name__) 13 | 14 | ROOT_FOLDER = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) 15 | DATABASE_FOLDER = os.path.join(ROOT_FOLDER, "database") 16 | DUCKDB_FILE = os.path.join(DATABASE_FOLDER, "data.duckdb") 17 | CACHE_FOLDER = os.path.join(ROOT_FOLDER, "database", "cache") 18 | 19 | os.makedirs(CACHE_FOLDER, exist_ok=True) 20 | os.makedirs(DATABASE_FOLDER, exist_ok=True) 21 | 22 | 23 | # common style for the progressbar dans cli 24 | tqdm_common = { 25 | "ncols": 100, 26 | "bar_format": "{l_bar}{bar}| {n_fmt}/{total_fmt}", 27 | "mininterval": 2.0, # Log progress every 2 second 28 | } 29 | 30 | 31 | def clear_cache(recreate_folder: bool = True): 32 | """Clear the cache folder.""" 33 | shutil.rmtree(CACHE_FOLDER) 34 | if recreate_folder: 35 | os.makedirs(CACHE_FOLDER, exist_ok=True) 36 | 37 | 38 | def download_file_from_https(url: str, filepath: Union[str, Path]): 39 | """ 40 | Downloads a file from a https link to a local file. 41 | :param url: The url where to download the file. 42 | :param filepath: The path to the local file. 43 | :return: Downloaded file filename. 44 | """ 45 | logger.info(f"Downloading file from {url} to {filepath}") 46 | response = requests.get( 47 | url, stream=True, headers={"Accept-Encoding": "gzip, deflate"} 48 | ) 49 | response.raise_for_status() 50 | response_size = int(response.headers.get("content-length", 0)) 51 | filepath = Path(filepath) 52 | with open(filepath, "wb") as f: 53 | with tqdm( 54 | total=response_size, 55 | unit="B", 56 | unit_scale=True, 57 | desc=filepath.name, 58 | **tqdm_common, 59 | ) as pbar: 60 | for chunk in response.iter_content(chunk_size=8192): 61 | f.write(chunk) 62 | pbar.update(len(chunk)) 63 | 64 | return filepath.name 65 | 66 | 67 | def extract_file(zip_file, extract_folder): 68 | with ZipFile(zip_file, "r") as zip_ref: 69 | file_list = zip_ref.namelist() 70 | with tqdm( 71 | total=len(file_list), unit="file", desc="Extracting", **tqdm_common 72 | ) as pbar: 73 | for file in file_list: 74 | zip_ref.extract(file, extract_folder) # Extract each file 75 | pbar.update(1) 76 | return True 77 | -------------------------------------------------------------------------------- /pipelines/tasks/client/pmtiles_processor.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from pathlib import Path 3 | 4 | from pipelines.config.config import get_s3_path_pmtiles 5 | from pipelines.tasks.config.config_geojson import ( 6 | config_merge_geo, 7 | ) 8 | from pipelines.utils.logger import get_logger 9 | from pipelines.utils.storage_client import ObjectStorageClient 10 | 11 | logger = get_logger(__name__) 12 | 13 | types = config_merge_geo.keys() 14 | 15 | 16 | class PmtilesProcessor: 17 | def __init__(self, type="communes"): 18 | if type not in types: 19 | raise Exception(f"type {type} must be in {types}") 20 | self.upload_file_path = f"georef-france-{type}-prelevement.pmtiles" 21 | 22 | def convert_geojson_to_pmtiles( 23 | self, geojson_file: str, pmtiles_file: str, layer="data_communes" 24 | ): 25 | """Convert a GeoJSON file to PMTiles format using Tippecanoe.""" 26 | # try: 27 | # Construct the Tippecanoe command 28 | command = [ 29 | "tippecanoe", 30 | "-zg", # Zoom levels 31 | "-o", # output 32 | pmtiles_file, # Output PMTiles file 33 | "--layer", # le nom de la couche dans les tuiles vectorielles 34 | layer, 35 | "--coalesce-densest-as-needed", 36 | "--extend-zooms-if-still-dropping", 37 | geojson_file, # Input GeoJSON file 38 | ] 39 | # if file already exists then remove it 40 | if Path(pmtiles_file).exists(): 41 | Path(pmtiles_file).unlink() 42 | # Run the command 43 | subprocess.run(command, check=True) 44 | logger.info(f"Successfully converted '{geojson_file}' to '{pmtiles_file}'.") 45 | 46 | # except subprocess.CalledProcessError as e: 47 | # logger.error(f"Error during conversion: {e}") 48 | # except Exception as e: 49 | # logger.error(f"An error occurred: {e}") 50 | 51 | def upload_pmtils_to_storage(self, env: str, pmtils_path: str): 52 | """ 53 | Upload the Pmtiles file to Storage Object depending on the environment 54 | This requires setting the correct environment variables for the Scaleway credentials 55 | """ 56 | s3 = ObjectStorageClient() 57 | s3_path = get_s3_path_pmtiles(env, self.upload_file_path) 58 | 59 | s3.upload_object(local_path=pmtils_path, file_key=s3_path, public_read=True) 60 | logger.info(f"✅ pmtils uploaded to s3://{s3.bucket_name}/{s3_path}") 61 | url = ( 62 | f"https://{s3.bucket_name}.{s3.endpoint_url.split('https://')[1]}/{s3_path}" 63 | ) 64 | return url 65 | -------------------------------------------------------------------------------- /pipelines/tasks/generate_pmtiles_legacy.py: -------------------------------------------------------------------------------- 1 | """Generate and upload merged new PMtiles file. LEGACY method. 2 | For both UDI and communes data: 3 | - Get geom data from duck db 4 | - Get sample results from duckdb, merge with geom, convert to pmtiles and uploads the new Pmtiles to S3. 5 | 6 | Args: 7 | - env (str): Environment to download from ("dev" or "prod") 8 | """ 9 | 10 | import json 11 | import os 12 | 13 | from tasks.config.common import CACHE_FOLDER 14 | 15 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient 16 | from pipelines.tasks.client.geojson_processor import GeoJSONProcessor 17 | from pipelines.tasks.client.pmtiles_processor import PmtilesProcessor 18 | from pipelines.utils.logger import get_logger 19 | 20 | logger = get_logger(__name__) 21 | 22 | 23 | def execute(env: str): 24 | """ 25 | Execute GeoJSON generation and upload process. 26 | 27 | Args: 28 | env: Environment to use ("dev" or "prod") 29 | """ 30 | duckdb_client = DuckDBClient() 31 | generate_pmtiles(env, "communes", duckdb_client) 32 | generate_pmtiles(env, "udi", duckdb_client) 33 | duckdb_client.close() 34 | 35 | 36 | def generate_pmtiles(env, type, duckdb_client): 37 | logger.info(f"Starting {type} GeoJSON generation process") 38 | 39 | # Initialize clients 40 | geojson_processor = GeoJSONProcessor(type, duckdb_client) 41 | pmtiles_processor = PmtilesProcessor(type) 42 | 43 | # Process and merge data 44 | logger.info(f"Merging GeoJSON with {type} results") 45 | geojson_output_path = os.path.join( 46 | CACHE_FOLDER, f"new-georef-france-{type}-prelevement.geojson" 47 | ) 48 | geojson = geojson_processor.generate_geojson() 49 | 50 | with open(geojson_output_path, "w", encoding="utf-8") as f: 51 | json.dump(geojson, f) 52 | 53 | logger.info(f"✅ GeoJSON processed and stored at: {geojson_output_path}") 54 | 55 | # logger.info("Uploading geojson to S3") 56 | # url = geojson_processor.upload_geojson_to_storage( 57 | # env, file_path=geojson_output_path 58 | # ) 59 | # logger.info(f"geojson in s3 pubic Url: {url}") 60 | 61 | logger.info("Convert new-GeoJSON to pmtiles") 62 | pmtils_output_path = os.path.join( 63 | CACHE_FOLDER, f"georef-france-{type}-prelevement.pmtiles" 64 | ) 65 | pmtiles_processor.convert_geojson_to_pmtiles( 66 | geojson_output_path, pmtils_output_path, f"data_{type}" 67 | ) 68 | 69 | logger.info("Uploading pmtiles to S3") 70 | url = pmtiles_processor.upload_pmtils_to_storage( 71 | env, pmtils_path=pmtils_output_path 72 | ) 73 | logger.info(f"pmtiles in s3 pubic Url: {url}") 74 | -------------------------------------------------------------------------------- /dbt_/models/staging/communes/_communes_models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_communes__cog 5 | description: "Liste des communes et leurs unités de distribution chargé depuis le site de l'insee https://www.insee.fr/fr/information/7766585" 6 | columns: 7 | - name: TYPECOM 8 | description: > 9 | Type de commune 10 | COM Commune 11 | COMA Commune associée 12 | COMD Commune déléguée 13 | ARM Arrondissement municipal 14 | - name: COM 15 | description: Code Commune 16 | - name: REG 17 | description: Code Region 18 | - name: DEP 19 | description: Code Departement 20 | - name: CTCD 21 | description: Code de la collectivité territoriale ayant les compétences départementales 22 | - name: ARR 23 | description: Code arrondissement 24 | - name: TNCC 25 | description: > 26 | Type de nom en clair 27 | 0 Pas d'article et le nom commence par une consonne sauf H muet charnière = DE 28 | 1 Pas d'article et le nom commence par une voyelle ou un H muet charnière = D' 29 | 2 Article = LE charnière = DU 30 | 3 Article = LA charnière = DE LA 31 | 4 Article = LES charnière = DES 32 | 5 Article = L' charnière = DE L' 33 | 6 Article = AUX charnière = DES 34 | 7 Article = LAS charnière = DE LAS 35 | 8 Article = LOS charnière = DE LOS 36 | - name: NCC 37 | description: Nom en clair (majuscules) 38 | - name: NCCENR 39 | description: Nom en clair (typographie riche) 40 | - name: LIBELLE 41 | description: Nom en clair (typographie riche) avec article 42 | - name: CAN 43 | description: Code canton. Pour les communes « multi-cantonales », code décliné de 99 à 90 (pseudo-canton) ou de 89 à 80 (communes nouvelles) 44 | - name: COMPARENT 45 | description: Code de la commune parente pour les arrondissements municipaux et les communes associées ou déléguées. 46 | 47 | - name: stg_communes__opendatasoft 48 | description: Tracé des communes chargé depuis https://public.opendatasoft.com/explore/dataset/georef-france-commune/information 49 | columns: 50 | - name: com_code 51 | description: "Code de la commune (extrait du champ com_code[1] de la source OpenDataSoft)" 52 | type: VARCHAR 53 | - name: com_name 54 | description: "Nom de la commune (extrait du champ com_name[1] de la source OpenDataSoft)" 55 | type: VARCHAR 56 | - name: geom 57 | description: "Géométrie de la commune" 58 | type: GEOMETRY 59 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT DISTINCT 4 | cdreseau, 5 | categorie, 6 | cdparametresiseeaux, 7 | valtraduite, 8 | limite_qualite, 9 | valeur_sanitaire_1, 10 | datetimeprel, 11 | DENSE_RANK() 12 | OVER ( 13 | PARTITION BY cdreseau 14 | ORDER BY datetimeprel DESC 15 | ) 16 | AS row_number 17 | 18 | FROM 19 | {{ ref('int__resultats_udi_communes') }} 20 | WHERE 21 | categorie = 'pesticide' 22 | AND 23 | categorie_2 = 'sub_active' 24 | AND 25 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 26 | datetimeprel >= DATE_TRUNC('day', ( 27 | SELECT MAX(sub.datetimeprel) 28 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 29 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 30 | ), 31 | 32 | aggregated AS ( 33 | SELECT 34 | cdreseau, 35 | cdparametresiseeaux, 36 | MAX(valtraduite) AS valtraduite, 37 | MAX(limite_qualite) AS limite_qualite, 38 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1, 39 | MAX(datetimeprel) AS datetimeprel 40 | FROM last_pvl 41 | WHERE row_number = 1 42 | GROUP BY cdreseau, cdparametresiseeaux 43 | ) 44 | 45 | SELECT 46 | cdreseau, 47 | 'sub_active' AS categorie, 48 | 'dernier_prel' AS periode, 49 | MAX(datetimeprel) AS date_dernier_prel, 50 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres, 51 | CASE 52 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie' 53 | WHEN 54 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1) 55 | THEN 'sup_valeur_sanitaire' 56 | WHEN 57 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite) 58 | THEN 'sup_limite_qualite' 59 | WHEN 60 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite <= limite_qualite) 61 | THEN 'inf_limite_qualite' 62 | ELSE 'erreur' 63 | END AS resultat, 64 | TO_JSON( 65 | MAP( 66 | LIST( 67 | cdparametresiseeaux 68 | ORDER BY cdparametresiseeaux 69 | ) FILTER (WHERE valtraduite > 0 70 | ), 71 | LIST( 72 | valtraduite 73 | ORDER BY cdparametresiseeaux 74 | ) FILTER (WHERE valtraduite > 0 75 | ) 76 | ) 77 | ) AS parametres_detectes 78 | 79 | FROM aggregated 80 | GROUP BY cdreseau 81 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT DISTINCT 4 | inseecommune, 5 | categorie, 6 | cdparametresiseeaux, 7 | valtraduite, 8 | limite_qualite, 9 | valeur_sanitaire_1, 10 | datetimeprel, 11 | DENSE_RANK() 12 | OVER ( 13 | PARTITION BY inseecommune 14 | ORDER BY datetimeprel DESC 15 | ) 16 | AS row_number 17 | 18 | FROM 19 | {{ ref('int__resultats_udi_communes') }} 20 | WHERE 21 | categorie = 'pesticide' 22 | AND 23 | categorie_2 = 'sub_active' 24 | AND 25 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 26 | datetimeprel >= DATE_TRUNC('day', ( 27 | SELECT MAX(sub.datetimeprel) 28 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 29 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 30 | ), 31 | 32 | aggregated AS ( 33 | SELECT 34 | inseecommune, 35 | cdparametresiseeaux, 36 | MAX(valtraduite) AS valtraduite, 37 | MAX(limite_qualite) AS limite_qualite, 38 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1, 39 | MAX(datetimeprel) AS datetimeprel 40 | FROM last_pvl 41 | WHERE row_number = 1 42 | GROUP BY inseecommune, cdparametresiseeaux 43 | ) 44 | 45 | SELECT 46 | inseecommune, 47 | 'sub_active' AS categorie, 48 | 'dernier_prel' AS periode, 49 | MAX(datetimeprel) AS date_dernier_prel, 50 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres, 51 | CASE 52 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie' 53 | WHEN 54 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1) 55 | THEN 'sup_valeur_sanitaire' 56 | WHEN 57 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite) 58 | THEN 'sup_limite_qualite' 59 | WHEN 60 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite <= limite_qualite) 61 | THEN 'inf_limite_qualite' 62 | ELSE 'erreur' 63 | END AS resultat, 64 | TO_JSON( 65 | MAP( 66 | LIST( 67 | cdparametresiseeaux 68 | ORDER BY cdparametresiseeaux 69 | ) FILTER (WHERE valtraduite > 0 70 | ), 71 | LIST( 72 | valtraduite 73 | ORDER BY cdparametresiseeaux 74 | ) FILTER (WHERE valtraduite > 0 75 | ) 76 | ) 77 | ) AS parametres_detectes 78 | 79 | FROM aggregated 80 | GROUP BY inseecommune 81 | -------------------------------------------------------------------------------- /webapp/app/api/udi/find/route.tsx: -------------------------------------------------------------------------------- 1 | // an api route fetching data 2 | 3 | import db from "@/app/lib/duckdb"; 4 | import { NextRequest, NextResponse } from "next/server"; 5 | 6 | export async function GET(req: NextRequest) { 7 | // Set CORS headers to allow requests from any origin 8 | const corsHeaders = { 9 | "Access-Control-Allow-Origin": "*", 10 | "Access-Control-Allow-Methods": "GET, OPTIONS", 11 | "Access-Control-Allow-Headers": "Content-Type, Authorization", 12 | }; 13 | 14 | // Handle OPTIONS request for CORS preflight 15 | if (req.method === "OPTIONS") { 16 | return NextResponse.json({}, { headers: corsHeaders }); 17 | } 18 | 19 | const { searchParams } = new URL(req.url); 20 | const lonParam = searchParams.get("lon"); 21 | const latParam = searchParams.get("lat"); 22 | 23 | if (lonParam == null || latParam == null) { 24 | return NextResponse.json( 25 | { message: "Paramètres manquants: lon et lat sont requis" }, 26 | { status: 400, headers: corsHeaders }, 27 | ); 28 | } 29 | const lon = parseFloat(lonParam); 30 | const lat = parseFloat(latParam); 31 | 32 | if ( 33 | isNaN(lon) || 34 | isNaN(lat) || 35 | lon < -180 || 36 | lon > 180 || 37 | lat < -90 || 38 | lat > 90 39 | ) { 40 | return NextResponse.json( 41 | { message: "Paramètres invalides" }, 42 | { status: 400, headers: corsHeaders }, 43 | ); 44 | } 45 | 46 | const connection = await db.connect(); 47 | try { 48 | await connection.run("LOAD spatial;"); 49 | 50 | const prepared = await connection.prepare(` 51 | SELECT code_udi 52 | FROM atlasante_udi 53 | WHERE ST_Contains(geom, ST_GeomFromText($1::VARCHAR)) 54 | ORDER BY udi_pop DESC 55 | LIMIT 1 56 | `); 57 | 58 | const point = `POINT(${lon} ${lat})`; 59 | prepared.bindVarchar(1, point); 60 | 61 | const result = await prepared.runAndReadAll(); 62 | 63 | if (result.currentRowCount > 0) { 64 | return NextResponse.json( 65 | { id: result.getRowObjectsJson()[0]["code_udi"] }, 66 | { status: 200, headers: corsHeaders }, 67 | ); 68 | } else { 69 | return NextResponse.json( 70 | { message: "Aucune UDI ne correspond à ces coordonnées" }, 71 | { status: 404, headers: corsHeaders }, 72 | ); 73 | } 74 | } catch (error) { 75 | console.error("Erreur de base de données:", error); 76 | return NextResponse.json( 77 | { 78 | message: 79 | "Une erreur interne s'est produite. Veuillez réessayer ultérieurement.", 80 | }, 81 | { status: 500, headers: corsHeaders }, 82 | ); 83 | } finally { 84 | await connection.close(); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /webapp/tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "tailwindcss"; 2 | import TailwindAnimate from "tailwindcss-animate"; 3 | 4 | export default { 5 | darkMode: ["class"], 6 | content: [ 7 | "./pages/**/*.{js,ts,jsx,tsx,mdx}", 8 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 9 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 10 | ], 11 | theme: { 12 | extend: { 13 | colors: { 14 | background: "hsl(var(--background))", 15 | foreground: "hsl(var(--foreground))", 16 | card: { 17 | DEFAULT: "hsl(var(--card))", 18 | foreground: "hsl(var(--card-foreground))", 19 | }, 20 | popover: { 21 | DEFAULT: "hsl(var(--popover))", 22 | foreground: "hsl(var(--popover-foreground))", 23 | }, 24 | primary: { 25 | DEFAULT: "hsl(var(--primary))", 26 | foreground: "hsl(var(--primary-foreground))", 27 | }, 28 | secondary: { 29 | DEFAULT: "hsl(var(--secondary))", 30 | foreground: "hsl(var(--secondary-foreground))", 31 | }, 32 | muted: { 33 | DEFAULT: "hsl(var(--muted))", 34 | foreground: "hsl(var(--muted-foreground))", 35 | }, 36 | accent: { 37 | DEFAULT: "hsl(var(--accent))", 38 | foreground: "hsl(var(--accent-foreground))", 39 | }, 40 | destructive: { 41 | DEFAULT: "hsl(var(--destructive))", 42 | foreground: "hsl(var(--destructive-foreground))", 43 | }, 44 | border: "hsl(var(--border))", 45 | input: "hsl(var(--input))", 46 | ring: "hsl(var(--ring))", 47 | chart: { 48 | "1": "hsl(var(--chart-1))", 49 | "2": "hsl(var(--chart-2))", 50 | "3": "hsl(var(--chart-3))", 51 | "4": "hsl(var(--chart-4))", 52 | "5": "hsl(var(--chart-5))", 53 | }, 54 | "custom-drom": "#22394e", 55 | }, 56 | borderRadius: { 57 | lg: "var(--radius)", 58 | md: "calc(var(--radius) - 2px)", 59 | sm: "calc(var(--radius) - 4px)", 60 | }, 61 | fontFamily: { 62 | sans: [ 63 | "Raleway", 64 | "ui-sans-serif", 65 | "system-ui", 66 | "-apple-system", 67 | "BlinkMacSystemFont", 68 | "Segoe UI", 69 | "Roboto", 70 | "Helvetica Neue", 71 | "Arial", 72 | "Noto Sans", 73 | "sans-serif", 74 | ], 75 | numbers: [ 76 | "ui-sans-serif", 77 | "system-ui", 78 | "-apple-system", 79 | "BlinkMacSystemFont", 80 | "Segoe UI", 81 | "Roboto", 82 | "Helvetica Neue", 83 | "Arial", 84 | "Noto Sans", 85 | "sans-serif", 86 | ], 87 | }, 88 | }, 89 | }, 90 | plugins: [TailwindAnimate], 91 | } satisfies Config; 92 | -------------------------------------------------------------------------------- /dbt_/tests/test_cvm_results.sql: -------------------------------------------------------------------------------- 1 | -- dernier relevé 2 | SELECT 3 | 'dernier relevé' AS periode, 4 | cdreseau, 5 | categorie, 6 | resultat, 7 | 0 AS nb_depassements, 8 | 0 AS nb_prelevements, 9 | 0 AS ratio_limite_qualite 10 | FROM 11 | {{ ref('int__resultats_cvm_udi_dernier') }} 12 | WHERE 13 | ( 14 | cdreseau = '976003489' 15 | AND categorie = 'cvm' 16 | AND date_dernier_prel = '2024-07-16 08:30:00' 17 | AND resultat != 'non_quantifie' 18 | ) 19 | OR 20 | ( 21 | cdreseau = '001000241' 22 | AND categorie = 'cvm' 23 | AND date_dernier_prel = '2024-12-31 14:00:00' 24 | AND resultat != 'non_quantifie' 25 | ) 26 | OR 27 | ( 28 | cdreseau = '087003637' 29 | AND categorie = 'cvm' 30 | AND date_dernier_prel = '2024-07-04 10:50:00' 31 | AND resultat != 'cvm_sup_0_5' 32 | ) 33 | OR 34 | ( 35 | cdreseau = '095004048' 36 | AND categorie = 'cvm' 37 | AND date_dernier_prel = '2024-07-23 08:26:00' 38 | AND resultat != 'inf_limites' 39 | ) 40 | UNION ALL 41 | -- annuel 42 | SELECT 43 | 'annuel' AS periode, 44 | cdreseau, 45 | categorie, 46 | '' AS resultat, 47 | nb_depassements, 48 | nb_prelevements, 49 | ratio_limite_qualite 50 | FROM 51 | {{ ref('int__resultats_cvm_udi_annuel') }} 52 | WHERE 53 | ( 54 | cdreseau = '001001073' 55 | AND categorie = 'cvm' 56 | AND annee = '2024' 57 | AND nb_depassements != 0 58 | ) 59 | OR 60 | ( 61 | cdreseau = '001001073' 62 | AND categorie = 'cvm' 63 | AND annee = '2024' 64 | AND ratio_limite_qualite != 0 65 | ) 66 | OR 67 | ( 68 | cdreseau = '001001073' 69 | AND categorie = 'cvm' 70 | AND annee = '2023' 71 | AND nb_depassements != 0 72 | ) 73 | OR 74 | ( 75 | cdreseau = '001001073' 76 | AND categorie = 'cvm' 77 | AND annee = '2022' 78 | AND nb_depassements != 0 79 | ) 80 | OR 81 | ( 82 | cdreseau = '007000088' 83 | AND categorie = 'cvm' 84 | AND annee IN ('2022', '2023', '2024') 85 | AND nb_depassements != 0 86 | ) 87 | OR 88 | ( 89 | cdreseau = '095004048' 90 | AND categorie = 'cvm' 91 | AND annee = '2024' 92 | AND nb_prelevements != 21 93 | ) 94 | OR 95 | ( 96 | cdreseau = '005001358' 97 | AND categorie = 'cvm' 98 | AND annee = '2022' 99 | AND nb_depassements != 2 100 | ) 101 | OR 102 | ( 103 | cdreseau = '032000209' 104 | AND categorie = 'cvm' 105 | AND annee = '2024' 106 | AND ( 107 | ratio_limite_qualite != 0.25 108 | OR 109 | nb_prelevements != 4 110 | ) 111 | ) 112 | -------------------------------------------------------------------------------- /pipelines/tasks/client/uploaded_geojson_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient 5 | from pipelines.tasks.config.common import ( 6 | CACHE_FOLDER, 7 | logger, 8 | ) 9 | from pipelines.utils.storage_client import ObjectStorageClient 10 | 11 | 12 | class UploadedGeoJSONClient: 13 | """Client pour télécharger et ingérer plusieurs fichiers GeoJSON uploadés préalablement manuellement sur S3""" 14 | 15 | def __init__(self, config, duckdb_client: DuckDBClient): 16 | self.config = config 17 | self.duckdb_client = duckdb_client 18 | self.storage_client = ObjectStorageClient() 19 | 20 | if "files" not in self.config: 21 | raise ValueError( 22 | "Configuration must contain a 'files' list with the GeoJSON files to process" 23 | ) 24 | 25 | self.files_config = self.config["files"] 26 | logger.info( 27 | f"UploadedGeoJSONClient initialized with {len(self.files_config)} file(s)" 28 | ) 29 | 30 | def process_datasets(self): 31 | logger.info(f"Processing {self.__class__.__name__} data") 32 | self._download_data() 33 | self._ingest_to_duckdb() 34 | logger.info(f"Finishing processing {self.__class__.__name__} data") 35 | 36 | def _download_data(self): 37 | os.makedirs(CACHE_FOLDER, exist_ok=True) 38 | 39 | for file_config in self.files_config: 40 | s3_key = ( 41 | f"{self.config['source'].get('prefix', 'upload')}/{file_config['path']}" 42 | ) 43 | local_path = Path(CACHE_FOLDER, file_config["local_file_name"]) 44 | logger.info(f"Downloading {s3_key} to {local_path}") 45 | self.storage_client.download_object( 46 | file_key=s3_key, local_path=str(local_path) 47 | ) 48 | 49 | def _ingest_to_duckdb(self): 50 | logger.info( 51 | f"Ingesting {len(self.files_config)} uploaded GeoJSON file(s) into DuckDB" 52 | ) 53 | 54 | # Collect all table names for dropping 55 | table_names = [file_config["table_name"] for file_config in self.files_config] 56 | self.duckdb_client.drop_tables(table_names=table_names) 57 | 58 | # Ingest each file 59 | for file_config in self.files_config: 60 | logger.info( 61 | f"Ingesting {file_config['local_file_name']} into table {file_config['table_name']}" 62 | ) 63 | self.duckdb_client.ingest_from_geojson( 64 | table_name=file_config["table_name"], 65 | filepath=Path(CACHE_FOLDER, file_config["local_file_name"]), 66 | ) 67 | logger.info( 68 | f"✅ {file_config['local_file_name']} has been ingested into table {file_config['table_name']}" 69 | ) 70 | 71 | logger.info("✅ All uploaded GeoJSON files have been ingested in DB") 72 | -------------------------------------------------------------------------------- /dbt_/tests/test_nitrates_results.sql: -------------------------------------------------------------------------------- 1 | -- dernier relevé 2 | SELECT 3 | 'dernier relevé' AS periode, 4 | cdreseau, 5 | resultat, 6 | 0 AS nb_depassements, 7 | 0 AS nb_prelevements, 8 | 0 AS ratio_depassements 9 | FROM 10 | {{ ref('int__resultats_nitrate_udi_dernier') }} 11 | WHERE 12 | ( 13 | cdreseau = '001000003' 14 | AND date_dernier_prel = '2025-05-23 09:06:00' 15 | AND resultat != 'no3_inf_25' 16 | ) 17 | OR 18 | ( 19 | cdreseau = '037000175' 20 | AND date_dernier_prel = '2025-06-17 10:02:00' 21 | AND resultat != 'no3_inf_40' 22 | ) 23 | OR 24 | ( 25 | cdreseau = '002000060' 26 | AND date_dernier_prel = '2025-04-10 09:22:00' 27 | AND resultat != 'sup_valeur_sanitaire' 28 | ) 29 | OR 30 | ( 31 | cdreseau = '060001271' 32 | AND date_dernier_prel = '2025-04-09 13:44:00' 33 | AND resultat != 'inf_valeur_sanitaire' 34 | ) 35 | OR 36 | ( 37 | cdreseau = '973000028' 38 | AND date_dernier_prel = '2025-05-20 10:44:00' 39 | AND resultat != 'non_quantifie' 40 | ) 41 | UNION ALL 42 | -- annuel 43 | SELECT 44 | 'annuel' AS periode, 45 | cdreseau, 46 | '' AS resultat, 47 | nb_depassements, 48 | nb_prelevements, 49 | ratio 50 | FROM 51 | {{ ref('int__resultats_nitrate_udi_annuel') }} 52 | WHERE 53 | ( 54 | cdreseau = '092003070' 55 | AND annee = '2024' 56 | AND ( 57 | nb_prelevements != 806 58 | OR nb_depassements != 0 59 | OR ratio != 0 60 | ) 61 | ) 62 | OR 63 | ( 64 | cdreseau = '071001155' 65 | AND annee = '2023' 66 | AND ( 67 | nb_prelevements != 1 68 | OR nb_depassements != 0 69 | OR ratio != 0 70 | ) 71 | ) 72 | OR 73 | ( 74 | cdreseau = '036000670' 75 | AND annee = '2024' 76 | AND ( 77 | nb_prelevements != 27 78 | OR nb_depassements != 25 79 | -- il y a 2 prélèvements = à 50 (= valeur_sanitaire_1) 80 | -- comme c'est un strict > dans la requête, on a 25 et pas 27 81 | OR ratio < 0.92 82 | ) 83 | ) 84 | OR 85 | ( 86 | cdreseau = '089003503' 87 | AND annee = '2020' 88 | AND ( 89 | nb_prelevements != 12 90 | OR nb_depassements != 3 91 | OR ratio != 0.25 92 | ) 93 | ) 94 | OR 95 | ( 96 | cdreseau = '055000713' 97 | AND annee = '2023' 98 | AND ( 99 | nb_prelevements != 4 100 | OR nb_depassements != 0 101 | OR ratio != 0 102 | ) 103 | ) 104 | OR 105 | ( 106 | cdreseau = '027000943' 107 | AND annee = '2021' 108 | AND ( 109 | nb_prelevements != 63 110 | OR nb_depassements != 1 111 | -- il y a 1 prélèvement = à 50 (= valeur_sanitaire_1) 112 | ) 113 | ) 114 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | -- Ici on ne garde que le dernier prélèvement 2 | -- pour chaque UDI dans la dernière année 3 | WITH metaux_lourds_dernier_prel AS ( 4 | SELECT 5 | cdreseau, 6 | categorie, 7 | cdparametresiseeaux, 8 | limite_qualite, 9 | valeur_sanitaire_1, 10 | valeur_sanitaire_2, 11 | datetimeprel, 12 | valtraduite, 13 | ROW_NUMBER() 14 | OVER ( 15 | PARTITION BY cdreseau, cdparametresiseeaux 16 | ORDER BY datetimeprel DESC 17 | ) 18 | AS row_number 19 | FROM 20 | {{ ref('int__resultats_udi_communes') }} 21 | WHERE 22 | cdparametresiseeaux IN ('PB', 'AS') 23 | AND 24 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 25 | datetimeprel >= DATE_TRUNC('day', ( 26 | SELECT MAX(sub.datetimeprel) 27 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 28 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 29 | ) 30 | 31 | -- Ici on ne prend que le prélèvement le plus récent (avec row_number = 1) 32 | -- pour chaque type de métaux lourds 33 | SELECT 34 | cdreseau, 35 | datetimeprel AS date_dernier_prel, 36 | 'dernier_prel' AS periode, 37 | 1 AS nb_parametres, 38 | CASE 39 | WHEN 40 | cdparametresiseeaux = 'PB' 41 | THEN 'metaux_lourds_pb' 42 | WHEN 43 | cdparametresiseeaux = 'AS' 44 | THEN 'metaux_lourds_as' 45 | END AS categorie, 46 | CASE 47 | WHEN 48 | -- Pas de distinction PB/AS car même résultat 49 | valtraduite IS NULL 50 | OR valtraduite = 0 51 | THEN 'non_quantifie' 52 | WHEN 53 | cdparametresiseeaux = 'PB' 54 | AND valtraduite >= limite_qualite 55 | THEN 'sup_limite_qualite' 56 | WHEN 57 | -- 5 est la future limite de qualité appliquée 58 | -- à partir de 2036 59 | cdparametresiseeaux = 'PB' 60 | AND valtraduite >= 5 61 | AND valtraduite < limite_qualite 62 | THEN 'sup_limite_qualite_2036' 63 | WHEN 64 | cdparametresiseeaux = 'PB' 65 | AND valtraduite < 5 66 | THEN 'inf_limite_qualite' 67 | WHEN 68 | cdparametresiseeaux = 'AS' 69 | AND valtraduite >= valeur_sanitaire_1 70 | THEN 'sup_valeur_sanitaire' 71 | WHEN 72 | cdparametresiseeaux = 'AS' 73 | AND valtraduite >= limite_qualite 74 | AND valtraduite < valeur_sanitaire_1 75 | THEN 'sup_limite_qualite' 76 | WHEN 77 | cdparametresiseeaux = 'AS' 78 | AND valtraduite < limite_qualite 79 | THEN 'inf_limite_qualite' 80 | ELSE 'erreur' 81 | END AS resultat, 82 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite) 83 | AS parametres_detectes 84 | FROM 85 | metaux_lourds_dernier_prel 86 | WHERE 87 | row_number = 1 88 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pesticide/metabolite/int__resultats_metabolite_udi_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT DISTINCT 4 | cdreseau, 5 | categorie, 6 | cdparametresiseeaux, 7 | valtraduite, 8 | limite_qualite, 9 | limite_indicative, 10 | valeur_sanitaire_1, 11 | datetimeprel, 12 | DENSE_RANK() 13 | OVER ( 14 | PARTITION BY cdreseau 15 | ORDER BY datetimeprel DESC 16 | ) 17 | AS row_number 18 | 19 | FROM 20 | {{ ref('int__resultats_udi_communes') }} 21 | WHERE 22 | categorie = 'pesticide' 23 | AND 24 | categorie_2 = 'metabolite' 25 | AND 26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 27 | datetimeprel >= DATE_TRUNC('day', ( 28 | SELECT MAX(sub.datetimeprel) 29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 31 | ), 32 | 33 | aggregated AS ( 34 | SELECT 35 | cdreseau, 36 | cdparametresiseeaux, 37 | MAX(valtraduite) AS valtraduite, 38 | MAX(limite_qualite) AS limite_qualite, 39 | MAX(limite_indicative) AS limite_indicative, 40 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1, 41 | MAX(datetimeprel) AS datetimeprel 42 | FROM last_pvl 43 | WHERE row_number = 1 44 | GROUP BY cdreseau, cdparametresiseeaux 45 | ) 46 | 47 | SELECT 48 | cdreseau, 49 | 'metabolite' AS categorie, 50 | 'dernier_prel' AS periode, 51 | MAX(datetimeprel) AS date_dernier_prel, 52 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres, 53 | CASE 54 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie' 55 | WHEN 56 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1) 57 | THEN 'sup_valeur_sanitaire' 58 | WHEN 59 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite) 60 | THEN 'sup_limite_qualite' 61 | WHEN 62 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_indicative) 63 | THEN 'sup_limite_indicative' 64 | WHEN 65 | BOOL_OR( 66 | valtraduite IS NOT NULL 67 | AND (limite_qualite IS NULL OR valtraduite <= limite_qualite) 68 | AND (limite_indicative IS NULL OR valtraduite <= limite_indicative) 69 | AND (limite_qualite IS NOT NULL OR limite_indicative IS NOT NULL) 70 | ) 71 | THEN 'inf_limites' 72 | ELSE 'erreur' 73 | END AS resultat, 74 | TO_JSON( 75 | MAP( 76 | LIST( 77 | cdparametresiseeaux 78 | ORDER BY cdparametresiseeaux 79 | ) FILTER (WHERE valtraduite > 0 80 | ), 81 | LIST( 82 | valtraduite 83 | ORDER BY cdparametresiseeaux 84 | ) FILTER (WHERE valtraduite > 0 85 | ) 86 | ) 87 | ) AS parametres_detectes 88 | 89 | FROM aggregated 90 | GROUP BY cdreseau 91 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | -- Ici on ne garde que le dernier prélèvement 2 | -- pour chaque UDI dans la dernière année 3 | WITH metaux_lourds_dernier_prel AS ( 4 | SELECT 5 | inseecommune, 6 | categorie, 7 | cdparametresiseeaux, 8 | limite_qualite, 9 | valeur_sanitaire_1, 10 | valeur_sanitaire_2, 11 | datetimeprel, 12 | valtraduite, 13 | ROW_NUMBER() 14 | OVER ( 15 | PARTITION BY inseecommune, cdparametresiseeaux 16 | ORDER BY datetimeprel DESC 17 | ) 18 | AS row_number 19 | FROM 20 | {{ ref('int__resultats_udi_communes') }} 21 | WHERE 22 | cdparametresiseeaux IN ('PB', 'AS') 23 | AND 24 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 25 | datetimeprel >= DATE_TRUNC('day', ( 26 | SELECT MAX(sub.datetimeprel) 27 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 28 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 29 | ) 30 | 31 | -- Ici on ne prend que le prélèvement le plus récent (avec row_number = 1) 32 | -- pour chaque type de métaux lourds 33 | SELECT 34 | inseecommune, 35 | datetimeprel AS date_dernier_prel, 36 | 'dernier_prel' AS periode, 37 | 1 AS nb_parametres, 38 | CASE 39 | WHEN 40 | cdparametresiseeaux = 'PB' 41 | THEN 'metaux_lourds_pb' 42 | WHEN 43 | cdparametresiseeaux = 'AS' 44 | THEN 'metaux_lourds_as' 45 | END AS categorie, 46 | CASE 47 | WHEN 48 | -- Pas de distinction PB/AS car même résultat 49 | valtraduite IS NULL 50 | OR valtraduite = 0 51 | THEN 'non_quantifie' 52 | WHEN 53 | cdparametresiseeaux = 'PB' 54 | AND valtraduite >= limite_qualite 55 | THEN 'sup_limite_qualite' 56 | WHEN 57 | -- 5 est la future limite de qualité appliquée 58 | -- à partir de 2036 59 | cdparametresiseeaux = 'PB' 60 | AND valtraduite >= 5 61 | AND valtraduite < limite_qualite 62 | THEN 'sup_limite_qualite_2036' 63 | WHEN 64 | cdparametresiseeaux = 'PB' 65 | AND valtraduite < 5 66 | THEN 'inf_limite_qualite' 67 | WHEN 68 | cdparametresiseeaux = 'AS' 69 | AND valtraduite >= valeur_sanitaire_1 70 | THEN 'sup_valeur_sanitaire' 71 | WHEN 72 | cdparametresiseeaux = 'AS' 73 | AND valtraduite >= limite_qualite 74 | AND valtraduite < valeur_sanitaire_1 75 | THEN 'sup_limite_qualite' 76 | WHEN 77 | cdparametresiseeaux = 'AS' 78 | AND valtraduite < limite_qualite 79 | THEN 'inf_limite_qualite' 80 | ELSE 'erreur' 81 | END AS resultat, 82 | JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite) 83 | AS parametres_detectes 84 | FROM 85 | metaux_lourds_dernier_prel 86 | WHERE 87 | row_number = 1 88 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pesticide/metabolite/int__resultats_metabolite_commune_dernier.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | last_pvl AS ( 3 | SELECT DISTINCT 4 | inseecommune, 5 | categorie, 6 | cdparametresiseeaux, 7 | valtraduite, 8 | limite_qualite, 9 | limite_indicative, 10 | valeur_sanitaire_1, 11 | datetimeprel, 12 | DENSE_RANK() 13 | OVER ( 14 | PARTITION BY inseecommune 15 | ORDER BY datetimeprel DESC 16 | ) 17 | AS row_number 18 | 19 | FROM 20 | {{ ref('int__resultats_udi_communes') }} 21 | WHERE 22 | categorie = 'pesticide' 23 | AND 24 | categorie_2 = 'metabolite' 25 | AND 26 | -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement 27 | datetimeprel >= DATE_TRUNC('day', ( 28 | SELECT MAX(sub.datetimeprel) 29 | FROM {{ ref('int__resultats_udi_communes') }} AS sub 30 | ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY 31 | ), 32 | 33 | aggregated AS ( 34 | SELECT 35 | inseecommune, 36 | cdparametresiseeaux, 37 | MAX(valtraduite) AS valtraduite, 38 | MAX(limite_qualite) AS limite_qualite, 39 | MAX(limite_indicative) AS limite_indicative, 40 | MAX(valeur_sanitaire_1) AS valeur_sanitaire_1, 41 | MAX(datetimeprel) AS datetimeprel 42 | FROM last_pvl 43 | WHERE row_number = 1 44 | GROUP BY inseecommune, cdparametresiseeaux 45 | ) 46 | 47 | SELECT 48 | inseecommune, 49 | 'metabolite' AS categorie, 50 | 'dernier_prel' AS periode, 51 | MAX(datetimeprel) AS date_dernier_prel, 52 | COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres, 53 | CASE 54 | WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie' 55 | WHEN 56 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1) 57 | THEN 'sup_valeur_sanitaire' 58 | WHEN 59 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite) 60 | THEN 'sup_limite_qualite' 61 | WHEN 62 | BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_indicative) 63 | THEN 'sup_limite_indicative' 64 | WHEN 65 | BOOL_OR( 66 | valtraduite IS NOT NULL 67 | AND (limite_qualite IS NULL OR valtraduite <= limite_qualite) 68 | AND (limite_indicative IS NULL OR valtraduite <= limite_indicative) 69 | AND (limite_qualite IS NOT NULL OR limite_indicative IS NOT NULL) 70 | ) 71 | THEN 'inf_limites' 72 | ELSE 'erreur' 73 | END AS resultat, 74 | TO_JSON( 75 | MAP( 76 | LIST( 77 | cdparametresiseeaux 78 | ORDER BY cdparametresiseeaux 79 | ) FILTER (WHERE valtraduite > 0 80 | ), 81 | LIST( 82 | valtraduite 83 | ORDER BY cdparametresiseeaux 84 | ) FILTER (WHERE valtraduite > 0 85 | ) 86 | ) 87 | ) AS parametres_detectes 88 | 89 | FROM aggregated 90 | GROUP BY inseecommune 91 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | pfas_prels AS ( 3 | SELECT DISTINCT 4 | de_partition AS annee, 5 | cdreseau, 6 | referenceprel, 7 | datetimeprel, 8 | cdparametresiseeaux, 9 | limite_qualite, 10 | valeur_sanitaire_1, 11 | valtraduite 12 | FROM 13 | {{ ref('int__resultats_udi_communes') }} 14 | WHERE 15 | categorie = 'pfas' 16 | ), 17 | 18 | -- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année 19 | pfas_results_udi_agg AS ( 20 | SELECT 21 | referenceprel, 22 | cdreseau, 23 | annee, 24 | -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS) 25 | MAX( 26 | CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END 27 | ) AS sum_20_pfas, 28 | COUNT( 29 | DISTINCT CASE 30 | WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel 31 | END 32 | ) AS count_20_pfas, 33 | -- On calcule une somme de 4 PFAS pour une limite recommandée par le 34 | -- haut conseil de la santé public 35 | SUM( 36 | CASE 37 | WHEN 38 | cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') 39 | THEN valtraduite 40 | ELSE 0 41 | END 42 | ) AS sum_4_pfas, 43 | -- On check si la somme des 20 PFAS est supérieure 44 | -- à la limite reglementaire 45 | MAX( 46 | CASE 47 | WHEN 48 | cdparametresiseeaux = 'SPFAS' 49 | AND limite_qualite IS NOT NULL 50 | AND valtraduite IS NOT NULL 51 | AND valtraduite > limite_qualite 52 | THEN 1 53 | ELSE 0 54 | END 55 | ) AS sum_20_pfas_above_limit, 56 | MAX( 57 | CASE 58 | WHEN 59 | valeur_sanitaire_1 IS NOT NULL 60 | AND valtraduite IS NOT NULL 61 | AND valtraduite > valeur_sanitaire_1 62 | THEN 1 63 | ELSE 0 64 | END 65 | ) AS has_pfas_above_vs, 66 | MAX(datetimeprel) AS max_datetimeprel 67 | FROM pfas_prels 68 | GROUP BY referenceprel, cdreseau, annee 69 | -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS 70 | HAVING count_20_pfas = 1 71 | ) 72 | 73 | SELECT 74 | cdreseau, 75 | annee, 76 | 'pfas' AS categorie, 77 | 'bilan_annuel_' || annee AS periode, 78 | COUNT(DISTINCT referenceprel) AS nb_prelevements, 79 | ROUND(( 80 | SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END) 81 | / 82 | COUNT(DISTINCT referenceprel) 83 | ), 2) AS ratio_limite_qualite, 84 | SUM(has_pfas_above_vs) AS nb_sup_valeur_sanitaire, 85 | TO_JSON({ 86 | 'SPFAS': MAX(sum_20_pfas), 87 | 'SUM_4_PFAS': MAX(sum_4_pfas) 88 | }) AS parametres_detectes, 89 | MAX(max_datetimeprel) AS date_dernier_prel 90 | 91 | FROM pfas_results_udi_agg 92 | GROUP BY cdreseau, annee 93 | -------------------------------------------------------------------------------- /dbt_/tests/test_pfas_results.sql: -------------------------------------------------------------------------------- 1 | -- dernier udi 2 | SELECT 3 | 'dernier_prel' AS periode, 4 | cdreseau, 5 | categorie, 6 | resultat, 7 | 0 AS ratio_limite_qualite, 8 | 0 AS nb_sup_valeur_sanitaire 9 | FROM 10 | {{ ref('int__resultats_pfas_udi_dernier') }} 11 | WHERE 12 | ( 13 | -- test 14 | -- l'UDI 013001457 a un prélevement le 2025-02-19 09:58:00 15 | -- avec un dépassement de valeur sanitaire pour PFOS 16 | cdreseau = '013001457' 17 | AND date_dernier_prel = TIMESTAMP '2025-02-19 09:58:00' 18 | AND resultat != 'sup_valeur_sanitaire' 19 | ) 20 | OR ( 21 | cdreseau = '004001032' 22 | AND date_dernier_prel = TIMESTAMP '2025-02-28 12:33:00' 23 | AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02' 24 | ) 25 | OR ( 26 | cdreseau = '008000855' 27 | AND date_dernier_prel = TIMESTAMP '2025-02-27 09:24:00' 28 | AND resultat != 'sup_valeur_sanitaire' 29 | ) 30 | OR 31 | ( 32 | cdreseau = '00800107747' 33 | AND date_dernier_prel = '2025-02-27 09:24:00' 34 | AND resultat != 'sup_valeur_sanitaire' 35 | ) 36 | OR 37 | ( 38 | cdreseau = '011004114' 39 | AND date_dernier_prel = '2025-02-24 13:55:00' 40 | AND resultat != 'somme_20pfas_sup_0_1' 41 | ) 42 | OR 43 | ( 44 | cdreseau = '001000404' 45 | AND date_dernier_prel = '2024-11-29 08:08:00' 46 | AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02' 47 | ) 48 | OR 49 | ( 50 | cdreseau = '001000511' 51 | AND date_dernier_prel = '2024-11-28 09:58:00' 52 | AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02' 53 | ) 54 | OR 55 | ( 56 | cdreseau = '003000370' 57 | AND date_dernier_prel = TIMESTAMP '2025-02-18 08:45:00' 58 | AND resultat != 'non_quantifie' 59 | ) 60 | UNION ALL 61 | -- annuel udi 62 | SELECT 63 | 'bilan_annuel' AS periode, 64 | cdreseau, 65 | categorie, 66 | '' AS resultat, 67 | ratio_limite_qualite, 68 | nb_sup_valeur_sanitaire 69 | FROM 70 | {{ ref('int__resultats_pfas_udi_annuel') }} 71 | WHERE 72 | ( 73 | cdreseau = '001000356' 74 | AND annee = '2025' 75 | AND 76 | ( 77 | ratio_limite_qualite != 0 78 | OR nb_sup_valeur_sanitaire != 0 79 | ) 80 | ) 81 | OR 82 | ( 83 | cdreseau = '074000043' 84 | AND annee = '2022' 85 | AND ( 86 | ratio_limite_qualite != 0.1 87 | OR nb_sup_valeur_sanitaire != 2 88 | ) 89 | ) 90 | OR 91 | ( 92 | cdreseau = '030000200' 93 | AND annee = '2024' 94 | AND ( 95 | nb_sup_valeur_sanitaire != 0 96 | OR ratio_limite_qualite != 0.25 97 | ) 98 | ) 99 | OR 100 | ( 101 | cdreseau = '069000025' 102 | AND annee IN ('2022', '2023', '2024') 103 | AND ( 104 | nb_sup_valeur_sanitaire != 0 105 | OR ratio_limite_qualite != 0 106 | ) 107 | ) 108 | -------------------------------------------------------------------------------- /dbt_/models/intermediate/pfas/int__resultats_pfas_commune_annuel.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | pfas_prels AS ( 3 | SELECT DISTINCT 4 | de_partition AS annee, 5 | inseecommune, 6 | referenceprel, 7 | datetimeprel, 8 | cdparametresiseeaux, 9 | limite_qualite, 10 | valeur_sanitaire_1, 11 | valtraduite 12 | FROM 13 | {{ ref('int__resultats_udi_communes') }} 14 | WHERE 15 | categorie = 'pfas' 16 | ), 17 | 18 | -- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année 19 | pfas_results_udi_agg AS ( 20 | SELECT 21 | referenceprel, 22 | inseecommune, 23 | annee, 24 | -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS) 25 | MAX( 26 | CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END 27 | ) AS sum_20_pfas, 28 | COUNT( 29 | DISTINCT CASE 30 | WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel 31 | END 32 | ) AS count_20_pfas, 33 | -- On calcule une somme de 4 PFAS pour une limite recommandée par le 34 | -- haut conseil de la santé public 35 | SUM( 36 | CASE 37 | WHEN 38 | cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') 39 | THEN valtraduite 40 | ELSE 0 41 | END 42 | ) AS sum_4_pfas, 43 | -- On check si la somme des 20 PFAS est supérieure 44 | -- à la limite reglementaire 45 | MAX( 46 | CASE 47 | WHEN 48 | cdparametresiseeaux = 'SPFAS' 49 | AND limite_qualite IS NOT NULL 50 | AND valtraduite IS NOT NULL 51 | AND valtraduite > limite_qualite 52 | THEN 1 53 | ELSE 0 54 | END 55 | ) AS sum_20_pfas_above_limit, 56 | MAX( 57 | CASE 58 | WHEN 59 | valeur_sanitaire_1 IS NOT NULL 60 | AND valtraduite IS NOT NULL 61 | AND valtraduite > valeur_sanitaire_1 62 | THEN 1 63 | ELSE 0 64 | END 65 | ) AS has_pfas_above_vs, 66 | MAX(datetimeprel) AS max_datetimeprel 67 | FROM pfas_prels 68 | GROUP BY referenceprel, inseecommune, annee 69 | -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS 70 | HAVING count_20_pfas = 1 71 | ) 72 | 73 | SELECT 74 | inseecommune, 75 | annee, 76 | 'pfas' AS categorie, 77 | 'bilan_annuel_' || annee AS periode, 78 | COUNT(DISTINCT referenceprel) AS nb_prelevements, 79 | ROUND(( 80 | SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END) 81 | / 82 | COUNT(DISTINCT referenceprel) 83 | ), 2) AS ratio_limite_qualite, 84 | SUM(has_pfas_above_vs) AS nb_sup_valeur_sanitaire, 85 | TO_JSON({ 86 | 'SPFAS': MAX(sum_20_pfas), 87 | 'SUM_4_PFAS': MAX(sum_4_pfas) 88 | }) AS parametres_detectes, 89 | MAX(max_datetimeprel) AS date_dernier_prel 90 | 91 | FROM pfas_results_udi_agg 92 | GROUP BY inseecommune, annee 93 | -------------------------------------------------------------------------------- /pipelines/notebooks/test_atlasante_udi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import duckdb\n", 10 | "from pipelines.tasks.config.common import DUCKDB_FILE\n", 11 | "\n", 12 | "con = duckdb.connect(database=DUCKDB_FILE, read_only=True)\n", 13 | "# show all tables in DB\n", 14 | "con.sql(\"SHOW TABLES;\").show()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# describe atlasante_udi table\n", 24 | "df = con.sql(\"DESCRIBE atlasante_udi;\").df()\n", 25 | "print(df)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# install spatial extention for spactial functions\n", 35 | "con.sql(\"INSTALL spatial;\")\n", 36 | "# Load spatial extension\n", 37 | "con.sql(\"LOAD spatial;\")" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# show same paris's UDI\n", 47 | "df = con.sql(\"Select * from atlasante_udi where uge_nom like '%EAU DE PARIS%'\").df()\n", 48 | "df.head()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# Paris North (Nord - near Saint-Denis):\n", 58 | "# latitudeN = 48.9358\n", 59 | "# longitudeN = 2.3538\n", 60 | "# Paris South\n", 61 | "# latitudeS = 48.8186\n", 62 | "# longitudeS = 2.3326\n", 63 | "# Paris West (Ouest - near Porte Maillot/Neuilly):\n", 64 | "# latitudeW = 48.8781\n", 65 | "# longitudeW = 2.2785\n", 66 | "# Central Paris (Centre - Notre-Dame):\n", 67 | "latitude = 48.8566\n", 68 | "longitude = 2.3522\n", 69 | "\n", 70 | "sql = f\"\"\"\n", 71 | "SELECT *\n", 72 | "FROM atlasante_udi\n", 73 | "WHERE ST_Contains(geom, ST_GeomFromText('POINT({longitude} {latitude})'));\n", 74 | "\"\"\"\n", 75 | "df = con.sql(sql).df()\n", 76 | "df.head()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "con.close()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": ".venv", 99 | "language": "python", 100 | "name": "python3" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.12.7" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 2 117 | } 118 | -------------------------------------------------------------------------------- /dbt_/models/website/web__stats_udi.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | -- Dernière mise à jour 3 | derniere_maj AS ( 4 | SELECT 5 | 'derniere_mise_a_jour' AS stat_nom, 6 | NULL AS stat_chiffre, 7 | max(date_dernier_prel)::VARCHAR AS stat_texte 8 | FROM {{ ref('web__resultats_udi') }} 9 | WHERE periode = 'dernier_prel' 10 | ), 11 | 12 | -- Total UDIs 13 | total_udis AS ( 14 | SELECT 15 | 'total_udis' AS stat_nom, 16 | NULL AS stat_texte, 17 | count(DISTINCT cdreseau) AS stat_chiffre 18 | FROM {{ ref('web__resultats_udi') }} 19 | WHERE periode = 'dernier_prel' 20 | ), 21 | 22 | -- Statistiques par catégorie pour dernier prélèvement 23 | stats_dernier_prel AS ( 24 | SELECT 25 | NULL AS stat_texte, 26 | 'dernier_prel_' || categorie || '_' || coalesce(resultat, 'non_recherche') 27 | AS stat_nom, 28 | count(*) AS stat_chiffre 29 | FROM {{ ref('web__resultats_udi') }} 30 | WHERE 31 | periode = 'dernier_prel' 32 | 33 | GROUP BY categorie, resultat 34 | 35 | ), 36 | 37 | -- Statistiques par catégorie et année pour bilan annuel - ratios par intervalles 38 | stats_bilan_annuel_ratio AS ( 39 | SELECT 40 | NULL AS stat_texte, 41 | periode || '_' || categorie || '_ratio_' 42 | || CASE 43 | WHEN ratio = 0 THEN '0' 44 | WHEN ratio <= 0.25 THEN '0.25' 45 | WHEN ratio <= 0.5 THEN '0.5' 46 | WHEN ratio <= 0.75 THEN '0.75' 47 | WHEN ratio <= 1 THEN '1' 48 | ELSE 'erreur' 49 | END AS stat_nom, 50 | count(*) AS stat_chiffre 51 | FROM {{ ref('web__resultats_udi') }} 52 | WHERE 53 | periode LIKE 'bilan_annuel_%' 54 | AND ratio IS NOT NULL 55 | GROUP BY 56 | periode, 57 | categorie, 58 | CASE 59 | WHEN ratio = 0 THEN '0' 60 | WHEN ratio <= 0.25 THEN '0.25' 61 | WHEN ratio <= 0.5 THEN '0.5' 62 | WHEN ratio <= 0.75 THEN '0.75' 63 | WHEN ratio <= 1 THEN '1' 64 | ELSE 'erreur' 65 | END 66 | ), 67 | 68 | -- Statistiques par catégorie et année pour bilan annuel - non recherche (ratio null) 69 | stats_bilan_annuel_non_recherche AS ( 70 | SELECT 71 | NULL AS stat_texte, 72 | periode || '_' || categorie || '_non_recherche' AS stat_nom, 73 | count(*) AS stat_chiffre 74 | FROM {{ ref('web__resultats_udi') }} 75 | WHERE 76 | periode LIKE 'bilan_annuel_%' 77 | AND ratio IS NULL 78 | GROUP BY periode, categorie 79 | ) 80 | 81 | -- Union de toutes les statistiques 82 | SELECT 83 | stat_nom, 84 | stat_chiffre, 85 | stat_texte 86 | FROM derniere_maj 87 | UNION ALL 88 | SELECT 89 | stat_nom, 90 | stat_chiffre, 91 | stat_texte 92 | FROM total_udis 93 | UNION ALL 94 | SELECT 95 | stat_nom, 96 | stat_chiffre, 97 | stat_texte 98 | FROM stats_dernier_prel 99 | UNION ALL 100 | SELECT 101 | stat_nom, 102 | stat_chiffre, 103 | stat_texte 104 | FROM stats_bilan_annuel_ratio 105 | UNION ALL 106 | SELECT 107 | stat_nom, 108 | stat_chiffre, 109 | stat_texte 110 | FROM stats_bilan_annuel_non_recherche 111 | -------------------------------------------------------------------------------- /dbt_/seeds/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | seeds: 4 | - name: references_generations_futures 5 | config: 6 | quote_char: '"' 7 | delimiter: "," 8 | full_refresh: true 9 | description: "Table de référence fournie par Générations Futures" 10 | columns: 11 | - name: cdparametresiseeaux 12 | description: "Code SISE-Eaux (Système d'Information des services Santé-Environnement Eaux) du paramètre" 13 | - name: cdparametre 14 | description: "Code SANDRE (Service National d'Administration des Données et Référentiels sur l'Eau) du paramètre" 15 | - name: libmajparametre 16 | description: "Nom du paramètre en majuscule" 17 | tests: 18 | - dbt_expectations.expect_column_values_to_be_of_type: 19 | column_type: VARCHAR 20 | - name: libminparametre 21 | description: "Nom du paramètre en minuscule" 22 | tests: 23 | - dbt_expectations.expect_column_values_to_be_of_type: 24 | column_type: VARCHAR 25 | - name: casparam 26 | description: "Code CAS (Chemical Abstracts Service) de la substance chimique" 27 | - name: categorie_1 28 | description: "Catégorie du paramètre" 29 | tests: 30 | - not_null 31 | - accepted_values: 32 | values: 33 | - "pfas" 34 | - "cvm" 35 | - "nitrate" 36 | - "metaux_lourds" 37 | - "substances_indus" 38 | - "pesticide" 39 | - name: categorie_2 40 | description: "Sous-catégorie" 41 | - name: categorie_3 42 | description: "Détail de la sous-catégorie" 43 | - name: limite_qualite 44 | description: "Limite de qualité du paramètre" 45 | tests: 46 | - dbt_expectations.expect_column_values_to_be_of_type: 47 | column_type: double 48 | - name: limite_qualite_unite 49 | description: "Unité de la limite de qualité" 50 | - name: limite_qualite_commentaire 51 | description: "Commentaire sur la limite de qualité" 52 | - name: limite_indicative 53 | description: "Limite indicative du paramètre" 54 | tests: 55 | - dbt_expectations.expect_column_values_to_be_of_type: 56 | column_type: double 57 | - name: limite_indicative_unite 58 | description: "Unité de la limite indicative" 59 | - name: valeur_sanitaire_1 60 | description: "Valeur sanitaire" 61 | tests: 62 | - dbt_expectations.expect_column_values_to_be_of_type: 63 | column_type: double 64 | - name: valeur_sanitaire_1_unite 65 | description: "Unité de la valeur sanitaire 1" 66 | - name: valeur_sanitaire_1_commentaire 67 | description: "Commentaire sur la valeur sanitaire 1" 68 | - name: valeur_sanitaire_2 69 | description: "Deuxième valeur sanitaire" 70 | tests: 71 | - dbt_expectations.expect_column_values_to_be_of_type: 72 | column_type: integer 73 | - name: valeur_sanitaire_2_unite 74 | description: "Unité de la valeur sanitaire 2" 75 | - name: valeur_sanitaire_2_commentaire 76 | description: "Commentaire sur la valeur sanitaire 2" 77 | - name: web_label 78 | description: "Libellé utilisé dans le site web" 79 | -------------------------------------------------------------------------------- /pipelines/tasks/config/config_uploaded_geojson.py: -------------------------------------------------------------------------------- 1 | """Configuration for uploaded GeoJSON files. 2 | 3 | Cette configuration supporte plusieurs fichiers GeoJSON uploadés manuellement sur S3. 4 | Pour ajouter un nouveau fichier, ajoutez simplement un dictionnaire dans la liste 'files'. 5 | 6 | Format de chaque fichier: 7 | - path: chemin relatif du fichier sur S3 (sera combiné avec prefix) 8 | - table_name: nom de la table à créer dans DuckDB 9 | - file_name: nom du fichier local à télécharger 10 | """ 11 | 12 | uploaded_geojson_config = { 13 | "source": { 14 | "prefix": "upload", # Préfixe S3 15 | }, 16 | "files": [ 17 | # { 18 | # # Cette première source contient le GeoJSON des UDIs de Atlasante issu des infofactures pour la métropole pour l'année 2023. 19 | # # Pour l'obtenir: 20 | # # - partir de la source suivante: https://catalogue.atlasante.fr/geonetwork/srv/fre/catalog.search#/metadata/1d02cd8b-137d-4360-b566-f6082a47ee32 21 | # # - cliquer sur "accès à la carte" (normalement on arrive sur cette URL: https://carto.atlasante.fr/1/ars_metropole_udi_infofactures.map) 22 | # # - à gauche de la carte, cliquer sur le bouton avec les trois "couches" 23 | # # - choisir le layer "Réseaux (UDI) - 2023" dans la liste (dans "Historique"), puis cliquer sur les trois points à droite, puis cliquer sur "Télécharger la donnée" 24 | # # - choisir format "GeoJSON" et projection "WGS84 - GPS (EPSG 4326)" puis cliquer sur "Exécution directe" pour télécharger le fichier 25 | # # - extraire le fichier "dgs_metropole_udi_infofactures_j.json" du zip téléchargé 26 | # # - renommer le fichier téléchargé et l'uploader dans le dossier approprié (cf `path` ci-dessous) 27 | # # 28 | # "path": "atlasante/udi_infofactures_2023.json", 29 | # "table_name": "atlasante_udi_2023", 30 | # "local_file_name": "udi_infofactures_2023.json", 31 | # }, 32 | # { 33 | # # UDIs de la Corse 34 | # # Pour l'obtenir: 35 | # # - partir de la source suivante: https://catalogue.atlasante.fr/geonetwork/srv/fre/catalog.search#/metadata/67a6998e-15b2-4796-9584-c87af156f549 36 | # # - sur "Accès au téléchargement des données", cliquer sur "Télécharger" 37 | # # - choisir format "GeoJSON" et projection "WGS84 - GPS (EPSG 4326)" puis cliquer sur "Exécution directe" pour télécharger le fichier 38 | # # - extraire le fichier "ars_r94_udi_2018_z.json" du zip téléchargé 39 | # # - renommer le fichier téléchargé et l'uploader dans le dossier approprié (cf `path` ci-dessous) 40 | # # 41 | # "path": "atlasante/udi_corse.json", 42 | # "table_name": "atlasante_udi_corse", 43 | # "local_file_name": "udi_corse.json", 44 | # }, 45 | { 46 | # GeoJSON des UDIs de Atlasante issu des infofactures pour l'année 2024. 47 | # S'obtient de la même manière que pour l'année 2023 (cf. commentaire dans le premier bloc). 48 | # Concernant la couverture géographique, il s'agit de la métropole + Corse. 49 | "path": "atlasante/udi_infofactures_2024.json", 50 | "table_name": "atlasante_udi_2024", 51 | "local_file_name": "udi_infofactures_2024.json", 52 | } 53 | ], 54 | } 55 | -------------------------------------------------------------------------------- /pipelines/tasks/download_pmtiles.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download PMtiles files. 3 | 4 | Args: 5 | - env (str): Environment to download from ("dev" or "prod") 6 | - use-boto3 (bool): Use boto3 library to download from S3 storage, instead of using public HTTPS URL (default: False) 7 | 8 | Examples: 9 | - download_pmtiles --env prod : Download PMtiles from production environment 10 | - download_pmtiles --env dev : Download PMtiles from development environment 11 | - download_pmtiles --use-boto3 : Download PMtiles from S3 storage 12 | """ 13 | 14 | import os 15 | from abc import ABC, abstractmethod 16 | 17 | from pipelines.config.config import get_s3_path_pmtiles 18 | from pipelines.tasks.config.common import CACHE_FOLDER, download_file_from_https 19 | from pipelines.utils.logger import get_logger 20 | from pipelines.utils.storage_client import ObjectStorageClient 21 | 22 | logger = get_logger(__name__) 23 | 24 | 25 | class PMtilesDownloadStrategy(ABC): 26 | """Interface for GeoJSON download strategies.""" 27 | 28 | def __init__(self): 29 | super().__init__() 30 | self.s3 = ObjectStorageClient() 31 | 32 | @abstractmethod 33 | def download(self, env: str, local_path: str): 34 | pass 35 | 36 | 37 | class Boto3DownloadStrategy(PMtilesDownloadStrategy): 38 | """Strategy for downloading PMtiles from S3 storage using boto3.""" 39 | 40 | def download(self, env: str, local_path: str): 41 | logger.info(f"Downloading PMtiles from S3 in environment {env}") 42 | remote_s3_path = get_s3_path_pmtiles(env) 43 | self.s3.download_object(remote_s3_path, local_path) 44 | logger.info( 45 | f"✅ GeoJSON downloaded from s3://{self.s3.bucket_name}/{remote_s3_path}" 46 | ) 47 | 48 | 49 | class HTTPSDownloadStrategy(PMtilesDownloadStrategy): 50 | """Strategy for downloading PMtiles via HTTPS.""" 51 | 52 | def download(self, env: str, local_path: str): 53 | logger.info("Downloading PMtiles via HTTPS") 54 | remote_s3_path = get_s3_path_pmtiles(env) 55 | url = f"https://{self.s3.bucket_name}.{self.s3.endpoint_url.split('https://')[1]}/{remote_s3_path}" 56 | download_file_from_https(url=url, filepath=local_path) 57 | logger.info(f"✅ GeoJSON downloaded via HTTPS: {url} -> {local_path}") 58 | 59 | 60 | class PMtilesDownloader: 61 | """Manages the PMtiles download process.""" 62 | 63 | def __init__(self, strategy: PMtilesDownloadStrategy, env: str): 64 | self.strategy = strategy 65 | self.local_geojson_path = os.path.join( 66 | CACHE_FOLDER, "new-georef-france-commune-prelevement.pmtiles" 67 | ) 68 | if env not in ("dev", "prod"): 69 | raise ValueError("'env' must be 'dev' or 'prod'") 70 | self.env = env 71 | 72 | def download(self): 73 | self.strategy.download(self.env, self.local_geojson_path) 74 | 75 | 76 | def execute(env: str, use_boto3: bool = False): 77 | """ 78 | Execute PMtiles download using the appropriate strategy. 79 | 80 | Args: 81 | env (str): Environment to download from ("dev" or "prod") 82 | use_boto3 (bool): Whether to use boto3 instead of HTTPS. Default is False. 83 | """ 84 | strategy = Boto3DownloadStrategy() if use_boto3 else HTTPSDownloadStrategy() 85 | downloader = PMtilesDownloader(strategy, env) 86 | downloader.download() 87 | -------------------------------------------------------------------------------- /Dockerfile.unified: -------------------------------------------------------------------------------- 1 | # Unified Dockerfile - Embeds database and pmtiles for atomic deployments 2 | 3 | # Builder stage for compiling the application 4 | # Note: debian bookworm is supported until 2028-06-30 5 | FROM debian:bookworm-slim AS builder 6 | 7 | # Define build argument for API key 8 | ARG NEXT_PUBLIC_PROTOMAPS_API_KEY 9 | 10 | # Install UV 11 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ 12 | ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy 13 | ENV UV_PYTHON_INSTALL_DIR=/python 14 | ENV UV_PYTHON_PREFERENCE=only-managed 15 | ENV UV_NO_CACHE=1 16 | RUN uv python install 3.12 17 | 18 | # Install Node.js and other required dependencies 19 | RUN apt-get update && apt-get install -y --no-install-recommends \ 20 | nodejs \ 21 | npm \ 22 | ca-certificates \ 23 | curl \ 24 | && rm -rf /var/lib/apt/lists/* 25 | 26 | # Set up Node.js environment 27 | WORKDIR /app/webapp 28 | COPY webapp/package.json webapp/package-lock.json /app/webapp/ 29 | RUN npm ci 30 | 31 | # Set up Python environment with UV 32 | WORKDIR /app 33 | COPY README.md pyproject.toml uv.lock /app/ 34 | COPY pipelines /app/pipelines 35 | RUN uv sync 36 | 37 | # Copy pre-built database and pmtiles 38 | COPY database/data.duckdb /app/database/data.duckdb 39 | COPY database/cache/*.pmtiles /app/public/pmtiles/ 40 | 41 | # Create trimmed database for website 42 | RUN uv run pipelines/run.py run trim_database_for_website --output-file=database/data_for_website.duckdb 43 | 44 | # Copy next.js app and build it 45 | WORKDIR /app/webapp 46 | COPY webapp /app/webapp 47 | ENV NEXT_TELEMETRY_DISABLED=1 48 | ENV NODE_ENV=production 49 | ENV NEXT_PUBLIC_PROTOMAPS_API_KEY=$NEXT_PUBLIC_PROTOMAPS_API_KEY 50 | ENV DUCKDB_PATH="/app/database/data_for_website.duckdb" 51 | RUN npm run build 52 | 53 | 54 | 55 | # Runner stage - only contains the necessary runtime files 56 | FROM debian:bookworm-slim AS runner 57 | 58 | # Define build argument for API key 59 | ARG NEXT_PUBLIC_PROTOMAPS_API_KEY 60 | 61 | # Install Node.js (minimal dependencies for runtime) 62 | RUN apt-get update && apt-get install -y --no-install-recommends \ 63 | nodejs \ 64 | ca-certificates \ 65 | && rm -rf /var/lib/apt/lists/* 66 | 67 | # Create non-root user 68 | RUN addgroup --system --gid 1000 appgroup && \ 69 | adduser --system --uid 1000 appuser 70 | 71 | WORKDIR /app 72 | 73 | # Create directories 74 | RUN mkdir -p /app/database /app/public/pmtiles 75 | RUN chown -R appuser:appgroup /app 76 | 77 | # Copy webapp files 78 | COPY --from=builder --chown=appuser:appgroup /app/webapp/.next/standalone /app 79 | COPY --from=builder --chown=appuser:appgroup /app/webapp/.next/static /app/.next/static 80 | COPY --from=builder --chown=appuser:appgroup /app/webapp/public /app/public 81 | 82 | # Copy database and pmtiles 83 | COPY --from=builder --chown=appuser:appgroup /app/database/data_for_website.duckdb /app/database/data_for_website.duckdb 84 | COPY --from=builder --chown=appuser:appgroup /app/public/pmtiles/ /app/public/pmtiles/ 85 | 86 | # Set environment variables 87 | ENV NODE_ENV=production 88 | ENV NEXT_TELEMETRY_DISABLED=1 89 | ENV PORT=8080 90 | ENV HOSTNAME="0.0.0.0" 91 | ENV NEXT_PUBLIC_PROTOMAPS_API_KEY=$NEXT_PUBLIC_PROTOMAPS_API_KEY 92 | ENV DUCKDB_PATH="/app/database/data_for_website.duckdb" 93 | ENV HOME="/app" 94 | 95 | # Switch to non-root user 96 | USER appuser 97 | 98 | # Expose the port 99 | EXPOSE 8080 100 | 101 | # Start the application 102 | CMD ["node", "server.js"] -------------------------------------------------------------------------------- /dbt_/models/website/web__resultats_udi.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | periodes AS ( 3 | SELECT unnest(ARRAY[ 4 | 'bilan_annuel_2020', 5 | 'bilan_annuel_2021', 6 | 'bilan_annuel_2022', 7 | 'bilan_annuel_2023', 8 | 'bilan_annuel_2024', 9 | 'bilan_annuel_2025', 10 | 'dernier_prel' 11 | ]) AS periode 12 | ), 13 | 14 | categories AS ( 15 | SELECT unnest(ARRAY[ 16 | 'cvm', 17 | 'pfas', 18 | 'sub_indus_perchlorate', 19 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant 20 | --'sub_indus_14dioxane', 21 | 'pesticide', 22 | 'sub_active', 23 | 'metabolite', 24 | 'metabolite_esa_metolachlore', 25 | 'metabolite_chlorothalonil_r471811', 26 | 'metabolite_chloridazone_desphenyl', 27 | 'metabolite_chloridazone_methyl_desphenyl', 28 | 'metabolite_atrazine_desethyl', 29 | --'metaux_lourds_as', 30 | --'metaux_lourds_pb', 31 | 'nitrate', 32 | 'tous' 33 | ]) AS categorie 34 | ), 35 | 36 | udi AS ( 37 | SELECT 38 | cdreseau, 39 | nomreseaux 40 | FROM 41 | {{ ref('int__udi') }} 42 | ), 43 | 44 | -- Cross join to ensure all combinations exist 45 | udi_periodes_categories AS ( 46 | SELECT 47 | u.cdreseau, 48 | u.nomreseaux, 49 | p.periode, 50 | categories.categorie 51 | FROM 52 | udi AS u 53 | CROSS JOIN 54 | periodes AS p 55 | CROSS JOIN 56 | categories 57 | ), 58 | 59 | -- Append results from 'tous' category (in another model to avoid circular dependency) 60 | results AS ( 61 | SELECT 62 | cdreseau, 63 | periode, 64 | categorie, 65 | resultat, 66 | ratio, 67 | date_dernier_prel, 68 | nb_parametres, 69 | nb_prelevements, 70 | nb_sup_valeur_sanitaire, 71 | parametres_detectes 72 | FROM {{ ref('int__union_resultats_udi') }} 73 | UNION ALL 74 | SELECT 75 | cdreseau, 76 | periode, 77 | categorie, 78 | null AS resultat, 79 | ratio, 80 | null AS date_dernier_prel, 81 | null AS nb_parametres, 82 | nb_prelevements, 83 | nb_sup_valeur_sanitaire, 84 | null AS parametres_detectes 85 | FROM {{ ref('int__resultats_tous_udi_annuel') }} 86 | UNION ALL 87 | SELECT 88 | cdreseau, 89 | periode, 90 | categorie, 91 | resultat, 92 | null AS ratio, 93 | date_dernier_prel, 94 | nb_parametres, 95 | null AS nb_prelevements, 96 | null AS nb_sup_valeur_sanitaire, 97 | null AS parametres_detectes 98 | FROM {{ ref('int__resultats_tous_udi_dernier') }} 99 | ) 100 | 101 | -- Final output with all UDI-periodes-categories combinations 102 | SELECT 103 | upc.cdreseau, 104 | upc.nomreseaux, 105 | upc.periode, 106 | upc.categorie, 107 | r.resultat, 108 | r.ratio, 109 | r.date_dernier_prel, 110 | r.nb_parametres, 111 | r.nb_prelevements, 112 | r.nb_sup_valeur_sanitaire, 113 | r.parametres_detectes 114 | FROM 115 | udi_periodes_categories AS upc 116 | LEFT JOIN 117 | results AS r 118 | ON 119 | upc.cdreseau = r.cdreseau 120 | AND upc.periode = r.periode 121 | AND upc.categorie = r.categorie 122 | ORDER BY 123 | upc.cdreseau, 124 | upc.periode, 125 | r.categorie 126 | -------------------------------------------------------------------------------- /dbt_/models/website/web__resultats_communes.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | periodes AS ( 3 | SELECT unnest(ARRAY[ 4 | 'bilan_annuel_2020', 5 | 'bilan_annuel_2021', 6 | 'bilan_annuel_2022', 7 | 'bilan_annuel_2023', 8 | 'bilan_annuel_2024', 9 | 'bilan_annuel_2025', 10 | 'dernier_prel' 11 | ]) AS periode 12 | ), 13 | 14 | categories AS ( 15 | SELECT unnest(ARRAY[ 16 | 'cvm', 17 | 'pfas', 18 | 'sub_indus_perchlorate', 19 | -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant 20 | --'sub_indus_14dioxane', 21 | 'pesticide', 22 | 'sub_active', 23 | 'metabolite', 24 | 'metabolite_esa_metolachlore', 25 | 'metabolite_chlorothalonil_r471811', 26 | 'metabolite_chloridazone_desphenyl', 27 | 'metabolite_chloridazone_methyl_desphenyl', 28 | 'metabolite_atrazine_desethyl', 29 | --'metaux_lourds_as', 30 | --'metaux_lourds_pb', 31 | 'nitrate', 32 | 'tous' 33 | ]) AS categorie 34 | ), 35 | 36 | cog_communes AS ( 37 | SELECT 38 | com AS commune_code_insee, 39 | libelle AS commune_nom 40 | FROM {{ ref("stg_communes__cog") }} 41 | WHERE typecom = 'COM' 42 | ), 43 | 44 | -- Cross join to ensure all combinations exist 45 | communes_periodes_categories AS ( 46 | SELECT 47 | cog.commune_code_insee, 48 | cog.commune_nom, 49 | p.periode, 50 | categories.categorie 51 | FROM 52 | cog_communes AS cog 53 | CROSS JOIN 54 | periodes AS p 55 | CROSS JOIN 56 | categories 57 | ), 58 | 59 | -- Append results from 'tous' category (in another model to avoid circular dependency) 60 | results AS ( 61 | SELECT 62 | inseecommune, 63 | periode, 64 | categorie, 65 | resultat, 66 | ratio, 67 | date_dernier_prel, 68 | nb_parametres, 69 | nb_prelevements, 70 | nb_sup_valeur_sanitaire, 71 | parametres_detectes 72 | FROM {{ ref('int__union_resultats_commune') }} 73 | UNION ALL 74 | SELECT 75 | inseecommune, 76 | periode, 77 | categorie, 78 | null AS resultat, 79 | ratio, 80 | null AS date_dernier_prel, 81 | null AS nb_parametres, 82 | nb_prelevements, 83 | nb_sup_valeur_sanitaire, 84 | null AS parametres_detectes 85 | FROM {{ ref('int__resultats_tous_commune_annuel') }} 86 | UNION ALL 87 | SELECT 88 | inseecommune, 89 | periode, 90 | categorie, 91 | resultat, 92 | null AS ratio, 93 | date_dernier_prel, 94 | nb_parametres, 95 | null AS nb_prelevements, 96 | null AS nb_sup_valeur_sanitaire, 97 | null AS parametres_detectes 98 | FROM {{ ref('int__resultats_tous_commune_dernier') }} 99 | ) 100 | 101 | -- Final output with all inseecommune-periodes-categories combinations 102 | SELECT 103 | cpc.commune_code_insee, 104 | cpc.commune_nom, 105 | cpc.periode, 106 | cpc.categorie, 107 | r.resultat, 108 | r.ratio, 109 | r.date_dernier_prel, 110 | r.nb_parametres, 111 | r.nb_prelevements, 112 | r.nb_sup_valeur_sanitaire, 113 | r.parametres_detectes 114 | FROM 115 | communes_periodes_categories AS cpc 116 | LEFT JOIN 117 | results AS r 118 | ON 119 | cpc.commune_code_insee = r.inseecommune 120 | AND cpc.periode = r.periode 121 | AND cpc.categorie = r.categorie 122 | ORDER BY 123 | cpc.commune_code_insee, 124 | cpc.periode, 125 | r.categorie 126 | -------------------------------------------------------------------------------- /pipelines/tasks/config/config_edc.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | 4 | def get_edc_config() -> Dict: 5 | """ 6 | Returns various configuration for processing the EDC (Eau distribuée par commune) datasets. 7 | The data comes from https://www.data.gouv.fr/fr/datasets/resultats-du-controle-sanitaire-de-leau-distribuee-commune-par-commune/ 8 | For each year a dataset is downloadable on a URL like this (ex. 2024): 9 | https://www.data.gouv.fr/fr/datasets/r/84a67a3b-08a7-4001-98e6-231c74a98139 10 | :return: A dict with the config used for processing. 11 | The "source" part is related to the data.gouv datasource 12 | The "files" part is related to the extracted files information and sql table names 13 | """ 14 | 15 | edc_config = { 16 | "source": { 17 | "base_url": "https://www.data.gouv.fr/fr/datasets/r/", 18 | "available_years": [ 19 | # "2016", 20 | # "2017", 21 | # "2018", 22 | # "2019", it was decided to use dataset from 2020 23 | "2020", 24 | "2021", 25 | "2022", 26 | "2023", 27 | "2024", 28 | "2025", 29 | ], 30 | "yearly_files_infos": { 31 | "2025": { 32 | "id": "7e38c236-dd3c-455e-a728-f0ecb84b1a7c", 33 | "zipfile": "dis-2025.zip", 34 | }, 35 | "2024": { 36 | "id": "a631e486-c790-42d0-8368-6a42b1a3dc1d", 37 | "zipfile": "dis-2024.zip", 38 | }, 39 | "2023": { 40 | "id": "c89dec4a-d985-447c-a102-75ba814c398e", 41 | "zipfile": "dis-2023.zip", 42 | }, 43 | "2022": { 44 | "id": "a97b6074-c4dd-4ef2-8922-b0cf04dbff9a", 45 | "zipfile": "dis-2022.zip", 46 | }, 47 | "2021": { 48 | "id": "d2b432cc-3761-44d3-8e66-48bc15300bb5", 49 | "zipfile": "dis-2021.zip", 50 | }, 51 | "2020": { 52 | "id": "a6cb4fea-ef8c-47a5-acb3-14e49ccad801", 53 | "zipfile": "dis-2020.zip", 54 | }, 55 | "2019": { 56 | "id": "861f2a7d-024c-4bf0-968b-9e3069d9de07", 57 | "zipfile": "dis-2019.zip", 58 | }, 59 | "2018": { 60 | "id": "0513b3c0-dc18-468d-a969-b3508f079792", 61 | "zipfile": "dis-2018.zip", 62 | }, 63 | "2017": { 64 | "id": "5785427b-3167-49fa-a581-aef835f0fb04", 65 | "zipfile": "dis-2017.zip", 66 | }, 67 | "2016": { 68 | "id": "483c84dd-7912-483b-b96f-4fa5e1d8651f", 69 | "zipfile": "dis-2016.zip", 70 | }, 71 | }, 72 | }, 73 | "files": { 74 | "communes": { 75 | "file_name_prefix": "DIS_COM_UDI_", 76 | "file_extension": ".txt", 77 | "table_name": "edc_communes", 78 | }, 79 | "prelevements": { 80 | "file_name_prefix": "DIS_PLV_", 81 | "file_extension": ".txt", 82 | "table_name": "edc_prelevements", 83 | }, 84 | "resultats": { 85 | "file_name_prefix": "DIS_RESULT_", 86 | "file_extension": ".txt", 87 | "table_name": "edc_resultats", 88 | }, 89 | }, 90 | } 91 | 92 | return edc_config 93 | --------------------------------------------------------------------------------