├── database
    └── .gitkeep
├── analytics
    ├── __init__.py
    └── notebooks
    │   └── __init__.py
├── dbt_
    ├── analyses
    │   └── .gitkeep
    ├── macros
    │   └── .gitkeep
    ├── seeds
    │   ├── .gitkeep
    │   └── schema.yml
    ├── snapshots
    │   └── .gitkeep
    ├── tests
    │   ├── .gitkeep
    │   ├── test_valeur_saniraire_2.sql
    │   ├── unique_datetimeprel_per_cdreseau_referenceprel.sql
    │   ├── test__coverage_20pfas_4pfas_98pct.sql
    │   ├── test_sub_active_results.sql
    │   ├── test_tous_results.sql
    │   ├── test_cvm_results.sql
    │   ├── test_nitrates_results.sql
    │   └── test_pfas_results.sql
    ├── .gitignore
    ├── packages.yml
    ├── profiles.yml
    ├── models
    │   ├── staging
    │   │   ├── communes
    │   │   │   ├── stg_communes__opendatasoft.sql
    │   │   │   ├── stg_communes__cog.sql
    │   │   │   └── _communes_models.yml
    │   │   ├── atlasante
    │   │   │   ├── stg_atlasante_udi_corse.sql
    │   │   │   ├── stg_atlasante_udi_2023.sql
    │   │   │   ├── stg_atlasante_udi_2024.sql
    │   │   │   └── _atlasante_models.yml
    │   │   └── edc
    │   │   │   ├── stg_edc__communes.sql
    │   │   │   ├── stg_edc__resultats.sql
    │   │   │   ├── stg_edc__prevelevements.sql
    │   │   │   └── val_traduite__docs.md
    │   ├── intermediate
    │   │   ├── int__udi.sql
    │   │   ├── int__commune_geom.sql
    │   │   ├── tous
    │   │   │   ├── int__resultats_tous_udi_annuel.sql
    │   │   │   ├── int__resultats_tous_commune_annuel.sql
    │   │   │   ├── int__resultats_tous_udi_dernier.sql
    │   │   │   └── int__resultats_tous_commune_dernier.sql
    │   │   ├── int__udi_geom.sql
    │   │   ├── int__lien_commune_cdreseau.sql
    │   │   ├── int__valeurs_de_reference.sql
    │   │   ├── int__lien_cdreseau_refreneceprel.sql
    │   │   ├── int__prelevements_uniques.sql
    │   │   ├── int__parametres_non_references.sql
    │   │   ├── nitrate
    │   │   │   ├── int__resultats_nitrate_udi_annuel.sql
    │   │   │   ├── int__resultats_nitrate_commune_annuel.sql
    │   │   │   ├── int__resultats_nitrate_udi_dernier.sql
    │   │   │   └── int__resultats_nitrate_commune_dernier.sql
    │   │   ├── pesticide
    │   │   │   ├── sub_active
    │   │   │   │   ├── int__resultats_sub_active_udi_annuel.sql
    │   │   │   │   ├── int__resultats_sub_active_commune_annuel.sql
    │   │   │   │   ├── int__resultats_sub_active_udi_dernier.sql
    │   │   │   │   └── int__resultats_sub_active_commune_dernier.sql
    │   │   │   └── metabolite
    │   │   │   │   ├── int__resultats_metabolite_udi_dernier.sql
    │   │   │   │   └── int__resultats_metabolite_commune_dernier.sql
    │   │   ├── cvm
    │   │   │   ├── int__resultats_cvm_udi_annuel.sql
    │   │   │   ├── int__resultats_cvm_commune_annuel.sql
    │   │   │   ├── int__resultats_cvm_udi_dernier.sql
    │   │   │   └── int__resultats_cvm_commune_dernier.sql
    │   │   ├── sub_indus
    │   │   │   ├── int__resultats_sub_indus_udi_annuel.sql
    │   │   │   ├── int__resultats_sub_indus_commune_annuel.sql
    │   │   │   ├── int__resultats_sub_indus_udi_dernier.sql
    │   │   │   └── int__resultats_sub_indus_commune_dernier.sql
    │   │   ├── metaux_lourds
    │   │   │   ├── int__resultats_metaux_lourds_udi_annuel.sql
    │   │   │   ├── int__resultats_metaux_lourds_commune_annuel.sql
    │   │   │   ├── int__resultats_metaux_lourds_udi_dernier.sql
    │   │   │   └── int__resultats_metaux_lourds_commune_dernier.sql
    │   │   ├── int__resultats_udi_communes.sql
    │   │   └── pfas
    │   │   │   ├── int__resultats_pfas_udi_annuel.sql
    │   │   │   └── int__resultats_pfas_commune_annuel.sql
    │   ├── sources
    │   │   └── __sources.yml
    │   └── website
    │   │   ├── web__stats_udi.sql
    │   │   ├── web__resultats_udi.sql
    │   │   └── web__resultats_communes.sql
    └── dbt_project.yml
├── .python-version
├── webapp
    ├── .prettierrc.json
    ├── app
    │   ├── duckdb-example
    │   │   ├── loading.tsx
    │   │   └── page.tsx
    │   ├── page.tsx
    │   ├── api
    │   │   ├── udi
    │   │   │   ├── dummy
    │   │   │   │   └── route.ts
    │   │   │   └── find
    │   │   │   │   └── route.tsx
    │   │   └── db-example
    │   │   │   └── route.ts
    │   ├── embed
    │   │   └── page.tsx
    │   ├── embed-external
    │   │   └── page.tsx
    │   ├── lib
    │   │   └── duckdb.ts
    │   ├── layout.tsx
    │   └── config.ts
    ├── public
    │   ├── images
    │   │   └── dfg.png
    │   └── fonts
    │   │   ├── raleway-v37-latin-300.woff2
    │   │   ├── raleway-v37-latin-600.woff2
    │   │   ├── raleway-v37-latin-700.woff2
    │   │   └── raleway-v37-latin-regular.woff2
    ├── postcss.config.mjs
    ├── lib
    │   ├── utils.ts
    │   ├── iframe-scroll.ts
    │   ├── mapLocale.ts
    │   └── property.ts
    ├── .env
    ├── eslint.config.mjs
    ├── components.json
    ├── tsconfig.json
    ├── .gitignore
    ├── components
    │   ├── ui
    │   │   ├── input.tsx
    │   │   ├── switch.tsx
    │   │   ├── hover-card.tsx
    │   │   ├── tooltip.tsx
    │   │   ├── popover.tsx
    │   │   ├── scroll-area.tsx
    │   │   ├── button.tsx
    │   │   └── card.tsx
    │   └── EmbedBanner.tsx
    ├── package.json
    ├── next.config.ts
    └── tailwind.config.ts
├── .sqlfluff
├── pipelines
    ├── config
    │   ├── .env.example
    │   ├── __init__.py
    │   └── config.py
    ├── tasks
    │   ├── __init__.py
    │   ├── config
    │   │   ├── config_insee.py
    │   │   ├── config_pmtiles.py
    │   │   ├── config_geojson.py
    │   │   ├── common.py
    │   │   ├── config_uploaded_geojson.py
    │   │   └── config_edc.py
    │   ├── client
    │   │   ├── core
    │   │   │   ├── https_to_duck_client.py
    │   │   │   └── https_client.py
    │   │   ├── commune_client.py
    │   │   ├── opendatasoft_client.py
    │   │   ├── pmtiles_processor.py
    │   │   └── uploaded_geojson_client.py
    │   ├── upload_database.py
    │   ├── generate_pmtiles.py
    │   ├── generate_pmtiles_legacy.py
    │   └── download_pmtiles.py
    ├── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   └── utils.py
    ├── __init__.py
    ├── test_pipelines.py
    └── notebooks
    │   ├── test_geojson_from_db.ipynb
    │   └── test_atlasante_udi.ipynb
├── .dockerignore
├── .vscode
    ├── extensions.json
    ├── tasks.json
    └── settings.json
├── Dockerfile.clevercloud
├── .github
    └── workflows
    │   ├── pre-commit.yaml
    │   ├── lint_nextjs.yml
    │   ├── test_dbt.yaml
    │   └── test_pipelines.yaml
├── .gitignore
├── LICENSE
├── pyproject.toml
├── .pre-commit-config.yaml
└── Dockerfile.unified


/database/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/analytics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dbt_/analyses/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dbt_/macros/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dbt_/seeds/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dbt_/snapshots/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dbt_/tests/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/analytics/notebooks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/webapp/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/.sqlfluff:
--------------------------------------------------------------------------------
1 | [sqlfluff]
2 | dialect = duckdb
3 | max_line_length = 100
4 | 


--------------------------------------------------------------------------------
/pipelines/config/.env.example:
--------------------------------------------------------------------------------
1 | SCW_ACCESS_KEY=MyKey
2 | SCW_SECRET_KEY=MySecret


--------------------------------------------------------------------------------
/pipelines/config/__init__.py:
--------------------------------------------------------------------------------
1 | # tasks/__init__.py
2 | 
3 | # Initialize the tasks package
4 | 


--------------------------------------------------------------------------------
/pipelines/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # tasks/__init__.py
2 | 
3 | # Initialize the tasks package
4 | 


--------------------------------------------------------------------------------
/pipelines/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # tasks/__init__.py
2 | 
3 | # Initialize the tasks package
4 | 


--------------------------------------------------------------------------------
/dbt_/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_packages/
4 | logs/
5 | package-lock.yml
6 | .user.yml
7 | !seeds/*.csv


--------------------------------------------------------------------------------
/webapp/app/duckdb-example/loading.tsx:
--------------------------------------------------------------------------------
1 | export default function Loading() {
2 |   return "Loading...";
3 | }
4 | 


--------------------------------------------------------------------------------
/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | # init the pipelines package
2 | from .utils.logger import setup_logger
3 | 
4 | setup_logger()
5 | 


--------------------------------------------------------------------------------
/webapp/public/images/dfg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/images/dfg.png


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | **/node_modules
 2 | .git
 3 | .github
 4 | webapp/.next
 5 | .vscode
 6 | .env*
 7 | **/.env*
 8 | npm-debug.log
 9 | .DS_Store
10 | 


--------------------------------------------------------------------------------
/webapp/app/page.tsx:
--------------------------------------------------------------------------------
1 | import { redirect } from "next/navigation";
2 | 
3 | export default async function Home() {
4 |   redirect("/embed");
5 | }
6 | 


--------------------------------------------------------------------------------
/dbt_/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: calogica/dbt_expectations
3 |     version: 0.10.4
4 |   
5 |   - package: dbt-labs/dbt_utils
6 |     version: 1.3.0


--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-300.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-300.woff2


--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-600.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-600.woff2


--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-700.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-700.woff2


--------------------------------------------------------------------------------
/webapp/public/fonts/raleway-v37-latin-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataforgoodfr/13_pollution_eau/HEAD/webapp/public/fonts/raleway-v37-latin-regular.woff2


--------------------------------------------------------------------------------
/webapp/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/dbt_/profiles.yml:
--------------------------------------------------------------------------------
 1 | dbt_:
 2 |   outputs:
 3 |     dev:
 4 |       type: duckdb
 5 |       path: ../database/data.duckdb
 6 |       threads: 1
 7 |       extensions:
 8 |         - spatial
 9 |   target: dev
10 | 


--------------------------------------------------------------------------------
/webapp/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { clsx, type ClassValue } from "clsx";
2 | import { twMerge } from "tailwind-merge";
3 | 
4 | export function cn(...inputs: ClassValue[]) {
5 |   return twMerge(clsx(inputs));
6 | }
7 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/communes/stg_communes__opendatasoft.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |     com_code[1]::VARCHAR AS com_code,
3 |     com_name[1]::VARCHAR AS com_name,
4 |     geom::GEOMETRY AS geom
5 | FROM {{ source('communes', 'opendatasoft_communes') }}
6 | 


--------------------------------------------------------------------------------
/webapp/app/api/udi/dummy/route.ts:
--------------------------------------------------------------------------------
1 | import { NextResponse } from "next/server";
2 | import { mockData } from "@/app/lib/mock-data";
3 | 
4 | export async function GET() {
5 |   return NextResponse.json(mockData["UDI12345"], { status: 200 });
6 | }
7 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/stg_atlasante_udi_corse.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |     gid::INTEGER AS gid,
3 |     cn_udi::VARCHAR AS cn_udi,
4 |     nom_udi::VARCHAR AS nom_udi,
5 |     geom::GEOMETRY AS geom,
6 |     ingestion_date::DATE AS ingestion_date
7 | FROM {{ source('atlasante', 'atlasante_udi_corse') }}
8 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "recommendations": [
 3 |     "ms-python.python",
 4 |     "ms-toolsai.jupyter",
 5 |     "charliermarsh.ruff",
 6 |     "github.vscode-pull-request-github",
 7 |     "actboy168.tasks",
 8 |     "dbaeumer.vscode-eslint",
 9 |     "esbenp.prettier-vscode"
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/Dockerfile.clevercloud:
--------------------------------------------------------------------------------
 1 | # Dockerfile for Clever Cloud - pulls pre-built unified image
 2 | 
 3 | ARG IMAGE_TAG=latest
 4 | FROM ghcr.io/dataforgoodfr/13_pollution_eau/pollution-eau-unified:${IMAGE_TAG}
 5 | 
 6 | # Expose the port
 7 | EXPOSE 8080
 8 | 
 9 | # Use the same entrypoint as the unified image
10 | CMD ["node", "server.js"]


--------------------------------------------------------------------------------
/pipelines/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def setup_logger(
 5 |     level=logging.INFO,
 6 |     log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 7 | ):
 8 |     "config log"
 9 |     logging.basicConfig(level=level, format=log_format)
10 | 
11 | 
12 | def get_logger(name):
13 |     return logging.getLogger(name)
14 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/stg_atlasante_udi_2023.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     gid::INTEGER AS gid,
 3 |     code_udi::VARCHAR AS code_udi,
 4 |     ins_nom::VARCHAR AS ins_nom,
 5 |     uge_nom::VARCHAR AS uge_nom,
 6 |     udi_pop::VARCHAR AS udi_pop,
 7 |     geom::GEOMETRY AS geom,
 8 |     ingestion_date::DATE AS ingestion_date
 9 | FROM {{ source('atlasante', 'atlasante_udi_2023') }}
10 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/stg_atlasante_udi_2024.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     gid::INTEGER AS gid,
 3 |     code_udi::VARCHAR AS code_udi,
 4 |     ins_nom::VARCHAR AS ins_nom,
 5 |     uge_nom::VARCHAR AS uge_nom,
 6 |     udi_pop::VARCHAR AS udi_pop,
 7 |     geom::GEOMETRY AS geom,
 8 |     ingestion_date::DATE AS ingestion_date
 9 | FROM {{ source('atlasante', 'atlasante_udi_2024') }}
10 | 


--------------------------------------------------------------------------------
/webapp/.env:
--------------------------------------------------------------------------------
1 | # Utiliser le fichier .env est pour ajouter des variables non-secrètes
2 | # voir https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables
3 | 
4 | # variables disponible sur le navigateur et le serveur
5 | NEXT_PUBLIC_PROTOMAPS_API_KEY=707d8bc70b393fc0
6 | 
7 | # variables disponibles uniquement sur le côté serveur
8 | DUCKDB_PATH=../database/data.duckdb
9 | 


--------------------------------------------------------------------------------
/dbt_/tests/test_valeur_saniraire_2.sql:
--------------------------------------------------------------------------------
 1 | -- we make sure that valeur_sanitaire_2 is > valeur_sanitaire_1
 2 | -- when they are not null
 3 | -- cf int__resultats_sub_indus_udi_dernier.sql why it is required
 4 | 
 5 | 
 6 | select *
 7 | from {{ ref('int__valeurs_de_reference') }}
 8 | where
 9 |     valeur_sanitaire_1 is not null
10 |     and valeur_sanitaire_2 is not null
11 |     and valeur_sanitaire_1 >= valeur_sanitaire_2
12 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "2.0.0",
 3 |   "tasks": [
 4 |     {
 5 |       "label": "Download database",
 6 |       "type": "shell",
 7 |       "command": "uv run pipelines/run.py run download_database",
 8 |       "group": "none",
 9 |       "icon": {
10 |         "id": "cloud-download"
11 |       },
12 |       "presentation": {
13 |         "reveal": "always",
14 |         "panel": "new"
15 |       }
16 |     }
17 |   ],
18 | }


--------------------------------------------------------------------------------
/webapp/lib/iframe-scroll.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Utility function to request the parent window to scroll the iframe into view
 3 |  * This should be called when user interacts with map components
 4 |  */
 5 | export function scrollIframeToFullscreen() {
 6 |   // Check if we're in an iframe
 7 |   if (window.self !== window.top) {
 8 |     // Send message to parent window to scroll this iframe into view
 9 |     window.parent.postMessage({ type: "scrollToIframe" }, "*");
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__udi.sql:
--------------------------------------------------------------------------------
 1 | select
 2 |     cdreseau,
 3 |     string_agg(distinct inseecommune) as inseecommunes,
 4 |     string_agg(distinct quartiers) as quartiers,
 5 |     string_agg(distinct nomreseaux) as nomreseaux
 6 | 
 7 | 
 8 | from {{ ref('int__lien_commune_cdreseau') }}
 9 | group by cdreseau
10 | 
11 | -- TODO: on pourrait garder une partition avec "de_partition".
12 | -- A noter néanmoins que la seule dépendance à ce modèle (web__resultats_udi)
13 | -- ne le requiert pas.
14 | 


--------------------------------------------------------------------------------
/pipelines/tasks/config/config_insee.py:
--------------------------------------------------------------------------------
 1 | def get_insee_config() -> dict:
 2 |     """Configuration for La Poste dataset"""
 3 |     return {
 4 |         "source": {
 5 |             "base_url": "https://www.insee.fr/fr/statistiques/fichier/7766585/",
 6 |             "id": "v_commune_2024.csv",
 7 |             "datetime": "20240220",
 8 |         },
 9 |         "file": {
10 |             "file_name": "insee_communes_2024.csv",
11 |             "table_name": "cog_communes",
12 |         },
13 |     }
14 | 


--------------------------------------------------------------------------------
/webapp/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { dirname } from "path";
 2 | import { fileURLToPath } from "url";
 3 | import { FlatCompat } from "@eslint/eslintrc";
 4 | 
 5 | const __filename = fileURLToPath(import.meta.url);
 6 | const __dirname = dirname(__filename);
 7 | 
 8 | const compat = new FlatCompat({
 9 |   baseDirectory: __dirname,
10 | });
11 | 
12 | const eslintConfig = [
13 |   ...compat.extends("next/core-web-vitals", "next/typescript", "prettier"),
14 | ];
15 | 
16 | export default eslintConfig;
17 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/edc/stg_edc__communes.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     inseecommune::VARCHAR(5) AS inseecommune,
 3 |     nomcommune::VARCHAR AS nomcommune,
 4 |     quartier::VARCHAR AS quartier,
 5 |     cdreseau::VARCHAR(9) AS cdreseau,
 6 |     nomreseau::VARCHAR AS nomreseau,
 7 |     debutalim::VARCHAR AS debutalim,
 8 |     de_partition::SMALLINT AS de_partition,
 9 |     de_ingestion_date::DATE AS de_ingestion_date,
10 |     de_dataset_datetime::VARCHAR AS de_dataset_datetime
11 | FROM {{ source('edc', 'edc_communes') }}
12 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__commune_geom.sql:
--------------------------------------------------------------------------------
 1 | WITH ranked_communes AS (
 2 |     SELECT
 3 |         com_code,
 4 |         com_name,
 5 |         geom,
 6 |         ROW_NUMBER() OVER (
 7 |             PARTITION BY com_code
 8 |             ORDER BY com_code
 9 |         ) AS row_num
10 |     FROM {{ ref('stg_communes__opendatasoft') }}
11 |     WHERE com_code IS NOT NULL AND com_code != ''
12 | )
13 | 
14 | SELECT
15 |     com_code,
16 |     com_name,
17 |     ST_ASGEOJSON(geom) AS geom
18 | FROM ranked_communes
19 | WHERE row_num = 1
20 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/communes/stg_communes__cog.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     TYPECOM::VARCHAR AS TYPECOM,
 3 |     COM::VARCHAR AS COM,
 4 |     REG::SMALLINT AS REG,
 5 |     DEP::VARCHAR AS DEP,
 6 |     CTCD::VARCHAR AS CTCD,
 7 |     ARR::VARCHAR AS ARR,
 8 |     TNCC::SMALLINT AS TNCC,
 9 |     NCC::VARCHAR AS NCC,
10 |     NCCENR::VARCHAR AS NCCENR,
11 |     LIBELLE::VARCHAR AS LIBELLE,
12 |     CAN::VARCHAR AS CAN,
13 |     COMPARENT::VARCHAR AS COMPARENT,
14 |     DE_PARTITION::SMALLINT AS DE_PARTITION
15 | FROM {{ source('communes', 'cog_communes') }}
16 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout code
13 |       uses: actions/checkout@v3
14 | 
15 |     - name: Install a specific version of uv
16 |       uses: astral-sh/setup-uv@v5
17 |       with:
18 |         version: ">=0.4.0"
19 |     - name: Install dependencies
20 |       run: uv sync 
21 |     - name: Run pre-commit
22 |       run: uv run pre-commit run --all-files


--------------------------------------------------------------------------------
/webapp/components.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://ui.shadcn.com/schema.json",
 3 |   "style": "new-york",
 4 |   "rsc": true,
 5 |   "tsx": true,
 6 |   "tailwind": {
 7 |     "config": "tailwind.config.ts",
 8 |     "css": "app/globals.css",
 9 |     "baseColor": "neutral",
10 |     "cssVariables": true,
11 |     "prefix": ""
12 |   },
13 |   "aliases": {
14 |     "components": "@/components",
15 |     "utils": "@/lib/utils",
16 |     "ui": "@/components/ui",
17 |     "lib": "@/lib",
18 |     "hooks": "@/hooks"
19 |   },
20 |   "iconLibrary": "lucide"
21 | }
22 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "[python]": {
 3 |     "editor.formatOnSave": true,
 4 |     "editor.defaultFormatter": "charliermarsh.ruff",
 5 |     "editor.codeActionsOnSave": {
 6 |       "source.organizeImports": "explicit"
 7 |     }
 8 |   },
 9 |   "notebook.formatOnSave.enabled": true,
10 |   "python.testing.pytestArgs": ["-sv"],
11 |   "python.testing.unittestEnabled": false,
12 |   "python.testing.pytestEnabled": true,
13 |   // Config for dorzey.vscode-sqlfluff extension
14 |   "sqlfluff.executablePath": "${workspaceFolder}/.venv/bin/sqlfluff",
15 |   "sqlfluff.linter.run": "onSave"
16 | }
17 | 


--------------------------------------------------------------------------------
/dbt_/models/sources/__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: edc
 5 |     database: data
 6 |     schema: main
 7 |     tables:
 8 |       - name: edc_communes
 9 |       - name: edc_prelevements
10 |       - name: edc_resultats
11 |   - name: communes
12 |     database: data
13 |     schema: main
14 |     tables:
15 |       - name: cog_communes
16 |       - name: opendatasoft_communes
17 |   - name: atlasante
18 |     database: data
19 |     schema: main
20 |     tables:
21 |       - name: atlasante_udi_2023
22 |       - name: atlasante_udi_corse
23 |       - name: atlasante_udi_2024
24 | 


--------------------------------------------------------------------------------
/webapp/app/api/db-example/route.ts:
--------------------------------------------------------------------------------
 1 | import { fetchExample } from "@/app/lib/data";
 2 | 
 3 | // an api route fetching data
 4 | export async function GET() {
 5 |   try {
 6 |     const reader = await fetchExample();
 7 |     return Response.json({
 8 |       status: "OK",
 9 |       rows: reader.getRowObjectsJson(),
10 |       columnNames: reader.columnNames(),
11 |       columnTypes: reader.columnTypes(),
12 |       count: reader.columnCount,
13 |     });
14 |   } catch (error) {
15 |     console.error("Error while retrieving data:", error);
16 |     return Response.json({ error }, { status: 500 });
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/webapp/lib/mapLocale.ts:
--------------------------------------------------------------------------------
 1 | import { defaultLocale } from "maplibre-gl/src/ui/default_locale";
 2 | 
 3 | // French locale for MapLibre, inheriting from default locale
 4 | // Only overriding the CooperativeGesturesHandler messages
 5 | export const frenchLocale = {
 6 |   ...defaultLocale,
 7 |   // French overrides for CooperativeGesturesHandler
 8 |   "CooperativeGesturesHandler.WindowsHelpText":
 9 |     "Utilisez Ctrl + molette pour zoomer sur la carte",
10 |   "CooperativeGesturesHandler.MacHelpText":
11 |     "Utilisez ⌘ + molette pour zoomer sur la carte",
12 |   "CooperativeGesturesHandler.MobileHelpText":
13 |     "Utilisez deux doigts pour déplacer la carte",
14 | };
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | .pytest_cache/
 5 | .mypy_cache/
 6 | .ipynb_checkpoints/
 7 | .venv/
 8 | .idea
 9 | .ruff_cache/
10 | 
11 | 
12 | # JavaScript/Node
13 | node_modules/
14 | npm-debug.log
15 | yarn-debug.log*
16 | yarn-error.log*
17 | build/
18 | dist/
19 | .next/
20 | 
21 | # Database & Data
22 | *.sqlite3
23 | *.db
24 | *.duckdb
25 | *.duckdb.*
26 | *.csv
27 | *.parquet
28 | *.xlsx
29 | *.xls
30 | *.pmtiles
31 | logs/
32 | 
33 | !database/.gitkeep
34 | cache/
35 | 
36 | # OS
37 | .DS_Store
38 | Thumbs.db
39 | *.tmp
40 | 
41 | # Environment & Secrets
42 | .env
43 | .env.*
44 | !.env.example
45 | *.pem
46 | secrets.yaml
47 | config.local.yaml


--------------------------------------------------------------------------------
/dbt_/tests/unique_datetimeprel_per_cdreseau_referenceprel.sql:
--------------------------------------------------------------------------------
 1 | -- Nous vérifiions que pour chaque couple cdreseau, referenceprel,
 2 | -- il n'y a qu'une seule date datetimeprel.
 3 | -- En effet, pour trouver tous les paramètres analysés lors du
 4 | -- prélèvement le plus récent, on se base sur la date datetimeprel.
 5 | -- Si on a plusieurs dates pour un même prélèvement, on ne peut pas
 6 | -- savoir quel est le bon.
 7 | -- 
 8 | -- cf _int__resultats_metabolite_divers_udi_dernier.sql
 9 | 
10 | SELECT
11 |     cdreseau,
12 |     referenceprel,
13 |     count(DISTINCT datetimeprel) AS count_datetimeprel
14 | FROM
15 |     {{ ref('int__resultats_udi_communes') }}
16 | GROUP BY 1, 2
17 | HAVING count(DISTINCT datetimeprel) > 1
18 | 


--------------------------------------------------------------------------------
/webapp/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./*"]
23 |     }
24 |   },
25 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 |   "exclude": ["node_modules"]
27 | }
28 | 


--------------------------------------------------------------------------------
/webapp/app/embed/page.tsx:
--------------------------------------------------------------------------------
 1 | import PollutionMap from "@/components/PollutionMap";
 2 | import { fetchPollutionStats, fetchParameterValues } from "../lib/data";
 3 | 
 4 | // Mise en cache de la page pour 24 heures
 5 | export const revalidate = 86400;
 6 | 
 7 | export default async function Embed() {
 8 |   const stats = await fetchPollutionStats();
 9 |   const parameterValues = await fetchParameterValues();
10 | 
11 |   return (
12 |     <div className="flex flex-col min-h-screen w-screen h-screen">
13 |       <main className="flex-1 w-full h-full">
14 |         <PollutionMap
15 |           pollutionStats={stats}
16 |           parameterValues={parameterValues}
17 |           showBanner={false}
18 |         />
19 |       </main>
20 |     </div>
21 |   );
22 | }
23 | 


--------------------------------------------------------------------------------
/.github/workflows/lint_nextjs.yml:
--------------------------------------------------------------------------------
 1 | name: Lint Next.js
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 |     paths:
 7 |       - "webapp/**"
 8 | 
 9 | jobs:
10 |   lint:
11 |     name: Next.js Lint
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - name: Checkout repository
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Setup Node.js
19 |         uses: actions/setup-node@v4
20 |         with:
21 |           node-version: "20"
22 |           cache: "npm"
23 |           cache-dependency-path: ./webapp/package.json
24 | 
25 |       - name: Install dependencies
26 |         run: |
27 |           cd webapp
28 |           npm ci
29 | 
30 |       - name: Run Next.js lint
31 |         run: |
32 |           cd webapp
33 |           npm run lint
34 | 


--------------------------------------------------------------------------------
/webapp/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.*
 7 | .yarn/*
 8 | !.yarn/patches
 9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 | 
13 | # testing
14 | /coverage
15 | 
16 | # next.js
17 | /.next/
18 | /out/
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | *.pem
26 | 
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 | 
33 | # next.js lets you commit env variables: https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables#loading-environment-variables
34 | !.env*
35 | 
36 | # vercel
37 | .vercel
38 | 
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     cdreseau,
 3 |     'tous' AS categorie,
 4 |     periode,
 5 |     sum(round(ratio * nb_prelevements))::float / sum(nb_prelevements)::float AS ratio,
 6 |     sum(nb_prelevements) AS nb_prelevements,
 7 |     sum(nb_sup_valeur_sanitaire) AS nb_sup_valeur_sanitaire
 8 | 
 9 | FROM {{ ref('int__union_resultats_udi') }}
10 | WHERE
11 |     periode LIKE 'bilan_annuel%'
12 |     AND
13 |     categorie NOT IN (
14 |         'sub_active',
15 |         'metabolite',
16 |         'metabolite_esa_metolachlore',
17 |         'metabolite_chlorothalonil_r471811',
18 |         'metabolite_chloridazone_desphenyl',
19 |         'metabolite_chloridazone_methyl_desphenyl',
20 |         'metabolite_atrazine_desethyl'
21 |     )
22 | GROUP BY cdreseau, periode
23 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     inseecommune,
 3 |     'tous' AS categorie,
 4 |     periode,
 5 |     sum(round(ratio * nb_prelevements))::float / sum(nb_prelevements)::float AS ratio,
 6 |     sum(nb_prelevements) AS nb_prelevements,
 7 |     sum(nb_sup_valeur_sanitaire) AS nb_sup_valeur_sanitaire
 8 | 
 9 | FROM {{ ref('int__union_resultats_commune') }}
10 | WHERE
11 |     periode LIKE 'bilan_annuel%'
12 |     AND
13 |     categorie NOT IN (
14 |         'sub_active',
15 |         'metabolite',
16 |         'metabolite_esa_metolachlore',
17 |         'metabolite_chlorothalonil_r471811',
18 |         'metabolite_chloridazone_desphenyl',
19 |         'metabolite_chloridazone_methyl_desphenyl',
20 |         'metabolite_atrazine_desethyl'
21 |     )
22 | GROUP BY inseecommune, periode
23 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__udi_geom.sql:
--------------------------------------------------------------------------------
 1 | WITH combined_data AS (
 2 |     -- SELECT
 3 |     --     code_udi,
 4 |     --     geom
 5 |     -- FROM {{ ref("stg_atlasante_udi_2023") }}
 6 |     -- UNION ALL
 7 |     -- SELECT
 8 |     --     cn_udi AS code_udi,
 9 |     --     geom
10 |     -- FROM {{ ref("stg_atlasante_udi_corse") }}
11 | 
12 |     SELECT
13 |         code_udi,
14 |         geom
15 |     FROM {{ ref("stg_atlasante_udi_2024") }}
16 | ),
17 | 
18 | ranked_data AS (
19 |     SELECT
20 |         code_udi,
21 |         geom,
22 |         ROW_NUMBER() OVER (
23 |             PARTITION BY code_udi
24 |             ORDER BY code_udi
25 |         ) AS row_num
26 |     FROM combined_data
27 |     WHERE code_udi IS NOT null AND code_udi != ''
28 | )
29 | 
30 | SELECT
31 |     code_udi,
32 |     ST_ASGEOJSON(geom) AS geom
33 | FROM ranked_data
34 | WHERE row_num = 1
35 | 


--------------------------------------------------------------------------------
/webapp/components/ui/input.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | 
 3 | import { cn } from "@/lib/utils";
 4 | 
 5 | const Input = React.forwardRef<HTMLInputElement, React.ComponentProps<"input">>(
 6 |   ({ className, type, ...props }, ref) => {
 7 |     return (
 8 |       <input
 9 |         type={type}
10 |         className={cn(
11 |           "flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
12 |           className,
13 |         )}
14 |         ref={ref}
15 |         {...props}
16 |       />
17 |     );
18 |   },
19 | );
20 | Input.displayName = "Input";
21 | 
22 | export { Input };
23 | 


--------------------------------------------------------------------------------
/webapp/app/embed-external/page.tsx:
--------------------------------------------------------------------------------
 1 | import PollutionMap from "@/components/PollutionMap";
 2 | import { fetchPollutionStats, fetchParameterValues } from "../lib/data";
 3 | 
 4 | // Mise en cache de la page pour 24 heures
 5 | export const revalidate = 86400;
 6 | 
 7 | export default async function EmbedExternal({
 8 |   searchParams,
 9 | }: {
10 |   searchParams: Promise<{ category?: string }>;
11 | }) {
12 |   const stats = await fetchPollutionStats();
13 |   const parameterValues = await fetchParameterValues();
14 |   const { category } = await searchParams;
15 | 
16 |   return (
17 |     <div className="flex flex-col min-h-screen w-screen h-screen">
18 |       <main className="flex-1 w-full h-full">
19 |         <PollutionMap
20 |           pollutionStats={stats}
21 |           parameterValues={parameterValues}
22 |           showBanner={true}
23 |           initialCategory={category}
24 |         />
25 |       </main>
26 |     </div>
27 |   );
28 | }
29 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__lien_commune_cdreseau.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |   config(
 3 |     materialized='table'
 4 |   )
 5 | }}
 6 | 
 7 | SELECT
 8 |     inseecommune,
 9 |     cdreseau,
10 |     de_partition,
11 |     -- Prenons toujours le même nom de commune pour une inseecommune donnée
12 |     MIN(nomcommune) AS nomcommune,
13 |     -- Agréger les différentes valeurs de quartier en une liste sans doublons
14 |     STRING_AGG(DISTINCT quartier) FILTER (
15 |         WHERE quartier IS NOT NULL AND quartier != '' AND quartier != '-'
16 |     ) AS quartiers,
17 |     -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
18 |     STRING_AGG(DISTINCT nomreseau) FILTER (
19 |         WHERE nomreseau IS NOT NULL AND nomreseau != ''
20 |     ) AS nomreseaux,
21 |     -- Prendre la première date de début d'alimentation
22 |     MIN(debutalim) AS debutalim
23 | FROM
24 |     {{ ref('stg_edc__communes') }}
25 | GROUP BY
26 |     inseecommune,
27 |     cdreseau,
28 |     de_partition
29 | 


--------------------------------------------------------------------------------
/pipelines/test_pipelines.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "task",
 8 |     ["build_database", "upload_database", "download_database"],
 9 | )
10 | def test_pipeline_task(task):
11 |     """
12 |     Test the specified pipeline task.
13 | 
14 |     This function tests the execution of the specified pipeline task from the
15 |     pipelines/run.py script. It ensures that the task runs without raising any exceptions.
16 | 
17 |     Args:
18 |         task (str): The name of the pipeline task to test.
19 |     """
20 |     commands_list = ["uv", "run", "pipelines/run.py", "run", task]
21 | 
22 |     # add options
23 |     if task == "build_database":
24 |         commands_list.extend(["--refresh-type", "last"])
25 |     elif task in ("download_database", "upload_database"):
26 |         commands_list.extend(["--env", "dev"])
27 | 
28 |     process = subprocess.run(commands_list)
29 | 
30 |     assert process.returncode == 0, f"{task} script failed"
31 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__valeurs_de_reference.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |   config(
 3 |     materialized='table'
 4 |   )
 5 | }}
 6 | 
 7 | SELECT
 8 |     cdparametresiseeaux,
 9 |     MAX(categorie_1) AS categorie_1,
10 |     MAX(categorie_2) AS categorie_2,
11 |     MAX(categorie_3) AS categorie_3,
12 |     MAX(limite_qualite) AS limite_qualite,
13 |     MAX(limite_qualite_unite) AS limite_qualite_unite,
14 |     MAX(limite_indicative) AS limite_indicative,
15 |     MAX(limite_indicative_unite) AS limite_indicative_unite,
16 |     MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
17 |     MAX(valeur_sanitaire_1_unite) AS valeur_sanitaire_1_unite,
18 |     MAX(valeur_sanitaire_2) AS valeur_sanitaire_2,
19 |     MAX(valeur_sanitaire_2_unite) AS valeur_sanitaire_2_unite,
20 |     MAX(web_label) AS web_label,
21 |     COUNT(*) AS nb_rows -- we enforce this to be 1 in a dbt test
22 | FROM
23 |     {{ ref('references_generations_futures') }}
24 | WHERE
25 |     cdparametresiseeaux IS NOT NULL
26 |     AND cdparametresiseeaux != ''
27 | GROUP BY
28 |     cdparametresiseeaux
29 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/edc/stg_edc__resultats.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     cddept::VARCHAR(3) AS cddept,
 3 |     referenceprel::VARCHAR(11) AS referenceprel,
 4 |     cdparametresiseeaux::VARCHAR(10) AS cdparametresiseeaux,
 5 |     cdparametre::INT AS cdparametre,
 6 |     libmajparametre::VARCHAR AS libmajparametre,
 7 |     libminparametre::VARCHAR AS libminparametre,
 8 |     libwebparametre::VARCHAR AS libwebparametre,
 9 |     qualitparam::VARCHAR(1) AS qualitparam,
10 |     insituana::VARCHAR(1) AS insituana,
11 |     rqana::VARCHAR(8) AS rqana,
12 |     cdunitereferencesiseeaux::VARCHAR(7) AS cdunitereferencesiseeaux,
13 |     cdunitereference::VARCHAR AS cdunitereference,
14 |     limitequal::VARCHAR AS limitequal,
15 |     refqual::VARCHAR AS refqual,
16 |     valtraduite::NUMERIC AS valtraduite,
17 |     casparam::VARCHAR AS casparam,
18 |     referenceanl::VARCHAR AS referenceanl,
19 |     de_partition::SMALLINT AS de_partition,
20 |     de_ingestion_date::DATE AS de_ingestion_date,
21 |     de_dataset_datetime::VARCHAR AS de_dataset_datetime
22 | FROM {{ source('edc', 'edc_resultats') }}
23 | 


--------------------------------------------------------------------------------
/webapp/app/lib/duckdb.ts:
--------------------------------------------------------------------------------
 1 | import { DuckDBInstance } from "@duckdb/node-api";
 2 | import fs from "fs";
 3 | import path from "path";
 4 | 
 5 | // Get database path from environment variable or use default
 6 | const envDbPath = process.env.DUCKDB_PATH;
 7 | const defaultDbPath = path.join(process.cwd(), "../database/data.duckdb");
 8 | const dbFilePath = envDbPath || defaultDbPath;
 9 | 
10 | console.log(`Using database path: ${dbFilePath}`);
11 | 
12 | // Check if the file exists
13 | if (!fs.existsSync(dbFilePath)) {
14 |   throw new Error(
15 |     `Database file not found at ${dbFilePath}. Please check that your DUCKDB_PATH environment variable is correctly set or that the default database exists.`,
16 |   );
17 | }
18 | 
19 | // Create DB instance
20 | const db = await DuckDBInstance.create(dbFilePath, {
21 |   access_mode: "READ_ONLY",
22 |   max_memory: "1GB",
23 |   threads: "4",
24 | });
25 | 
26 | // Load the geospatial extension
27 | const connection = await db.connect();
28 | await connection.run("INSTALL spatial;");
29 | await connection.run("LOAD spatial;");
30 | 
31 | export default db;
32 | 


--------------------------------------------------------------------------------
/.github/workflows/test_dbt.yaml:
--------------------------------------------------------------------------------
 1 | name: 🧪 Run DBT Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 |     paths:
 7 |       - 'dbt_/**'
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - name: Checkout code
15 |       uses: actions/checkout@v3
16 | 
17 |     - name: Install a specific version of uv
18 |       uses: astral-sh/setup-uv@v5
19 |       with:
20 |         version: ">=0.4.0"
21 | 
22 |     - name: Install dependencies
23 |       run: uv sync
24 | 
25 |     - name: Download production database from Storage
26 |       run: |
27 |         uv run pipelines/run.py run download_database
28 | 
29 |     - name: Install dbt dependencies
30 |       run: |
31 |         cd dbt_
32 |         uv run dbt deps || { echo "dbt deps failed"; exit 1; }
33 | 
34 |     - name: Run dbt build
35 |       run: |
36 |         cd dbt_
37 |         uv run dbt build || { echo "dbt build failed"; exit 1; }
38 |         
39 |     - name: Run dbt docs generate
40 |       run: |
41 |         cd dbt_
42 |         uv run dbt docs generate || { echo "dbt docs generate failed"; exit 1; }
43 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__lien_cdreseau_refreneceprel.sql:
--------------------------------------------------------------------------------
 1 | with ranked as (
 2 |     select
 3 |         cdreseau,
 4 |         referenceprel,
 5 |         dateprel,
 6 |         heureprel,
 7 |         de_partition,
 8 |         -- Quand heureprel est null ou invalide, on choisit arbitrairement 09:00
 9 |         -- Examples:
10 |         -- referenceprel = '07700233713'
11 |         -- referenceprel = '02800116863'
12 |         COALESCE(
13 |             TRY_STRPTIME(
14 |                 dateprel || ' ' || REPLACE(heureprel, 'h', ':'),
15 |                 '%Y-%m-%d %H:%M'
16 |             ),
17 |             TRY_STRPTIME(
18 |                 dateprel || ' 09:00',
19 |                 '%Y-%m-%d %H:%M'
20 |             )
21 |         ) as datetimeprel,
22 |         ROW_NUMBER() over (
23 |             partition by cdreseau, referenceprel
24 |             order by
25 |                 dateprel,
26 |                 heureprel
27 |         ) as row_num
28 |     from
29 |         {{ ref('stg_edc__prevelevements') }}
30 | 
31 | )
32 | 
33 | select * exclude (row_num)
34 | from
35 |     ranked
36 | where
37 |     row_num = 1
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015-2024 Data4Good
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/pipelines/tasks/client/core/https_to_duck_client.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
 4 | from pipelines.tasks.client.core.https_client import HTTPSClient
 5 | from pipelines.tasks.config.common import (
 6 |     logger,
 7 | )
 8 | 
 9 | 
10 | class HTTPSToDuckDBClient(HTTPSClient, ABC):
11 |     def __init__(self, config, duckdb_client: DuckDBClient):
12 |         super().__init__(config["source"]["base_url"])
13 |         self.config = config
14 |         self.duckdb_client = duckdb_client
15 | 
16 |     def process_datasets(self):
17 |         logger.info(f"Processing {self.__class__.__name__} data")
18 |         self._download_data()
19 |         self._ingest_to_duckdb()
20 |         logger.info(f"Finishing processing {self.__class__.__name__} data")
21 | 
22 |     @abstractmethod
23 |     def _download_data(self):
24 |         """Download data - to be implemented by subclasses"""
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _ingest_to_duckdb(self):
29 |         """Ingest data to DuckDB - to be implemented by subclasses"""
30 |         pass
31 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__prelevements_uniques.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | prelevements_cdfirstreseauamont AS (
 3 |     SELECT DISTINCT
 4 |         referenceprel,
 5 |         dateprel,
 6 |         heureprel,
 7 |         conclusionprel,
 8 |         plvconformitebacterio,
 9 |         plvconformitechimique,
10 |         plvconformitereferencebact,
11 |         plvconformitereferencechim,
12 |         (CASE
13 |             WHEN cdreseauamont IS NULL THEN cdreseau
14 |             WHEN cdreseauamont IS NOT NULL THEN cdreseauamont
15 |         END) AS cdfirstreseauamont,
16 |         TRY_STRPTIME(
17 |             dateprel || ' ' || REPLACE(heureprel, 'h', ':'), '%Y-%m-%d %H:%M'
18 |         ) AS datetimeprel
19 |     FROM
20 |         {{ ref('stg_edc__prevelevements') }}
21 | ),
22 | 
23 | ranked AS (
24 |     SELECT
25 |         *,
26 |         ROW_NUMBER() OVER (
27 |             PARTITION BY referenceprel
28 |             ORDER BY
29 |                 dateprel,
30 |                 heureprel
31 |         ) AS row_num
32 |     FROM
33 |         prelevements_cdfirstreseauamont
34 | )
35 | 
36 | SELECT * EXCLUDE (row_num)
37 | FROM
38 |     ranked
39 | WHERE
40 |     row_num = 1
41 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/edc/stg_edc__prevelevements.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     cddept::VARCHAR(3) AS cddept,
 3 |     cdreseau::VARCHAR(9) AS cdreseau,
 4 |     inseecommuneprinc::VARCHAR(5) AS inseecommuneprinc,
 5 |     nomcommuneprinc::VARCHAR AS nomcommuneprinc,
 6 |     cdreseauamont::VARCHAR(9) AS cdreseauamont,
 7 |     nomreseauamont::VARCHAR AS nomreseauamont,
 8 |     REPLACE(pourcentdebit, ' %', '')::TINYINT AS pourcentdebit,
 9 |     referenceprel::VARCHAR(11) AS referenceprel,
10 |     dateprel::DATE AS dateprel,
11 |     heureprel::VARCHAR AS heureprel,
12 |     conclusionprel::VARCHAR AS conclusionprel,
13 |     ugelib::VARCHAR AS ugelib,
14 |     distrlib::VARCHAR AS distrlib,
15 |     moalib::VARCHAR AS moalib,
16 |     plvconformitebacterio::VARCHAR(1) AS plvconformitebacterio,
17 |     plvconformitechimique::VARCHAR(1) AS plvconformitechimique,
18 |     plvconformitereferencebact::VARCHAR(1) AS plvconformitereferencebact,
19 |     plvconformitereferencechim::VARCHAR(1) AS plvconformitereferencechim,
20 |     de_partition::SMALLINT AS de_partition,
21 |     de_ingestion_date::DATE AS de_ingestion_date,
22 |     de_dataset_datetime::VARCHAR AS de_dataset_datetime
23 | FROM {{ source('edc', 'edc_prelevements') }}
24 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__parametres_non_references.sql:
--------------------------------------------------------------------------------
 1 | -- Analyse des paramètres présents dans stg_edc__resultats mais absents des
 2 | -- références Générations Futures
 3 | 
 4 | WITH parametres_resultats AS (
 5 |     SELECT
 6 |         cdparametresiseeaux,
 7 |         STRING_AGG(DISTINCT cdparametre, ', ') AS cdparametre,
 8 |         STRING_AGG(DISTINCT libmajparametre, ', ') AS libmajparametre,
 9 |         STRING_AGG(DISTINCT libminparametre, ', ') AS libminparametre,
10 |         STRING_AGG(DISTINCT casparam, ', ') AS casparam,
11 |         COUNT(*) AS nb
12 |     FROM {{ ref('stg_edc__resultats') }}
13 |     WHERE cdparametresiseeaux IS NOT NULL
14 |     GROUP BY
15 |         cdparametresiseeaux
16 | ),
17 | 
18 | parametres_non_references AS (
19 |     SELECT pr.*
20 |     FROM parametres_resultats AS pr
21 |     LEFT JOIN {{ ref('int__valeurs_de_reference') }} AS vr
22 |         ON pr.cdparametresiseeaux = vr.cdparametresiseeaux
23 |     WHERE vr.cdparametresiseeaux IS NULL
24 | )
25 | 
26 | SELECT
27 |     cdparametresiseeaux,
28 |     cdparametre,
29 |     libmajparametre,
30 |     libminparametre,
31 |     casparam,
32 |     nb
33 | FROM parametres_non_references
34 | ORDER BY nb DESC, cdparametresiseeaux ASC
35 | 


--------------------------------------------------------------------------------
/pipelines/tasks/upload_database.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Upload database to S3 storage.
 3 | 
 4 | Args:
 5 |     - env (str): Environment to upload to ("dev" or "prod")
 6 | 
 7 | Examples:
 8 |     - upload_database --env dev  : Upload database to development environment
 9 |     - upload_database --env prod : Upload database to production environment
10 | """
11 | 
12 | from pipelines.config.config import get_s3_path
13 | from pipelines.tasks.config.common import DUCKDB_FILE
14 | from pipelines.utils.logger import get_logger
15 | from pipelines.utils.storage_client import ObjectStorageClient
16 | 
17 | logger = get_logger(__name__)
18 | 
19 | 
20 | def upload_database_to_storage(env):
21 |     """
22 |     Upload the database built locally to Storage Object depending on the environment
23 |     This requires setting the correct environment variables for the Scaleway credentials
24 |     """
25 |     s3 = ObjectStorageClient()
26 | 
27 |     db_path = DUCKDB_FILE  # Fichier local
28 |     s3_path = get_s3_path(env)  # Destination sur S3
29 | 
30 |     s3.upload_object(local_path=db_path, file_key=s3_path, public_read=True)
31 |     logger.info(f"✅ Base uploadée sur s3://{s3.bucket_name}/{s3_path}")
32 | 
33 | 
34 | def execute(env):
35 |     upload_database_to_storage(env)
36 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pollution_eau"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = ">=3.12"
 7 | dependencies = [
 8 |     "click>=8.1.8,<9",
 9 |     "duckdb==1.2.0",
10 |     "duckdb-engine==0.15.0",
11 |     "folium>=0.19.4",
12 |     "ipykernel>=6.29.5,<7",
13 |     "ipython>=8.31.0,<9",
14 |     "ipython-sql>=0.5.0,<1",
15 |     "jupysql>=0.10.17,<1",
16 |     "matplotlib>=3.10.0,<4",
17 |     "pandas>=2.2.3,<3",
18 |     "requests>=2.32.3,<3",
19 |     "boto3==1.34.11,<2",
20 |     "python-dotenv>=1.0.1,<2",
21 |     "ibis-framework[duckdb]==10.1.0",
22 |     "dbt-core>=1.9.2,<2",
23 |     "dbt-duckdb>=1.9.1,<2",
24 |     "tqdm>=4.67.1,<5",
25 |     "pre-commit>=4.1.0",
26 |     "ruff>=0.9.10",
27 |     "sqlfluff>=3.3.1,<4",
28 | ]
29 | 
30 | [dependency-groups]
31 | dev = [
32 |     "jupyter>=1.1.0,<2",
33 |     "pre-commit>=4.1.0,<5",
34 |     "pytest>=8.3.4,<9",
35 |     "pytest-cov>=6.0.0,<7"
36 | ]
37 | 
38 | [project.optional-dependencies]
39 | pmtiles = [
40 |   "tippecanoe>=2.72.0,<3"
41 | ]
42 | 
43 | [build-system]
44 | requires = ["hatchling"]
45 | build-backend = "hatchling.build"
46 | 
47 | [tool.hatch.build.targets.wheel]
48 | packages = ["pipelines"]
49 | 


--------------------------------------------------------------------------------
/pipelines/tasks/client/core/https_client.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | import requests
 5 | 
 6 | from pipelines.tasks.config.common import download_file_from_https, logger
 7 | 
 8 | 
 9 | class HTTPSClient:
10 |     def __init__(self, base_url: str):
11 |         self.base_url = base_url
12 | 
13 |     def download_file_from_https(self, path: str, filepath: Union[str, Path]):
14 |         """
15 |         Downloads a file from a https link to a local file.
16 |         :param path: The url path to download the file.
17 |         :param filepath: The path to the local file.
18 |         :return: Downloaded file filename.
19 |         """
20 |         url = self.base_url + path
21 |         return download_file_from_https(url, filepath)
22 | 
23 |     @staticmethod
24 |     def get_url_headers(url: str) -> dict:
25 |         """
26 |         Get url HTTP headers
27 |         :param url: static dataset url
28 |         :return: HTTP headers
29 |         """
30 |         try:
31 |             response = requests.head(url, timeout=5)
32 |             response.raise_for_status()
33 |             return dict(response.headers)
34 |         except requests.exceptions.RequestException as ex:
35 |             logger.error(f"Exception raised: {ex}")
36 |             return {}
37 | 


--------------------------------------------------------------------------------
/webapp/components/ui/switch.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import * as React from "react";
 4 | import * as SwitchPrimitives from "@radix-ui/react-switch";
 5 | 
 6 | import { cn } from "@/lib/utils";
 7 | 
 8 | const Switch = React.forwardRef<
 9 |   React.ElementRef<typeof SwitchPrimitives.Root>,
10 |   React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
11 | >(({ className, ...props }, ref) => (
12 |   <SwitchPrimitives.Root
13 |     className={cn(
14 |       "peer inline-flex h-5 w-9 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
15 |       className,
16 |     )}
17 |     {...props}
18 |     ref={ref}
19 |   >
20 |     <SwitchPrimitives.Thumb
21 |       className={cn(
22 |         "pointer-events-none block h-4 w-4 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-4 data-[state=unchecked]:translate-x-0",
23 |       )}
24 |     />
25 |   </SwitchPrimitives.Root>
26 | ));
27 | Switch.displayName = SwitchPrimitives.Root.displayName;
28 | 
29 | export { Switch };
30 | 


--------------------------------------------------------------------------------
/webapp/components/ui/hover-card.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import * as React from "react";
 4 | import * as HoverCardPrimitive from "@radix-ui/react-hover-card";
 5 | 
 6 | import { cn } from "@/lib/utils";
 7 | 
 8 | const HoverCard = HoverCardPrimitive.Root;
 9 | 
10 | const HoverCardTrigger = HoverCardPrimitive.Trigger;
11 | 
12 | const HoverCardContent = React.forwardRef<
13 |   React.ElementRef<typeof HoverCardPrimitive.Content>,
14 |   React.ComponentPropsWithoutRef<typeof HoverCardPrimitive.Content>
15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
16 |   <HoverCardPrimitive.Content
17 |     ref={ref}
18 |     align={align}
19 |     sideOffset={sideOffset}
20 |     className={cn(
21 |       "z-50 w-64 rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
22 |       className,
23 |     )}
24 |     {...props}
25 |   />
26 | ));
27 | HoverCardContent.displayName = HoverCardPrimitive.Content.displayName;
28 | 
29 | export { HoverCard, HoverCardTrigger, HoverCardContent };
30 | 


--------------------------------------------------------------------------------
/pipelines/tasks/config/config_pmtiles.py:
--------------------------------------------------------------------------------
 1 | """Configuration for DuckDB-based PMTiles generation."""
 2 | 
 3 | # Value columns to pivot for both data types
 4 | value_columns = [
 5 |     "resultat",
 6 |     "ratio",
 7 |     "date_dernier_prel",
 8 |     "nb_parametres",
 9 |     "nb_prelevements",
10 |     "nb_sup_valeur_sanitaire",
11 |     "parametres_detectes",
12 | ]
13 | 
14 | # Configuration for both commune and UDI data processing
15 | config_pmtiles: dict[str, dict[str, str | list[str] | None]] = {
16 |     "communes": {
17 |         "result_table": "web__resultats_communes",
18 |         "geom_table": "int__commune_geom",
19 |         "id_columns": ["commune_code_insee", "commune_nom"],
20 |         "result_id_column": "commune_code_insee",
21 |         "geom_id_column": "com_code",
22 |         "geom_name_column": "com_name",
23 |         "output_filename": "commune_data",
24 |         "layer_name": "data_communes",
25 |     },
26 |     "udi": {
27 |         "result_table": "web__resultats_udi",
28 |         "geom_table": "int__udi_geom",
29 |         "id_columns": ["cdreseau", "nomreseaux"],
30 |         "result_id_column": "cdreseau",
31 |         "geom_id_column": "code_udi",
32 |         "geom_name_column": None,  # UDI geom table doesn't have a name column
33 |         "output_filename": "udi_data",
34 |         "layer_name": "data_udi",
35 |     },
36 | }
37 | 


--------------------------------------------------------------------------------
/pipelines/tasks/client/commune_client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from pipelines.tasks.client.core.https_to_duck_client import HTTPSToDuckDBClient
 5 | from pipelines.tasks.config.common import (
 6 |     CACHE_FOLDER,
 7 |     logger,
 8 | )
 9 | 
10 | 
11 | class CommuneClient(HTTPSToDuckDBClient):
12 |     def __init__(self, config, duckdb_client):
13 |         super().__init__(config, duckdb_client)
14 | 
15 |     def _download_data(self):
16 |         """Process the COG datasets"""
17 |         logger.info("Launching processing of Insee communes")
18 | 
19 |         os.makedirs(CACHE_FOLDER, exist_ok=True)
20 |         self.download_file_from_https(
21 |             path=self.config["source"]["id"],
22 |             filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]),
23 |         )
24 | 
25 |     def _ingest_to_duckdb(self):
26 |         """Implement INSEE specific ingestion logic"""
27 |         self.duckdb_client.drop_tables([self.config["file"]["table_name"]])
28 |         self.duckdb_client.ingest_from_csv(
29 |             ingest_type="CREATE",
30 |             table_name=self.config["file"]["table_name"],
31 |             de_partition=self.config["source"]["datetime"][:4],
32 |             dataset_datetime=self.config["source"]["datetime"],
33 |             filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]),
34 |         )
35 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/edc/val_traduite__docs.md:
--------------------------------------------------------------------------------
 1 | {% docs val_traduite_docs %}
 2 | Traduction au format numérique du résultat textuel d’une mesure Rqana par application automatisée de règles prédéfinies.
 3 | 
 4 | Principe de traduction :
 5 | 
 6 | | Résultat | Valeur traduite |                     Commentaire                    |
 7 | |:--------:|:---------------:|:--------------------------------------------------:|
 8 | |    XXX   |       XXX       |                                                    |
 9 | |    XXX   |       -XXX      |                                                    |
10 | |   <XXX   |        0        |                                                    |
11 | |   >XXX   |       XXX       |                                                    |
12 | |  TRACES  |        0        |    Entre seuil de quantification et de détection   |
13 | | INCOMPT. |       1,11      | Valeur trop élevée en microbiologie. Préférer >XXX |
14 | | PRESENCE |        1        |               Présence non quantifiée              |
15 | |    N.D   |        0        |                < seuil de détection                |
16 | | ILLISIBL |       NULL      |         Non interprétable en bactériologie         |
17 | |  <SEUIL  |        0        |                < seuil de détection                |
18 | |   N.M.   |       NULL      |                  Non fait, perdu.                  |
19 | {% enddocs %}
20 | 


--------------------------------------------------------------------------------
/pipelines/tasks/client/opendatasoft_client.py:
--------------------------------------------------------------------------------
 1 | # pipelines/tasks/client/opendatasoft_client.py
 2 | 
 3 | 
 4 | import os
 5 | from pathlib import Path
 6 | 
 7 | from pipelines.tasks.client.core.https_to_duck_client import HTTPSToDuckDBClient
 8 | from pipelines.tasks.config.common import (
 9 |     CACHE_FOLDER,
10 |     logger,
11 | )
12 | from pipelines.tasks.config.config_geojson import get_opendatasoft_config
13 | 
14 | 
15 | class OpenDataSoftClient(HTTPSToDuckDBClient):
16 |     def __init__(self, duckdb_client):
17 |         config = get_opendatasoft_config()
18 |         super().__init__(config, duckdb_client)
19 | 
20 |     def _download_data(self):
21 |         logger.info("Launching download_data from s3")
22 |         os.makedirs(CACHE_FOLDER, exist_ok=True)
23 |         self.download_file_from_https(
24 |             path=self.config["source"]["id"],
25 |             filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]),
26 |         )
27 | 
28 |     def _ingest_to_duckdb(self):
29 |         logger.info("DB ingest geojson data")
30 |         self.duckdb_client.drop_tables(table_names=[self.config["file"]["table_name"]])
31 |         self.duckdb_client.ingest_from_geojson(
32 |             table_name=self.config["file"]["table_name"],
33 |             filepath=Path(CACHE_FOLDER, self.config["file"]["file_name"]),
34 |         )
35 |         logger.info("✅ geojson file has been ingested in DB")
36 | 


--------------------------------------------------------------------------------
/pipelines/tasks/generate_pmtiles.py:
--------------------------------------------------------------------------------
 1 | """Generate PMTiles files using DuckDB for efficient data processing. Optional upload to S3.
 2 | 
 3 | This task generates two PMTiles files:
 4 | - commune_data.pmtiles: Contains commune-level water quality data with geometry
 5 | - udi_data.pmtiles: Contains UDI (water distribution unit) level data with geometry
 6 | 
 7 | The process uses DuckDB for efficient joining and pivoting of data.
 8 | 
 9 | Args:
10 |     - env (str): Environment to upload to ("dev" or "prod")
11 |     - upload (bool): Whether to upload the generated PMTiles files to S3 (default is False)
12 | """
13 | 
14 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
15 | from pipelines.tasks.client.pmtiles_client import PmtilesClient
16 | from pipelines.utils.logger import get_logger
17 | 
18 | logger = get_logger(__name__)
19 | 
20 | 
21 | def execute(env: str, upload: bool = False):
22 |     """
23 |     Execute PMTiles generation process using DuckDB.
24 | 
25 |     Args:
26 |         env: Environment to use ("dev" or "prod")
27 |     """
28 |     logger.info(f"Starting DuckDB-based PMTiles generation for environment: {env}")
29 | 
30 |     duckdb_client = DuckDBClient()
31 | 
32 |     try:
33 |         processor = PmtilesClient(duckdb_client)
34 |         processor.generate_pmtiles(env, upload=upload)
35 | 
36 |     finally:
37 |         # Always close the database connection
38 |         duckdb_client.close()
39 | 


--------------------------------------------------------------------------------
/webapp/components/ui/tooltip.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import * as React from "react";
 4 | import * as TooltipPrimitive from "@radix-ui/react-tooltip";
 5 | 
 6 | import { cn } from "@/lib/utils";
 7 | 
 8 | const TooltipProvider = TooltipPrimitive.Provider;
 9 | 
10 | const Tooltip = TooltipPrimitive.Root;
11 | 
12 | const TooltipTrigger = TooltipPrimitive.Trigger;
13 | 
14 | const TooltipContent = React.forwardRef<
15 |   React.ElementRef<typeof TooltipPrimitive.Content>,
16 |   React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Content>
17 | >(({ className, sideOffset = 4, ...props }, ref) => (
18 |   <TooltipPrimitive.Portal>
19 |     <TooltipPrimitive.Content
20 |       ref={ref}
21 |       sideOffset={sideOffset}
22 |       className={cn(
23 |         "z-50 overflow-hidden rounded-md bg-primary px-3 py-1.5 text-xs text-primary-foreground animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 origin-[--radix-tooltip-content-transform-origin]",
24 |         className,
25 |       )}
26 |       {...props}
27 |     />
28 |   </TooltipPrimitive.Portal>
29 | ));
30 | TooltipContent.displayName = TooltipPrimitive.Content.displayName;
31 | 
32 | export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider };
33 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/atlasante/_atlasante_models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_atlasante_udi_2023
 5 |     description: "Unités de distribution (UDI) de la France métropolitaine"
 6 |     columns:
 7 |       - name: gid
 8 |         type: INTEGER
 9 |       - name: code_udi
10 |         type: VARCHAR
11 |       - name: ins_nom
12 |         type: VARCHAR
13 |       - name: uge_nom
14 |         type: VARCHAR
15 |       - name: udi_pop
16 |         type: VARCHAR
17 |       - name: geom
18 |         type: GEOMETRY
19 |       - name: ingestion_date
20 |         type: DATE
21 |   - name: stg_atlasante_udi_corse
22 |     description: "Unités de distribution (UDI) de la Corse"
23 |     columns:
24 |       - name: gid
25 |         type: INTEGER
26 |       - name: cn_udi
27 |         type: VARCHAR
28 |       - name: nom_udi
29 |         type: VARCHAR
30 |       - name: geom
31 |         type: GEOMETRY
32 |       - name: ingestion_date
33 |         type: DATE
34 |   - name: stg_atlasante_udi_2024
35 |     description: "Unités de distribution (UDI) de la France métropolitaine"
36 |     columns:
37 |       - name: gid
38 |         type: INTEGER
39 |       - name: code_udi
40 |         type: VARCHAR
41 |       - name: ins_nom
42 |         type: VARCHAR
43 |       - name: uge_nom
44 |         type: VARCHAR
45 |       - name: udi_pop
46 |         type: VARCHAR
47 |       - name: geom
48 |         type: GEOMETRY
49 |       - name: ingestion_date
50 |         type: DATE
51 | 


--------------------------------------------------------------------------------
/webapp/lib/property.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Parse a property name into its components
 3 |  * @param propertyName The full property name in format period_category_variable
 4 |  * @returns An object with the period, category, and variable
 5 |  */
 6 | export function parsePropertyName(propertyName: string): {
 7 |   period: string;
 8 |   category: string;
 9 |   variable: string;
10 | } | null {
11 |   // Handle null or empty values
12 |   if (!propertyName) {
13 |     return null;
14 |   }
15 | 
16 |   // Uses regex to match the pattern
17 |   const pattern = /^(bilan_annuel_\d{4}|dernier_prel)_([^_]+)_(.+)$/;
18 |   const match = propertyName.match(pattern);
19 | 
20 |   // If the property name doesn't match our expected format
21 |   if (!match) {
22 |     return null;
23 |   }
24 | 
25 |   // Extract components from regex match
26 |   const [, period, category, variable] = match;
27 | 
28 |   return {
29 |     period,
30 |     category,
31 |     variable,
32 |   };
33 | }
34 | 
35 | /**
36 |  * Get the full property name from components
37 |  * @param period The period (e.g., "bilan_annuel_2022", "dernier_prel")
38 |  * @param category The category (e.g., "pfas", "cvm")
39 |  * @param variable The variable (e.g., "resultat", "parametres_detectes")
40 |  * @returns The full property name
41 |  */
42 | export function getPropertyName(
43 |   period: string,
44 |   category: string,
45 |   variable: string,
46 | ): string {
47 |   return `${period}_${category}_${variable}`;
48 | }
49 | 


--------------------------------------------------------------------------------
/dbt_/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | # Name your project! Project names should contain only lowercase characters
 2 | # and underscores. A good package name should reflect your organization's
 3 | # name or the intended use of these models
 4 | name: "dbt_"
 5 | version: "1.0.0"
 6 | 
 7 | # This setting configures which "profile" dbt uses for this project.
 8 | profile: "dbt_"
 9 | 
10 | # These configurations specify where dbt should look for different types of files.
11 | # The `model-paths` config, for example, states that models in this project can be
12 | # found in the "models/" directory. You probably won't need to change these!
13 | model-paths: ["models"]
14 | analysis-paths: ["analyses"]
15 | test-paths: ["tests"]
16 | seed-paths: ["seeds"]
17 | macro-paths: ["macros"]
18 | snapshot-paths: ["snapshots"]
19 | 
20 | clean-targets: # directories to be removed by `dbt clean`
21 |   - "target"
22 |   - "dbt_packages"
23 | 
24 | # Configuring models
25 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
26 | 
27 | # In this example config, we tell dbt to build all models in the example/
28 | # directory as views. These settings can be overridden in the individual model
29 | # files using the `{{ config(...) }}` macro.
30 | models:
31 |   dbt_:
32 |     # Config indicated by + and applies to all files under models/example/
33 |     staging:
34 |       +materialized: view
35 |     intermediate:
36 |       +materialized: view
37 |     website:
38 |       +materialized: table
39 | 


--------------------------------------------------------------------------------
/webapp/components/ui/popover.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import * as React from "react";
 4 | import * as PopoverPrimitive from "@radix-ui/react-popover";
 5 | 
 6 | import { cn } from "@/lib/utils";
 7 | 
 8 | const Popover = PopoverPrimitive.Root;
 9 | 
10 | const PopoverTrigger = PopoverPrimitive.Trigger;
11 | 
12 | const PopoverAnchor = PopoverPrimitive.Anchor;
13 | 
14 | const PopoverContent = React.forwardRef<
15 |   React.ElementRef<typeof PopoverPrimitive.Content>,
16 |   React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content>
17 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
18 |   <PopoverPrimitive.Portal>
19 |     <PopoverPrimitive.Content
20 |       ref={ref}
21 |       align={align}
22 |       sideOffset={sideOffset}
23 |       className={cn(
24 |         "z-50 w-72 rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
25 |         className,
26 |       )}
27 |       {...props}
28 |     />
29 |   </PopoverPrimitive.Portal>
30 | ));
31 | PopoverContent.displayName = PopoverPrimitive.Content.displayName;
32 | 
33 | export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor };
34 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/sqlfluff/sqlfluff
 3 |     rev: 3.3.1  # Vérifie la dernière version sur GitHub
 4 |     hooks:
 5 |       - id: sqlfluff-lint
 6 |         args: [ "--config", ".sqlfluff" ]  # Adapte au dialecte que tu utilises (bigquery, snowflake, etc.)
 7 |         files: dbt_/
 8 | 
 9 |       - id: sqlfluff-fix  # Optionnel, pour corriger automatiquement les erreurs
10 |         args: [ "--config", ".sqlfluff" ]
11 |         files: dbt_/
12 |   - repo: https://github.com/astral-sh/ruff-pre-commit
13 |     # Ruff version.
14 |     rev: v0.9.3
15 |     hooks:
16 |       # Run the linter.
17 |       - id: ruff
18 |         args: [ --fix ]
19 |       # Run the formatter.
20 |       - id: ruff-format
21 |   - repo: https://github.com/pre-commit/pre-commit-hooks
22 |     rev: v5.0.0
23 |     hooks:
24 |       - id: check-merge-conflict
25 |       - id: mixed-line-ending
26 |   #- repo: https://github.com/pycqa/bandit
27 |   #  rev: 1.7.4
28 |   #  hooks:
29 |   #  - id: bandit
30 |   #    exclude: tests/
31 | 
32 |   # This pre commit only work with poetry so we commented it as we work with uv
33 |   # - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
34 |   #  rev: v1.3.1
35 |   #  hooks:
36 |   #    - id: python-safety-dependencies-check
37 | 
38 |   - repo: https://github.com/rbubley/mirrors-prettier
39 |     rev: v3.5.1
40 |     hooks:
41 |     - id: prettier
42 |       types_or: [markdown, javascript, jsx, ts, tsx, json]
43 |       files: 'webapp'
44 | 


--------------------------------------------------------------------------------
/pipelines/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from urllib.parse import urlparse
 4 | 
 5 | import requests
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def get_project_root() -> Path:
12 |     """
13 |     Returns project root folder when called from anywhere in the project
14 |     This is useful for specifying paths that are relative to the project root
15 |     e.g. `local_db_path = Path(get_project_root(), "database/data.duckdb")`
16 |     """
17 |     return Path(__file__).parent.parent.parent
18 | 
19 | 
20 | def get_url_headers(url: str) -> dict:
21 |     """
22 |     Get url HTTP headers
23 |     :param url: static dataset url
24 |     :return: HTTP headers
25 |     """
26 |     try:
27 |         response = requests.head(url, timeout=5)
28 |         response.raise_for_status()
29 |         return response.headers
30 |     except requests.exceptions.RequestException as ex:
31 |         logger.error(f"Exception raised: {ex}")
32 |         return {}
33 | 
34 | 
35 | def extract_dataset_datetime(url: str) -> str:
36 |     """
37 |     Extract the dataset datetime from dataset location url
38 |     which can be found in the static dataset url headers
39 |     :param url: static dataset url
40 |     :return: dataset datetime under format "YYYYMMDD-HHMMSS"
41 |     """
42 |     metadata = get_url_headers(url)
43 |     parsed_url = urlparse(metadata.get("location"))
44 |     path_parts = parsed_url.path.strip("/").split("/")
45 |     return path_parts[-2]
46 | 


--------------------------------------------------------------------------------
/pipelines/config/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from pipelines.utils.logger import get_logger
 6 | 
 7 | logger = get_logger(__name__)
 8 | 
 9 | current_dir = os.path.dirname(os.path.abspath(__file__))
10 | # Construct the path to the .env file
11 | dotenv_path = os.path.join(current_dir, ".env")
12 | 
13 | 
14 | def load_env_variables():
15 |     load_dotenv(dotenv_path)
16 | 
17 | 
18 | def get_environment(default="prod"):
19 |     env = os.getenv("ENV", default)
20 |     logger.info(f"Running on env {env}")
21 |     if env not in ["dev", "prod"]:
22 |         raise ValueError(f"Invalid environment: {env}. Must be 'dev' or 'prod'.")
23 |     return env
24 | 
25 | 
26 | def get_s3_path(env, filename="data.duckdb"):
27 |     return f"{env}/database/{filename}"
28 | 
29 | 
30 | def get_s3_udi_path(env, filename):
31 |     return f"{env}/UDI/{filename}"
32 | 
33 | 
34 | def get_s3_path_geojson(env, filename="new-georef-france-commune-prelevement.geojson"):
35 |     """Get the S3 path for GeoJSON file based on environment.
36 | 
37 |     Args:
38 |         env (str): Environment ("dev" or "prod")
39 | 
40 |     Returns:
41 |         str: S3 path for the GeoJSON file
42 |     """
43 |     if env not in ["dev", "prod"]:
44 |         raise ValueError("Environment must be 'dev' or 'prod'")
45 |     return f"{env}/geojson/{filename}"
46 | 
47 | 
48 | def get_s3_path_pmtiles(env, filename="georef-france-commune-prelevement.pmtiles"):
49 |     if env not in ["dev", "prod"]:
50 |         raise ValueError("Environment must be 'dev' or 'prod'")
51 |     return f"{env}/pmtiles/{filename}"
52 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | nitrate_prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 5 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 6 |     SELECT DISTINCT
 7 |         de_partition AS annee,
 8 |         cdreseau,
 9 |         referenceprel,
10 |         datetimeprel,
11 |         valeur_sanitaire_1,
12 |         valtraduite
13 |     FROM
14 |         {{ ref('int__resultats_udi_communes') }}
15 |     WHERE
16 |         categorie = 'nitrate'
17 |         AND cdparametresiseeaux = 'NO3'
18 | )
19 | 
20 | SELECT
21 |     cdreseau,
22 |     annee,
23 |     'nitrate' AS categorie,
24 |     'bilan_annuel_' || annee AS periode,
25 |     count(
26 |         DISTINCT
27 |         CASE
28 |             WHEN
29 |                 valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
30 |                 THEN referenceprel
31 |         END
32 |     ) AS nb_depassements,
33 |     count(DISTINCT referenceprel) AS nb_prelevements,
34 |     (
35 |         count(
36 |             DISTINCT
37 |             CASE
38 |                 WHEN
39 |                     valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
40 |                     THEN referenceprel
41 |             END
42 |         )::float
43 |         /
44 |         count(DISTINCT referenceprel)::float
45 |     ) AS ratio,
46 |     to_json({
47 |         'NO3': max(valtraduite)
48 |     }) AS parametres_detectes,
49 |     max(datetimeprel) AS date_dernier_prel
50 | 
51 | FROM nitrate_prels
52 | 
53 | GROUP BY cdreseau, annee
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_pipelines.yaml:
--------------------------------------------------------------------------------
 1 | name: 🧪 Run Pipelines Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 |     paths:
 7 |       - 'pipelines/**'
 8 | 
 9 | env:
10 |   SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
11 |   SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
12 | 
13 | jobs:
14 |   test:
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - name: Checkout code
19 |       uses: actions/checkout@v3
20 | 
21 |     - name: Check if SCW_ACCESS_KEY and SCW_SECRET_KEY are set
22 |       run: |
23 |         if [ -z "$SCW_ACCESS_KEY" ]; then
24 |           echo "SCW_ACCESS_KEY is not set, cannot run tests without access key"
25 |           exit 1
26 |         else
27 |           echo "SCW_ACCESS_KEY is properly set."
28 |         fi
29 | 
30 |         if [ -z "$SCW_SECRET_KEY" ]; then
31 |           echo "SCW_SECRET_KEY is not set, cannot run tests without secret key"
32 |           exit 1
33 |         else
34 |           echo "SCW_SECRET_KEY is properly set."
35 |         fi
36 | 
37 |     - name: Install a specific version of uv
38 |       uses: astral-sh/setup-uv@v5
39 |       with:
40 |         version: ">=0.4.0"
41 | 
42 |     - name: Install dependencies
43 |       run: |
44 |         uv sync
45 | 
46 |     - name: Run tests with coverage
47 |       run: uv run pytest -s --cov=. --cov-report=term-missing
48 | 
49 |     - name: test dbt
50 |       run: |
51 |         cd dbt_
52 |         uv run dbt deps
53 |         uv run dbt seed
54 |         uv run dbt run
55 | 
56 |     - name: test pmtiles generation
57 |       run: |
58 |         uv pip install .[pmtiles]
59 |         uv run pipelines/run.py run generate_pmtiles --env dev
60 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | sub_active_prels AS (
 3 |     SELECT DISTINCT
 4 |         de_partition AS annee,
 5 |         cdreseau,
 6 |         referenceprel,
 7 |         datetimeprel,
 8 |         cdparametresiseeaux,
 9 |         valtraduite,
10 |         limite_qualite,
11 |         valeur_sanitaire_1
12 |     FROM
13 |         {{ ref('int__resultats_udi_communes') }}
14 |     WHERE
15 |         categorie = 'pesticide'
16 |         AND
17 |         categorie_2 = 'sub_active'
18 | )
19 | 
20 | SELECT
21 |     cdreseau,
22 |     annee,
23 |     'sub_active' AS categorie,
24 |     'bilan_annuel_' || annee AS periode,
25 |     COUNT(
26 |         DISTINCT
27 |         CASE
28 |             WHEN
29 |                 valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 |                 THEN referenceprel
31 |         END
32 |     ) AS nb_depassements,
33 |     COUNT(
34 |         DISTINCT
35 |         CASE
36 |             WHEN
37 |                 valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
38 |                 THEN referenceprel
39 |         END
40 |     ) AS nb_sup_valeur_sanitaire,
41 |     COUNT(DISTINCT referenceprel) AS nb_prelevements,
42 |     (
43 |         COUNT(
44 |             DISTINCT
45 |             CASE
46 |                 WHEN
47 |                     valtraduite IS NOT NULL AND valtraduite > limite_qualite
48 |                     THEN referenceprel
49 |             END
50 |         )::float
51 |         /
52 |         COUNT(DISTINCT referenceprel)::float
53 |     ) AS ratio_limite_qualite
54 | 
55 | FROM sub_active_prels
56 | 
57 | GROUP BY cdreseau, annee
58 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | cvm_prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- C'est très rare pour les CVM (de l'ordre d'une dizaine de cas)
 5 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 6 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 7 |     SELECT DISTINCT
 8 |         de_partition AS annee,
 9 |         cdreseau,
10 |         referenceprel,
11 |         datetimeprel,
12 |         limite_qualite,
13 |         valtraduite
14 |     FROM
15 |         {{ ref('int__resultats_udi_communes') }}
16 |     WHERE
17 |         categorie = 'cvm'
18 | )
19 | 
20 | SELECT
21 |     cdreseau,
22 |     annee,
23 |     'cvm' AS categorie,
24 |     'bilan_annuel_' || annee AS periode,
25 |     count(
26 |         DISTINCT
27 |         CASE
28 |             WHEN
29 |                 valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 |                 THEN referenceprel
31 |         END
32 |     ) AS nb_depassements,
33 |     count(DISTINCT referenceprel) AS nb_prelevements,
34 |     (
35 |         count(
36 |             DISTINCT
37 |             CASE
38 |                 WHEN
39 |                     valtraduite IS NOT NULL AND valtraduite > limite_qualite
40 |                     THEN referenceprel
41 |             END
42 |         )::float
43 |         /
44 |         count(DISTINCT referenceprel)::float
45 |     ) AS ratio_limite_qualite,
46 |     to_json({
47 |         'CLVYL': max(valtraduite)
48 |     }) AS parametres_detectes,
49 |     max(datetimeprel) AS date_dernier_prel
50 | 
51 | FROM cvm_prels
52 | 
53 | GROUP BY cdreseau, annee
54 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | nitrate_prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 5 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 6 |     SELECT DISTINCT
 7 |         de_partition AS annee,
 8 |         inseecommune,
 9 |         referenceprel,
10 |         datetimeprel,
11 |         valeur_sanitaire_1,
12 |         valtraduite
13 |     FROM
14 |         {{ ref('int__resultats_udi_communes') }}
15 |     WHERE
16 |         categorie = 'nitrate'
17 |         AND cdparametresiseeaux = 'NO3'
18 | )
19 | 
20 | SELECT
21 |     inseecommune,
22 |     annee,
23 |     'nitrate' AS categorie,
24 |     'bilan_annuel_' || annee AS periode,
25 |     count(
26 |         DISTINCT
27 |         CASE
28 |             WHEN
29 |                 valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
30 |                 THEN referenceprel
31 |         END
32 |     ) AS nb_depassements,
33 |     count(DISTINCT referenceprel) AS nb_prelevements,
34 |     (
35 |         count(
36 |             DISTINCT
37 |             CASE
38 |                 WHEN
39 |                     valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
40 |                     THEN referenceprel
41 |             END
42 |         )::float
43 |         /
44 |         count(DISTINCT referenceprel)::float
45 |     ) AS ratio,
46 |     to_json({
47 |         'NO3': max(valtraduite)
48 |     }) AS parametres_detectes,
49 |     max(datetimeprel) AS date_dernier_prel
50 | 
51 | FROM nitrate_prels
52 | 
53 | GROUP BY inseecommune, annee
54 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | sub_active_prels AS (
 3 |     SELECT DISTINCT
 4 |         de_partition AS annee,
 5 |         inseecommune,
 6 |         referenceprel,
 7 |         datetimeprel,
 8 |         cdparametresiseeaux,
 9 |         valtraduite,
10 |         limite_qualite,
11 |         valeur_sanitaire_1
12 |     FROM
13 |         {{ ref('int__resultats_udi_communes') }}
14 |     WHERE
15 |         categorie = 'pesticide'
16 |         AND
17 |         categorie_2 = 'sub_active'
18 | )
19 | 
20 | SELECT
21 |     inseecommune,
22 |     annee,
23 |     'sub_active' AS categorie,
24 |     'bilan_annuel_' || annee AS periode,
25 |     COUNT(
26 |         DISTINCT
27 |         CASE
28 |             WHEN
29 |                 valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 |                 THEN referenceprel
31 |         END
32 |     ) AS nb_depassements,
33 |     COUNT(
34 |         DISTINCT
35 |         CASE
36 |             WHEN
37 |                 valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
38 |                 THEN referenceprel
39 |         END
40 |     ) AS nb_sup_valeur_sanitaire,
41 |     COUNT(DISTINCT referenceprel) AS nb_prelevements,
42 |     (
43 |         COUNT(
44 |             DISTINCT
45 |             CASE
46 |                 WHEN
47 |                     valtraduite IS NOT NULL AND valtraduite > limite_qualite
48 |                     THEN referenceprel
49 |             END
50 |         )::float
51 |         /
52 |         COUNT(DISTINCT referenceprel)::float
53 |     ) AS ratio_limite_qualite
54 | 
55 | FROM sub_active_prels
56 | 
57 | GROUP BY inseecommune, annee
58 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | cvm_prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- C'est très rare pour les CVM (de l'ordre d'une dizaine de cas)
 5 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 6 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 7 |     SELECT DISTINCT
 8 |         de_partition AS annee,
 9 |         inseecommune,
10 |         referenceprel,
11 |         datetimeprel,
12 |         limite_qualite,
13 |         valtraduite
14 |     FROM
15 |         {{ ref('int__resultats_udi_communes') }}
16 |     WHERE
17 |         categorie = 'cvm'
18 | )
19 | 
20 | SELECT
21 |     inseecommune,
22 |     annee,
23 |     'cvm' AS categorie,
24 |     'bilan_annuel_' || annee AS periode,
25 |     count(
26 |         DISTINCT
27 |         CASE
28 |             WHEN
29 |                 valtraduite IS NOT NULL AND valtraduite > limite_qualite
30 |                 THEN referenceprel
31 |         END
32 |     ) AS nb_depassements,
33 |     count(DISTINCT referenceprel) AS nb_prelevements,
34 |     (
35 |         count(
36 |             DISTINCT
37 |             CASE
38 |                 WHEN
39 |                     valtraduite IS NOT NULL AND valtraduite > limite_qualite
40 |                     THEN referenceprel
41 |             END
42 |         )::float
43 |         /
44 |         count(DISTINCT referenceprel)::float
45 |     ) AS ratio_limite_qualite,
46 |     to_json({
47 |         'CLVYL': max(valtraduite)
48 |     }) AS parametres_detectes,
49 |     max(datetimeprel) AS date_dernier_prel
50 | 
51 | FROM cvm_prels
52 | 
53 | GROUP BY inseecommune, annee
54 | 


--------------------------------------------------------------------------------
/webapp/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "13_pollution_eau",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev --turbopack",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "@duckdb/node-api": "^1.2.0-alpha.14",
13 |     "@radix-ui/react-dialog": "^1.1.15",
14 |     "@radix-ui/react-hover-card": "^1.1.15",
15 |     "@radix-ui/react-popover": "^1.1.15",
16 |     "@radix-ui/react-scroll-area": "^1.2.10",
17 |     "@radix-ui/react-select": "^2.2.6",
18 |     "@radix-ui/react-slot": "^1.2.3",
19 |     "@radix-ui/react-switch": "^1.2.6",
20 |     "@radix-ui/react-tooltip": "^1.2.8",
21 |     "class-variance-authority": "^0.7.1",
22 |     "clsx": "^2.1.1",
23 |     "cmdk": "^1.0.4",
24 |     "lucide-react": "^0.475.0",
25 |     "maplibre-gl": "^5.1.0",
26 |     "next": "^15.2.3",
27 |     "pmtiles": "^4.2.1",
28 |     "protomaps-themes-base": "^4.4.0",
29 |     "react": "^19.0.0",
30 |     "react-dom": "^19.0.0",
31 |     "react-map-gl": "^8.0.0",
32 |     "tailwind-merge": "^3.0.1",
33 |     "tailwindcss-animate": "^1.0.7"
34 |   },
35 |   "devDependencies": {
36 |     "@eslint/eslintrc": "^3",
37 |     "@types/node": "^20",
38 |     "@types/react": "^19",
39 |     "@types/react-dom": "^19",
40 |     "eslint": "^9",
41 |     "eslint-config-next": "15.1.7",
42 |     "eslint-config-prettier": "^10.0.1",
43 |     "postcss": "^8",
44 |     "prettier": "3.5.1",
45 |     "tailwindcss": "^3.4",
46 |     "typescript": "^5"
47 |   },
48 |   "resolutions": {
49 |     "react": "^19.0.0",
50 |     "react-dom": "^19.0.0",
51 |     "@types/react": "^19",
52 |     "@types/react-dom": "^19"
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/pipelines/tasks/config/config_geojson.py:
--------------------------------------------------------------------------------
 1 | def get_opendatasoft_config() -> dict:
 2 |     """Get OpenDataSoft configuration parameters.
 3 | 
 4 |     Returns:
 5 |         dict: Configuration parameters for OpenDataSoft client
 6 |     """
 7 | 
 8 |     return {
 9 |         "source": {
10 |             "base_url": "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/",
11 |             "id": "georef-france-commune/exports/geojson",
12 |             "datetime": "20240220",
13 |         },
14 |         "file": {
15 |             "file_name": "georef-france-commune.geojson",
16 |             "table_name": "opendatasoft_communes",
17 |         },
18 |     }
19 | 
20 | 
21 | config_merge_geo = {
22 |     "communes": {
23 |         "result_table": "web__resultats_communes",
24 |         "geom_table": "stg_communes__opendatasoft_json",
25 |         "groupby_columns": ["commune_code_insee", "commune_nom"],
26 |         "result_join_column": "commune_code_insee",
27 |         "geom_join_column": "com_code",
28 |         "upload_file_name": "georef-france-communes-prelevement.geojson",
29 |     },
30 |     "udi": {
31 |         "result_table": "web__resultats_udi",
32 |         "geom_table": "stg_udi_json",
33 |         "groupby_columns": ["cdreseau", "nomreseaux"],
34 |         "result_join_column": "cdreseau",
35 |         "geom_join_column": "code_udi",
36 |         "upload_file_name": "georef-france-udi-prelevement.geojson",
37 |     },
38 | }
39 | 
40 | col_input = ["periode", "categorie"]
41 | 
42 | list_column_result = [
43 |     "resultat",
44 |     "ratio",
45 |     "date_dernier_prel",
46 |     "nb_parametres",
47 |     "nb_prelevements",
48 |     "nb_sup_valeur_sanitaire",
49 |     "parametres_detectes",
50 | ]
51 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     cdreseau,
 3 |     'tous' AS categorie,
 4 |     'dernier_prel' AS periode,
 5 |     MAX(date_dernier_prel) AS date_dernier_prel,
 6 |     SUM(nb_parametres) AS nb_parametres,
 7 |     CASE
 8 |         WHEN BOOL_OR(resultat IN (
 9 |             'sup_valeur_sanitaire',
10 |             'sup_valeur_sanitaire_2'
11 |         )) THEN 'sup_limite_sanitaire'
12 | 
13 |         WHEN BOOL_OR(resultat IN (
14 |             'cvm_sup_0_5',
15 |             'somme_20pfas_sup_0_1',
16 |             'sup_limite_qualite'
17 |         )) THEN 'sup_limite_qualite'
18 | 
19 |         WHEN BOOL_OR(resultat IN (
20 |             'inf_valeur_sanitaire',
21 |             'inf_limite_qualite',
22 |             -- 'inf_limites_sup_0_1',
23 |             --'sup_limite_indicative',
24 |             'inf_limites',
25 |             'somme_20pfas_inf_0_1_et_4pfas_sup_0_02',
26 |             'somme_20pfas_inf_0_1_et_4pfas_inf_0_02',
27 |             'sup_limite_qualite_2036',
28 |             'no3_inf_25',
29 |             'no3_inf_40'
30 | 
31 |         )) THEN 'quantifie'
32 | 
33 |         WHEN BOOL_AND(resultat IN (
34 |             'non_quantifie'
35 |         )) THEN 'non_quantifie'
36 | 
37 |         ELSE 'erreur'
38 |     END AS resultat
39 | 
40 | FROM {{ ref('int__union_resultats_udi') }}
41 | WHERE
42 |     periode = 'dernier_prel'
43 |     AND
44 |     categorie NOT IN (
45 |         'sub_active',
46 |         'metabolite',
47 |         'metabolite_esa_metolachlore',
48 |         'metabolite_chlorothalonil_r471811',
49 |         'metabolite_chloridazone_desphenyl',
50 |         'metabolite_chloridazone_methyl_desphenyl',
51 |         'metabolite_atrazine_desethyl'
52 |     )
53 | GROUP BY cdreseau
54 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/tous/int__resultats_tous_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     inseecommune,
 3 |     'tous' AS categorie,
 4 |     'dernier_prel' AS periode,
 5 |     MAX(date_dernier_prel) AS date_dernier_prel,
 6 |     SUM(nb_parametres) AS nb_parametres,
 7 |     CASE
 8 |         WHEN BOOL_OR(resultat IN (
 9 |             'sup_valeur_sanitaire',
10 |             'sup_valeur_sanitaire_2'
11 |         )) THEN 'sup_limite_sanitaire'
12 | 
13 |         WHEN BOOL_OR(resultat IN (
14 |             'cvm_sup_0_5',
15 |             'somme_20pfas_sup_0_1',
16 |             'sup_limite_qualite'
17 |         )) THEN 'sup_limite_qualite'
18 | 
19 |         WHEN BOOL_OR(resultat IN (
20 |             'inf_valeur_sanitaire',
21 |             'inf_limite_qualite',
22 |             -- 'inf_limites_sup_0_1',
23 |             --'sup_limite_indicative',
24 |             'inf_limites',
25 |             'somme_20pfas_inf_0_1_et_4pfas_sup_0_02',
26 |             'somme_20pfas_inf_0_1_et_4pfas_inf_0_02',
27 |             'sup_limite_qualite_2036',
28 |             'no3_inf_25',
29 |             'no3_inf_40'
30 | 
31 |         )) THEN 'quantifie'
32 | 
33 |         WHEN BOOL_AND(resultat IN (
34 |             'non_quantifie'
35 |         )) THEN 'non_quantifie'
36 | 
37 |         ELSE 'erreur'
38 |     END AS resultat
39 | 
40 | FROM {{ ref('int__union_resultats_commune') }}
41 | WHERE
42 |     periode = 'dernier_prel'
43 |     AND
44 |     categorie NOT IN (
45 |         'sub_active',
46 |         'metabolite',
47 |         'metabolite_esa_metolachlore',
48 |         'metabolite_chlorothalonil_r471811',
49 |         'metabolite_chloridazone_desphenyl',
50 |         'metabolite_chloridazone_methyl_desphenyl',
51 |         'metabolite_atrazine_desethyl'
52 |     )
53 | GROUP BY inseecommune
54 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT
 4 |         cdreseau,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         datetimeprel,
 8 |         limite_qualite,
 9 |         valtraduite,
10 |         ROW_NUMBER()
11 |             OVER (
12 |                 PARTITION BY cdreseau, cdparametresiseeaux
13 |                 ORDER BY datetimeprel DESC
14 |             )
15 |             AS row_number
16 |     FROM
17 |         {{ ref('int__resultats_udi_communes') }}
18 |     WHERE
19 |         categorie = 'cvm'
20 |         AND
21 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
22 |         datetimeprel >= DATE_TRUNC('day', (
23 |             SELECT MAX(sub.datetimeprel)
24 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
25 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
26 | )
27 | 
28 | SELECT
29 |     last_pvl.cdreseau,
30 |     last_pvl.categorie,
31 |     'dernier_prel' AS periode,
32 |     last_pvl.datetimeprel AS date_dernier_prel,
33 |     1 AS nb_parametres,
34 |     CASE
35 |         WHEN
36 |             last_pvl.valtraduite = 0
37 |             OR last_pvl.valtraduite IS NULL
38 |             THEN 'non_quantifie'
39 |         WHEN
40 |             last_pvl.valtraduite > last_pvl.limite_qualite
41 |             THEN 'cvm_sup_0_5'
42 |         WHEN
43 |             last_pvl.valtraduite <= last_pvl.limite_qualite
44 |             THEN 'inf_limites'
45 |         ELSE 'erreur'
46 |     END AS resultat,
47 |     CASE
48 |         WHEN
49 |             last_pvl.valtraduite > 0
50 |             THEN TO_JSON(MAP([last_pvl.cdparametresiseeaux], [last_pvl.valtraduite]))
51 |         ELSE TO_JSON(MAP([], []))
52 |     END AS parametres_detectes
53 | FROM
54 |     last_pvl
55 | WHERE
56 |     last_pvl.row_number = 1
57 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/cvm/int__resultats_cvm_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT
 4 |         inseecommune,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         datetimeprel,
 8 |         valtraduite,
 9 |         limite_qualite,
10 |         ROW_NUMBER()
11 |             OVER (
12 |                 PARTITION BY inseecommune, cdparametresiseeaux
13 |                 ORDER BY datetimeprel DESC
14 |             )
15 |             AS row_number
16 |     FROM
17 |         {{ ref('int__resultats_udi_communes') }}
18 |     WHERE
19 |         categorie = 'cvm'
20 |         AND
21 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
22 |         datetimeprel >= DATE_TRUNC('day', (
23 |             SELECT MAX(sub.datetimeprel)
24 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
25 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
26 | )
27 | 
28 | SELECT
29 |     last_pvl.inseecommune,
30 |     last_pvl.categorie,
31 |     'dernier_prel' AS periode,
32 |     last_pvl.datetimeprel AS date_dernier_prel,
33 |     1 AS nb_parametres,
34 |     CASE
35 |         WHEN
36 |             last_pvl.valtraduite = 0
37 |             OR last_pvl.valtraduite IS NULL
38 |             THEN 'non_quantifie'
39 |         WHEN
40 |             last_pvl.valtraduite > last_pvl.limite_qualite
41 |             THEN 'cvm_sup_0_5'
42 |         WHEN
43 |             last_pvl.valtraduite <= last_pvl.limite_qualite
44 |             THEN 'inf_limites'
45 |         ELSE 'erreur'
46 |     END AS resultat,
47 |     CASE
48 |         WHEN
49 |             last_pvl.valtraduite > 0
50 |             THEN TO_JSON(MAP([last_pvl.cdparametresiseeaux], [last_pvl.valtraduite]))
51 |         ELSE TO_JSON(MAP([], []))
52 |     END AS parametres_detectes
53 | FROM
54 |     last_pvl
55 | WHERE
56 |     last_pvl.row_number = 1
57 | 


--------------------------------------------------------------------------------
/webapp/components/ui/scroll-area.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import * as React from "react";
 4 | import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";
 5 | 
 6 | import { cn } from "@/lib/utils";
 7 | 
 8 | const ScrollArea = React.forwardRef<
 9 |   React.ElementRef<typeof ScrollAreaPrimitive.Root>,
10 |   React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.Root>
11 | >(({ className, children, ...props }, ref) => (
12 |   <ScrollAreaPrimitive.Root
13 |     ref={ref}
14 |     className={cn("relative overflow-hidden", className)}
15 |     {...props}
16 |   >
17 |     <ScrollAreaPrimitive.Viewport className="h-full w-full rounded-[inherit]">
18 |       {children}
19 |     </ScrollAreaPrimitive.Viewport>
20 |     <ScrollBar />
21 |     <ScrollAreaPrimitive.Corner />
22 |   </ScrollAreaPrimitive.Root>
23 | ));
24 | ScrollArea.displayName = ScrollAreaPrimitive.Root.displayName;
25 | 
26 | const ScrollBar = React.forwardRef<
27 |   React.ElementRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>,
28 |   React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>
29 | >(({ className, orientation = "vertical", ...props }, ref) => (
30 |   <ScrollAreaPrimitive.ScrollAreaScrollbar
31 |     ref={ref}
32 |     orientation={orientation}
33 |     className={cn(
34 |       "flex touch-none select-none transition-colors",
35 |       orientation === "vertical" &&
36 |         "h-full w-2.5 border-l border-l-transparent p-[1px]",
37 |       orientation === "horizontal" &&
38 |         "h-2.5 flex-col border-t border-t-transparent p-[1px]",
39 |       className,
40 |     )}
41 |     {...props}
42 |   >
43 |     <ScrollAreaPrimitive.ScrollAreaThumb className="relative flex-1 rounded-full bg-border" />
44 |   </ScrollAreaPrimitive.ScrollAreaScrollbar>
45 | ));
46 | ScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName;
47 | 
48 | export { ScrollArea, ScrollBar };
49 | 


--------------------------------------------------------------------------------
/dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql:
--------------------------------------------------------------------------------
 1 | -- Dans le calcul des résultats PFAS derniers prélèvements
 2 | -- (int__resultats_pfas_udi_dernier.sql), on présuppose que la plupart du temps
 3 | -- la somme des 20 PFAS (SPFAS) et la somme des 4 PFAS (PFOA, PFOS, PFNA,
 4 | -- PFHXS) sont bien présentes. Ce test permet de vérifier que pour au moins 98%
 5 | -- des couples cdreseau/referenceprel c'est le cas.
 6 | 
 7 | WITH yearly_pfas_results AS (
 8 |     SELECT
 9 |         cdreseau,
10 |         referenceprel,
11 |         -- Vérifie si la somme des 20 PFAS est disponible
12 |         COUNT(
13 |             DISTINCT CASE
14 |                 WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux
15 |             END
16 |         ) AS has_sum_20_pfas,
17 |         -- Vérifie si tous les 4 PFAS spécifiques sont disponibles
18 |         COUNT(
19 |             DISTINCT CASE
20 |                 WHEN
21 |                     cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')
22 |                     THEN cdparametresiseeaux
23 |             END
24 |         ) AS count_4_pfas
25 |     FROM {{ ref('int__resultats_udi_communes') }}
26 |     WHERE
27 |         categorie = 'pfas'
28 |         AND CURRENT_DATE - datetimeprel < INTERVAL 1 YEAR
29 |     GROUP BY cdreseau, referenceprel
30 | )
31 | 
32 | SELECT
33 | 
34 |     COUNT(*) AS total_aggregations,
35 |     -- Pourcentage d'agrégations avec la somme des 20 PFAS présente
36 |     ROUND(
37 |         (
38 |             SUM(CASE WHEN has_sum_20_pfas = 1 THEN 1 ELSE 0 END)
39 |             * 100.0
40 |             / COUNT(*)
41 |         ),
42 |         2
43 |     ) AS pct_with_sum_20_pfas,
44 |     -- Pourcentage d'agrégations avec tous les 4 PFAS spécifiques présents
45 |     ROUND(
46 |         (SUM(CASE WHEN count_4_pfas = 4 THEN 1 ELSE 0 END) * 100.0 / COUNT(*)),
47 |         2
48 |     ) AS pct_with_all_4_pfas
49 | FROM yearly_pfas_results
50 | 
51 | HAVING pct_with_sum_20_pfas < 98 OR pct_with_all_4_pfas < 98
52 | 


--------------------------------------------------------------------------------
/webapp/next.config.ts:
--------------------------------------------------------------------------------
 1 | import type { NextConfig } from "next";
 2 | 
 3 | const nextConfig: NextConfig = {
 4 |   serverExternalPackages: ["@duckdb/node-api"],
 5 |   eslint: {
 6 |     // Warning: This allows production builds to successfully complete even if
 7 |     // your project has ESLint errors.
 8 |     ignoreDuringBuilds: true,
 9 |   },
10 |   output: "standalone",
11 |   headers: async () => {
12 |     return [
13 |       {
14 |         source: "/pmtiles/:path*.pmtiles",
15 |         headers: [
16 |           {
17 |             key: "Cache-Control",
18 |             value: "public, max-age=120, s-maxage=60",
19 |           },
20 |           {
21 |             key: "Accept-Ranges",
22 |             value: "bytes",
23 |           },
24 |         ],
25 |       },
26 |       // {
27 |       //   source: "/_next/static/:path*",
28 |       //   headers: [
29 |       //     {
30 |       //       key: "Cache-Control",
31 |       //       value: "public, max-age=31536000, immutable",
32 |       //     },
33 |       //   ],
34 |       // },
35 |       {
36 |         source: "/embed",
37 |         headers: [
38 |           {
39 |             key: "Cache-Control",
40 |             value: "public, max-age=120, s-maxage=60",
41 |           },
42 |           {
43 |             key: "Content-Security-Policy",
44 |             value:
45 |               "frame-ancestors 'self' https://dansmoneau.fr https://*.dansmoneau.fr",
46 |           },
47 |         ],
48 |       },
49 |       {
50 |         source: "/embed-external",
51 |         headers: [
52 |           {
53 |             key: "Cache-Control",
54 |             value: "public, max-age=120, s-maxage=60",
55 |           },
56 |         ],
57 |       },
58 |     ];
59 |   },
60 |   rewrites: async () => {
61 |     return [
62 |       {
63 |         source: "/s3/:path*",
64 |         destination: "https://s3.fr-par.scw.cloud/pollution-eau-s3/:path*",
65 |       },
66 |     ];
67 |   },
68 | };
69 | 
70 | export default nextConfig;
71 | 


--------------------------------------------------------------------------------
/pipelines/notebooks/test_geojson_from_db.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from pipelines.tasks.client.core.duckdb_client import DuckDBClient\n",
10 |     "\n",
11 |     "duckdb_client = DuckDBClient()"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "from pipelines.tasks.client.geojson_processor import GeoJSONProcessor\n",
21 |     "\n",
22 |     "geojson_processor = GeoJSONProcessor(\"communes\", duckdb_client)\n",
23 |     "geojson_communes = geojson_processor.generate_geojson()"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "code",
28 |    "execution_count": null,
29 |    "metadata": {},
30 |    "outputs": [],
31 |    "source": [
32 |     "from pipelines.tasks.client.geojson_processor import GeoJSONProcessor\n",
33 |     "\n",
34 |     "geojson_processor = GeoJSONProcessor(\"udi\", duckdb_client)\n",
35 |     "geojson_udi = geojson_processor.generate_geojson()"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": null,
41 |    "metadata": {},
42 |    "outputs": [],
43 |    "source": [
44 |     "geojson_udi"
45 |    ]
46 |   },
47 |   {
48 |    "cell_type": "code",
49 |    "execution_count": null,
50 |    "metadata": {},
51 |    "outputs": [],
52 |    "source": []
53 |   }
54 |  ],
55 |  "metadata": {
56 |   "kernelspec": {
57 |    "display_name": ".venv",
58 |    "language": "python",
59 |    "name": "python3"
60 |   },
61 |   "language_info": {
62 |    "codemirror_mode": {
63 |     "name": "ipython",
64 |     "version": 3
65 |    },
66 |    "file_extension": ".py",
67 |    "mimetype": "text/x-python",
68 |    "name": "python",
69 |    "nbconvert_exporter": "python",
70 |    "pygments_lexer": "ipython3",
71 |    "version": "3.12.7"
72 |   }
73 |  },
74 |  "nbformat": 4,
75 |  "nbformat_minor": 2
76 | }
77 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 5 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 6 |     SELECT DISTINCT
 7 |         de_partition AS annee,
 8 |         cdreseau,
 9 |         cdparametresiseeaux,
10 |         valeur_sanitaire_1,
11 |         referenceprel,
12 |         datetimeprel,
13 |         valtraduite
14 |     FROM
15 |         {{ ref('int__resultats_udi_communes') }}
16 |     WHERE
17 |         cdparametresiseeaux IN (
18 |             -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
19 |             --'14DAN',
20 |             'PCLAT'
21 |         )
22 | )
23 | 
24 | SELECT
25 |     cdreseau,
26 |     annee,
27 |     CASE
28 |         WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
29 |         WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
30 |     END AS categorie,
31 |     'bilan_annuel_' || annee AS periode,
32 |     count(
33 |         DISTINCT
34 |         CASE
35 |             WHEN
36 |                 valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
37 |                 THEN referenceprel
38 |         END
39 |     ) AS nb_depassements,
40 |     count(DISTINCT referenceprel) AS nb_prelevements,
41 |     (
42 |         count(
43 |             DISTINCT
44 |             CASE
45 |                 WHEN
46 |                     valtraduite IS NOT NULL
47 |                     AND valtraduite > valeur_sanitaire_1
48 |                     THEN referenceprel
49 |             END
50 |         )::float
51 |         /
52 |         count(DISTINCT referenceprel)::float
53 |     ) AS ratio_limite_sanitaire,
54 |     json_object(
55 |         max(cdparametresiseeaux), max(valtraduite)
56 |     ) AS parametres_detectes,
57 |     date_trunc('day', max(datetimeprel)) AS date_dernier_prel
58 | 
59 | FROM prels
60 | 
61 | GROUP BY cdreseau, annee, categorie
62 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT
 4 |         cdreseau,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valeur_sanitaire_1,
 8 |         datetimeprel,
 9 |         valtraduite,
10 |         ROW_NUMBER()
11 |             OVER (
12 |                 PARTITION BY cdreseau, cdparametresiseeaux
13 |                 ORDER BY datetimeprel DESC
14 |             )
15 |             AS row_number
16 |     FROM
17 |         {{ ref('int__resultats_udi_communes') }}
18 |     WHERE
19 |         categorie = 'nitrate'
20 |         AND
21 |         cdparametresiseeaux = 'NO3'
22 |         AND
23 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
24 |         datetimeprel >= DATE_TRUNC('day', (
25 |             SELECT MAX(sub.datetimeprel)
26 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
27 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
28 |         AND
29 |         -- Cf cas cdreseau IN( '034005906') , referenceprel= 03400327764
30 |         valtraduite IS NOT NULL
31 | )
32 | 
33 | SELECT
34 |     cdreseau,
35 |     'dernier_prel' AS periode,
36 |     datetimeprel AS date_dernier_prel,
37 |     1 AS nb_parametres,
38 |     categorie,
39 |     CASE
40 |         WHEN
41 |             valtraduite > valeur_sanitaire_1
42 |             THEN 'sup_valeur_sanitaire'
43 |         WHEN
44 |             valtraduite <= 10
45 |             THEN 'non_quantifie'
46 |         WHEN
47 |             valtraduite <= 25
48 |             THEN 'no3_inf_25'
49 |         WHEN
50 |             valtraduite > 25 AND valtraduite <= 40
51 |             THEN 'no3_inf_40'
52 |         WHEN
53 |             valtraduite > 40 AND valtraduite <= valeur_sanitaire_1
54 |             THEN 'inf_valeur_sanitaire'
55 |         ELSE 'error'
56 |     END AS resultat,
57 |     JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
58 |         AS parametres_detectes
59 | FROM
60 |     last_pvl
61 | WHERE
62 |     row_number = 1
63 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 5 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 6 |     SELECT DISTINCT
 7 |         de_partition AS annee,
 8 |         inseecommune,
 9 |         cdparametresiseeaux,
10 |         valeur_sanitaire_1,
11 |         referenceprel,
12 |         datetimeprel,
13 |         valtraduite
14 |     FROM
15 |         {{ ref('int__resultats_udi_communes') }}
16 |     WHERE
17 |         cdparametresiseeaux IN (
18 |             -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
19 |             --'14DAN',
20 |             'PCLAT'
21 |         )
22 | )
23 | 
24 | SELECT
25 |     inseecommune,
26 |     annee,
27 |     CASE
28 |         WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
29 |         WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
30 |     END AS categorie,
31 |     'bilan_annuel_' || annee AS periode,
32 |     count(
33 |         DISTINCT
34 |         CASE
35 |             WHEN
36 |                 valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1
37 |                 THEN referenceprel
38 |         END
39 |     ) AS nb_depassements,
40 |     count(DISTINCT referenceprel) AS nb_prelevements,
41 |     (
42 |         count(
43 |             DISTINCT
44 |             CASE
45 |                 WHEN
46 |                     valtraduite IS NOT NULL
47 |                     AND valtraduite > valeur_sanitaire_1
48 |                     THEN referenceprel
49 |             END
50 |         )::float
51 |         /
52 |         count(DISTINCT referenceprel)::float
53 |     ) AS ratio_limite_sanitaire,
54 |     json_object(
55 |         max(cdparametresiseeaux), max(valtraduite)
56 |     ) AS parametres_detectes,
57 |     date_trunc('day', max(datetimeprel)) AS date_dernier_prel
58 | 
59 | FROM prels
60 | 
61 | GROUP BY inseecommune, annee, categorie
62 | 


--------------------------------------------------------------------------------
/webapp/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata, Viewport } from "next";
 2 | import "./globals.css";
 3 | import Script from "next/script";
 4 | 
 5 | export const metadata: Metadata = {
 6 |   title: "Pollution de l'Eau Potable en France",
 7 |   description: "",
 8 |   robots: {
 9 |     index: false,
10 |     follow: false,
11 |   },
12 |   icons: {
13 |     icon: "/images/dfg.png",
14 |     shortcut: "/images/dfg.png",
15 |     apple: "/images/dfg.png",
16 |   },
17 | };
18 | 
19 | export const viewport: Viewport = {
20 |   width: "device-width",
21 |   initialScale: 1,
22 |   maximumScale: 1,
23 |   userScalable: false,
24 | };
25 | 
26 | export default function RootLayout({
27 |   children,
28 | }: Readonly<{
29 |   children: React.ReactNode;
30 | }>) {
31 |   return (
32 |     <html lang="en">
33 |       <body>
34 |         {/* Hard fix to prevent iframe scrolling: When the PollutionMapSearchBox Popover appears, it causes unwanted scrolling in the parent window containing the iframe. Despite attempts to find a more elegant solution, this override is the only reliable way to prevent this behavior when the app is embedded in an iframe. */}
35 |         <Script id="prevent-iframe-scroll" strategy="beforeInteractive">
36 |           {`
37 |             if (window.self !== window.top) {
38 |               // We're in an iframe
39 |               const originalScrollTo = window.scrollTo;
40 |               const originalScrollBy = window.scrollBy;
41 |               const originalScroll = window.scroll;
42 | 
43 |               // Override scroll functions to do nothing
44 |               window.scrollTo = function() {};
45 |               window.scrollBy = function() {};
46 |               window.scroll = function() {};
47 | 
48 |               // Prevent Element.scrollIntoView
49 |               if (Element.prototype.scrollIntoView) {
50 |                 Element.prototype.scrollIntoView = function() {};
51 |               }
52 |             }
53 |           `}
54 |         </Script>
55 |         {children}
56 |       </body>
57 |     </html>
58 |   );
59 | }
60 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/nitrate/int__resultats_nitrate_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT
 4 |         inseecommune,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valeur_sanitaire_1,
 8 |         datetimeprel,
 9 |         valtraduite,
10 |         ROW_NUMBER()
11 |             OVER (
12 |                 PARTITION BY inseecommune, cdparametresiseeaux
13 |                 ORDER BY datetimeprel DESC
14 |             )
15 |             AS row_number
16 |     FROM
17 |         {{ ref('int__resultats_udi_communes') }}
18 |     WHERE
19 |         categorie = 'nitrate'
20 |         AND
21 |         cdparametresiseeaux = 'NO3'
22 |         AND
23 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
24 |         datetimeprel >= DATE_TRUNC('day', (
25 |             SELECT MAX(sub.datetimeprel)
26 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
27 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
28 |         AND
29 |         -- Cf cas cdreseau IN( '034005906') , referenceprel= 03400327764
30 |         valtraduite IS NOT NULL
31 | )
32 | 
33 | SELECT
34 |     inseecommune,
35 |     'dernier_prel' AS periode,
36 |     datetimeprel AS date_dernier_prel,
37 |     1 AS nb_parametres,
38 |     categorie,
39 |     CASE
40 |         WHEN
41 |             valtraduite > valeur_sanitaire_1
42 |             THEN 'sup_valeur_sanitaire'
43 |         WHEN
44 |             valtraduite <= 10
45 |             THEN 'non_quantifie'
46 |         WHEN
47 |             valtraduite <= 25
48 |             THEN 'no3_inf_25'
49 |         WHEN
50 |             valtraduite > 25 AND valtraduite <= 40
51 |             THEN 'no3_inf_40'
52 |         WHEN
53 |             valtraduite > 40 AND valtraduite <= valeur_sanitaire_1
54 |             THEN 'inf_valeur_sanitaire'
55 |         ELSE 'error'
56 |     END AS resultat,
57 |     JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
58 |         AS parametres_detectes
59 | FROM
60 |     last_pvl
61 | WHERE
62 |     row_number = 1
63 | 


--------------------------------------------------------------------------------
/webapp/components/EmbedBanner.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import { ExternalLink, InfoIcon } from "lucide-react";
 4 | 
 5 | export default function EmbedBanner() {
 6 |   return (
 7 |     <div
 8 |       className="text-white p-4 z-50 shadow-lg"
 9 |       style={{ background: "#0b534b" }}
10 |     >
11 |       <div className="flex flex-col md:flex-row md:items-center md:justify-between gap-3 max-w-full">
12 |         <div className="flex items-center gap-2 flex-1">
13 |           <InfoIcon className="text-white flex-shrink-0" size={24} />
14 |           <div>
15 |             <div className="font-bold text-base md:text-lg">
16 |               Générations Futures et Data For Good ont créé Dans Mon Eau.
17 |             </div>
18 |             <div className="text-sm md:text-base opacity-90 mt-1">
19 |               Un outil pour connaître la qualité de votre eau du robinet.
20 |             </div>
21 |           </div>
22 |         </div>
23 |         <div className="flex items-center justify-center md:justify-end">
24 |           <a
25 |             href="https://dansmoneau.fr"
26 |             target="_blank"
27 |             rel="noopener"
28 |             className="inline-flex items-center gap-1.5 md:gap-2 bg-green-600 text-white px-3 py-1.5 md:px-4 md:py-2 rounded-lg text-sm md:text-base font-bold hover:bg-green-700 transition-all shadow-md border-2 border-green-600 pulse-animation whitespace-nowrap"
29 |           >
30 |             <ExternalLink size={14} className="md:w-4 md:h-4" />
31 |             <span>Découvrir dansmoneau.fr</span>
32 |           </a>
33 |         </div>
34 |       </div>
35 |       <style jsx>{`
36 |         .pulse-animation {
37 |           animation: pulse 2s infinite;
38 |         }
39 |         @keyframes pulse {
40 |           0% {
41 |             box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
42 |           }
43 |           70% {
44 |             box-shadow: 0 0 0 10px rgba(34, 197, 94, 0);
45 |           }
46 |           100% {
47 |             box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
48 |           }
49 |         }
50 |       `}</style>
51 |     </div>
52 |   );
53 | }
54 | 


--------------------------------------------------------------------------------
/dbt_/tests/test_sub_active_results.sql:
--------------------------------------------------------------------------------
 1 | -- dernier udi
 2 | SELECT
 3 |     'dernier_prel' AS periode,
 4 |     cdreseau,
 5 |     resultat,
 6 |     0 AS ratio_limite_qualite,
 7 |     0 AS nb_sup_valeur_sanitaire
 8 | FROM
 9 |     {{ ref('int__resultats_sub_active_udi_dernier') }}
10 | WHERE
11 |     (
12 |         cdreseau = '051000769'
13 |         AND date_dernier_prel = TIMESTAMP '2025-03-31 13:58:00'
14 |         AND resultat != 'non_quantifie'
15 |     )
16 |     OR
17 |     (
18 |         cdreseau = '030000509'
19 |         AND date_dernier_prel = TIMESTAMP '2025-03-31 11:56:00'
20 |         AND resultat != 'inf_limite_qualite'
21 |     )
22 |     OR
23 |     (
24 |         cdreseau = '029000947'
25 |         AND date_dernier_prel = TIMESTAMP '2025-03-31 11:00:00'
26 |         AND resultat != 'sup_limite_qualite'
27 |     )
28 |     OR
29 |     (
30 |         cdreseau = '060001302'
31 |         AND date_dernier_prel = TIMESTAMP '2024-12-19 08:29:00'
32 |         AND resultat != 'sup_valeur_sanitaire'
33 |     )
34 | -- annuel udi
35 | UNION ALL
36 | SELECT
37 |     'bilan_annuel' AS periode,
38 |     cdreseau,
39 |     '' AS resultat,
40 |     ratio_limite_qualite,
41 |     nb_sup_valeur_sanitaire
42 | FROM
43 |     {{ ref('int__resultats_sub_active_udi_annuel') }}
44 | WHERE
45 |     (
46 |         cdreseau = '051000769'
47 |         AND annee = 2024
48 |         AND (
49 |             nb_prelevements != 6
50 |             OR nb_depassements != 3
51 |             OR nb_sup_valeur_sanitaire != 0
52 |             OR ratio_limite_qualite != 0.5
53 |         )
54 |     )
55 |     OR
56 |     (
57 |         cdreseau = '030000509'
58 |         AND annee = 2024
59 |         AND (
60 |             nb_prelevements != 7
61 |             OR nb_depassements != 0
62 |             OR nb_sup_valeur_sanitaire != 0
63 |             OR ratio_limite_qualite != 0
64 |         )
65 |     )
66 |     OR
67 |     (
68 |         cdreseau = '060001302'
69 |         AND annee = 2024
70 |         AND (
71 |             nb_prelevements != 4
72 |             OR nb_depassements != 1
73 |             OR nb_sup_valeur_sanitaire != 1
74 |             OR ratio_limite_qualite != 0.25
75 |         )
76 |     )
77 | 


--------------------------------------------------------------------------------
/webapp/components/ui/button.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | import { Slot } from "@radix-ui/react-slot";
 3 | import { cva, type VariantProps } from "class-variance-authority";
 4 | 
 5 | import { cn } from "@/lib/utils";
 6 | 
 7 | const buttonVariants = cva(
 8 |   "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0",
 9 |   {
10 |     variants: {
11 |       variant: {
12 |         default:
13 |           "bg-primary text-primary-foreground shadow hover:bg-primary/90",
14 |         destructive:
15 |           "bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90",
16 |         outline:
17 |           "border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground",
18 |         secondary:
19 |           "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80",
20 |         ghost: "hover:bg-accent hover:text-accent-foreground",
21 |         link: "text-primary underline-offset-4 hover:underline",
22 |       },
23 |       size: {
24 |         default: "h-9 px-4 py-2",
25 |         sm: "h-8 rounded-md px-3 text-xs",
26 |         lg: "h-10 rounded-md px-8",
27 |         icon: "h-9 w-9",
28 |       },
29 |     },
30 |     defaultVariants: {
31 |       variant: "default",
32 |       size: "default",
33 |     },
34 |   },
35 | );
36 | 
37 | export interface ButtonProps
38 |   extends React.ButtonHTMLAttributes<HTMLButtonElement>,
39 |     VariantProps<typeof buttonVariants> {
40 |   asChild?: boolean;
41 | }
42 | 
43 | const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
44 |   ({ className, variant, size, asChild = false, ...props }, ref) => {
45 |     const Comp = asChild ? Slot : "button";
46 |     return (
47 |       <Comp
48 |         className={cn(buttonVariants({ variant, size, className }))}
49 |         ref={ref}
50 |         {...props}
51 |       />
52 |     );
53 |   },
54 | );
55 | Button.displayName = "Button";
56 | 
57 | export { Button, buttonVariants };
58 | 


--------------------------------------------------------------------------------
/dbt_/tests/test_tous_results.sql:
--------------------------------------------------------------------------------
 1 | -- dernier relevé
 2 | SELECT
 3 |     'dernier_prel' AS periode,
 4 |     cdreseau,
 5 |     categorie,
 6 |     resultat,
 7 |     null AS ratio,
 8 |     null AS nb_prelevements,
 9 |     null AS nb_sup_valeur_sanitaire
10 | FROM
11 |     {{ ref('int__resultats_tous_udi_dernier') }}
12 | WHERE
13 |     (
14 |         cdreseau = '001000598'
15 |         AND date_dernier_prel = '2025-03-26 10:59:00'
16 |         AND resultat != 'sup_limite_qualite'
17 |     )
18 |     OR
19 |     (
20 |         cdreseau = '049000506'
21 |         AND date_dernier_prel = '2025-07-08 08:30:00'
22 |         AND resultat != 'quantifie'
23 |     )
24 |     OR
25 |     (
26 |         cdreseau = '033000400'
27 |         AND date_dernier_prel = '2025-07-17 09:50:00'
28 |         AND resultat != 'non_quantifie'
29 |     )
30 |     OR
31 |     (
32 |         cdreseau = '088002246'
33 |         AND date_dernier_prel = '2025-04-22 08:11:00'
34 |         AND resultat != 'sup_limite_sanitaire'
35 |     )
36 | UNION ALL
37 | -- annuel
38 | SELECT
39 |     periode,
40 |     cdreseau,
41 |     categorie,
42 |     null AS resultat,
43 |     ratio,
44 |     nb_prelevements,
45 |     nb_sup_valeur_sanitaire
46 | FROM
47 |     {{ ref('int__resultats_tous_udi_annuel') }}
48 | WHERE
49 |     (
50 |         cdreseau = '054000780'
51 |         AND periode = 'bilan_annuel_2024'
52 |         AND (
53 |             nb_prelevements != 7
54 |             -- cvm : 1
55 |             -- metaux_lourds_as : 1
56 |             -- nitrate : 5
57 |             -- pesticide : 1
58 |             OR
59 |             ratio != 0
60 |             OR
61 |             nb_sup_valeur_sanitaire != 0
62 |         )
63 |     )
64 |     OR
65 |     (
66 |         cdreseau = '061000423'
67 |         AND periode = 'bilan_annuel_2024'
68 |         AND (
69 |             nb_prelevements != 27
70 |             -- cvm: 1
71 |             -- metaux_lourds_as: 1
72 |             -- nitrate: 14
73 |             -- pesticide: 12
74 |             OR
75 |             ratio <= 0.4
76 |             OR
77 |             ratio >= 0.5
78 |             -- ratio = 12/28 ~= 0.42
79 |             OR
80 |             nb_sup_valeur_sanitaire != 0
81 |         )
82 |     )
83 | 


--------------------------------------------------------------------------------
/webapp/components/ui/card.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | 
 3 | import { cn } from "@/lib/utils";
 4 | 
 5 | const Card = React.forwardRef<
 6 |   HTMLDivElement,
 7 |   React.HTMLAttributes<HTMLDivElement>
 8 | >(({ className, ...props }, ref) => (
 9 |   <div
10 |     ref={ref}
11 |     className={cn(
12 |       "rounded-xl border bg-card text-card-foreground shadow",
13 |       className,
14 |     )}
15 |     {...props}
16 |   />
17 | ));
18 | Card.displayName = "Card";
19 | 
20 | const CardHeader = React.forwardRef<
21 |   HTMLDivElement,
22 |   React.HTMLAttributes<HTMLDivElement>
23 | >(({ className, ...props }, ref) => (
24 |   <div
25 |     ref={ref}
26 |     className={cn("flex flex-col space-y-1.5 p-6", className)}
27 |     {...props}
28 |   />
29 | ));
30 | CardHeader.displayName = "CardHeader";
31 | 
32 | const CardTitle = React.forwardRef<
33 |   HTMLDivElement,
34 |   React.HTMLAttributes<HTMLDivElement>
35 | >(({ className, ...props }, ref) => (
36 |   <div
37 |     ref={ref}
38 |     className={cn("font-semibold leading-none tracking-tight", className)}
39 |     {...props}
40 |   />
41 | ));
42 | CardTitle.displayName = "CardTitle";
43 | 
44 | const CardDescription = React.forwardRef<
45 |   HTMLDivElement,
46 |   React.HTMLAttributes<HTMLDivElement>
47 | >(({ className, ...props }, ref) => (
48 |   <div
49 |     ref={ref}
50 |     className={cn("text-sm text-muted-foreground", className)}
51 |     {...props}
52 |   />
53 | ));
54 | CardDescription.displayName = "CardDescription";
55 | 
56 | const CardContent = React.forwardRef<
57 |   HTMLDivElement,
58 |   React.HTMLAttributes<HTMLDivElement>
59 | >(({ className, ...props }, ref) => (
60 |   <div ref={ref} className={cn("p-6 pt-0", className)} {...props} />
61 | ));
62 | CardContent.displayName = "CardContent";
63 | 
64 | const CardFooter = React.forwardRef<
65 |   HTMLDivElement,
66 |   React.HTMLAttributes<HTMLDivElement>
67 | >(({ className, ...props }, ref) => (
68 |   <div
69 |     ref={ref}
70 |     className={cn("flex items-center p-6 pt-0", className)}
71 |     {...props}
72 |   />
73 | ));
74 | CardFooter.displayName = "CardFooter";
75 | 
76 | export {
77 |   Card,
78 |   CardHeader,
79 |   CardFooter,
80 |   CardTitle,
81 |   CardDescription,
82 |   CardContent,
83 | };
84 | 


--------------------------------------------------------------------------------
/webapp/app/config.ts:
--------------------------------------------------------------------------------
 1 | import layers from "protomaps-themes-base";
 2 | 
 3 | export const MAPLIBRE_MAP = {
 4 |   protomaps: {
 5 |     // https://protomaps.com/api
 6 |     api_key: process.env.NEXT_PUBLIC_PROTOMAPS_API_KEY || "",
 7 |     maxzoom: 15,
 8 |     theme: "white", // unsure between "white" and "light"
 9 |     language: "fr",
10 |   },
11 |   initialViewState: {
12 |     longitude: 0.882755215151974,
13 |     latitude: 46.489410422633256,
14 |     zoom: 5.2,
15 |   },
16 |   countryBorderWidth: 2,
17 |   countryBorderColor: "#bdb8b8",
18 | };
19 | 
20 | // Default map style without layers (will be added dynamically)
21 | export const DEFAULT_MAP_STYLE: maplibregl.StyleSpecification = {
22 |   version: 8,
23 |   glyphs:
24 |     "https://protomaps.github.io/basemaps-assets/fonts/{fontstack}/{range}.pbf",
25 |   sprite: "https://protomaps.github.io/basemaps-assets/sprites/v4/light",
26 |   sources: {
27 |     protomaps: {
28 |       type: "vector",
29 |       maxzoom: MAPLIBRE_MAP.protomaps.maxzoom,
30 |       url: `https://api.protomaps.com/tiles/v4.json?key=${MAPLIBRE_MAP.protomaps.api_key}`,
31 |       attribution:
32 |         '<a href="https://osm.org/copyright" class="text-xs">© OpenStreetMap</a>',
33 |     },
34 |     communes: {
35 |       type: "vector",
36 |       url: "pmtiles:///pmtiles/commune_data.pmtiles",
37 |     },
38 |     udis: {
39 |       type: "vector",
40 |       url: "pmtiles:///pmtiles/udi_data.pmtiles",
41 |     },
42 |   },
43 |   layers: [], // layers will be added dynamically in the Map component
44 | } satisfies maplibregl.StyleSpecification;
45 | 
46 | // Helper function to get the default base layers
47 | export const getDefaultLayers = () => {
48 |   return [
49 |     ...layers(
50 |       "protomaps",
51 |       MAPLIBRE_MAP.protomaps.theme,
52 |       MAPLIBRE_MAP.protomaps.language,
53 |     ).filter((layer) => !["boundaries_country"].includes(layer.id)),
54 |     {
55 |       id: "boundaries_country",
56 |       type: "line",
57 |       source: "protomaps",
58 |       "source-layer": "boundaries",
59 |       filter: ["<=", "kind_detail", 2],
60 |       paint: {
61 |         "line-color": MAPLIBRE_MAP.countryBorderColor,
62 |         "line-width": MAPLIBRE_MAP.countryBorderWidth,
63 |       },
64 |     },
65 |   ];
66 | };
67 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | metaux_lourds_prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 5 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 6 |     SELECT DISTINCT
 7 |         de_partition AS annee,
 8 |         cdreseau,
 9 |         cdparametresiseeaux,
10 |         valeur_sanitaire_1,
11 |         limite_qualite,
12 |         referenceprel,
13 |         datetimeprel,
14 |         valtraduite
15 |     FROM
16 |         {{ ref('int__resultats_udi_communes') }}
17 |     WHERE
18 |         cdparametresiseeaux IN ('PB', 'AS')
19 | )
20 | 
21 | SELECT
22 |     cdreseau,
23 |     annee,
24 |     CASE
25 |         WHEN cdparametresiseeaux = 'PB' THEN 'metaux_lourds_pb'
26 |         WHEN cdparametresiseeaux = 'AS' THEN 'metaux_lourds_as'
27 |     END AS categorie,
28 |     'bilan_annuel_' || annee AS periode,
29 |     count(
30 |         DISTINCT
31 |         CASE
32 |             WHEN
33 |                 cdparametresiseeaux = 'PB'
34 |                 AND valtraduite IS NOT NULL AND valtraduite >= limite_qualite
35 |                 THEN referenceprel
36 |             WHEN
37 |                 cdparametresiseeaux = 'AS'
38 |                 AND valtraduite IS NOT NULL
39 |                 AND valtraduite >= valeur_sanitaire_1
40 |                 THEN referenceprel
41 |         END
42 |     ) AS nb_depassements,
43 |     count(DISTINCT referenceprel) AS nb_prelevements,
44 |     (
45 |         count(
46 |             DISTINCT
47 |             CASE
48 |                 WHEN
49 |                     cdparametresiseeaux = 'PB'
50 |                     AND valtraduite IS NOT NULL
51 |                     AND valtraduite >= limite_qualite
52 |                     THEN referenceprel
53 |                 WHEN
54 |                     cdparametresiseeaux = 'AS'
55 |                     AND valtraduite IS NOT NULL
56 |                     AND valtraduite >= valeur_sanitaire_1
57 |                     THEN referenceprel
58 |             END
59 |         )::float
60 |         /
61 |         count(DISTINCT referenceprel)::float
62 |     ) AS ratio
63 | 
64 | FROM metaux_lourds_prels
65 | 
66 | GROUP BY cdreseau, annee, categorie
67 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | metaux_lourds_prels AS (
 3 |     -- Certains prélèvements ont plusieurs analyses pour la même substance
 4 |     -- Le SELECT DISTINCT ne dédoublonne pas ces cas là
 5 |     -- Donc il n'y a pas d'unicité sur referenceprel dans cetre requête
 6 |     SELECT DISTINCT
 7 |         de_partition AS annee,
 8 |         inseecommune,
 9 |         cdparametresiseeaux,
10 |         valeur_sanitaire_1,
11 |         limite_qualite,
12 |         referenceprel,
13 |         datetimeprel,
14 |         valtraduite
15 |     FROM
16 |         {{ ref('int__resultats_udi_communes') }}
17 |     WHERE
18 |         cdparametresiseeaux IN ('PB', 'AS')
19 | )
20 | 
21 | SELECT
22 |     inseecommune,
23 |     annee,
24 |     CASE
25 |         WHEN cdparametresiseeaux = 'PB' THEN 'metaux_lourds_pb'
26 |         WHEN cdparametresiseeaux = 'AS' THEN 'metaux_lourds_as'
27 |     END AS categorie,
28 |     'bilan_annuel_' || annee AS periode,
29 |     count(
30 |         DISTINCT
31 |         CASE
32 |             WHEN
33 |                 cdparametresiseeaux = 'PB'
34 |                 AND valtraduite IS NOT NULL AND valtraduite >= limite_qualite
35 |                 THEN referenceprel
36 |             WHEN
37 |                 cdparametresiseeaux = 'AS'
38 |                 AND valtraduite IS NOT NULL
39 |                 AND valtraduite >= valeur_sanitaire_1
40 |                 THEN referenceprel
41 |         END
42 |     ) AS nb_depassements,
43 |     count(DISTINCT referenceprel) AS nb_prelevements,
44 |     (
45 |         count(
46 |             DISTINCT
47 |             CASE
48 |                 WHEN
49 |                     cdparametresiseeaux = 'PB'
50 |                     AND valtraduite IS NOT NULL
51 |                     AND valtraduite >= limite_qualite
52 |                     THEN referenceprel
53 |                 WHEN
54 |                     cdparametresiseeaux = 'AS'
55 |                     AND valtraduite IS NOT NULL
56 |                     AND valtraduite >= valeur_sanitaire_1
57 |                     THEN referenceprel
58 |             END
59 |         )::float
60 |         /
61 |         count(DISTINCT referenceprel)::float
62 |     ) AS ratio
63 | 
64 | FROM metaux_lourds_prels
65 | 
66 | GROUP BY inseecommune, annee, categorie
67 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT
 4 |         cdreseau,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valeur_sanitaire_1,
 8 |         valeur_sanitaire_2,
 9 |         datetimeprel,
10 |         valtraduite,
11 |         ROW_NUMBER()
12 |             OVER (
13 |                 PARTITION BY cdreseau, cdparametresiseeaux
14 |                 ORDER BY datetimeprel DESC
15 |             )
16 |             AS row_number
17 |     FROM
18 |         {{ ref('int__resultats_udi_communes') }}
19 |     WHERE
20 |         cdparametresiseeaux IN (
21 |             -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
22 |             --'14DAN',
23 |             'PCLAT'
24 |         )
25 |         AND
26 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 |         datetimeprel >= DATE_TRUNC('day', (
28 |             SELECT MAX(sub.datetimeprel)
29 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | )
32 | 
33 | SELECT
34 |     cdreseau,
35 |     'dernier_prel' AS periode,
36 |     datetimeprel AS date_dernier_prel,
37 |     1 AS nb_parametres,
38 |     CASE
39 |         WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
40 |         WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
41 |     END AS categorie,
42 |     CASE
43 |         WHEN
44 |             valtraduite = 0 OR valtraduite IS NULL
45 |             THEN 'non_quantifie'
46 |         WHEN
47 |             valtraduite > valeur_sanitaire_2
48 |             THEN 'sup_valeur_sanitaire_2'
49 |         WHEN
50 |             -- by construction, valeur_sanitaire_2 > valeur_sanitaire_1
51 |             -- so here the result is actually:
52 |             -- valeur_sanitaire_1 < valtraduite <= valeur_sanitaire_2
53 |             valtraduite > valeur_sanitaire_1
54 |             THEN 'sup_valeur_sanitaire'
55 |         WHEN
56 |             valtraduite <= valeur_sanitaire_1
57 |             THEN 'inf_valeur_sanitaire'
58 |         ELSE 'error'
59 |     END AS resultat,
60 |     JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
61 |         AS parametres_detectes
62 | FROM
63 |     last_pvl
64 | WHERE
65 |     row_number = 1
66 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/sub_indus/int__resultats_sub_indus_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT
 4 |         inseecommune,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valeur_sanitaire_1,
 8 |         valeur_sanitaire_2,
 9 |         datetimeprel,
10 |         valtraduite,
11 |         ROW_NUMBER()
12 |             OVER (
13 |                 PARTITION BY inseecommune, cdparametresiseeaux
14 |                 ORDER BY datetimeprel DESC
15 |             )
16 |             AS row_number
17 |     FROM
18 |         {{ ref('int__resultats_udi_communes') }}
19 |     WHERE
20 |         cdparametresiseeaux IN (
21 |             -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
22 |             --'14DAN',
23 |             'PCLAT'
24 |         )
25 |         AND
26 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 |         datetimeprel >= DATE_TRUNC('day', (
28 |             SELECT MAX(sub.datetimeprel)
29 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | )
32 | 
33 | SELECT
34 |     inseecommune,
35 |     'dernier_prel' AS periode,
36 |     datetimeprel AS date_dernier_prel,
37 |     1 AS nb_parametres,
38 |     CASE
39 |         WHEN cdparametresiseeaux = '14DAN' THEN 'sub_indus_14dioxane'
40 |         WHEN cdparametresiseeaux = 'PCLAT' THEN 'sub_indus_perchlorate'
41 |     END AS categorie,
42 |     CASE
43 |         WHEN
44 |             valtraduite = 0 OR valtraduite IS NULL
45 |             THEN 'non_quantifie'
46 |         WHEN
47 |             valtraduite > valeur_sanitaire_2
48 |             THEN 'sup_valeur_sanitaire_2'
49 |         WHEN
50 |             -- by construction, valeur_sanitaire_2 > valeur_sanitaire_1
51 |             -- so here the result is actually:
52 |             -- valeur_sanitaire_1 < valtraduite <= valeur_sanitaire_2
53 |             valtraduite > valeur_sanitaire_1
54 |             THEN 'sup_valeur_sanitaire'
55 |         WHEN
56 |             valtraduite <= valeur_sanitaire_1
57 |             THEN 'inf_valeur_sanitaire'
58 |         ELSE 'error'
59 |     END AS resultat,
60 |     JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
61 |         AS parametres_detectes
62 | FROM
63 |     last_pvl
64 | WHERE
65 |     row_number = 1
66 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/int__resultats_udi_communes.sql:
--------------------------------------------------------------------------------
 1 | WITH resultats AS (
 2 |     SELECT
 3 |         referenceprel,
 4 |         cdparametresiseeaux,
 5 |         de_partition,
 6 | 
 7 |         -- Correction de la colonne valtraduite qui contient les valeurs
 8 |         -- textuelles de rqana converties en valeurs numériques.
 9 |         -- Certaines valeurs textuelles telles que "Changement anormal", "OUI",
10 |         -- "PRESENCE" étaient converties en 1.
11 |         -- Ces valeurs sont corrigées en 0 car on veut les considérer comme
12 |         -- des valeurs non quantifiées.
13 |         -- Les valeurs purement numériques restent inchangées.
14 |         -- Exemples après correction :
15 |         --   'Changement anormal' → 0
16 |         --   'OUI' → 0
17 |         --   'PRESENCE' → 0
18 |         --   '1,0' → 1
19 |         --   '>1' → 1
20 |         CASE
21 |             WHEN valtraduite = 1 AND REGEXP_MATCHES(rqana, '[a-zA-Z]') THEN 0
22 |             ELSE valtraduite
23 |         END AS valtraduite
24 | 
25 |         --  On n'utilise plus limitequal des données d'origine
26 |         -- car on se base sur des valeurs fournies par Générations Futures
27 |         --
28 |         -- CAST(
29 |         --     REGEXP_EXTRACT(
30 |         --         REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?'
31 |         --     ) AS FLOAT
32 |         -- ) AS limitequal_float,
33 |         -- REGEXP_EXTRACT(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite
34 |     FROM
35 |         {{ ref("stg_edc__resultats") }}
36 | ),
37 | 
38 | resultats_with_ref AS (
39 |     SELECT
40 |         resultats.*,
41 |         r.categorie_1 AS categorie,
42 |         r.categorie_2,
43 |         r.categorie_3,
44 |         r.limite_qualite,
45 |         r.limite_indicative,
46 |         r.valeur_sanitaire_1,
47 |         r.valeur_sanitaire_2
48 |     FROM
49 |         resultats
50 |     INNER JOIN
51 |         {{ ref("int__valeurs_de_reference") }} AS r
52 |         ON
53 |             resultats.cdparametresiseeaux = r.cdparametresiseeaux
54 | )
55 | 
56 | 
57 | SELECT
58 |     resultats_with_ref.*,
59 |     udi.cdreseau,
60 |     udi.inseecommune,
61 |     plv.datetimeprel
62 | FROM
63 |     resultats_with_ref
64 | INNER JOIN
65 |     {{ ref("int__lien_cdreseau_refreneceprel") }} AS plv
66 |     ON
67 |         resultats_with_ref.referenceprel = plv.referenceprel
68 |         AND
69 |         resultats_with_ref.de_partition = plv.de_partition
70 | 
71 | LEFT JOIN
72 |     {{ ref("int__lien_commune_cdreseau") }} AS udi
73 |     ON
74 |         plv.cdreseau = udi.cdreseau
75 |         AND plv.de_partition = udi.de_partition
76 | 


--------------------------------------------------------------------------------
/webapp/app/duckdb-example/page.tsx:
--------------------------------------------------------------------------------
 1 | import { fetchExample } from "../lib/data";
 2 | 
 3 | export default async function Page() {
 4 |   //using api route
 5 |   // try {
 6 | 
 7 |   //   const response = await fetch("http://localhost:3001/api/db-example", { cache: "no-store" })
 8 |   //   const results = response.json();
 9 |   // } catch (err) {
10 |   //    console.error("Error fetching DB status:", err)
11 |   // }
12 | 
13 |   // using directly the data layer
14 |   const reader = await fetchExample();
15 | 
16 |   return (
17 |     <div className="flex items-center justify-center min-h-screen bg-gray-100 p-4">
18 |       <div className="w-full max-w-5xl overflow-x-auto bg-white shadow-lg rounded-2xl p-6">
19 |         <table className="w-full border-collapse border border-gray-300">
20 |           <thead>
21 |             <tr className="bg-gray-200 text-gray-700">
22 |               <th className="border border-gray-300 px-4 py-2">Row</th>
23 |               {Array.from({ length: reader.columnCount }, (_, i) => (
24 |                 <th key={i} className="border border-gray-300 px-4 py-2">
25 |                   {reader.columnName(i)}
26 |                 </th>
27 |               ))}
28 |             </tr>
29 |           </thead>
30 |           <tbody>
31 |             {Object.entries(reader.getRows()).map(([key, value]) => (
32 |               <tr key={key} className="hover:bg-gray-100">
33 |                 <td className="border border-gray-300 px-4 py-2 font-semibold">
34 |                   {key}
35 |                 </td>
36 |                 {Array.from({ length: reader.columnCount }, (_, i) => (
37 |                   <td key={i} className="border border-gray-300 px-4 py-2">
38 |                     {/* Affichage par type - exemple avec des méthodes propre à certains types => pas d'erreur, le typage semble bon */}
39 |                     {/* {value[i] != null &&
40 |                       ((reader.columnType(i).typeId === DuckDBTypeId.VARCHAR &&
41 |                         String(value[i]).slice(0, 3)) ||
42 |                         (reader.columnType(i).typeId === DuckDBTypeId.BIGINT &&
43 |                           (value[i] as bigint) * BigInt(100000)) ||
44 |                         (reader.columnType(i).typeId === DuckDBTypeId.DOUBLE &&
45 |                           (value[i] as number)?.toExponential()))} */}
46 |                     {/* Affichage simple */}
47 |                     {String(value[i])}
48 |                   </td>
49 |                 ))}
50 |               </tr>
51 |             ))}
52 |           </tbody>
53 |         </table>
54 |       </div>
55 |     </div>
56 |   );
57 | }
58 | 


--------------------------------------------------------------------------------
/pipelines/tasks/config/common.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | from pathlib import Path
 4 | from typing import Union
 5 | from zipfile import ZipFile
 6 | 
 7 | import requests
 8 | from tqdm import tqdm
 9 | 
10 | from pipelines.utils.logger import get_logger
11 | 
12 | logger = get_logger(__name__)
13 | 
14 | ROOT_FOLDER = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
15 | DATABASE_FOLDER = os.path.join(ROOT_FOLDER, "database")
16 | DUCKDB_FILE = os.path.join(DATABASE_FOLDER, "data.duckdb")
17 | CACHE_FOLDER = os.path.join(ROOT_FOLDER, "database", "cache")
18 | 
19 | os.makedirs(CACHE_FOLDER, exist_ok=True)
20 | os.makedirs(DATABASE_FOLDER, exist_ok=True)
21 | 
22 | 
23 | # common style for the progressbar dans cli
24 | tqdm_common = {
25 |     "ncols": 100,
26 |     "bar_format": "{l_bar}{bar}| {n_fmt}/{total_fmt}",
27 |     "mininterval": 2.0,  # Log progress every 2 second
28 | }
29 | 
30 | 
31 | def clear_cache(recreate_folder: bool = True):
32 |     """Clear the cache folder."""
33 |     shutil.rmtree(CACHE_FOLDER)
34 |     if recreate_folder:
35 |         os.makedirs(CACHE_FOLDER, exist_ok=True)
36 | 
37 | 
38 | def download_file_from_https(url: str, filepath: Union[str, Path]):
39 |     """
40 |     Downloads a file from a https link to a local file.
41 |     :param url: The url where to download the file.
42 |     :param filepath: The path to the local file.
43 |     :return: Downloaded file filename.
44 |     """
45 |     logger.info(f"Downloading file from {url} to {filepath}")
46 |     response = requests.get(
47 |         url, stream=True, headers={"Accept-Encoding": "gzip, deflate"}
48 |     )
49 |     response.raise_for_status()
50 |     response_size = int(response.headers.get("content-length", 0))
51 |     filepath = Path(filepath)
52 |     with open(filepath, "wb") as f:
53 |         with tqdm(
54 |             total=response_size,
55 |             unit="B",
56 |             unit_scale=True,
57 |             desc=filepath.name,
58 |             **tqdm_common,
59 |         ) as pbar:
60 |             for chunk in response.iter_content(chunk_size=8192):
61 |                 f.write(chunk)
62 |                 pbar.update(len(chunk))
63 | 
64 |     return filepath.name
65 | 
66 | 
67 | def extract_file(zip_file, extract_folder):
68 |     with ZipFile(zip_file, "r") as zip_ref:
69 |         file_list = zip_ref.namelist()
70 |         with tqdm(
71 |             total=len(file_list), unit="file", desc="Extracting", **tqdm_common
72 |         ) as pbar:
73 |             for file in file_list:
74 |                 zip_ref.extract(file, extract_folder)  # Extract each file
75 |                 pbar.update(1)
76 |     return True
77 | 


--------------------------------------------------------------------------------
/pipelines/tasks/client/pmtiles_processor.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | from pathlib import Path
 3 | 
 4 | from pipelines.config.config import get_s3_path_pmtiles
 5 | from pipelines.tasks.config.config_geojson import (
 6 |     config_merge_geo,
 7 | )
 8 | from pipelines.utils.logger import get_logger
 9 | from pipelines.utils.storage_client import ObjectStorageClient
10 | 
11 | logger = get_logger(__name__)
12 | 
13 | types = config_merge_geo.keys()
14 | 
15 | 
16 | class PmtilesProcessor:
17 |     def __init__(self, type="communes"):
18 |         if type not in types:
19 |             raise Exception(f"type {type} must be in {types}")
20 |         self.upload_file_path = f"georef-france-{type}-prelevement.pmtiles"
21 | 
22 |     def convert_geojson_to_pmtiles(
23 |         self, geojson_file: str, pmtiles_file: str, layer="data_communes"
24 |     ):
25 |         """Convert a GeoJSON file to PMTiles format using Tippecanoe."""
26 |         # try:
27 |         # Construct the Tippecanoe command
28 |         command = [
29 |             "tippecanoe",
30 |             "-zg",  # Zoom levels
31 |             "-o",  # output
32 |             pmtiles_file,  # Output PMTiles file
33 |             "--layer",  # le nom de la couche dans les tuiles vectorielles
34 |             layer,
35 |             "--coalesce-densest-as-needed",
36 |             "--extend-zooms-if-still-dropping",
37 |             geojson_file,  # Input GeoJSON file
38 |         ]
39 |         # if file already exists then remove it
40 |         if Path(pmtiles_file).exists():
41 |             Path(pmtiles_file).unlink()
42 |         # Run the command
43 |         subprocess.run(command, check=True)
44 |         logger.info(f"Successfully converted '{geojson_file}' to '{pmtiles_file}'.")
45 | 
46 |         # except subprocess.CalledProcessError as e:
47 |         #     logger.error(f"Error during conversion: {e}")
48 |         # except Exception as e:
49 |         #     logger.error(f"An error occurred: {e}")
50 | 
51 |     def upload_pmtils_to_storage(self, env: str, pmtils_path: str):
52 |         """
53 |         Upload the Pmtiles file to Storage Object depending on the environment
54 |         This requires setting the correct environment variables for the Scaleway credentials
55 |         """
56 |         s3 = ObjectStorageClient()
57 |         s3_path = get_s3_path_pmtiles(env, self.upload_file_path)
58 | 
59 |         s3.upload_object(local_path=pmtils_path, file_key=s3_path, public_read=True)
60 |         logger.info(f"✅ pmtils uploaded to s3://{s3.bucket_name}/{s3_path}")
61 |         url = (
62 |             f"https://{s3.bucket_name}.{s3.endpoint_url.split('https://')[1]}/{s3_path}"
63 |         )
64 |         return url
65 | 


--------------------------------------------------------------------------------
/pipelines/tasks/generate_pmtiles_legacy.py:
--------------------------------------------------------------------------------
 1 | """Generate and upload merged new PMtiles file. LEGACY method.
 2 | For both UDI and communes data:
 3 | - Get geom data from duck db
 4 | - Get sample results from duckdb, merge with geom, convert to pmtiles and uploads the new Pmtiles to S3.
 5 | 
 6 | Args:
 7 |     - env (str): Environment to download from ("dev" or "prod")
 8 | """
 9 | 
10 | import json
11 | import os
12 | 
13 | from tasks.config.common import CACHE_FOLDER
14 | 
15 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
16 | from pipelines.tasks.client.geojson_processor import GeoJSONProcessor
17 | from pipelines.tasks.client.pmtiles_processor import PmtilesProcessor
18 | from pipelines.utils.logger import get_logger
19 | 
20 | logger = get_logger(__name__)
21 | 
22 | 
23 | def execute(env: str):
24 |     """
25 |     Execute GeoJSON generation and upload process.
26 | 
27 |     Args:
28 |         env: Environment to use ("dev" or "prod")
29 |     """
30 |     duckdb_client = DuckDBClient()
31 |     generate_pmtiles(env, "communes", duckdb_client)
32 |     generate_pmtiles(env, "udi", duckdb_client)
33 |     duckdb_client.close()
34 | 
35 | 
36 | def generate_pmtiles(env, type, duckdb_client):
37 |     logger.info(f"Starting {type} GeoJSON generation process")
38 | 
39 |     # Initialize clients
40 |     geojson_processor = GeoJSONProcessor(type, duckdb_client)
41 |     pmtiles_processor = PmtilesProcessor(type)
42 | 
43 |     # Process and merge data
44 |     logger.info(f"Merging GeoJSON with {type} results")
45 |     geojson_output_path = os.path.join(
46 |         CACHE_FOLDER, f"new-georef-france-{type}-prelevement.geojson"
47 |     )
48 |     geojson = geojson_processor.generate_geojson()
49 | 
50 |     with open(geojson_output_path, "w", encoding="utf-8") as f:
51 |         json.dump(geojson, f)
52 | 
53 |     logger.info(f"✅ GeoJSON processed and stored at: {geojson_output_path}")
54 | 
55 |     # logger.info("Uploading geojson to S3")
56 |     # url = geojson_processor.upload_geojson_to_storage(
57 |     #     env, file_path=geojson_output_path
58 |     # )
59 |     # logger.info(f"geojson in s3 pubic Url: {url}")
60 | 
61 |     logger.info("Convert new-GeoJSON to pmtiles")
62 |     pmtils_output_path = os.path.join(
63 |         CACHE_FOLDER, f"georef-france-{type}-prelevement.pmtiles"
64 |     )
65 |     pmtiles_processor.convert_geojson_to_pmtiles(
66 |         geojson_output_path, pmtils_output_path, f"data_{type}"
67 |     )
68 | 
69 |     logger.info("Uploading pmtiles to S3")
70 |     url = pmtiles_processor.upload_pmtils_to_storage(
71 |         env, pmtils_path=pmtils_output_path
72 |     )
73 |     logger.info(f"pmtiles in s3 pubic Url: {url}")
74 | 


--------------------------------------------------------------------------------
/dbt_/models/staging/communes/_communes_models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_communes__cog
 5 |     description: "Liste des communes et leurs unités de distribution chargé depuis le site de l'insee https://www.insee.fr/fr/information/7766585"
 6 |     columns:
 7 |       - name: TYPECOM
 8 |         description: >
 9 |           Type de commune
10 |           COM 	Commune
11 |           COMA 	Commune associée
12 |           COMD 	Commune déléguée
13 |           ARM 	Arrondissement municipal
14 |       - name: COM
15 |         description: Code Commune
16 |       - name: REG
17 |         description: Code Region
18 |       - name: DEP
19 |         description: Code Departement
20 |       - name: CTCD
21 |         description: Code de la collectivité territoriale ayant les compétences départementales
22 |       - name: ARR
23 |         description: Code arrondissement
24 |       - name: TNCC
25 |         description: >
26 |           Type de nom en clair
27 |           0 	Pas d'article et le nom commence par une consonne sauf H muet 	charnière = DE
28 |           1 	Pas d'article et le nom commence par une voyelle ou un H muet 	charnière = D'
29 |           2 	Article = LE 	charnière = DU
30 |           3 	Article = LA 	charnière = DE LA
31 |           4 	Article = LES 	charnière = DES
32 |           5 	Article = L' 	charnière = DE L'
33 |           6 	Article = AUX 	charnière = DES
34 |           7 	Article = LAS 	charnière = DE LAS
35 |           8 	Article = LOS 	charnière = DE LOS
36 |       - name: NCC
37 |         description: Nom en clair (majuscules)
38 |       - name: NCCENR
39 |         description: Nom en clair (typographie riche)
40 |       - name: LIBELLE
41 |         description: Nom en clair (typographie riche) avec article
42 |       - name: CAN
43 |         description: Code canton. Pour les communes « multi-cantonales », code décliné de 99 à 90 (pseudo-canton) ou de 89 à 80 (communes nouvelles)
44 |       - name: COMPARENT
45 |         description: Code de la commune parente pour les arrondissements municipaux et les communes associées ou déléguées.
46 | 
47 |   - name: stg_communes__opendatasoft
48 |     description: Tracé des communes chargé depuis https://public.opendatasoft.com/explore/dataset/georef-france-commune/information
49 |     columns:
50 |       - name: com_code
51 |         description: "Code de la commune (extrait du champ com_code[1] de la source OpenDataSoft)"
52 |         type: VARCHAR
53 |       - name: com_name
54 |         description: "Nom de la commune (extrait du champ com_name[1] de la source OpenDataSoft)"
55 |         type: VARCHAR
56 |       - name: geom
57 |         description: "Géométrie de la commune"
58 |         type: GEOMETRY
59 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT DISTINCT
 4 |         cdreseau,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valtraduite,
 8 |         limite_qualite,
 9 |         valeur_sanitaire_1,
10 |         datetimeprel,
11 |         DENSE_RANK()
12 |             OVER (
13 |                 PARTITION BY cdreseau
14 |                 ORDER BY datetimeprel DESC
15 |             )
16 |             AS row_number
17 | 
18 |     FROM
19 |         {{ ref('int__resultats_udi_communes') }}
20 |     WHERE
21 |         categorie = 'pesticide'
22 |         AND
23 |         categorie_2 = 'sub_active'
24 |         AND
25 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
26 |         datetimeprel >= DATE_TRUNC('day', (
27 |             SELECT MAX(sub.datetimeprel)
28 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
29 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
30 | ),
31 | 
32 | aggregated AS (
33 |     SELECT
34 |         cdreseau,
35 |         cdparametresiseeaux,
36 |         MAX(valtraduite) AS valtraduite,
37 |         MAX(limite_qualite) AS limite_qualite,
38 |         MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
39 |         MAX(datetimeprel) AS datetimeprel
40 |     FROM last_pvl
41 |     WHERE row_number = 1
42 |     GROUP BY cdreseau, cdparametresiseeaux
43 | )
44 | 
45 | SELECT
46 |     cdreseau,
47 |     'sub_active' AS categorie,
48 |     'dernier_prel' AS periode,
49 |     MAX(datetimeprel) AS date_dernier_prel,
50 |     COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
51 |     CASE
52 |         WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
53 |         WHEN
54 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
55 |             THEN 'sup_valeur_sanitaire'
56 |         WHEN
57 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
58 |             THEN 'sup_limite_qualite'
59 |         WHEN
60 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite <= limite_qualite)
61 |             THEN 'inf_limite_qualite'
62 |         ELSE 'erreur'
63 |     END AS resultat,
64 |     TO_JSON(
65 |         MAP(
66 |             LIST(
67 |                 cdparametresiseeaux
68 |                 ORDER BY cdparametresiseeaux
69 |             ) FILTER (WHERE valtraduite > 0
70 |             ),
71 |             LIST(
72 |                 valtraduite
73 |                 ORDER BY cdparametresiseeaux
74 |             ) FILTER (WHERE valtraduite > 0
75 |             )
76 |         )
77 |     ) AS parametres_detectes
78 | 
79 | FROM aggregated
80 | GROUP BY cdreseau
81 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/sub_active/int__resultats_sub_active_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT DISTINCT
 4 |         inseecommune,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valtraduite,
 8 |         limite_qualite,
 9 |         valeur_sanitaire_1,
10 |         datetimeprel,
11 |         DENSE_RANK()
12 |             OVER (
13 |                 PARTITION BY inseecommune
14 |                 ORDER BY datetimeprel DESC
15 |             )
16 |             AS row_number
17 | 
18 |     FROM
19 |         {{ ref('int__resultats_udi_communes') }}
20 |     WHERE
21 |         categorie = 'pesticide'
22 |         AND
23 |         categorie_2 = 'sub_active'
24 |         AND
25 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
26 |         datetimeprel >= DATE_TRUNC('day', (
27 |             SELECT MAX(sub.datetimeprel)
28 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
29 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
30 | ),
31 | 
32 | aggregated AS (
33 |     SELECT
34 |         inseecommune,
35 |         cdparametresiseeaux,
36 |         MAX(valtraduite) AS valtraduite,
37 |         MAX(limite_qualite) AS limite_qualite,
38 |         MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
39 |         MAX(datetimeprel) AS datetimeprel
40 |     FROM last_pvl
41 |     WHERE row_number = 1
42 |     GROUP BY inseecommune, cdparametresiseeaux
43 | )
44 | 
45 | SELECT
46 |     inseecommune,
47 |     'sub_active' AS categorie,
48 |     'dernier_prel' AS periode,
49 |     MAX(datetimeprel) AS date_dernier_prel,
50 |     COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
51 |     CASE
52 |         WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
53 |         WHEN
54 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
55 |             THEN 'sup_valeur_sanitaire'
56 |         WHEN
57 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
58 |             THEN 'sup_limite_qualite'
59 |         WHEN
60 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite <= limite_qualite)
61 |             THEN 'inf_limite_qualite'
62 |         ELSE 'erreur'
63 |     END AS resultat,
64 |     TO_JSON(
65 |         MAP(
66 |             LIST(
67 |                 cdparametresiseeaux
68 |                 ORDER BY cdparametresiseeaux
69 |             ) FILTER (WHERE valtraduite > 0
70 |             ),
71 |             LIST(
72 |                 valtraduite
73 |                 ORDER BY cdparametresiseeaux
74 |             ) FILTER (WHERE valtraduite > 0
75 |             )
76 |         )
77 |     ) AS parametres_detectes
78 | 
79 | FROM aggregated
80 | GROUP BY inseecommune
81 | 


--------------------------------------------------------------------------------
/webapp/app/api/udi/find/route.tsx:
--------------------------------------------------------------------------------
 1 | // an api route fetching data
 2 | 
 3 | import db from "@/app/lib/duckdb";
 4 | import { NextRequest, NextResponse } from "next/server";
 5 | 
 6 | export async function GET(req: NextRequest) {
 7 |   // Set CORS headers to allow requests from any origin
 8 |   const corsHeaders = {
 9 |     "Access-Control-Allow-Origin": "*",
10 |     "Access-Control-Allow-Methods": "GET, OPTIONS",
11 |     "Access-Control-Allow-Headers": "Content-Type, Authorization",
12 |   };
13 | 
14 |   // Handle OPTIONS request for CORS preflight
15 |   if (req.method === "OPTIONS") {
16 |     return NextResponse.json({}, { headers: corsHeaders });
17 |   }
18 | 
19 |   const { searchParams } = new URL(req.url);
20 |   const lonParam = searchParams.get("lon");
21 |   const latParam = searchParams.get("lat");
22 | 
23 |   if (lonParam == null || latParam == null) {
24 |     return NextResponse.json(
25 |       { message: "Paramètres manquants: lon et lat sont requis" },
26 |       { status: 400, headers: corsHeaders },
27 |     );
28 |   }
29 |   const lon = parseFloat(lonParam);
30 |   const lat = parseFloat(latParam);
31 | 
32 |   if (
33 |     isNaN(lon) ||
34 |     isNaN(lat) ||
35 |     lon < -180 ||
36 |     lon > 180 ||
37 |     lat < -90 ||
38 |     lat > 90
39 |   ) {
40 |     return NextResponse.json(
41 |       { message: "Paramètres invalides" },
42 |       { status: 400, headers: corsHeaders },
43 |     );
44 |   }
45 | 
46 |   const connection = await db.connect();
47 |   try {
48 |     await connection.run("LOAD spatial;");
49 | 
50 |     const prepared = await connection.prepare(`
51 |       SELECT code_udi
52 |       FROM atlasante_udi
53 |       WHERE ST_Contains(geom, ST_GeomFromText($1::VARCHAR))
54 |       ORDER BY udi_pop DESC
55 |       LIMIT 1
56 |     `);
57 | 
58 |     const point = `POINT(${lon} ${lat})`;
59 |     prepared.bindVarchar(1, point);
60 | 
61 |     const result = await prepared.runAndReadAll();
62 | 
63 |     if (result.currentRowCount > 0) {
64 |       return NextResponse.json(
65 |         { id: result.getRowObjectsJson()[0]["code_udi"] },
66 |         { status: 200, headers: corsHeaders },
67 |       );
68 |     } else {
69 |       return NextResponse.json(
70 |         { message: "Aucune UDI ne correspond à ces coordonnées" },
71 |         { status: 404, headers: corsHeaders },
72 |       );
73 |     }
74 |   } catch (error) {
75 |     console.error("Erreur de base de données:", error);
76 |     return NextResponse.json(
77 |       {
78 |         message:
79 |           "Une erreur interne s'est produite. Veuillez réessayer ultérieurement.",
80 |       },
81 |       { status: 500, headers: corsHeaders },
82 |     );
83 |   } finally {
84 |     await connection.close();
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/webapp/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Config } from "tailwindcss";
 2 | import TailwindAnimate from "tailwindcss-animate";
 3 | 
 4 | export default {
 5 |   darkMode: ["class"],
 6 |   content: [
 7 |     "./pages/**/*.{js,ts,jsx,tsx,mdx}",
 8 |     "./components/**/*.{js,ts,jsx,tsx,mdx}",
 9 |     "./app/**/*.{js,ts,jsx,tsx,mdx}",
10 |   ],
11 |   theme: {
12 |     extend: {
13 |       colors: {
14 |         background: "hsl(var(--background))",
15 |         foreground: "hsl(var(--foreground))",
16 |         card: {
17 |           DEFAULT: "hsl(var(--card))",
18 |           foreground: "hsl(var(--card-foreground))",
19 |         },
20 |         popover: {
21 |           DEFAULT: "hsl(var(--popover))",
22 |           foreground: "hsl(var(--popover-foreground))",
23 |         },
24 |         primary: {
25 |           DEFAULT: "hsl(var(--primary))",
26 |           foreground: "hsl(var(--primary-foreground))",
27 |         },
28 |         secondary: {
29 |           DEFAULT: "hsl(var(--secondary))",
30 |           foreground: "hsl(var(--secondary-foreground))",
31 |         },
32 |         muted: {
33 |           DEFAULT: "hsl(var(--muted))",
34 |           foreground: "hsl(var(--muted-foreground))",
35 |         },
36 |         accent: {
37 |           DEFAULT: "hsl(var(--accent))",
38 |           foreground: "hsl(var(--accent-foreground))",
39 |         },
40 |         destructive: {
41 |           DEFAULT: "hsl(var(--destructive))",
42 |           foreground: "hsl(var(--destructive-foreground))",
43 |         },
44 |         border: "hsl(var(--border))",
45 |         input: "hsl(var(--input))",
46 |         ring: "hsl(var(--ring))",
47 |         chart: {
48 |           "1": "hsl(var(--chart-1))",
49 |           "2": "hsl(var(--chart-2))",
50 |           "3": "hsl(var(--chart-3))",
51 |           "4": "hsl(var(--chart-4))",
52 |           "5": "hsl(var(--chart-5))",
53 |         },
54 |         "custom-drom": "#22394e",
55 |       },
56 |       borderRadius: {
57 |         lg: "var(--radius)",
58 |         md: "calc(var(--radius) - 2px)",
59 |         sm: "calc(var(--radius) - 4px)",
60 |       },
61 |       fontFamily: {
62 |         sans: [
63 |           "Raleway",
64 |           "ui-sans-serif",
65 |           "system-ui",
66 |           "-apple-system",
67 |           "BlinkMacSystemFont",
68 |           "Segoe UI",
69 |           "Roboto",
70 |           "Helvetica Neue",
71 |           "Arial",
72 |           "Noto Sans",
73 |           "sans-serif",
74 |         ],
75 |         numbers: [
76 |           "ui-sans-serif",
77 |           "system-ui",
78 |           "-apple-system",
79 |           "BlinkMacSystemFont",
80 |           "Segoe UI",
81 |           "Roboto",
82 |           "Helvetica Neue",
83 |           "Arial",
84 |           "Noto Sans",
85 |           "sans-serif",
86 |         ],
87 |       },
88 |     },
89 |   },
90 |   plugins: [TailwindAnimate],
91 | } satisfies Config;
92 | 


--------------------------------------------------------------------------------
/dbt_/tests/test_cvm_results.sql:
--------------------------------------------------------------------------------
  1 | -- dernier relevé
  2 | SELECT
  3 |     'dernier relevé' AS periode,
  4 |     cdreseau,
  5 |     categorie,
  6 |     resultat,
  7 |     0 AS nb_depassements,
  8 |     0 AS nb_prelevements,
  9 |     0 AS ratio_limite_qualite
 10 | FROM
 11 |     {{ ref('int__resultats_cvm_udi_dernier') }}
 12 | WHERE
 13 |     (
 14 |         cdreseau = '976003489'
 15 |         AND categorie = 'cvm'
 16 |         AND date_dernier_prel = '2024-07-16 08:30:00'
 17 |         AND resultat != 'non_quantifie'
 18 |     )
 19 |     OR
 20 |     (
 21 |         cdreseau = '001000241'
 22 |         AND categorie = 'cvm'
 23 |         AND date_dernier_prel = '2024-12-31 14:00:00'
 24 |         AND resultat != 'non_quantifie'
 25 |     )
 26 |     OR
 27 |     (
 28 |         cdreseau = '087003637'
 29 |         AND categorie = 'cvm'
 30 |         AND date_dernier_prel = '2024-07-04 10:50:00'
 31 |         AND resultat != 'cvm_sup_0_5'
 32 |     )
 33 |     OR
 34 |     (
 35 |         cdreseau = '095004048'
 36 |         AND categorie = 'cvm'
 37 |         AND date_dernier_prel = '2024-07-23 08:26:00'
 38 |         AND resultat != 'inf_limites'
 39 |     )
 40 | UNION ALL
 41 | -- annuel
 42 | SELECT
 43 |     'annuel' AS periode,
 44 |     cdreseau,
 45 |     categorie,
 46 |     '' AS resultat,
 47 |     nb_depassements,
 48 |     nb_prelevements,
 49 |     ratio_limite_qualite
 50 | FROM
 51 |     {{ ref('int__resultats_cvm_udi_annuel') }}
 52 | WHERE
 53 |     (
 54 |         cdreseau = '001001073'
 55 |         AND categorie = 'cvm'
 56 |         AND annee = '2024'
 57 |         AND nb_depassements != 0
 58 |     )
 59 |     OR
 60 |     (
 61 |         cdreseau = '001001073'
 62 |         AND categorie = 'cvm'
 63 |         AND annee = '2024'
 64 |         AND ratio_limite_qualite != 0
 65 |     )
 66 |     OR
 67 |     (
 68 |         cdreseau = '001001073'
 69 |         AND categorie = 'cvm'
 70 |         AND annee = '2023'
 71 |         AND nb_depassements != 0
 72 |     )
 73 |     OR
 74 |     (
 75 |         cdreseau = '001001073'
 76 |         AND categorie = 'cvm'
 77 |         AND annee = '2022'
 78 |         AND nb_depassements != 0
 79 |     )
 80 |     OR
 81 |     (
 82 |         cdreseau = '007000088'
 83 |         AND categorie = 'cvm'
 84 |         AND annee IN ('2022', '2023', '2024')
 85 |         AND nb_depassements != 0
 86 |     )
 87 |     OR
 88 |     (
 89 |         cdreseau = '095004048'
 90 |         AND categorie = 'cvm'
 91 |         AND annee = '2024'
 92 |         AND nb_prelevements != 21
 93 |     )
 94 |     OR
 95 |     (
 96 |         cdreseau = '005001358'
 97 |         AND categorie = 'cvm'
 98 |         AND annee = '2022'
 99 |         AND nb_depassements != 2
100 |     )
101 |     OR
102 |     (
103 |         cdreseau = '032000209'
104 |         AND categorie = 'cvm'
105 |         AND annee = '2024'
106 |         AND (
107 |             ratio_limite_qualite != 0.25
108 |             OR
109 |             nb_prelevements != 4
110 |         )
111 |     )
112 | 


--------------------------------------------------------------------------------
/pipelines/tasks/client/uploaded_geojson_client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from pipelines.tasks.client.core.duckdb_client import DuckDBClient
 5 | from pipelines.tasks.config.common import (
 6 |     CACHE_FOLDER,
 7 |     logger,
 8 | )
 9 | from pipelines.utils.storage_client import ObjectStorageClient
10 | 
11 | 
12 | class UploadedGeoJSONClient:
13 |     """Client pour télécharger et ingérer plusieurs fichiers GeoJSON uploadés préalablement manuellement sur S3"""
14 | 
15 |     def __init__(self, config, duckdb_client: DuckDBClient):
16 |         self.config = config
17 |         self.duckdb_client = duckdb_client
18 |         self.storage_client = ObjectStorageClient()
19 | 
20 |         if "files" not in self.config:
21 |             raise ValueError(
22 |                 "Configuration must contain a 'files' list with the GeoJSON files to process"
23 |             )
24 | 
25 |         self.files_config = self.config["files"]
26 |         logger.info(
27 |             f"UploadedGeoJSONClient initialized with {len(self.files_config)} file(s)"
28 |         )
29 | 
30 |     def process_datasets(self):
31 |         logger.info(f"Processing {self.__class__.__name__} data")
32 |         self._download_data()
33 |         self._ingest_to_duckdb()
34 |         logger.info(f"Finishing processing {self.__class__.__name__} data")
35 | 
36 |     def _download_data(self):
37 |         os.makedirs(CACHE_FOLDER, exist_ok=True)
38 | 
39 |         for file_config in self.files_config:
40 |             s3_key = (
41 |                 f"{self.config['source'].get('prefix', 'upload')}/{file_config['path']}"
42 |             )
43 |             local_path = Path(CACHE_FOLDER, file_config["local_file_name"])
44 |             logger.info(f"Downloading {s3_key} to {local_path}")
45 |             self.storage_client.download_object(
46 |                 file_key=s3_key, local_path=str(local_path)
47 |             )
48 | 
49 |     def _ingest_to_duckdb(self):
50 |         logger.info(
51 |             f"Ingesting {len(self.files_config)} uploaded GeoJSON file(s) into DuckDB"
52 |         )
53 | 
54 |         # Collect all table names for dropping
55 |         table_names = [file_config["table_name"] for file_config in self.files_config]
56 |         self.duckdb_client.drop_tables(table_names=table_names)
57 | 
58 |         # Ingest each file
59 |         for file_config in self.files_config:
60 |             logger.info(
61 |                 f"Ingesting {file_config['local_file_name']} into table {file_config['table_name']}"
62 |             )
63 |             self.duckdb_client.ingest_from_geojson(
64 |                 table_name=file_config["table_name"],
65 |                 filepath=Path(CACHE_FOLDER, file_config["local_file_name"]),
66 |             )
67 |             logger.info(
68 |                 f"✅ {file_config['local_file_name']} has been ingested into table {file_config['table_name']}"
69 |             )
70 | 
71 |         logger.info("✅ All uploaded GeoJSON files have been ingested in DB")
72 | 


--------------------------------------------------------------------------------
/dbt_/tests/test_nitrates_results.sql:
--------------------------------------------------------------------------------
  1 | -- dernier relevé
  2 | SELECT
  3 |     'dernier relevé' AS periode,
  4 |     cdreseau,
  5 |     resultat,
  6 |     0 AS nb_depassements,
  7 |     0 AS nb_prelevements,
  8 |     0 AS ratio_depassements
  9 | FROM
 10 |     {{ ref('int__resultats_nitrate_udi_dernier') }}
 11 | WHERE
 12 |     (
 13 |         cdreseau = '001000003'
 14 |         AND date_dernier_prel = '2025-05-23 09:06:00'
 15 |         AND resultat != 'no3_inf_25'
 16 |     )
 17 |     OR
 18 |     (
 19 |         cdreseau = '037000175'
 20 |         AND date_dernier_prel = '2025-06-17 10:02:00'
 21 |         AND resultat != 'no3_inf_40'
 22 |     )
 23 |     OR
 24 |     (
 25 |         cdreseau = '002000060'
 26 |         AND date_dernier_prel = '2025-04-10 09:22:00'
 27 |         AND resultat != 'sup_valeur_sanitaire'
 28 |     )
 29 |     OR
 30 |     (
 31 |         cdreseau = '060001271'
 32 |         AND date_dernier_prel = '2025-04-09 13:44:00'
 33 |         AND resultat != 'inf_valeur_sanitaire'
 34 |     )
 35 |     OR
 36 |     (
 37 |         cdreseau = '973000028'
 38 |         AND date_dernier_prel = '2025-05-20 10:44:00'
 39 |         AND resultat != 'non_quantifie'
 40 |     )
 41 | UNION ALL
 42 | -- annuel
 43 | SELECT
 44 |     'annuel' AS periode,
 45 |     cdreseau,
 46 |     '' AS resultat,
 47 |     nb_depassements,
 48 |     nb_prelevements,
 49 |     ratio
 50 | FROM
 51 |     {{ ref('int__resultats_nitrate_udi_annuel') }}
 52 | WHERE
 53 |     (
 54 |         cdreseau = '092003070'
 55 |         AND annee = '2024'
 56 |         AND (
 57 |             nb_prelevements != 806
 58 |             OR nb_depassements != 0
 59 |             OR ratio != 0
 60 |         )
 61 |     )
 62 |     OR
 63 |     (
 64 |         cdreseau = '071001155'
 65 |         AND annee = '2023'
 66 |         AND (
 67 |             nb_prelevements != 1
 68 |             OR nb_depassements != 0
 69 |             OR ratio != 0
 70 |         )
 71 |     )
 72 |     OR
 73 |     (
 74 |         cdreseau = '036000670'
 75 |         AND annee = '2024'
 76 |         AND (
 77 |             nb_prelevements != 27
 78 |             OR nb_depassements != 25
 79 |             -- il y a 2 prélèvements = à 50 (= valeur_sanitaire_1)
 80 |             -- comme c'est un strict > dans la requête, on a 25 et pas 27
 81 |             OR ratio < 0.92
 82 |         )
 83 |     )
 84 |     OR
 85 |     (
 86 |         cdreseau = '089003503'
 87 |         AND annee = '2020'
 88 |         AND (
 89 |             nb_prelevements != 12
 90 |             OR nb_depassements != 3
 91 |             OR ratio != 0.25
 92 |         )
 93 |     )
 94 |     OR
 95 |     (
 96 |         cdreseau = '055000713'
 97 |         AND annee = '2023'
 98 |         AND (
 99 |             nb_prelevements != 4
100 |             OR nb_depassements != 0
101 |             OR ratio != 0
102 |         )
103 |     )
104 |     OR
105 |     (
106 |         cdreseau = '027000943'
107 |         AND annee = '2021'
108 |         AND (
109 |             nb_prelevements != 63
110 |             OR nb_depassements != 1
111 |             -- il y a 1 prélèvement = à 50 (= valeur_sanitaire_1)
112 |         )
113 |     )
114 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | -- Ici on ne garde que le dernier prélèvement
 2 | -- pour chaque UDI dans la dernière année
 3 | WITH metaux_lourds_dernier_prel AS (
 4 |     SELECT
 5 |         cdreseau,
 6 |         categorie,
 7 |         cdparametresiseeaux,
 8 |         limite_qualite,
 9 |         valeur_sanitaire_1,
10 |         valeur_sanitaire_2,
11 |         datetimeprel,
12 |         valtraduite,
13 |         ROW_NUMBER()
14 |             OVER (
15 |                 PARTITION BY cdreseau, cdparametresiseeaux
16 |                 ORDER BY datetimeprel DESC
17 |             )
18 |             AS row_number
19 |     FROM
20 |         {{ ref('int__resultats_udi_communes') }}
21 |     WHERE
22 |         cdparametresiseeaux IN ('PB', 'AS')
23 |         AND
24 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
25 |         datetimeprel >= DATE_TRUNC('day', (
26 |             SELECT MAX(sub.datetimeprel)
27 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
28 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
29 | )
30 | 
31 | -- Ici on ne prend que le prélèvement le plus récent (avec row_number = 1)
32 | -- pour chaque type de métaux lourds
33 | SELECT
34 |     cdreseau,
35 |     datetimeprel AS date_dernier_prel,
36 |     'dernier_prel' AS periode,
37 |     1 AS nb_parametres,
38 |     CASE
39 |         WHEN
40 |             cdparametresiseeaux = 'PB'
41 |             THEN 'metaux_lourds_pb'
42 |         WHEN
43 |             cdparametresiseeaux = 'AS'
44 |             THEN 'metaux_lourds_as'
45 |     END AS categorie,
46 |     CASE
47 |         WHEN
48 |             -- Pas de distinction PB/AS car même résultat
49 |             valtraduite IS NULL
50 |             OR valtraduite = 0
51 |             THEN 'non_quantifie'
52 |         WHEN
53 |             cdparametresiseeaux = 'PB'
54 |             AND valtraduite >= limite_qualite
55 |             THEN 'sup_limite_qualite'
56 |         WHEN
57 |             -- 5 est la future limite de qualité appliquée
58 |             -- à partir de 2036
59 |             cdparametresiseeaux = 'PB'
60 |             AND valtraduite >= 5
61 |             AND valtraduite < limite_qualite
62 |             THEN 'sup_limite_qualite_2036'
63 |         WHEN
64 |             cdparametresiseeaux = 'PB'
65 |             AND valtraduite < 5
66 |             THEN 'inf_limite_qualite'
67 |         WHEN
68 |             cdparametresiseeaux = 'AS'
69 |             AND valtraduite >= valeur_sanitaire_1
70 |             THEN 'sup_valeur_sanitaire'
71 |         WHEN
72 |             cdparametresiseeaux = 'AS'
73 |             AND valtraduite >= limite_qualite
74 |             AND valtraduite < valeur_sanitaire_1
75 |             THEN 'sup_limite_qualite'
76 |         WHEN
77 |             cdparametresiseeaux = 'AS'
78 |             AND valtraduite < limite_qualite
79 |             THEN 'inf_limite_qualite'
80 |         ELSE 'erreur'
81 |     END AS resultat,
82 |     JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
83 |         AS parametres_detectes
84 | FROM
85 |     metaux_lourds_dernier_prel
86 | WHERE
87 |     row_number = 1
88 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/metabolite/int__resultats_metabolite_udi_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT DISTINCT
 4 |         cdreseau,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valtraduite,
 8 |         limite_qualite,
 9 |         limite_indicative,
10 |         valeur_sanitaire_1,
11 |         datetimeprel,
12 |         DENSE_RANK()
13 |             OVER (
14 |                 PARTITION BY cdreseau
15 |                 ORDER BY datetimeprel DESC
16 |             )
17 |             AS row_number
18 | 
19 |     FROM
20 |         {{ ref('int__resultats_udi_communes') }}
21 |     WHERE
22 |         categorie = 'pesticide'
23 |         AND
24 |         categorie_2 = 'metabolite'
25 |         AND
26 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 |         datetimeprel >= DATE_TRUNC('day', (
28 |             SELECT MAX(sub.datetimeprel)
29 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | ),
32 | 
33 | aggregated AS (
34 |     SELECT
35 |         cdreseau,
36 |         cdparametresiseeaux,
37 |         MAX(valtraduite) AS valtraduite,
38 |         MAX(limite_qualite) AS limite_qualite,
39 |         MAX(limite_indicative) AS limite_indicative,
40 |         MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
41 |         MAX(datetimeprel) AS datetimeprel
42 |     FROM last_pvl
43 |     WHERE row_number = 1
44 |     GROUP BY cdreseau, cdparametresiseeaux
45 | )
46 | 
47 | SELECT
48 |     cdreseau,
49 |     'metabolite' AS categorie,
50 |     'dernier_prel' AS periode,
51 |     MAX(datetimeprel) AS date_dernier_prel,
52 |     COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
53 |     CASE
54 |         WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
55 |         WHEN
56 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
57 |             THEN 'sup_valeur_sanitaire'
58 |         WHEN
59 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
60 |             THEN 'sup_limite_qualite'
61 |         WHEN
62 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_indicative)
63 |             THEN 'sup_limite_indicative'
64 |         WHEN
65 |             BOOL_OR(
66 |                 valtraduite IS NOT NULL
67 |                 AND (limite_qualite IS NULL OR valtraduite <= limite_qualite)
68 |                 AND (limite_indicative IS NULL OR valtraduite <= limite_indicative)
69 |                 AND (limite_qualite IS NOT NULL OR limite_indicative IS NOT NULL)
70 |             )
71 |             THEN 'inf_limites'
72 |         ELSE 'erreur'
73 |     END AS resultat,
74 |     TO_JSON(
75 |         MAP(
76 |             LIST(
77 |                 cdparametresiseeaux
78 |                 ORDER BY cdparametresiseeaux
79 |             ) FILTER (WHERE valtraduite > 0
80 |             ),
81 |             LIST(
82 |                 valtraduite
83 |                 ORDER BY cdparametresiseeaux
84 |             ) FILTER (WHERE valtraduite > 0
85 |             )
86 |         )
87 |     ) AS parametres_detectes
88 | 
89 | FROM aggregated
90 | GROUP BY cdreseau
91 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/metaux_lourds/int__resultats_metaux_lourds_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | -- Ici on ne garde que le dernier prélèvement
 2 | -- pour chaque UDI dans la dernière année
 3 | WITH metaux_lourds_dernier_prel AS (
 4 |     SELECT
 5 |         inseecommune,
 6 |         categorie,
 7 |         cdparametresiseeaux,
 8 |         limite_qualite,
 9 |         valeur_sanitaire_1,
10 |         valeur_sanitaire_2,
11 |         datetimeprel,
12 |         valtraduite,
13 |         ROW_NUMBER()
14 |             OVER (
15 |                 PARTITION BY inseecommune, cdparametresiseeaux
16 |                 ORDER BY datetimeprel DESC
17 |             )
18 |             AS row_number
19 |     FROM
20 |         {{ ref('int__resultats_udi_communes') }}
21 |     WHERE
22 |         cdparametresiseeaux IN ('PB', 'AS')
23 |         AND
24 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
25 |         datetimeprel >= DATE_TRUNC('day', (
26 |             SELECT MAX(sub.datetimeprel)
27 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
28 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
29 | )
30 | 
31 | -- Ici on ne prend que le prélèvement le plus récent (avec row_number = 1)
32 | -- pour chaque type de métaux lourds
33 | SELECT
34 |     inseecommune,
35 |     datetimeprel AS date_dernier_prel,
36 |     'dernier_prel' AS periode,
37 |     1 AS nb_parametres,
38 |     CASE
39 |         WHEN
40 |             cdparametresiseeaux = 'PB'
41 |             THEN 'metaux_lourds_pb'
42 |         WHEN
43 |             cdparametresiseeaux = 'AS'
44 |             THEN 'metaux_lourds_as'
45 |     END AS categorie,
46 |     CASE
47 |         WHEN
48 |             -- Pas de distinction PB/AS car même résultat
49 |             valtraduite IS NULL
50 |             OR valtraduite = 0
51 |             THEN 'non_quantifie'
52 |         WHEN
53 |             cdparametresiseeaux = 'PB'
54 |             AND valtraduite >= limite_qualite
55 |             THEN 'sup_limite_qualite'
56 |         WHEN
57 |             -- 5 est la future limite de qualité appliquée
58 |             -- à partir de 2036
59 |             cdparametresiseeaux = 'PB'
60 |             AND valtraduite >= 5
61 |             AND valtraduite < limite_qualite
62 |             THEN 'sup_limite_qualite_2036'
63 |         WHEN
64 |             cdparametresiseeaux = 'PB'
65 |             AND valtraduite < 5
66 |             THEN 'inf_limite_qualite'
67 |         WHEN
68 |             cdparametresiseeaux = 'AS'
69 |             AND valtraduite >= valeur_sanitaire_1
70 |             THEN 'sup_valeur_sanitaire'
71 |         WHEN
72 |             cdparametresiseeaux = 'AS'
73 |             AND valtraduite >= limite_qualite
74 |             AND valtraduite < valeur_sanitaire_1
75 |             THEN 'sup_limite_qualite'
76 |         WHEN
77 |             cdparametresiseeaux = 'AS'
78 |             AND valtraduite < limite_qualite
79 |             THEN 'inf_limite_qualite'
80 |         ELSE 'erreur'
81 |     END AS resultat,
82 |     JSON_OBJECT(CASE WHEN valtraduite > 0 THEN cdparametresiseeaux END, valtraduite)
83 |         AS parametres_detectes
84 | FROM
85 |     metaux_lourds_dernier_prel
86 | WHERE
87 |     row_number = 1
88 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pesticide/metabolite/int__resultats_metabolite_commune_dernier.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | last_pvl AS (
 3 |     SELECT DISTINCT
 4 |         inseecommune,
 5 |         categorie,
 6 |         cdparametresiseeaux,
 7 |         valtraduite,
 8 |         limite_qualite,
 9 |         limite_indicative,
10 |         valeur_sanitaire_1,
11 |         datetimeprel,
12 |         DENSE_RANK()
13 |             OVER (
14 |                 PARTITION BY inseecommune
15 |                 ORDER BY datetimeprel DESC
16 |             )
17 |             AS row_number
18 | 
19 |     FROM
20 |         {{ ref('int__resultats_udi_communes') }}
21 |     WHERE
22 |         categorie = 'pesticide'
23 |         AND
24 |         categorie_2 = 'metabolite'
25 |         AND
26 |         -- On garde les prélèvements de moins d'un an à partir du dernier prélèvement
27 |         datetimeprel >= DATE_TRUNC('day', (
28 |             SELECT MAX(sub.datetimeprel)
29 |             FROM {{ ref('int__resultats_udi_communes') }} AS sub
30 |         ) - INTERVAL 1 YEAR) + INTERVAL 1 DAY
31 | ),
32 | 
33 | aggregated AS (
34 |     SELECT
35 |         inseecommune,
36 |         cdparametresiseeaux,
37 |         MAX(valtraduite) AS valtraduite,
38 |         MAX(limite_qualite) AS limite_qualite,
39 |         MAX(limite_indicative) AS limite_indicative,
40 |         MAX(valeur_sanitaire_1) AS valeur_sanitaire_1,
41 |         MAX(datetimeprel) AS datetimeprel
42 |     FROM last_pvl
43 |     WHERE row_number = 1
44 |     GROUP BY inseecommune, cdparametresiseeaux
45 | )
46 | 
47 | SELECT
48 |     inseecommune,
49 |     'metabolite' AS categorie,
50 |     'dernier_prel' AS periode,
51 |     MAX(datetimeprel) AS date_dernier_prel,
52 |     COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,
53 |     CASE
54 |         WHEN BOOL_AND(valtraduite IS NULL OR valtraduite = 0) THEN 'non_quantifie'
55 |         WHEN
56 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > valeur_sanitaire_1)
57 |             THEN 'sup_valeur_sanitaire'
58 |         WHEN
59 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_qualite)
60 |             THEN 'sup_limite_qualite'
61 |         WHEN
62 |             BOOL_OR(valtraduite IS NOT NULL AND valtraduite > limite_indicative)
63 |             THEN 'sup_limite_indicative'
64 |         WHEN
65 |             BOOL_OR(
66 |                 valtraduite IS NOT NULL
67 |                 AND (limite_qualite IS NULL OR valtraduite <= limite_qualite)
68 |                 AND (limite_indicative IS NULL OR valtraduite <= limite_indicative)
69 |                 AND (limite_qualite IS NOT NULL OR limite_indicative IS NOT NULL)
70 |             )
71 |             THEN 'inf_limites'
72 |         ELSE 'erreur'
73 |     END AS resultat,
74 |     TO_JSON(
75 |         MAP(
76 |             LIST(
77 |                 cdparametresiseeaux
78 |                 ORDER BY cdparametresiseeaux
79 |             ) FILTER (WHERE valtraduite > 0
80 |             ),
81 |             LIST(
82 |                 valtraduite
83 |                 ORDER BY cdparametresiseeaux
84 |             ) FILTER (WHERE valtraduite > 0
85 |             )
86 |         )
87 |     ) AS parametres_detectes
88 | 
89 | FROM aggregated
90 | GROUP BY inseecommune
91 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | pfas_prels AS (
 3 |     SELECT DISTINCT
 4 |         de_partition AS annee,
 5 |         cdreseau,
 6 |         referenceprel,
 7 |         datetimeprel,
 8 |         cdparametresiseeaux,
 9 |         limite_qualite,
10 |         valeur_sanitaire_1,
11 |         valtraduite
12 |     FROM
13 |         {{ ref('int__resultats_udi_communes') }}
14 |     WHERE
15 |         categorie = 'pfas'
16 | ),
17 | 
18 | -- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année
19 | pfas_results_udi_agg AS (
20 |     SELECT
21 |         referenceprel,
22 |         cdreseau,
23 |         annee,
24 |         -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)
25 |         MAX(
26 |             CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END
27 |         ) AS sum_20_pfas,
28 |         COUNT(
29 |             DISTINCT CASE
30 |                 WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel
31 |             END
32 |         ) AS count_20_pfas,
33 |         -- On calcule une somme de 4 PFAS pour une limite recommandée par le
34 |         -- haut conseil de la santé public
35 |         SUM(
36 |             CASE
37 |                 WHEN
38 |                     cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')
39 |                     THEN valtraduite
40 |                 ELSE 0
41 |             END
42 |         ) AS sum_4_pfas,
43 |         -- On check si la somme des 20 PFAS est supérieure
44 |         -- à la limite reglementaire
45 |         MAX(
46 |             CASE
47 |                 WHEN
48 |                     cdparametresiseeaux = 'SPFAS'
49 |                     AND limite_qualite IS NOT NULL
50 |                     AND valtraduite IS NOT NULL
51 |                     AND valtraduite > limite_qualite
52 |                     THEN 1
53 |                 ELSE 0
54 |             END
55 |         ) AS sum_20_pfas_above_limit,
56 |         MAX(
57 |             CASE
58 |                 WHEN
59 |                     valeur_sanitaire_1 IS NOT NULL
60 |                     AND valtraduite IS NOT NULL
61 |                     AND valtraduite > valeur_sanitaire_1
62 |                     THEN 1
63 |                 ELSE 0
64 |             END
65 |         ) AS has_pfas_above_vs,
66 |         MAX(datetimeprel) AS max_datetimeprel
67 |     FROM pfas_prels
68 |     GROUP BY referenceprel, cdreseau, annee
69 |     -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS
70 |     HAVING count_20_pfas = 1
71 | )
72 | 
73 | SELECT
74 |     cdreseau,
75 |     annee,
76 |     'pfas' AS categorie,
77 |     'bilan_annuel_' || annee AS periode,
78 |     COUNT(DISTINCT referenceprel) AS nb_prelevements,
79 |     ROUND((
80 |         SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END)
81 |         /
82 |         COUNT(DISTINCT referenceprel)
83 |     ), 2) AS ratio_limite_qualite,
84 |     SUM(has_pfas_above_vs) AS nb_sup_valeur_sanitaire,
85 |     TO_JSON({
86 |         'SPFAS': MAX(sum_20_pfas),
87 |         'SUM_4_PFAS': MAX(sum_4_pfas)
88 |     }) AS parametres_detectes,
89 |     MAX(max_datetimeprel) AS date_dernier_prel
90 | 
91 | FROM pfas_results_udi_agg
92 | GROUP BY cdreseau, annee
93 | 


--------------------------------------------------------------------------------
/dbt_/tests/test_pfas_results.sql:
--------------------------------------------------------------------------------
  1 | -- dernier udi
  2 | SELECT
  3 |     'dernier_prel' AS periode,
  4 |     cdreseau,
  5 |     categorie,
  6 |     resultat,
  7 |     0 AS ratio_limite_qualite,
  8 |     0 AS nb_sup_valeur_sanitaire
  9 | FROM
 10 |     {{ ref('int__resultats_pfas_udi_dernier') }}
 11 | WHERE
 12 |     (
 13 |     -- test
 14 |     -- l'UDI 013001457 a un prélevement le 2025-02-19 09:58:00
 15 |     -- avec un dépassement de valeur sanitaire pour PFOS
 16 |         cdreseau = '013001457'
 17 |         AND date_dernier_prel = TIMESTAMP '2025-02-19 09:58:00'
 18 |         AND resultat != 'sup_valeur_sanitaire'
 19 |     )
 20 |     OR (
 21 |         cdreseau = '004001032'
 22 |         AND date_dernier_prel = TIMESTAMP '2025-02-28 12:33:00'
 23 |         AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02'
 24 |     )
 25 |     OR (
 26 |         cdreseau = '008000855'
 27 |         AND date_dernier_prel = TIMESTAMP '2025-02-27 09:24:00'
 28 |         AND resultat != 'sup_valeur_sanitaire'
 29 |     )
 30 |     OR
 31 |     (
 32 |         cdreseau = '00800107747'
 33 |         AND date_dernier_prel = '2025-02-27 09:24:00'
 34 |         AND resultat != 'sup_valeur_sanitaire'
 35 |     )
 36 |     OR
 37 |     (
 38 |         cdreseau = '011004114'
 39 |         AND date_dernier_prel = '2025-02-24 13:55:00'
 40 |         AND resultat != 'somme_20pfas_sup_0_1'
 41 |     )
 42 |     OR
 43 |     (
 44 |         cdreseau = '001000404'
 45 |         AND date_dernier_prel = '2024-11-29 08:08:00'
 46 |         AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02'
 47 |     )
 48 |     OR
 49 |     (
 50 |         cdreseau = '001000511'
 51 |         AND date_dernier_prel = '2024-11-28 09:58:00'
 52 |         AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02'
 53 |     )
 54 |     OR
 55 |     (
 56 |         cdreseau = '003000370'
 57 |         AND date_dernier_prel = TIMESTAMP '2025-02-18 08:45:00'
 58 |         AND resultat != 'non_quantifie'
 59 |     )
 60 | UNION ALL
 61 | -- annuel udi
 62 | SELECT
 63 |     'bilan_annuel' AS periode,
 64 |     cdreseau,
 65 |     categorie,
 66 |     '' AS resultat,
 67 |     ratio_limite_qualite,
 68 |     nb_sup_valeur_sanitaire
 69 | FROM
 70 |     {{ ref('int__resultats_pfas_udi_annuel') }}
 71 | WHERE
 72 |     (
 73 |         cdreseau = '001000356'
 74 |         AND annee = '2025'
 75 |         AND
 76 |         (
 77 |             ratio_limite_qualite != 0
 78 |             OR nb_sup_valeur_sanitaire != 0
 79 |         )
 80 |     )
 81 |     OR
 82 |     (
 83 |         cdreseau = '074000043'
 84 |         AND annee = '2022'
 85 |         AND (
 86 |             ratio_limite_qualite != 0.1
 87 |             OR nb_sup_valeur_sanitaire != 2
 88 |         )
 89 |     )
 90 |     OR
 91 |     (
 92 |         cdreseau = '030000200'
 93 |         AND annee = '2024'
 94 |         AND (
 95 |             nb_sup_valeur_sanitaire != 0
 96 |             OR ratio_limite_qualite != 0.25
 97 |         )
 98 |     )
 99 |     OR
100 |     (
101 |         cdreseau = '069000025'
102 |         AND annee IN ('2022', '2023', '2024')
103 |         AND (
104 |             nb_sup_valeur_sanitaire != 0
105 |             OR ratio_limite_qualite != 0
106 |         )
107 |     )
108 | 


--------------------------------------------------------------------------------
/dbt_/models/intermediate/pfas/int__resultats_pfas_commune_annuel.sql:
--------------------------------------------------------------------------------
 1 | WITH
 2 | pfas_prels AS (
 3 |     SELECT DISTINCT
 4 |         de_partition AS annee,
 5 |         inseecommune,
 6 |         referenceprel,
 7 |         datetimeprel,
 8 |         cdparametresiseeaux,
 9 |         limite_qualite,
10 |         valeur_sanitaire_1,
11 |         valtraduite
12 |     FROM
13 |         {{ ref('int__resultats_udi_communes') }}
14 |     WHERE
15 |         categorie = 'pfas'
16 | ),
17 | 
18 | -- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année
19 | pfas_results_udi_agg AS (
20 |     SELECT
21 |         referenceprel,
22 |         inseecommune,
23 |         annee,
24 |         -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)
25 |         MAX(
26 |             CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END
27 |         ) AS sum_20_pfas,
28 |         COUNT(
29 |             DISTINCT CASE
30 |                 WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel
31 |             END
32 |         ) AS count_20_pfas,
33 |         -- On calcule une somme de 4 PFAS pour une limite recommandée par le
34 |         -- haut conseil de la santé public
35 |         SUM(
36 |             CASE
37 |                 WHEN
38 |                     cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')
39 |                     THEN valtraduite
40 |                 ELSE 0
41 |             END
42 |         ) AS sum_4_pfas,
43 |         -- On check si la somme des 20 PFAS est supérieure
44 |         -- à la limite reglementaire
45 |         MAX(
46 |             CASE
47 |                 WHEN
48 |                     cdparametresiseeaux = 'SPFAS'
49 |                     AND limite_qualite IS NOT NULL
50 |                     AND valtraduite IS NOT NULL
51 |                     AND valtraduite > limite_qualite
52 |                     THEN 1
53 |                 ELSE 0
54 |             END
55 |         ) AS sum_20_pfas_above_limit,
56 |         MAX(
57 |             CASE
58 |                 WHEN
59 |                     valeur_sanitaire_1 IS NOT NULL
60 |                     AND valtraduite IS NOT NULL
61 |                     AND valtraduite > valeur_sanitaire_1
62 |                     THEN 1
63 |                 ELSE 0
64 |             END
65 |         ) AS has_pfas_above_vs,
66 |         MAX(datetimeprel) AS max_datetimeprel
67 |     FROM pfas_prels
68 |     GROUP BY referenceprel, inseecommune, annee
69 |     -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS
70 |     HAVING count_20_pfas = 1
71 | )
72 | 
73 | SELECT
74 |     inseecommune,
75 |     annee,
76 |     'pfas' AS categorie,
77 |     'bilan_annuel_' || annee AS periode,
78 |     COUNT(DISTINCT referenceprel) AS nb_prelevements,
79 |     ROUND((
80 |         SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END)
81 |         /
82 |         COUNT(DISTINCT referenceprel)
83 |     ), 2) AS ratio_limite_qualite,
84 |     SUM(has_pfas_above_vs) AS nb_sup_valeur_sanitaire,
85 |     TO_JSON({
86 |         'SPFAS': MAX(sum_20_pfas),
87 |         'SUM_4_PFAS': MAX(sum_4_pfas)
88 |     }) AS parametres_detectes,
89 |     MAX(max_datetimeprel) AS date_dernier_prel
90 | 
91 | FROM pfas_results_udi_agg
92 | GROUP BY inseecommune, annee
93 | 


--------------------------------------------------------------------------------
/pipelines/notebooks/test_atlasante_udi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import duckdb\n",
 10 |     "from pipelines.tasks.config.common import DUCKDB_FILE\n",
 11 |     "\n",
 12 |     "con = duckdb.connect(database=DUCKDB_FILE, read_only=True)\n",
 13 |     "# show all tables in DB\n",
 14 |     "con.sql(\"SHOW TABLES;\").show()"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# describe atlasante_udi table\n",
 24 |     "df = con.sql(\"DESCRIBE atlasante_udi;\").df()\n",
 25 |     "print(df)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "# install spatial extention for spactial functions\n",
 35 |     "con.sql(\"INSTALL spatial;\")\n",
 36 |     "# Load spatial extension\n",
 37 |     "con.sql(\"LOAD spatial;\")"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# show same paris's UDI\n",
 47 |     "df = con.sql(\"Select * from atlasante_udi where uge_nom like '%EAU DE PARIS%'\").df()\n",
 48 |     "df.head()"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "# Paris North (Nord - near Saint-Denis):\n",
 58 |     "# latitudeN = 48.9358\n",
 59 |     "# longitudeN = 2.3538\n",
 60 |     "# Paris South\n",
 61 |     "# latitudeS = 48.8186\n",
 62 |     "# longitudeS = 2.3326\n",
 63 |     "# Paris West (Ouest - near Porte Maillot/Neuilly):\n",
 64 |     "# latitudeW = 48.8781\n",
 65 |     "# longitudeW = 2.2785\n",
 66 |     "# Central Paris (Centre - Notre-Dame):\n",
 67 |     "latitude = 48.8566\n",
 68 |     "longitude = 2.3522\n",
 69 |     "\n",
 70 |     "sql = f\"\"\"\n",
 71 |     "SELECT *\n",
 72 |     "FROM atlasante_udi\n",
 73 |     "WHERE ST_Contains(geom, ST_GeomFromText('POINT({longitude} {latitude})'));\n",
 74 |     "\"\"\"\n",
 75 |     "df = con.sql(sql).df()\n",
 76 |     "df.head()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "con.close()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": []
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": ".venv",
 99 |    "language": "python",
100 |    "name": "python3"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 3
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython3",
112 |    "version": "3.12.7"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 2
117 | }
118 | 


--------------------------------------------------------------------------------
/dbt_/models/website/web__stats_udi.sql:
--------------------------------------------------------------------------------
  1 | WITH
  2 | -- Dernière mise à jour
  3 | derniere_maj AS (
  4 |     SELECT
  5 |         'derniere_mise_a_jour' AS stat_nom,
  6 |         NULL AS stat_chiffre,
  7 |         max(date_dernier_prel)::VARCHAR AS stat_texte
  8 |     FROM {{ ref('web__resultats_udi') }}
  9 |     WHERE periode = 'dernier_prel'
 10 | ),
 11 | 
 12 | -- Total UDIs
 13 | total_udis AS (
 14 |     SELECT
 15 |         'total_udis' AS stat_nom,
 16 |         NULL AS stat_texte,
 17 |         count(DISTINCT cdreseau) AS stat_chiffre
 18 |     FROM {{ ref('web__resultats_udi') }}
 19 |     WHERE periode = 'dernier_prel'
 20 | ),
 21 | 
 22 | -- Statistiques par catégorie pour dernier prélèvement
 23 | stats_dernier_prel AS (
 24 |     SELECT
 25 |         NULL AS stat_texte,
 26 |         'dernier_prel_' || categorie || '_' || coalesce(resultat, 'non_recherche')
 27 |             AS stat_nom,
 28 |         count(*) AS stat_chiffre
 29 |     FROM {{ ref('web__resultats_udi') }}
 30 |     WHERE
 31 |         periode = 'dernier_prel'
 32 | 
 33 |     GROUP BY categorie, resultat
 34 | 
 35 | ),
 36 | 
 37 | -- Statistiques par catégorie et année pour bilan annuel - ratios par intervalles
 38 | stats_bilan_annuel_ratio AS (
 39 |     SELECT
 40 |         NULL AS stat_texte,
 41 |         periode || '_' || categorie || '_ratio_'
 42 |         || CASE
 43 |             WHEN ratio = 0 THEN '0'
 44 |             WHEN ratio <= 0.25 THEN '0.25'
 45 |             WHEN ratio <= 0.5 THEN '0.5'
 46 |             WHEN ratio <= 0.75 THEN '0.75'
 47 |             WHEN ratio <= 1 THEN '1'
 48 |             ELSE 'erreur'
 49 |         END AS stat_nom,
 50 |         count(*) AS stat_chiffre
 51 |     FROM {{ ref('web__resultats_udi') }}
 52 |     WHERE
 53 |         periode LIKE 'bilan_annuel_%'
 54 |         AND ratio IS NOT NULL
 55 |     GROUP BY
 56 |         periode,
 57 |         categorie,
 58 |         CASE
 59 |             WHEN ratio = 0 THEN '0'
 60 |             WHEN ratio <= 0.25 THEN '0.25'
 61 |             WHEN ratio <= 0.5 THEN '0.5'
 62 |             WHEN ratio <= 0.75 THEN '0.75'
 63 |             WHEN ratio <= 1 THEN '1'
 64 |             ELSE 'erreur'
 65 |         END
 66 | ),
 67 | 
 68 | -- Statistiques par catégorie et année pour bilan annuel - non recherche (ratio null)
 69 | stats_bilan_annuel_non_recherche AS (
 70 |     SELECT
 71 |         NULL AS stat_texte,
 72 |         periode || '_' || categorie || '_non_recherche' AS stat_nom,
 73 |         count(*) AS stat_chiffre
 74 |     FROM {{ ref('web__resultats_udi') }}
 75 |     WHERE
 76 |         periode LIKE 'bilan_annuel_%'
 77 |         AND ratio IS NULL
 78 |     GROUP BY periode, categorie
 79 | )
 80 | 
 81 | -- Union de toutes les statistiques
 82 | SELECT
 83 |     stat_nom,
 84 |     stat_chiffre,
 85 |     stat_texte
 86 | FROM derniere_maj
 87 | UNION ALL
 88 | SELECT
 89 |     stat_nom,
 90 |     stat_chiffre,
 91 |     stat_texte
 92 | FROM total_udis
 93 | UNION ALL
 94 | SELECT
 95 |     stat_nom,
 96 |     stat_chiffre,
 97 |     stat_texte
 98 | FROM stats_dernier_prel
 99 | UNION ALL
100 | SELECT
101 |     stat_nom,
102 |     stat_chiffre,
103 |     stat_texte
104 | FROM stats_bilan_annuel_ratio
105 | UNION ALL
106 | SELECT
107 |     stat_nom,
108 |     stat_chiffre,
109 |     stat_texte
110 | FROM stats_bilan_annuel_non_recherche
111 | 


--------------------------------------------------------------------------------
/dbt_/seeds/schema.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | seeds:
 4 |   - name: references_generations_futures
 5 |     config:
 6 |       quote_char: '"'
 7 |       delimiter: ","
 8 |       full_refresh: true
 9 |     description: "Table de référence fournie par Générations Futures"
10 |     columns:
11 |       - name: cdparametresiseeaux
12 |         description: "Code SISE-Eaux (Système d'Information des services Santé-Environnement Eaux) du paramètre"
13 |       - name: cdparametre
14 |         description: "Code SANDRE (Service National d'Administration des Données et Référentiels sur l'Eau) du paramètre"
15 |       - name: libmajparametre
16 |         description: "Nom du paramètre en majuscule"
17 |         tests:
18 |           - dbt_expectations.expect_column_values_to_be_of_type:
19 |               column_type: VARCHAR
20 |       - name: libminparametre
21 |         description: "Nom du paramètre en minuscule"
22 |         tests:
23 |           - dbt_expectations.expect_column_values_to_be_of_type:
24 |               column_type: VARCHAR
25 |       - name: casparam
26 |         description: "Code CAS (Chemical Abstracts Service) de la substance chimique"
27 |       - name: categorie_1
28 |         description: "Catégorie du paramètre"
29 |         tests:
30 |           - not_null
31 |           - accepted_values:
32 |               values:
33 |                 - "pfas"
34 |                 - "cvm"
35 |                 - "nitrate"
36 |                 - "metaux_lourds"
37 |                 - "substances_indus"
38 |                 - "pesticide"
39 |       - name: categorie_2
40 |         description: "Sous-catégorie"
41 |       - name: categorie_3
42 |         description: "Détail de la sous-catégorie"
43 |       - name: limite_qualite
44 |         description: "Limite de qualité du paramètre"
45 |         tests:
46 |           - dbt_expectations.expect_column_values_to_be_of_type:
47 |               column_type: double
48 |       - name: limite_qualite_unite
49 |         description: "Unité de la limite de qualité"
50 |       - name: limite_qualite_commentaire
51 |         description: "Commentaire sur la limite de qualité"
52 |       - name: limite_indicative
53 |         description: "Limite indicative du paramètre"
54 |         tests:
55 |           - dbt_expectations.expect_column_values_to_be_of_type:
56 |               column_type: double
57 |       - name: limite_indicative_unite
58 |         description: "Unité de la limite indicative"
59 |       - name: valeur_sanitaire_1
60 |         description: "Valeur sanitaire"
61 |         tests:
62 |           - dbt_expectations.expect_column_values_to_be_of_type:
63 |               column_type: double
64 |       - name: valeur_sanitaire_1_unite
65 |         description: "Unité de la valeur sanitaire 1"
66 |       - name: valeur_sanitaire_1_commentaire
67 |         description: "Commentaire sur la valeur sanitaire 1"
68 |       - name: valeur_sanitaire_2
69 |         description: "Deuxième valeur sanitaire"
70 |         tests:
71 |           - dbt_expectations.expect_column_values_to_be_of_type:
72 |               column_type: integer
73 |       - name: valeur_sanitaire_2_unite
74 |         description: "Unité de la valeur sanitaire 2"
75 |       - name: valeur_sanitaire_2_commentaire
76 |         description: "Commentaire sur la valeur sanitaire 2"
77 |       - name: web_label
78 |         description: "Libellé utilisé dans le site web"
79 | 


--------------------------------------------------------------------------------
/pipelines/tasks/config/config_uploaded_geojson.py:
--------------------------------------------------------------------------------
 1 | """Configuration for uploaded GeoJSON files.
 2 | 
 3 | Cette configuration supporte plusieurs fichiers GeoJSON uploadés manuellement sur S3.
 4 | Pour ajouter un nouveau fichier, ajoutez simplement un dictionnaire dans la liste 'files'.
 5 | 
 6 | Format de chaque fichier:
 7 | - path: chemin relatif du fichier sur S3 (sera combiné avec prefix)
 8 | - table_name: nom de la table à créer dans DuckDB
 9 | - file_name: nom du fichier local à télécharger
10 | """
11 | 
12 | uploaded_geojson_config = {
13 |     "source": {
14 |         "prefix": "upload",  # Préfixe S3
15 |     },
16 |     "files": [
17 |         # {
18 |         #     # Cette première source contient le GeoJSON des UDIs de Atlasante issu des infofactures pour la métropole pour l'année 2023.
19 |         #     # Pour l'obtenir:
20 |         #     # - partir de la source suivante: https://catalogue.atlasante.fr/geonetwork/srv/fre/catalog.search#/metadata/1d02cd8b-137d-4360-b566-f6082a47ee32
21 |         #     # - cliquer sur "accès à la carte" (normalement on arrive sur cette URL: https://carto.atlasante.fr/1/ars_metropole_udi_infofactures.map)
22 |         #     # - à gauche de la carte, cliquer sur le bouton avec les trois "couches"
23 |         #     # - choisir le layer "Réseaux (UDI) - 2023" dans la liste (dans "Historique"), puis cliquer sur les trois points à droite, puis cliquer sur "Télécharger la donnée"
24 |         #     # - choisir format "GeoJSON" et projection "WGS84 - GPS (EPSG 4326)" puis cliquer sur "Exécution directe" pour télécharger le fichier
25 |         #     # - extraire le fichier "dgs_metropole_udi_infofactures_j.json" du zip téléchargé
26 |         #     # - renommer le fichier téléchargé et l'uploader dans le dossier approprié (cf `path` ci-dessous)
27 |         #     #
28 |         #     "path": "atlasante/udi_infofactures_2023.json",
29 |         #     "table_name": "atlasante_udi_2023",
30 |         #     "local_file_name": "udi_infofactures_2023.json",
31 |         # },
32 |         # {
33 |         #     # UDIs de la Corse
34 |         #     # Pour l'obtenir:
35 |         #     # - partir de la source suivante: https://catalogue.atlasante.fr/geonetwork/srv/fre/catalog.search#/metadata/67a6998e-15b2-4796-9584-c87af156f549
36 |         #     # - sur "Accès au téléchargement des données", cliquer sur "Télécharger"
37 |         #     # - choisir format "GeoJSON" et projection "WGS84 - GPS (EPSG 4326)" puis cliquer sur "Exécution directe" pour télécharger le fichier
38 |         #     # - extraire le fichier "ars_r94_udi_2018_z.json" du zip téléchargé
39 |         #     # - renommer le fichier téléchargé et l'uploader dans le dossier approprié (cf `path` ci-dessous)
40 |         #     #
41 |         #     "path": "atlasante/udi_corse.json",
42 |         #     "table_name": "atlasante_udi_corse",
43 |         #     "local_file_name": "udi_corse.json",
44 |         # },
45 |         {
46 |             # GeoJSON des UDIs de Atlasante issu des infofactures pour l'année 2024.
47 |             # S'obtient de la même manière que pour l'année 2023 (cf. commentaire dans le premier bloc).
48 |             # Concernant la couverture géographique, il s'agit de la métropole + Corse.
49 |             "path": "atlasante/udi_infofactures_2024.json",
50 |             "table_name": "atlasante_udi_2024",
51 |             "local_file_name": "udi_infofactures_2024.json",
52 |         }
53 |     ],
54 | }
55 | 


--------------------------------------------------------------------------------
/pipelines/tasks/download_pmtiles.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download PMtiles files.
 3 | 
 4 | Args:
 5 |     - env (str): Environment to download from ("dev" or "prod")
 6 |     - use-boto3 (bool): Use boto3 library to download from S3 storage, instead of using public HTTPS URL (default: False)
 7 | 
 8 | Examples:
 9 |     - download_pmtiles --env prod : Download PMtiles from production environment
10 |     - download_pmtiles --env dev  : Download PMtiles from development environment
11 |     - download_pmtiles --use-boto3 : Download PMtiles from S3 storage
12 | """
13 | 
14 | import os
15 | from abc import ABC, abstractmethod
16 | 
17 | from pipelines.config.config import get_s3_path_pmtiles
18 | from pipelines.tasks.config.common import CACHE_FOLDER, download_file_from_https
19 | from pipelines.utils.logger import get_logger
20 | from pipelines.utils.storage_client import ObjectStorageClient
21 | 
22 | logger = get_logger(__name__)
23 | 
24 | 
25 | class PMtilesDownloadStrategy(ABC):
26 |     """Interface for GeoJSON download strategies."""
27 | 
28 |     def __init__(self):
29 |         super().__init__()
30 |         self.s3 = ObjectStorageClient()
31 | 
32 |     @abstractmethod
33 |     def download(self, env: str, local_path: str):
34 |         pass
35 | 
36 | 
37 | class Boto3DownloadStrategy(PMtilesDownloadStrategy):
38 |     """Strategy for downloading PMtiles from S3 storage using boto3."""
39 | 
40 |     def download(self, env: str, local_path: str):
41 |         logger.info(f"Downloading PMtiles from S3 in environment {env}")
42 |         remote_s3_path = get_s3_path_pmtiles(env)
43 |         self.s3.download_object(remote_s3_path, local_path)
44 |         logger.info(
45 |             f"✅ GeoJSON downloaded from s3://{self.s3.bucket_name}/{remote_s3_path}"
46 |         )
47 | 
48 | 
49 | class HTTPSDownloadStrategy(PMtilesDownloadStrategy):
50 |     """Strategy for downloading PMtiles via HTTPS."""
51 | 
52 |     def download(self, env: str, local_path: str):
53 |         logger.info("Downloading PMtiles via HTTPS")
54 |         remote_s3_path = get_s3_path_pmtiles(env)
55 |         url = f"https://{self.s3.bucket_name}.{self.s3.endpoint_url.split('https://')[1]}/{remote_s3_path}"
56 |         download_file_from_https(url=url, filepath=local_path)
57 |         logger.info(f"✅ GeoJSON downloaded via HTTPS: {url} -> {local_path}")
58 | 
59 | 
60 | class PMtilesDownloader:
61 |     """Manages the PMtiles download process."""
62 | 
63 |     def __init__(self, strategy: PMtilesDownloadStrategy, env: str):
64 |         self.strategy = strategy
65 |         self.local_geojson_path = os.path.join(
66 |             CACHE_FOLDER, "new-georef-france-commune-prelevement.pmtiles"
67 |         )
68 |         if env not in ("dev", "prod"):
69 |             raise ValueError("'env' must be 'dev' or 'prod'")
70 |         self.env = env
71 | 
72 |     def download(self):
73 |         self.strategy.download(self.env, self.local_geojson_path)
74 | 
75 | 
76 | def execute(env: str, use_boto3: bool = False):
77 |     """
78 |     Execute PMtiles download using the appropriate strategy.
79 | 
80 |     Args:
81 |         env (str): Environment to download from ("dev" or "prod")
82 |         use_boto3 (bool): Whether to use boto3 instead of HTTPS. Default is False.
83 |     """
84 |     strategy = Boto3DownloadStrategy() if use_boto3 else HTTPSDownloadStrategy()
85 |     downloader = PMtilesDownloader(strategy, env)
86 |     downloader.download()
87 | 


--------------------------------------------------------------------------------
/Dockerfile.unified:
--------------------------------------------------------------------------------
  1 | # Unified Dockerfile - Embeds database and pmtiles for atomic deployments
  2 | 
  3 | # Builder stage for compiling the application
  4 | # Note: debian bookworm is supported until 2028-06-30
  5 | FROM debian:bookworm-slim AS builder
  6 | 
  7 | # Define build argument for API key
  8 | ARG NEXT_PUBLIC_PROTOMAPS_API_KEY
  9 | 
 10 | # Install UV
 11 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
 12 | ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
 13 | ENV UV_PYTHON_INSTALL_DIR=/python
 14 | ENV UV_PYTHON_PREFERENCE=only-managed
 15 | ENV UV_NO_CACHE=1
 16 | RUN uv python install 3.12
 17 | 
 18 | # Install Node.js and other required dependencies
 19 | RUN apt-get update && apt-get install -y --no-install-recommends \
 20 |     nodejs \
 21 |     npm \
 22 |     ca-certificates \
 23 |     curl \
 24 |     && rm -rf /var/lib/apt/lists/*
 25 | 
 26 | # Set up Node.js environment
 27 | WORKDIR /app/webapp
 28 | COPY webapp/package.json webapp/package-lock.json /app/webapp/
 29 | RUN npm ci
 30 | 
 31 | # Set up Python environment with UV
 32 | WORKDIR /app
 33 | COPY README.md pyproject.toml uv.lock /app/
 34 | COPY pipelines /app/pipelines
 35 | RUN uv sync
 36 | 
 37 | # Copy pre-built database and pmtiles
 38 | COPY database/data.duckdb /app/database/data.duckdb
 39 | COPY database/cache/*.pmtiles /app/public/pmtiles/
 40 | 
 41 | # Create trimmed database for website
 42 | RUN uv run pipelines/run.py run trim_database_for_website --output-file=database/data_for_website.duckdb
 43 | 
 44 | # Copy next.js app and build it
 45 | WORKDIR /app/webapp
 46 | COPY webapp /app/webapp
 47 | ENV NEXT_TELEMETRY_DISABLED=1
 48 | ENV NODE_ENV=production
 49 | ENV NEXT_PUBLIC_PROTOMAPS_API_KEY=$NEXT_PUBLIC_PROTOMAPS_API_KEY
 50 | ENV DUCKDB_PATH="/app/database/data_for_website.duckdb"
 51 | RUN npm run build
 52 | 
 53 | 
 54 | 
 55 | # Runner stage - only contains the necessary runtime files
 56 | FROM debian:bookworm-slim AS runner
 57 | 
 58 | # Define build argument for API key
 59 | ARG NEXT_PUBLIC_PROTOMAPS_API_KEY
 60 | 
 61 | # Install Node.js (minimal dependencies for runtime)
 62 | RUN apt-get update && apt-get install -y --no-install-recommends \
 63 |     nodejs \
 64 |     ca-certificates \
 65 |     && rm -rf /var/lib/apt/lists/*
 66 | 
 67 | # Create non-root user
 68 | RUN addgroup --system --gid 1000 appgroup && \
 69 |     adduser --system --uid 1000 appuser
 70 | 
 71 | WORKDIR /app
 72 | 
 73 | # Create directories
 74 | RUN mkdir -p /app/database /app/public/pmtiles
 75 | RUN chown -R appuser:appgroup /app
 76 | 
 77 | # Copy webapp files
 78 | COPY --from=builder --chown=appuser:appgroup /app/webapp/.next/standalone /app
 79 | COPY --from=builder --chown=appuser:appgroup /app/webapp/.next/static /app/.next/static
 80 | COPY --from=builder --chown=appuser:appgroup /app/webapp/public /app/public
 81 | 
 82 | # Copy database and pmtiles
 83 | COPY --from=builder --chown=appuser:appgroup /app/database/data_for_website.duckdb /app/database/data_for_website.duckdb
 84 | COPY --from=builder --chown=appuser:appgroup /app/public/pmtiles/ /app/public/pmtiles/
 85 | 
 86 | # Set environment variables
 87 | ENV NODE_ENV=production
 88 | ENV NEXT_TELEMETRY_DISABLED=1
 89 | ENV PORT=8080
 90 | ENV HOSTNAME="0.0.0.0"
 91 | ENV NEXT_PUBLIC_PROTOMAPS_API_KEY=$NEXT_PUBLIC_PROTOMAPS_API_KEY
 92 | ENV DUCKDB_PATH="/app/database/data_for_website.duckdb"
 93 | ENV HOME="/app"
 94 | 
 95 | # Switch to non-root user
 96 | USER appuser
 97 | 
 98 | # Expose the port
 99 | EXPOSE 8080
100 | 
101 | # Start the application
102 | CMD ["node", "server.js"]


--------------------------------------------------------------------------------
/dbt_/models/website/web__resultats_udi.sql:
--------------------------------------------------------------------------------
  1 | WITH
  2 | periodes AS (
  3 |     SELECT unnest(ARRAY[
  4 |         'bilan_annuel_2020',
  5 |         'bilan_annuel_2021',
  6 |         'bilan_annuel_2022',
  7 |         'bilan_annuel_2023',
  8 |         'bilan_annuel_2024',
  9 |         'bilan_annuel_2025',
 10 |         'dernier_prel'
 11 |     ]) AS periode
 12 | ),
 13 | 
 14 | categories AS (
 15 |     SELECT unnest(ARRAY[
 16 |         'cvm',
 17 |         'pfas',
 18 |         'sub_indus_perchlorate',
 19 |         -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
 20 |         --'sub_indus_14dioxane',
 21 |         'pesticide',
 22 |         'sub_active',
 23 |         'metabolite',
 24 |         'metabolite_esa_metolachlore',
 25 |         'metabolite_chlorothalonil_r471811',
 26 |         'metabolite_chloridazone_desphenyl',
 27 |         'metabolite_chloridazone_methyl_desphenyl',
 28 |         'metabolite_atrazine_desethyl',
 29 |         --'metaux_lourds_as',
 30 |         --'metaux_lourds_pb',
 31 |         'nitrate',
 32 |         'tous'
 33 |     ]) AS categorie
 34 | ),
 35 | 
 36 | udi AS (
 37 |     SELECT
 38 |         cdreseau,
 39 |         nomreseaux
 40 |     FROM
 41 |         {{ ref('int__udi') }}
 42 | ),
 43 | 
 44 | -- Cross join to ensure all combinations exist
 45 | udi_periodes_categories AS (
 46 |     SELECT
 47 |         u.cdreseau,
 48 |         u.nomreseaux,
 49 |         p.periode,
 50 |         categories.categorie
 51 |     FROM
 52 |         udi AS u
 53 |     CROSS JOIN
 54 |         periodes AS p
 55 |     CROSS JOIN
 56 |         categories
 57 | ),
 58 | 
 59 | -- Append results from 'tous' category (in another model to avoid circular dependency)
 60 | results AS (
 61 |     SELECT
 62 |         cdreseau,
 63 |         periode,
 64 |         categorie,
 65 |         resultat,
 66 |         ratio,
 67 |         date_dernier_prel,
 68 |         nb_parametres,
 69 |         nb_prelevements,
 70 |         nb_sup_valeur_sanitaire,
 71 |         parametres_detectes
 72 |     FROM {{ ref('int__union_resultats_udi') }}
 73 |     UNION ALL
 74 |     SELECT
 75 |         cdreseau,
 76 |         periode,
 77 |         categorie,
 78 |         null AS resultat,
 79 |         ratio,
 80 |         null AS date_dernier_prel,
 81 |         null AS nb_parametres,
 82 |         nb_prelevements,
 83 |         nb_sup_valeur_sanitaire,
 84 |         null AS parametres_detectes
 85 |     FROM {{ ref('int__resultats_tous_udi_annuel') }}
 86 |     UNION ALL
 87 |     SELECT
 88 |         cdreseau,
 89 |         periode,
 90 |         categorie,
 91 |         resultat,
 92 |         null AS ratio,
 93 |         date_dernier_prel,
 94 |         nb_parametres,
 95 |         null AS nb_prelevements,
 96 |         null AS nb_sup_valeur_sanitaire,
 97 |         null AS parametres_detectes
 98 |     FROM {{ ref('int__resultats_tous_udi_dernier') }}
 99 | )
100 | 
101 | -- Final output with all UDI-periodes-categories combinations
102 | SELECT
103 |     upc.cdreseau,
104 |     upc.nomreseaux,
105 |     upc.periode,
106 |     upc.categorie,
107 |     r.resultat,
108 |     r.ratio,
109 |     r.date_dernier_prel,
110 |     r.nb_parametres,
111 |     r.nb_prelevements,
112 |     r.nb_sup_valeur_sanitaire,
113 |     r.parametres_detectes
114 | FROM
115 |     udi_periodes_categories AS upc
116 | LEFT JOIN
117 |     results AS r
118 |     ON
119 |         upc.cdreseau = r.cdreseau
120 |         AND upc.periode = r.periode
121 |         AND upc.categorie = r.categorie
122 | ORDER BY
123 |     upc.cdreseau,
124 |     upc.periode,
125 |     r.categorie
126 | 


--------------------------------------------------------------------------------
/dbt_/models/website/web__resultats_communes.sql:
--------------------------------------------------------------------------------
  1 | WITH
  2 | periodes AS (
  3 |     SELECT unnest(ARRAY[
  4 |         'bilan_annuel_2020',
  5 |         'bilan_annuel_2021',
  6 |         'bilan_annuel_2022',
  7 |         'bilan_annuel_2023',
  8 |         'bilan_annuel_2024',
  9 |         'bilan_annuel_2025',
 10 |         'dernier_prel'
 11 |     ]) AS periode
 12 | ),
 13 | 
 14 | categories AS (
 15 |     SELECT unnest(ARRAY[
 16 |         'cvm',
 17 |         'pfas',
 18 |         'sub_indus_perchlorate',
 19 |         -- Les résultats pour le 1,4 dioxane sont ignorés pour l'instant
 20 |         --'sub_indus_14dioxane',
 21 |         'pesticide',
 22 |         'sub_active',
 23 |         'metabolite',
 24 |         'metabolite_esa_metolachlore',
 25 |         'metabolite_chlorothalonil_r471811',
 26 |         'metabolite_chloridazone_desphenyl',
 27 |         'metabolite_chloridazone_methyl_desphenyl',
 28 |         'metabolite_atrazine_desethyl',
 29 |         --'metaux_lourds_as',
 30 |         --'metaux_lourds_pb',
 31 |         'nitrate',
 32 |         'tous'
 33 |     ]) AS categorie
 34 | ),
 35 | 
 36 | cog_communes AS (
 37 |     SELECT
 38 |         com AS commune_code_insee,
 39 |         libelle AS commune_nom
 40 |     FROM {{ ref("stg_communes__cog") }}
 41 |     WHERE typecom = 'COM'
 42 | ),
 43 | 
 44 | -- Cross join to ensure all combinations exist
 45 | communes_periodes_categories AS (
 46 |     SELECT
 47 |         cog.commune_code_insee,
 48 |         cog.commune_nom,
 49 |         p.periode,
 50 |         categories.categorie
 51 |     FROM
 52 |         cog_communes AS cog
 53 |     CROSS JOIN
 54 |         periodes AS p
 55 |     CROSS JOIN
 56 |         categories
 57 | ),
 58 | 
 59 | -- Append results from 'tous' category (in another model to avoid circular dependency)
 60 | results AS (
 61 |     SELECT
 62 |         inseecommune,
 63 |         periode,
 64 |         categorie,
 65 |         resultat,
 66 |         ratio,
 67 |         date_dernier_prel,
 68 |         nb_parametres,
 69 |         nb_prelevements,
 70 |         nb_sup_valeur_sanitaire,
 71 |         parametres_detectes
 72 |     FROM {{ ref('int__union_resultats_commune') }}
 73 |     UNION ALL
 74 |     SELECT
 75 |         inseecommune,
 76 |         periode,
 77 |         categorie,
 78 |         null AS resultat,
 79 |         ratio,
 80 |         null AS date_dernier_prel,
 81 |         null AS nb_parametres,
 82 |         nb_prelevements,
 83 |         nb_sup_valeur_sanitaire,
 84 |         null AS parametres_detectes
 85 |     FROM {{ ref('int__resultats_tous_commune_annuel') }}
 86 |     UNION ALL
 87 |     SELECT
 88 |         inseecommune,
 89 |         periode,
 90 |         categorie,
 91 |         resultat,
 92 |         null AS ratio,
 93 |         date_dernier_prel,
 94 |         nb_parametres,
 95 |         null AS nb_prelevements,
 96 |         null AS nb_sup_valeur_sanitaire,
 97 |         null AS parametres_detectes
 98 |     FROM {{ ref('int__resultats_tous_commune_dernier') }}
 99 | )
100 | 
101 | -- Final output with all inseecommune-periodes-categories combinations
102 | SELECT
103 |     cpc.commune_code_insee,
104 |     cpc.commune_nom,
105 |     cpc.periode,
106 |     cpc.categorie,
107 |     r.resultat,
108 |     r.ratio,
109 |     r.date_dernier_prel,
110 |     r.nb_parametres,
111 |     r.nb_prelevements,
112 |     r.nb_sup_valeur_sanitaire,
113 |     r.parametres_detectes
114 | FROM
115 |     communes_periodes_categories AS cpc
116 | LEFT JOIN
117 |     results AS r
118 |     ON
119 |         cpc.commune_code_insee = r.inseecommune
120 |         AND cpc.periode = r.periode
121 |         AND cpc.categorie = r.categorie
122 | ORDER BY
123 |     cpc.commune_code_insee,
124 |     cpc.periode,
125 |     r.categorie
126 | 


--------------------------------------------------------------------------------
/pipelines/tasks/config/config_edc.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | 
 4 | def get_edc_config() -> Dict:
 5 |     """
 6 |     Returns various configuration for processing the EDC (Eau distribuée par commune) datasets.
 7 |     The data comes from https://www.data.gouv.fr/fr/datasets/resultats-du-controle-sanitaire-de-leau-distribuee-commune-par-commune/
 8 |     For each year a dataset is downloadable on a URL like this (ex. 2024):
 9 |         https://www.data.gouv.fr/fr/datasets/r/84a67a3b-08a7-4001-98e6-231c74a98139
10 |     :return: A dict with the config used for processing.
11 |         The "source" part is related to the data.gouv datasource
12 |         The "files" part is related to the extracted files information and sql table names
13 |     """
14 | 
15 |     edc_config = {
16 |         "source": {
17 |             "base_url": "https://www.data.gouv.fr/fr/datasets/r/",
18 |             "available_years": [
19 |                 # "2016",
20 |                 # "2017",
21 |                 # "2018",
22 |                 # "2019", it was decided to use dataset from 2020
23 |                 "2020",
24 |                 "2021",
25 |                 "2022",
26 |                 "2023",
27 |                 "2024",
28 |                 "2025",
29 |             ],
30 |             "yearly_files_infos": {
31 |                 "2025": {
32 |                     "id": "7e38c236-dd3c-455e-a728-f0ecb84b1a7c",
33 |                     "zipfile": "dis-2025.zip",
34 |                 },
35 |                 "2024": {
36 |                     "id": "a631e486-c790-42d0-8368-6a42b1a3dc1d",
37 |                     "zipfile": "dis-2024.zip",
38 |                 },
39 |                 "2023": {
40 |                     "id": "c89dec4a-d985-447c-a102-75ba814c398e",
41 |                     "zipfile": "dis-2023.zip",
42 |                 },
43 |                 "2022": {
44 |                     "id": "a97b6074-c4dd-4ef2-8922-b0cf04dbff9a",
45 |                     "zipfile": "dis-2022.zip",
46 |                 },
47 |                 "2021": {
48 |                     "id": "d2b432cc-3761-44d3-8e66-48bc15300bb5",
49 |                     "zipfile": "dis-2021.zip",
50 |                 },
51 |                 "2020": {
52 |                     "id": "a6cb4fea-ef8c-47a5-acb3-14e49ccad801",
53 |                     "zipfile": "dis-2020.zip",
54 |                 },
55 |                 "2019": {
56 |                     "id": "861f2a7d-024c-4bf0-968b-9e3069d9de07",
57 |                     "zipfile": "dis-2019.zip",
58 |                 },
59 |                 "2018": {
60 |                     "id": "0513b3c0-dc18-468d-a969-b3508f079792",
61 |                     "zipfile": "dis-2018.zip",
62 |                 },
63 |                 "2017": {
64 |                     "id": "5785427b-3167-49fa-a581-aef835f0fb04",
65 |                     "zipfile": "dis-2017.zip",
66 |                 },
67 |                 "2016": {
68 |                     "id": "483c84dd-7912-483b-b96f-4fa5e1d8651f",
69 |                     "zipfile": "dis-2016.zip",
70 |                 },
71 |             },
72 |         },
73 |         "files": {
74 |             "communes": {
75 |                 "file_name_prefix": "DIS_COM_UDI_",
76 |                 "file_extension": ".txt",
77 |                 "table_name": "edc_communes",
78 |             },
79 |             "prelevements": {
80 |                 "file_name_prefix": "DIS_PLV_",
81 |                 "file_extension": ".txt",
82 |                 "table_name": "edc_prelevements",
83 |             },
84 |             "resultats": {
85 |                 "file_name_prefix": "DIS_RESULT_",
86 |                 "file_extension": ".txt",
87 |                 "table_name": "edc_resultats",
88 |             },
89 |         },
90 |     }
91 | 
92 |     return edc_config
93 | 


--------------------------------------------------------------------------------