├── .env.example ├── .gitignore ├── .vscode └── settings.json ├── Readme.md ├── data └── .gitkeep ├── data_stack_diagram.png ├── dbt-models ├── .gitignore ├── .user.yml ├── README.md ├── analyses │ └── .gitkeep ├── dbt_project.yml ├── macros │ └── .gitkeep ├── models │ ├── dim_families.sql │ ├── dim_family_members.sql │ ├── food_bank_report │ │ ├── food_bank_report.yml │ │ ├── quarterly_report │ │ │ ├── viz_food_bank_report__count_members__quarterly.sql │ │ │ ├── viz_food_bank_report__families__quarterly.sql │ │ │ ├── viz_food_bank_report__family_members__quarterly.sql │ │ │ ├── viz_food_bank_report__supply_sources__quarterly.sql │ │ │ ├── viz_food_bank_report__visits_volumes__quarterly.sql │ │ │ └── viz_food_bank_report__volumes__quarterly.sql │ │ └── yearly_report │ │ │ ├── viz_food_bank_report__families__yearly.sql │ │ │ ├── viz_food_bank_report__family_members__yearly.sql │ │ │ ├── viz_food_bank_report__visits_volumes__yearly.sql │ │ │ └── viz_food_bank_report__volumes__yearly.sql │ ├── intermediate │ │ ├── intermediate.yml │ │ ├── stg_families__family_members_combinations.sql │ │ ├── stg_operations__product_operations.sql │ │ ├── stg_products__distribution_metrics.sql │ │ ├── stg_products__stock_timeseries.sql │ │ ├── stg_supply_sources_consolidated.sql │ │ └── stg_visits_events__operations_combinations.sql │ ├── schema.yml │ ├── staging │ │ ├── staging.yml │ │ ├── stg_families.sql │ │ ├── stg_family_members.sql │ │ ├── stg_operations.sql │ │ ├── stg_products.sql │ │ ├── stg_supply_sources.sql │ │ └── stg_visit_events.sql │ ├── stg_families__visit_events.sql │ ├── stg_family_members__visit_events.sql │ ├── viz_operations__stock_entries_sources.sql │ └── viz_products__stocks_availability.sql ├── package-lock.yml ├── packages.yml ├── seeds │ ├── seed_bapif_collection_dates.csv │ ├── seed_collection_dates.csv │ ├── seed_grocery_shopping_dates.csv │ └── seeds.yml ├── snapshots │ └── .gitkeep └── tests │ └── .gitkeep └── etl-la-porte-bleue ├── .gitignore ├── README.md ├── handler.py ├── models.py ├── package-lock.json ├── package.json ├── requirements.txt └── serverless.yml /.env.example: -------------------------------------------------------------------------------- 1 | MONGO_DB=mongodb+srv://user:pwd@portebleue.e4e6b.mongodb.net/porteBleue?retryWrites=true&w=majority 2 | 3 | POSTGRES_DB=postgresql://user:pwd@ec2-xx-xx-xx-yyy.eu-west-1.compute.amazonaws.com:5432/dbname 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | etc/ 3 | 4 | # Created by https://www.toptal.com/developers/gitignore/api/python,venv 5 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,venv 6 | 7 | ### Python ### 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 112 | __pypackages__/ 113 | 114 | # Celery stuff 115 | celerybeat-schedule 116 | celerybeat.pid 117 | 118 | # SageMath parsed files 119 | *.sage.py 120 | 121 | # Environments 122 | .env 123 | .venv 124 | env/ 125 | venv/ 126 | ENV/ 127 | env.bak/ 128 | venv.bak/ 129 | 130 | # Spyder project settings 131 | .spyderproject 132 | .spyproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | 137 | # mkdocs documentation 138 | /site 139 | 140 | # mypy 141 | .mypy_cache/ 142 | .dmypy.json 143 | dmypy.json 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | 148 | # pytype static type analyzer 149 | .pytype/ 150 | 151 | # Cython debug symbols 152 | cython_debug/ 153 | 154 | # PyCharm 155 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 156 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 157 | # and can be added to the global gitignore or merged into this file. For a more nuclear 158 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 159 | #.idea/ 160 | 161 | ### venv ### 162 | # Virtualenv 163 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 164 | [Bb]in 165 | [Ii]nclude 166 | [Ll]ib 167 | [Ll]ib64 168 | [Ll]ocal 169 | [Ss]cripts 170 | pyvenv.cfg 171 | pip-selfcheck.json 172 | 173 | # End of https://www.toptal.com/developers/gitignore/api/python,venv 174 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "black" 3 | } 4 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Analytics La Porte Bleue 2 | > Modern Data Stack for a Charity 3 | 4 | Get started with a cost-effective modern data stack using this repo. 5 | Read more about the project in my blog post [here](). 6 | 7 | 8 | This repo helps bootstrap the infrastructure of a charity's modern data stack known as La Porte Bleue. 9 | ## Architecture 10 | 11 | ![](data_stack_diagram.png) 12 | 13 | 1. Ingestion: **Python** (PyMongo & SQLModel) 14 | 2. Orchestration: **Prefect** 15 | 3. Warehousing: **Heroku Postgres** 16 | 4. Transform: **dbt** 17 | 5. Business Intelligence: **Preset** 18 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Porte-Bleue/mongo-to-postgres-etl/2c0713eeba8d2638fa698e7f41f2d3c80da9bd8b/data/.gitkeep -------------------------------------------------------------------------------- /data_stack_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Porte-Bleue/mongo-to-postgres-etl/2c0713eeba8d2638fa698e7f41f2d3c80da9bd8b/data_stack_diagram.png -------------------------------------------------------------------------------- /dbt-models/.gitignore: -------------------------------------------------------------------------------- 1 | profiles.yml 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /dbt-models/.user.yml: -------------------------------------------------------------------------------- 1 | id: 11e92474-ec8b-46b3-809f-6b03af668ceb 2 | -------------------------------------------------------------------------------- /dbt-models/README.md: -------------------------------------------------------------------------------- 1 | Welcome to your new dbt project! 2 | 3 | ### Using the starter project 4 | 5 | ```sh 6 | export DBT_PROFILES_DIR=$(pwd) 7 | dbt run 8 | dbt test 9 | ``` 10 | 11 | ### Resources: 12 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) 13 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 14 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support 15 | - Find [dbt events](https://events.getdbt.com) near you 16 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 17 | -------------------------------------------------------------------------------- /dbt-models/analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Porte-Bleue/mongo-to-postgres-etl/2c0713eeba8d2638fa698e7f41f2d3c80da9bd8b/dbt-models/analyses/.gitkeep -------------------------------------------------------------------------------- /dbt-models/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'la_porte_bleue' 2 | version: '1.0.0' 3 | config-version: 2 4 | 5 | profile: 'la_porte_bleue' 6 | 7 | # Set the statement_timeout (in milliseconds) 8 | vars: 9 | statement_timeout: 600000 # 10 minutes in milliseconds 10 | 11 | # These configurations specify where dbt should look for different types of files. 12 | # The `model-paths` config, for example, states that models in this project can be 13 | # found in the "models/" directory. You probably won't need to change these! 14 | model-paths: ["models"] 15 | analysis-paths: ["analyses"] 16 | test-paths: ["tests"] 17 | seed-paths: ["seeds"] 18 | macro-paths: ["macros"] 19 | snapshot-paths: ["snapshots"] 20 | 21 | target-path: "target" # directory which will store compiled SQL files 22 | clean-targets: # directories to be removed by `dbt clean` 23 | - "target" 24 | - "dbt_packages" 25 | 26 | models: 27 | la_porte_bleue: 28 | +materialized: view 29 | staging: 30 | +materialized: view 31 | -------------------------------------------------------------------------------- /dbt-models/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Porte-Bleue/mongo-to-postgres-etl/2c0713eeba8d2638fa698e7f41f2d3c80da9bd8b/dbt-models/macros/.gitkeep -------------------------------------------------------------------------------- /dbt-models/models/dim_families.sql: -------------------------------------------------------------------------------- 1 | with families as ( 2 | 3 | select * from {{ ref('stg_families') }} 4 | 5 | ), 6 | 7 | family_visits_events as ( 8 | 9 | select * from {{ ref('stg_families__visit_events') }} 10 | 11 | ), 12 | 13 | families_lifetime_metrics as ( 14 | 15 | select 16 | family_id, 17 | min(visit_date) as first_visit_date, 18 | sum(number_of_operations) as lifetime_operation_count, 19 | count(visit_id) as lifetime_visit_count 20 | from family_visits_events 21 | group by 1 22 | 23 | ), 24 | 25 | final as ( 26 | 27 | select 28 | families.family_id, 29 | families.family_name, 30 | array_length(families.family_members_ids, 1) as number_of_family_members, 31 | families.city, 32 | families.housing_details, 33 | families.creation_date, 34 | families.update_date, 35 | families_lifetime_metrics.first_visit_date, 36 | families.last_visit_date, 37 | coalesce(families_lifetime_metrics.lifetime_visit_count, 0) as lifetime_visit_count, 38 | coalesce(families_lifetime_metrics.lifetime_operation_count, 0) as lifetime_operation_count 39 | from families 40 | left join families_lifetime_metrics 41 | on families.family_id = families_lifetime_metrics.family_id 42 | 43 | ) 44 | 45 | select * from final 46 | -------------------------------------------------------------------------------- /dbt-models/models/dim_family_members.sql: -------------------------------------------------------------------------------- 1 | with families__family_members_combinations as ( 2 | 3 | select * from {{ ref('stg_families__family_members_combinations') }} 4 | 5 | ), 6 | 7 | family_members as ( 8 | 9 | select * from {{ ref('stg_family_members') }} 10 | 11 | ), 12 | 13 | families as ( 14 | 15 | select * from {{ ref('stg_families') }} 16 | 17 | ), 18 | 19 | dim_families as ( 20 | 21 | select * from {{ ref('dim_families') }} 22 | 23 | ), 24 | 25 | family_members_families__joined as ( 26 | 27 | select 28 | family_members.family_member_id, 29 | family_members.first_name, 30 | families__family_members_combinations.family_id, 31 | families.family_name, 32 | families.city, 33 | family_members.gender, 34 | family_members.adult_or_child, 35 | family_members.birth_date, 36 | family_members.creation_date, 37 | family_members.update_date, 38 | dim_families.first_visit_date, 39 | families.last_visit_date, 40 | dim_families.lifetime_visit_count 41 | from family_members 42 | left join families__family_members_combinations 43 | on family_members.family_member_id = families__family_members_combinations.family_member_id 44 | left join families 45 | on families__family_members_combinations.family_id = families.family_id 46 | left join dim_families 47 | on families.family_id = dim_families.family_id 48 | 49 | ) 50 | 51 | select * from family_members_families__joined 52 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/food_bank_report.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: viz_food_bank_report__families__quarterly 6 | description: > 7 | Quarterly report on the total number of families helped and visits 8 | over the period. 9 | 10 | - name: viz_food_bank_report__family_members__quarterly 11 | description: > 12 | Quarterly report on the number of people helped over the period 13 | by age range and gender. 14 | 15 | - name: viz_food_bank_report__volumes__quarterly 16 | description: > 17 | Quarterly report on the net weight distributed over the period, 18 | including food bank, purchases and donations in kilograms and tonnes. 19 | 20 | - name: viz_food_bank_report__visits_volumes__quarterly 21 | description: > 22 | Quarterly report combining visit metrics (number of distribution, visits etc) 23 | with volume metrics (net weight distributed) over the period. 24 | 25 | - name: viz_food_bank_report__families__yearly 26 | description: > 27 | Yearly report on the total number of families helped and visits 28 | over the period. 29 | 30 | - name: viz_food_bank_report__family_members__yearly 31 | description: > 32 | Yearly report on the number of people helped over the period 33 | by age range and gender. 34 | 35 | - name: viz_food_bank_report__volumes__yearly 36 | description: > 37 | Yearly report on the net weight distributed over the period, 38 | including food bank, purchases and donations in kilograms and tonnes. 39 | 40 | - name: viz_food_bank_report__visits_volumes__yearly 41 | description: > 42 | Yearly report combining visit metrics (number of distribution, visits etc) 43 | with volume metrics (net weight distributed) over the period. 44 | 45 | - name: viz_food_bank_report__count_members__quarterly 46 | description: > 47 | Quarterly report on the distribution of the number of family members 48 | among the unique number of families visiting over the period. 49 | 50 | - name: viz_food_bank_report__supply_sources__quarterly 51 | description: > 52 | Quarterly report including stock metrics related to product entries 53 | from the various supply sources. 54 | 55 | exposures: 56 | - name: food-bank-report 57 | description: >- 58 | La Porte Bleue is required to declare its activities to the food bank by means of 59 | a quarterly and yearly report containing key figures and information on the people 60 | helped as well as the volume of donations. 61 | type: dashboard 62 | maturity: high 63 | owner: 64 | name: Marie Lestavel 65 | email: marielestavel@gmail.com 66 | 67 | depends_on: 68 | - ref('viz_food_bank_report__families__quarterly') 69 | - ref('viz_food_bank_report__family_members__quarterly') 70 | - ref('viz_food_bank_report__volumes__quarterly') 71 | - ref('viz_food_bank_report__visits_volumes__quarterly') 72 | - ref('viz_food_bank_report__families__yearly') 73 | - ref('viz_food_bank_report__family_members__yearly') 74 | - ref('viz_food_bank_report__volumes__yearly') 75 | - ref('viz_food_bank_report__visits_volumes__yearly') 76 | - ref('viz_food_bank_report__count_members__quarterly') 77 | 78 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/quarterly_report/viz_food_bank_report__count_members__quarterly.sql: -------------------------------------------------------------------------------- 1 | with families__visit_events as ( 2 | 3 | select * from {{ ref('stg_families__visit_events') }} 4 | 5 | ), 6 | 7 | dim_families as ( 8 | 9 | select * from {{ ref('dim_families') }} 10 | 11 | ), 12 | 13 | family_members_by_quarter as ( 14 | 15 | select 16 | date_trunc('quarter', families__visit_events.visit_date)::date as quarter_at, 17 | families__visit_events.family_id, 18 | dim_families.number_of_family_members 19 | from families__visit_events 20 | left join dim_families 21 | on families__visit_events.family_id = dim_families.family_id 22 | group by 1,2,3 23 | 24 | ), 25 | 26 | grouped as ( 27 | 28 | select 29 | quarter_at, 30 | number_of_family_members, 31 | count(family_id) as family_count 32 | from family_members_by_quarter 33 | group by 1,2 34 | 35 | ) 36 | 37 | select 38 | * 39 | from grouped 40 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/quarterly_report/viz_food_bank_report__families__quarterly.sql: -------------------------------------------------------------------------------- 1 | with families_visit_events as ( 2 | 3 | select * from {{ ref('stg_families__visit_events') }} 4 | 5 | ), 6 | 7 | weekly_visits as ( 8 | 9 | select 10 | date_trunc('quarter', visit_date)::date as quarter_at, 11 | date_trunc('week', visit_date)::date as week_at, 12 | count(visit_id) as total_visits 13 | from families_visit_events 14 | group by 1,2 15 | ), 16 | 17 | max_weekly_visits_by_quarter as ( 18 | 19 | select 20 | quarter_at, 21 | max(total_visits) as max_weekly_visits 22 | from weekly_visits 23 | group by 1 24 | 25 | ), 26 | 27 | metrics_by_quarter as ( 28 | 29 | select 30 | date_trunc('quarter', visit_date)::date as quarter_at, 31 | count(distinct visit_date) as distribution_count, 32 | count(distinct family_id) as family_count, 33 | count(distinct visit_id) as total_visits, 34 | sum(number_of_operations) as total_product_distributed 35 | from families_visit_events 36 | group by 1 37 | 38 | ), 39 | 40 | joined as ( 41 | 42 | select 43 | metrics_by_quarter.*, 44 | max_weekly_visits 45 | from metrics_by_quarter 46 | left join max_weekly_visits_by_quarter 47 | on metrics_by_quarter.quarter_at = max_weekly_visits_by_quarter.quarter_at 48 | 49 | ) 50 | 51 | select * from joined 52 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/quarterly_report/viz_food_bank_report__family_members__quarterly.sql: -------------------------------------------------------------------------------- 1 | with family_members__visit_events as ( 2 | 3 | select * from {{ ref('stg_family_members__visit_events') }} 4 | 5 | ), 6 | 7 | calculate_age as ( 8 | 9 | select 10 | *, 11 | date_part('year', age(current_date, birth_date))::int as age 12 | from family_members__visit_events 13 | 14 | ), 15 | 16 | family_members_by_quarter as ( 17 | 18 | select 19 | date_trunc('quarter', visit_date)::date as quarter_at, 20 | family_id, 21 | family_member_id, 22 | gender, 23 | case 24 | when age >= 0 and age <= 3 then '0 to 3' 25 | when age >= 4 and age <= 14 then '4 to 14' 26 | when age >= 15 and age <= 25 then '15 to 25' 27 | when age >= 26 and age <= 64 then '26 to 64' 28 | when age >= 65 then '65+' 29 | end as age_range, 30 | count(family_member_id) as total_people_helped 31 | from calculate_age 32 | group by 1,2,3,4,5 33 | 34 | ), 35 | 36 | grouped as ( 37 | 38 | select 39 | quarter_at, 40 | gender, 41 | age_range, 42 | count(family_member_id) as unique_people_helped, 43 | sum(total_people_helped) as total_people_helped 44 | from family_members_by_quarter 45 | group by 1,2,3 46 | 47 | ) 48 | 49 | select 50 | * 51 | from grouped 52 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/quarterly_report/viz_food_bank_report__supply_sources__quarterly.sql: -------------------------------------------------------------------------------- 1 | with supply_source as ( 2 | 3 | select * from {{ ref('viz_operations__stock_entries_sources') }} 4 | 5 | ), 6 | 7 | visit_events as ( 8 | 9 | select * from {{ ref('stg_families__visit_events') }} 10 | 11 | ), 12 | 13 | quarters_calendar as ( 14 | 15 | select 16 | date_trunc('quarter', visit_date)::date as quarter_at 17 | from visit_events 18 | group by 1 19 | 20 | ), 21 | 22 | distinct_supply_source as ( 23 | 24 | select 25 | source_name 26 | from supply_source 27 | group by 1 28 | 29 | ), 30 | 31 | quarter_sources_cross_join as ( 32 | 33 | -- Create a cross join between quarters and distinct supply sources 34 | -- to ensure all combinations are represented, even if there is no data for a specific source in a quarter. 35 | 36 | select 37 | quarters_calendar.quarter_at, 38 | distinct_supply_source.source_name 39 | from quarters_calendar 40 | cross join distinct_supply_source 41 | 42 | ), 43 | 44 | supply_sources_aggregate as ( 45 | 46 | select 47 | date_trunc('quarter', date_at)::date as quarter_at, 48 | source_name, 49 | sum(batch_quantity) as batch_quantity, 50 | sum(monetary_value_eur) as monetary_value_eur 51 | from supply_source 52 | group by 1,2 53 | 54 | ), 55 | 56 | final as ( 57 | 58 | -- Create the final dataset that includes all quarters and supply sources, 59 | -- filling in missing data with 0 60 | 61 | select 62 | quarter_sources_cross_join.quarter_at, 63 | quarter_sources_cross_join.source_name, 64 | coalesce(supply_sources_aggregate.batch_quantity, 0::numeric) as batch_quantity, 65 | coalesce(supply_sources_aggregate.monetary_value_eur, 0) as monetary_value_eur 66 | from quarter_sources_cross_join 67 | left join supply_sources_aggregate 68 | on quarter_sources_cross_join.quarter_at = supply_sources_aggregate.quarter_at 69 | and quarter_sources_cross_join.source_name = supply_sources_aggregate.source_name 70 | 71 | ) 72 | 73 | select * from final 74 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/quarterly_report/viz_food_bank_report__visits_volumes__quarterly.sql: -------------------------------------------------------------------------------- 1 | with volumes as ( 2 | 3 | select * from {{ ref('viz_food_bank_report__volumes__quarterly') }} 4 | 5 | ), 6 | 7 | visits as ( 8 | 9 | select * from {{ ref('viz_food_bank_report__families__quarterly') }} 10 | 11 | ), 12 | 13 | joined as ( 14 | 15 | select 16 | visits.quarter_at, 17 | visits.distribution_count, 18 | visits.family_count, 19 | visits.total_visits, 20 | visits.total_product_distributed, 21 | volumes.volumes_distributed_kilos, 22 | volumes.total_monetary_value_eur 23 | from visits 24 | left join volumes 25 | on visits.quarter_at = volumes.quarter_at 26 | 27 | ) 28 | 29 | select * from joined 30 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/quarterly_report/viz_food_bank_report__volumes__quarterly.sql: -------------------------------------------------------------------------------- 1 | with product_operations as ( 2 | 3 | select * from {{ ref('stg_operations__product_operations') }} 4 | 5 | ), 6 | 7 | volumes_distributed_by_quarter as ( 8 | 9 | select 10 | date_trunc('quarter', date_at)::date as quarter_at, 11 | sum(quantity_in_kilo) as volumes_distributed_kilos, 12 | sum(quantity_in_kilo)/1000 as volumes_distributed_tonnes, 13 | sum(monetary_value_eur) as total_monetary_value_eur 14 | from product_operations 15 | where operation_type = 'out' 16 | and flow_type = 'family-order' 17 | group by 1 18 | 19 | ) 20 | 21 | select 22 | * 23 | from volumes_distributed_by_quarter 24 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/yearly_report/viz_food_bank_report__families__yearly.sql: -------------------------------------------------------------------------------- 1 | with family_members__visit_events as ( 2 | 3 | select * from {{ ref('stg_families__visit_events') }} 4 | 5 | ), 6 | 7 | metrics_yearly as ( 8 | 9 | select 10 | date_trunc('year', visit_date)::date as year_at, 11 | count(distinct visit_date) as distribution_count, 12 | count(distinct family_id) as family_count, 13 | count(distinct visit_id) as total_visits, 14 | sum(number_of_operations) as total_product_distributed 15 | from family_members__visit_events 16 | group by 1 17 | 18 | ) 19 | 20 | select * from metrics_yearly 21 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/yearly_report/viz_food_bank_report__family_members__yearly.sql: -------------------------------------------------------------------------------- 1 | with family_members__visit_events as ( 2 | 3 | select * from {{ ref('stg_family_members__visit_events') }} 4 | 5 | ), 6 | 7 | last_day_year as ( 8 | 9 | select 10 | *, 11 | (date_trunc('year', visit_date)::date + interval '1 year')::date-1 as visit_last_day_of_year 12 | from family_members__visit_events 13 | 14 | ), 15 | 16 | calculate_age as ( 17 | 18 | select 19 | *, 20 | date_part('year', age(visit_last_day_of_year, birth_date))::int as age 21 | from last_day_year 22 | 23 | ), 24 | 25 | family_members_yearly as ( 26 | 27 | select 28 | date_trunc('year', visit_date)::date as year_at, 29 | family_id, 30 | family_member_id, 31 | gender, 32 | case 33 | when age >= 0 and age <= 3 then '0 to 3' 34 | when age >= 4 and age <= 14 then '4 to 14' 35 | when age >= 15 and age <= 25 then '15 to 25' 36 | when age >= 26 and age <= 64 then '26 to 64' 37 | when age >= 65 then '65+' 38 | end as age_range, 39 | count(family_member_id) as total_people_helped 40 | from calculate_age 41 | group by 1,2,3,4,5 42 | 43 | ), 44 | 45 | grouped as ( 46 | 47 | select 48 | year_at, 49 | gender, 50 | age_range, 51 | count(family_member_id) as unique_people_helped, 52 | sum(total_people_helped) as total_people_helped 53 | from family_members_yearly 54 | group by 1,2,3 55 | 56 | ) 57 | 58 | select 59 | * 60 | from grouped 61 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/yearly_report/viz_food_bank_report__visits_volumes__yearly.sql: -------------------------------------------------------------------------------- 1 | with volumes as ( 2 | 3 | select * from {{ ref('viz_food_bank_report__volumes__yearly') }} 4 | 5 | ), 6 | 7 | visits as ( 8 | 9 | select * from {{ ref('viz_food_bank_report__families__yearly') }} 10 | 11 | ), 12 | 13 | joined as ( 14 | 15 | select 16 | visits.year_at, 17 | visits.distribution_count, 18 | visits.family_count, 19 | visits.total_visits, 20 | visits.total_product_distributed, 21 | volumes.volumes_distributed_kilos, 22 | volumes.total_monetary_value_eur 23 | from visits 24 | left join volumes 25 | on visits.year_at = volumes.year_at 26 | 27 | ) 28 | 29 | select * from joined 30 | -------------------------------------------------------------------------------- /dbt-models/models/food_bank_report/yearly_report/viz_food_bank_report__volumes__yearly.sql: -------------------------------------------------------------------------------- 1 | with product_operations as ( 2 | 3 | select * from {{ ref('stg_operations__product_operations') }} 4 | 5 | ), 6 | 7 | volumes_distributed_yearly as ( 8 | 9 | select 10 | date_trunc('year', date_at)::date as year_at, 11 | sum(quantity_in_kilo) as volumes_distributed_kilos, 12 | sum(quantity_in_kilo)/1000 as volumes_distributed_tonnes, 13 | sum(monetary_value_eur) as total_monetary_value_eur 14 | from product_operations 15 | where operation_type = 'out' 16 | and flow_type = 'family-order' 17 | group by 1 18 | 19 | ) 20 | 21 | select 22 | * 23 | from volumes_distributed_yearly 24 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/intermediate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | 4 | models: 5 | - name: stg_families__family_members_combinations 6 | description: One to many model between family and family members 7 | columns: 8 | - name: family_member_id 9 | description: The unique identifier for a family member 10 | tests: 11 | - unique 12 | - not_null 13 | - name: family_id 14 | description: The unique identifier for a family 15 | 16 | - name: stg_visits_events__operations_combinations 17 | description: One to many model between visit and operations 18 | columns: 19 | - name: operation_id 20 | description: The unique identifier for an operation 21 | tests: 22 | - unique 23 | - not_null 24 | - name: visit_id 25 | description: The unique identifier for a family visit 26 | 27 | - name: stg_operations__product_operations 28 | description: > 29 | Incremental model that aggregates operations data to a product-level 30 | staging level on a daily basis. 31 | columns: 32 | - name: product_operations_id 33 | description: The unique identifier for this table 34 | tests: 35 | - unique 36 | - not_null 37 | - name: date_at 38 | description: The date when the operation occurred 39 | - name: product_id 40 | description: The unique identifier for a product 41 | - name: product_name 42 | description: The name of the product 43 | - name: unit_of_measure 44 | description: > 45 | Unit of measure for a given product 46 | eg: 'Paquet' for 'Biscuits' 47 | - name: units_per_batch 48 | description: > 49 | Number of product units given per batch 50 | - name: visit_id 51 | description: The unique identifier for a family visit 52 | - name: operation_type 53 | description: The type of stock operation, either `in` or `out` 54 | - name: flow_type 55 | description: The type of flow 56 | - name: quantity_in_unit 57 | description: The quantity of product units associated with the operation 58 | - name: quantity_in_kilo 59 | description: The corresponding weight, in kilograms, of the quantity of product units associated with the operation 60 | - name: batch_quantity 61 | description: > 62 | The corresponding quantity of product units in product batch. 63 | Eg: If diapers are given in batches of 10 to families, 64 | then 10 unit entries correspond to 1 product batch stock entry. 65 | - name: monetary_value_eur 66 | description: > 67 | The corresponding monetary value of the products associated with the operation 68 | calculated based on the average price of a product unit. 69 | 70 | - name: stg_products__stock_timeseries 71 | description: > 72 | Incremental model containing a daily snapshot of stock to date for each product 73 | columns: 74 | - name: product_stock_id 75 | description: The unique identifier for this table 76 | tests: 77 | - unique 78 | - not_null 79 | - name: date_at 80 | description: The date of the snapshot 81 | - name: product_id 82 | description: The unique identifier for a product 83 | - name: product_name 84 | description: The name of the product 85 | - name: stock_at_date 86 | description: The stock to date of the product 87 | 88 | - name: stg_products__distribution_metrics 89 | description: > 90 | Model containing moving average metrics for product distributions based on the last 4 distributions 91 | columns: 92 | - name: date_at 93 | description: The distribution date 94 | - name: product_id 95 | description: The unique identifier for a product 96 | - name: product_name 97 | description: The name of the product 98 | - name: quantity_distributed_moving_average_last_4_distributions 99 | description: the average quantity of products distributed per distribution based on the last 4 distributions 100 | - name: visits_moving_average_last_4_distributions 101 | description: the average number of visits per distribution based on the last 4 distributions 102 | - name: average_quantity_per_family 103 | description: the average amount distributed per family over a distribution based on the last 4 distributions 104 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/stg_families__family_members_combinations.sql: -------------------------------------------------------------------------------- 1 | with families as ( 2 | 3 | select * from {{ ref('stg_families') }} 4 | 5 | ), 6 | 7 | unnest_family_members as ( 8 | 9 | select 10 | family_id, 11 | unnest(family_members_ids) as family_member_id 12 | from families 13 | 14 | ) 15 | 16 | select * from unnest_family_members 17 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/stg_operations__product_operations.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config({ 3 | "materialized": "incremental", 4 | "dist": "auto", 5 | "unique_key": "product_operations_id", 6 | "sort": 'date_at' 7 | }) 8 | }} 9 | 10 | with last_run as ( 11 | 12 | select 13 | max(date_at) as max_date_at 14 | from {{ this }} 15 | 16 | ), 17 | 18 | operations as ( 19 | 20 | select 21 | product_id, 22 | operation_type, 23 | flow_type, 24 | quantity, 25 | creation_date 26 | from {{ ref('stg_operations') }} 27 | {% if is_incremental() %} 28 | where (creation_date >= (select max_date_at from last_run) 29 | or updated_at > created_at) 30 | {% endif %} 31 | ), 32 | 33 | products as ( 34 | 35 | select 36 | product_id, 37 | product_name, 38 | unit_of_measure, 39 | units_per_batch, 40 | product_weight_kg, 41 | price_per_unit_eur 42 | from {{ ref('stg_products') }} 43 | 44 | ), 45 | 46 | product_aggregate as ( 47 | 48 | select 49 | operations.creation_date as date_at, 50 | operations.product_id, 51 | products.product_name, 52 | products.unit_of_measure, 53 | products.units_per_batch, 54 | products.product_weight_kg, 55 | products.price_per_unit_eur, 56 | operations.operation_type, 57 | operations.flow_type, 58 | sum(operations.quantity) as quantity_in_unit, 59 | sum(operations.quantity) * coalesce(products.product_weight_kg, 0) as quantity_in_kilo, 60 | sum(operations.quantity)/max(products.units_per_batch)::numeric as batch_quantity, 61 | sum(operations.quantity) * products.price_per_unit_eur as monetary_value_eur 62 | from operations 63 | inner join products 64 | on operations.product_id = products.product_id 65 | group by 1,2,3,4,5,6,7,8,9 66 | 67 | ) 68 | 69 | select 70 | {{ 71 | dbt_utils.surrogate_key([ 72 | 'date_at', 73 | 'product_id', 74 | 'operation_type', 75 | 'flow_type' 76 | ]) 77 | }} as product_operations_id, 78 | * 79 | from product_aggregate 80 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/stg_products__distribution_metrics.sql: -------------------------------------------------------------------------------- 1 | with product_operations as ( 2 | 3 | select * from {{ ref('stg_operations__product_operations') }} 4 | 5 | ), 6 | 7 | visits as ( 8 | 9 | select * from {{ ref('stg_families__visit_events') }} 10 | 11 | ), 12 | 13 | products_distributed_by_distribution as ( 14 | 15 | select 16 | date_at, 17 | product_id, 18 | product_name, 19 | sum(batch_quantity) as batch_quantity 20 | from product_operations 21 | where operation_type = 'out' 22 | and flow_type = 'family-order' 23 | group by 1,2,3 24 | 25 | ), 26 | 27 | visits_by_distribution as ( 28 | 29 | select 30 | visit_date as date_at, 31 | count(distinct family_id) as visit_count 32 | from visits 33 | group by 1 34 | 35 | ), 36 | 37 | visits_moving_average as ( 38 | 39 | select 40 | date_at, 41 | avg(visit_count) over (order by date_at rows between 3 preceding and current row) as visits_moving_average_last_4_distributions 42 | from visits_by_distribution 43 | 44 | ), 45 | 46 | joined as ( 47 | 48 | select 49 | products_distributed_by_distribution.date_at, 50 | products_distributed_by_distribution.product_id, 51 | products_distributed_by_distribution.product_name, 52 | products_distributed_by_distribution.batch_quantity, 53 | visits_moving_average.visits_moving_average_last_4_distributions 54 | from products_distributed_by_distribution 55 | inner join visits_moving_average 56 | on products_distributed_by_distribution.date_at = visits_moving_average.date_at 57 | 58 | ), 59 | 60 | products_grouped as ( 61 | 62 | select 63 | date_at, 64 | product_id, 65 | product_name, 66 | visits_moving_average_last_4_distributions, 67 | avg(sum(batch_quantity)) over (partition by product_id order by date_at rows between 3 preceding and current row) as quantity_distributed_moving_average_last_4_distributions 68 | from joined 69 | group by 1,2,3,4 70 | 71 | ), 72 | 73 | final as ( 74 | 75 | select 76 | date_at, 77 | product_id, 78 | product_name, 79 | quantity_distributed_moving_average_last_4_distributions, 80 | visits_moving_average_last_4_distributions, 81 | quantity_distributed_moving_average_last_4_distributions/visits_moving_average_last_4_distributions::numeric as average_quantity_per_family 82 | from products_grouped 83 | 84 | ) 85 | 86 | select * from final 87 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/stg_products__stock_timeseries.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config({ 3 | "materialized": "incremental", 4 | "dist": "auto", 5 | "unique_key": "product_stock_id", 6 | "sort": 'date_at' 7 | }) 8 | }} 9 | 10 | with products as ( 11 | 12 | select * from {{ ref('stg_products') }} 13 | 14 | ), 15 | 16 | daily_stock as ( 17 | 18 | select 19 | current_date as date_at, 20 | product_id, 21 | product_name, 22 | current_stock as stock_at_date 23 | from products 24 | 25 | ) 26 | 27 | select 28 | {{ 29 | dbt_utils.surrogate_key([ 30 | 'date_at', 31 | 'product_id' 32 | ]) 33 | }} as product_stock_id, 34 | * 35 | from daily_stock 36 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/stg_supply_sources_consolidated.sql: -------------------------------------------------------------------------------- 1 | with stg_supply_source as ( 2 | 3 | select 4 | date_at, 5 | source_name 6 | from {{ ref('stg_supply_sources') }} 7 | group by 1,2 8 | 9 | ), 10 | 11 | seed_sources as ( 12 | 13 | select 14 | *, 15 | 'Collectes Supermarché' as source_name 16 | from {{ ref('seed_collection_dates') }} 17 | union 18 | select 19 | *, 20 | 'Pleins Courses' as source_name 21 | from {{ ref('seed_grocery_shopping_dates') }} 22 | union 23 | select 24 | *, 25 | 'Enlèvements BAPIF' as source_name 26 | from {{ ref('seed_bapif_collection_dates') }} 27 | 28 | ), 29 | 30 | union_seed_supply_table as ( 31 | 32 | select 33 | * 34 | from seed_sources 35 | union all 36 | select 37 | * 38 | from stg_supply_source 39 | 40 | ) 41 | 42 | select * from union_seed_supply_table 43 | -------------------------------------------------------------------------------- /dbt-models/models/intermediate/stg_visits_events__operations_combinations.sql: -------------------------------------------------------------------------------- 1 | with visit_events as ( 2 | 3 | select * from {{ ref('stg_visit_events') }} 4 | 5 | ), 6 | 7 | unnest_operations as ( 8 | 9 | select 10 | visit_id, 11 | unnest(operation_ids::varchar[]) as operation_id 12 | from visit_events 13 | 14 | ) 15 | 16 | select * from unnest_operations 17 | -------------------------------------------------------------------------------- /dbt-models/models/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: stg_families__visit_events 6 | description: One to many relationship model between a family and visit events 7 | columns: 8 | - name: visit_id 9 | description: The unique identifier for a family visit 10 | - name: visit_date 11 | description: The date when the family visited 12 | - name: family_id 13 | description: The unique identifier for a family 14 | - name: family_name 15 | description: The family name 16 | - name: city 17 | description: The city of residence of the family 18 | - name: housing_details 19 | description: Information on the family's residence (if any) 20 | - name: is_latest_visit 21 | description: Whether the visit event is the latest or not 22 | - name: is_first_visit 23 | description: Whether the visit event is the family's very first visit 24 | - name: family_type 25 | description: Whether the family is new or returning 26 | - name: number_of_operations 27 | description: the number of operations executed during the visit 28 | 29 | - name: dim_families 30 | description: This dimension model contains basic information about a family 31 | columns: 32 | - name: family_id 33 | description: The unique identifier for a family 34 | tests: 35 | - unique 36 | - name: family_name 37 | description: The family name 38 | - name: number_of_family_members 39 | description: The number of family members 40 | tests: 41 | - not_null 42 | - name: city 43 | description: the city of residence of the family 44 | - name: housing_details 45 | description: Information on the family's residence (if any) 46 | - name: creation_date 47 | description: The date when the family member was created 48 | - name: update_date 49 | description: The date when the family member was updated 50 | - name: first_visit_date 51 | description: The date when the family first visited 52 | - name: last_visit_date 53 | description: The date when the family last visited 54 | - name: lifetime_visit_count 55 | description: The total number of visits by the family since they first visited 56 | - name: lifetime_operation_count 57 | description: The total number of operations executed for the family across all visits 58 | 59 | - name: stg_family_members__visit_events 60 | description: Many to many relationships between visit events and family members 61 | columns: 62 | - name: visit_id 63 | description: The unique identifier for a family visit 64 | - name: visit_date 65 | description: The date when the family visited 66 | - name: family_id 67 | description: The unique identifier for a family 68 | - name: family_name 69 | description: The family name 70 | - name: city 71 | description: the city of residence of the family member 72 | - name: family_member_id 73 | description: The unique identifier for a family member 74 | - name: first_name 75 | description: The first name of the family member 76 | - name: gender 77 | description: The gender of the family member 78 | - name: adult_or_child 79 | description: whether the family member is an adult or a child 80 | - name: birth_date 81 | description: The birth date of the family member 82 | - name: is_first_visit 83 | description: Whether the visit event is the very first visit or not 84 | - name: is_latest_visit 85 | description: Whether the visit event is the latest or not 86 | 87 | - name: dim_family_members 88 | description: This dimension model contains basic information about a family member 89 | columns: 90 | - name: family_member_id 91 | description: The unique identifier for a family member 92 | tests: 93 | - unique 94 | - not_null 95 | - name: first_name 96 | description: The first name of the family member 97 | - name: family_id 98 | description: The unique identifier for a family 99 | tests: 100 | - not_null 101 | - name: family_name 102 | description: The family name 103 | tests: 104 | - not_null 105 | - name: city 106 | description: the city of residence of the family 107 | tests: 108 | - not_null 109 | - name: gender 110 | description: The gender of the family member 111 | - name: adult_or_child 112 | description: whether the family member is an adult or a child 113 | - name: birth_date 114 | description: The birth date of the family member 115 | - name: creation_date 116 | description: The date when the family member was created 117 | - name: update_date 118 | description: The date when the family member was updated 119 | - name: first_visit_date 120 | description: The date when the member's family first visited 121 | - name: last_visit_date 122 | description: The date when the member's family last visited 123 | - name: lifetime_visit_count 124 | description: The total number of visits of the member's family since they first visited 125 | 126 | - name: viz_operations__stock_entries_sources 127 | description: Model containing stock entries metrics by sources at the product-level. 128 | columns: 129 | - name: date_at 130 | description: The date when the operation occurred 131 | - name: source_name 132 | description: > 133 | The acquisition channel for this operation. 134 | Either 'Pleins Courses', 'Collectes Supermarché' or 'Enlèvements BAPIF' 135 | - name: product_name 136 | description: The name of the product 137 | - name: unit_of_measure 138 | description: > 139 | Unit of measure for a given product 140 | eg: 'Paquet' for 'Biscuits' 141 | - name: units_per_batch 142 | description: Number of product units given per batch 143 | - name: date_latest_collection 144 | description: The date of the lastest collection 145 | - name: unit_entries 146 | description: The quantity of product units entered in stock 147 | - name: batch_quantity 148 | description: > 149 | The corresponding quantity of product units in product batch. 150 | Eg: If diapers are given in batches of 10 to families, 151 | then 10 unit entries correspond to 1 product batch stock entry. 152 | - name: monetary_value_eur 153 | description: > 154 | The corresponding monetary value in euro of the products brought into stock 155 | calculated based on the average price of a product unit. 156 | 157 | - name: viz_products__stocks_availability 158 | description: > 159 | Vizualiation model containing moving average metrics for product distributions based on the last 4 distributions 160 | columns: 161 | - name: date_at 162 | description: The distribution date 163 | - name: product_id 164 | description: The unique identifier for a product 165 | - name: product_name 166 | description: The name of the product 167 | - name: quantity_distributed_moving_average_last_4_distributions 168 | description: the average quantity of products distributed per distribution based on the last 4 distributions 169 | - name: visits_moving_average_last_4_distributions 170 | description: the average number of visits per distribution based on the last 4 distributions 171 | - name: average_quantity_per_family 172 | description: the average amount distributed per family over a distribution based on the last 4 distributions 173 | - name: stock_at_date 174 | description: The stock to date of the product 175 | - name: count_distributions_available 176 | description: > 177 | The theoretical number of distributions where the product would be available in stock 178 | based on the stock to date and the average quantity distributed per distribution 179 | - name: date_latest_distribution 180 | description: The date of the lastest distribution 181 | -------------------------------------------------------------------------------- /dbt-models/models/staging/staging.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | sources: 5 | - name: public 6 | schema: public 7 | 8 | tables: 9 | - name: families 10 | - name: family_members 11 | - name: products 12 | - name: visit_events 13 | - name: operations 14 | - name: collects 15 | 16 | models: 17 | - name: stg_families 18 | columns: 19 | - name: family_id 20 | description: The unique identifier for a family 21 | tests: 22 | - unique 23 | - not_null 24 | - name: family_name 25 | description: The family name 26 | tests: 27 | - not_null 28 | - name: family_members_ids 29 | description: Array of IDs of each family member 30 | tests: 31 | - not_null 32 | - name: city 33 | description: the city of residence of the family 34 | tests: 35 | - not_null 36 | - name: housing_details 37 | description: Information on the family's residence (if any) 38 | tests: 39 | - accepted_values: 40 | values: 41 | ['Propre Logement', 42 | 'SDF', 43 | 'Hôtel du 115', 44 | 'Autres'] 45 | - name: created_at 46 | description: Timestamp when the family was created 47 | tests: 48 | - not_null 49 | - name: creation_date 50 | description: The date when the family was created 51 | tests: 52 | - not_null 53 | - name: updated_at 54 | description: Timestamp when the family was updated 55 | tests: 56 | - not_null 57 | - name: update_date 58 | description: The date when the family was updated 59 | tests: 60 | - not_null 61 | - name: last_visit_at 62 | description: Timestamp when the family last visited 63 | tests: 64 | - not_null 65 | - name: last_visit_date 66 | description: The date when the family last visited 67 | tests: 68 | - not_null 69 | 70 | - name: stg_family_members 71 | columns: 72 | - name: family_member_id 73 | description: The unique identifier for a family member 74 | tests: 75 | - unique 76 | - not_null 77 | - name: first_name 78 | description: The first name of the family member 79 | tests: 80 | - not_null 81 | - name: gender 82 | description: The gender of the family member 83 | tests: 84 | - not_null 85 | - name: adult_or_child 86 | description: whether the family member is an adult or a child 87 | tests: 88 | - not_null 89 | - name: birth_date 90 | description: The birth date of the family member 91 | tests: 92 | - not_null 93 | - name: created_at 94 | description: The date when the family member was created 95 | tests: 96 | - not_null 97 | - name: creation_date 98 | description: The date when the family member was created 99 | tests: 100 | - not_null 101 | - name: updated_at 102 | description: Timestamp when the family member was updated 103 | tests: 104 | - not_null 105 | - name: update_date 106 | description: The date when the family member was updated 107 | tests: 108 | - not_null 109 | 110 | - name: stg_products 111 | columns: 112 | - name: product_id 113 | description: The unique identifier for a product 114 | tests: 115 | - unique 116 | - not_null 117 | - name: product_name 118 | description: The name of the product 119 | tests: 120 | - not_null 121 | - name: unit_of_measure 122 | description: > 123 | Unit of measure for a given product 124 | eg: 'Paquet' for 'Biscuits' 125 | tests: 126 | - not_null 127 | - name: units_per_batch 128 | description: Number of product units given per batch 129 | tests: 130 | - not_null 131 | - name: created_at 132 | description: Timestamp when the product was created 133 | tests: 134 | - not_null 135 | - name: creation_date 136 | description: The date when the product was created 137 | tests: 138 | - not_null 139 | - name: updated_at 140 | description: Timestamp when the product was updated 141 | tests: 142 | - not_null 143 | - name: update_date 144 | description: The date when the product was updated 145 | tests: 146 | - not_null 147 | - name: category_id 148 | description: The identifier of the category to which the product belongs to 149 | tests: 150 | - not_null 151 | - name: current_stock 152 | description: The current stock of the product 153 | tests: 154 | - not_null 155 | - name: product_weight_kg 156 | description: The theoretical average weight of a product unit 157 | tests: 158 | - not_null 159 | - name: price_per_unit_eur 160 | description: The theoretical average price of a product unit in euro 161 | tests: 162 | - not_null 163 | 164 | - name: stg_visit_events 165 | columns: 166 | - name: visit_id 167 | description: The unique identifier for a family visit 168 | tests: 169 | - unique 170 | - not_null 171 | - name: operation_ids 172 | description: Array of IDs of each operation performed during the visit 173 | tests: 174 | - not_null 175 | - name: family_id 176 | description: The unique identifier for a family 177 | tests: 178 | - not_null 179 | - name: visit_at 180 | description: Timestamp when the family visited 181 | tests: 182 | - not_null 183 | - name: visit_date 184 | description: The date when the family visited 185 | tests: 186 | - not_null 187 | 188 | - name: stg_operations 189 | columns: 190 | - name: operation_id 191 | description: The unique identifier for an operation 192 | tests: 193 | - unique 194 | - not_null 195 | - name: operation_type 196 | description: The type of stock operation, either `in` or `out` 197 | tests: 198 | - not_null 199 | - accepted_values: 200 | values: ['in', 'out'] 201 | - name: quantity 202 | description: The quantity of product units associated with the operation 203 | tests: 204 | - not_null 205 | - name: product_id 206 | description: The unique identifier for a product 207 | tests: 208 | - not_null 209 | - name: flow_type 210 | description: The type of flow 211 | tests: 212 | - not_null 213 | - name: created_at 214 | description: Timestamp when the operation was created 215 | tests: 216 | - not_null 217 | - name: creation_date 218 | description: The date when the operation was created 219 | tests: 220 | - not_null 221 | - name: updated_at 222 | description: Timestamp when the product was updated 223 | tests: 224 | - not_null 225 | - name: update_date 226 | description: The date when the product was updated 227 | tests: 228 | - not_null 229 | 230 | - name: stg_supply_sources 231 | columns: 232 | - name: supply_id 233 | description: The unique identifier for a supply entry 234 | tests: 235 | - unique 236 | - not_null 237 | - name: source_name 238 | description: The name of the source, either `Collectes Supermarché`, `Pleins Courses` or `Enlèvements BAPIF` 239 | tests: 240 | - not_null 241 | - accepted_values: 242 | values: ['Collectes Supermarché', 'Pleins Courses', 'Enlèvements BAPIF'] 243 | - name: date_at 244 | description: The date when the collect occurred 245 | tests: 246 | - not_null 247 | - name: title 248 | description: The title of the supply entry 249 | tests: 250 | - not_null 251 | - name: created_at 252 | description: Timestamp when the supply entry was created 253 | tests: 254 | - not_null 255 | - name: updated_at 256 | description: Timestamp when the supply entry was updated 257 | tests: 258 | - not_null 259 | - name: created_by 260 | description: identifier of the member having completed the stock entry into the app 261 | tests: 262 | - not_null 263 | - name: updated_by 264 | description: identifier of the member having updated the stock entry into the app 265 | tests: 266 | - not_null 267 | -------------------------------------------------------------------------------- /dbt-models/models/staging/stg_families.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('public', 'families') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | family_id, 11 | name as family_name, 12 | family_members_ids::varchar[] as family_members_ids, 13 | nullif(city, '') as city, 14 | nullif(housing_details, '') as housing_details, 15 | created_at, 16 | created_at::date as creation_date, 17 | updated_at, 18 | updated_at::date as update_date, 19 | last_visit_at, 20 | last_visit_at::date as last_visit_date 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed 26 | -------------------------------------------------------------------------------- /dbt-models/models/staging/stg_family_members.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('public', 'family_members') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | family_member_id, 11 | nullif(surname, '') as first_name, 12 | gender, 13 | adult_or_child, 14 | cast(to_date(birth_date, 'DD/MM/YYYY') as date) as birth_date, 15 | created_at, 16 | created_at::date as creation_date, 17 | updated_at, 18 | updated_at::date as update_date 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /dbt-models/models/staging/stg_operations.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('public', 'operations') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | operation_id, 11 | operation_type, 12 | quantity, 13 | product_id, 14 | from_flow as flow_type, 15 | created_at, 16 | created_at::date as creation_date, 17 | updated_at, 18 | updated_at::date as update_date 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /dbt-models/models/staging/stg_products.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('public', 'products') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | product_id, 11 | name as product_name, 12 | unit_of_measure, 13 | quantity_for_one_foodstuff as units_per_batch, 14 | created_at, 15 | created_at::date as creation_date, 16 | updated_at, 17 | updated_at::date as update_date, 18 | category as category_id, 19 | current_stock, 20 | nullif(weight_in_kg, 'NaN')::numeric as product_weight_kg, 21 | price_per_unit as price_per_unit_eur 22 | from source 23 | 24 | ) 25 | 26 | select * from renamed 27 | -------------------------------------------------------------------------------- /dbt-models/models/staging/stg_supply_sources.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('public', 'collects') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | collect_id as supply_id, 11 | case 12 | when collect_type = 'collect' then 'Collectes Supermarché' 13 | when collect_type = 'shopping' then 'Pleins Courses' 14 | when collect_type like '%BAPIF%' then 'Enlèvements BAPIF' 15 | else collect_type 16 | end as source_name, 17 | date_at::date, 18 | title, 19 | created_at, 20 | updated_at, 21 | created_by, 22 | updated_by 23 | from source 24 | 25 | ) 26 | 27 | select * from renamed 28 | -------------------------------------------------------------------------------- /dbt-models/models/staging/stg_visit_events.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from {{ source('public', 'visit_events') }} 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | visit_id, 11 | operation_ids::varchar[] as operation_ids, 12 | family_id, 13 | created_at as visit_at, 14 | created_at::date as visit_date 15 | from source 16 | 17 | ) 18 | 19 | select * from renamed 20 | -------------------------------------------------------------------------------- /dbt-models/models/stg_families__visit_events.sql: -------------------------------------------------------------------------------- 1 | with visit_events as ( 2 | 3 | select * from {{ ref('stg_visit_events') }} 4 | 5 | ), 6 | 7 | families as ( 8 | 9 | select * from {{ ref('stg_families') }} 10 | 11 | ), 12 | 13 | first_visit_date as ( 14 | 15 | select 16 | family_id, 17 | min(visit_date) as first_visit_date 18 | from visit_events 19 | group by 1 20 | 21 | ), 22 | 23 | visit_events_families__joined as ( 24 | 25 | select 26 | visit_events.visit_id, 27 | visit_events.visit_date, 28 | visit_events.family_id, 29 | families.family_name, 30 | families.city, 31 | families.housing_details, 32 | visit_events.visit_date = families.last_visit_date as is_latest_visit, 33 | visit_events.visit_date = first_visit_date.first_visit_date as is_first_visit, 34 | case when visit_events.visit_date = first_visit_date.first_visit_date then 'new' else 'returning' end as family_type, 35 | array_length(visit_events.operation_ids, 1) as number_of_operations 36 | from visit_events 37 | left join families 38 | on visit_events.family_id = families.family_id 39 | left join first_visit_date 40 | on visit_events.family_id = first_visit_date.family_id 41 | 42 | ) 43 | 44 | select * from visit_events_families__joined 45 | -------------------------------------------------------------------------------- /dbt-models/models/stg_family_members__visit_events.sql: -------------------------------------------------------------------------------- 1 | with visit_events as ( 2 | 3 | select * from {{ ref('stg_visit_events') }} 4 | 5 | ), 6 | 7 | dim_family_members as ( 8 | 9 | select * from {{ ref('dim_family_members') }} 10 | 11 | ), 12 | 13 | visit_events_family_members__joined as ( 14 | 15 | select 16 | visit_events.visit_id, 17 | visit_events.visit_date, 18 | visit_events.family_id, 19 | dim_family_members.family_name, 20 | dim_family_members.city, 21 | dim_family_members.family_member_id, 22 | dim_family_members.first_name, 23 | dim_family_members.gender, 24 | dim_family_members.adult_or_child, 25 | dim_family_members.birth_date, 26 | visit_events.visit_date = dim_family_members.first_visit_date as is_first_visit, 27 | visit_events.visit_date = dim_family_members.last_visit_date as is_latest_visit 28 | from visit_events 29 | left join dim_family_members 30 | on visit_events.family_id = dim_family_members.family_id 31 | 32 | ) 33 | 34 | select * from visit_events_family_members__joined 35 | -------------------------------------------------------------------------------- /dbt-models/models/viz_operations__stock_entries_sources.sql: -------------------------------------------------------------------------------- 1 | with product_operations as ( 2 | 3 | select * from {{ ref('stg_operations__product_operations') }} 4 | 5 | ), 6 | 7 | dates_per_sources as ( 8 | 9 | select 10 | * 11 | from {{ ref('stg_supply_sources_consolidated') }} 12 | 13 | ), 14 | 15 | operations_dates_joined as ( 16 | 17 | select 18 | product_operations.date_at, 19 | dates_per_sources.source_name, 20 | product_operations.product_name, 21 | product_operations.unit_of_measure, 22 | product_operations.units_per_batch, 23 | max(product_operations.date_at) over (partition by source_name) as date_latest_collection, 24 | sum(product_operations.quantity_in_unit) as unit_entries, 25 | sum(product_operations.batch_quantity) as batch_quantity, 26 | sum(product_operations.monetary_value_eur) as monetary_value_eur 27 | from product_operations 28 | inner join dates_per_sources 29 | on product_operations.date_at = dates_per_sources.date_at 30 | where operation_type = 'in' 31 | and flow_type = 'inventory' 32 | group by 1,2,3,4,5 33 | 34 | ) 35 | 36 | select * from operations_dates_joined 37 | -------------------------------------------------------------------------------- /dbt-models/models/viz_products__stocks_availability.sql: -------------------------------------------------------------------------------- 1 | with distribution_metrics as ( 2 | 3 | select * from {{ ref('stg_products__distribution_metrics') }} 4 | 5 | ), 6 | 7 | stocks as ( 8 | 9 | select * from {{ ref('stg_products__stock_timeseries') }} 10 | 11 | ), 12 | 13 | joined as ( 14 | 15 | select 16 | distribution_metrics.date_at, 17 | distribution_metrics.product_id, 18 | distribution_metrics.product_name, 19 | distribution_metrics.quantity_distributed_moving_average_last_4_distributions, 20 | distribution_metrics.visits_moving_average_last_4_distributions, 21 | distribution_metrics.average_quantity_per_family, 22 | stocks.stock_at_date, 23 | -- To improve the model, `visits_moving_average_last_4_distributions` should be replaced by a prediction of the upcoming number of visits 24 | round(case when average_quantity_per_family*visits_moving_average_last_4_distributions > 0 then stock_at_date/(average_quantity_per_family*visits_moving_average_last_4_distributions) else 0 end, 2) as count_distributions_available, 25 | max(distribution_metrics.date_at) over () as date_latest_distribution 26 | from distribution_metrics 27 | inner join stocks 28 | on distribution_metrics.date_at = stocks.date_at 29 | and distribution_metrics.product_id = stocks.product_id 30 | 31 | ) 32 | 33 | select * from joined 34 | -------------------------------------------------------------------------------- /dbt-models/package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - git: https://{{env_var('DBT_ENV_SECRET_GIT_CREDENTIAL')}}@github.com/dbt-labs/dbt-utils.git 3 | revision: ac072a3c4b78d43a1c013e7de8b8fa6e290b544e 4 | sha1_hash: d240f89ddc69e50cee964d41a7728683c29731e8 5 | -------------------------------------------------------------------------------- /dbt-models/packages.yml: -------------------------------------------------------------------------------- 1 | # add dependencies. these will get pulled during the `dbt deps` process. 2 | --- 3 | packages: 4 | - git: "https://{{env_var('DBT_ENV_SECRET_GIT_CREDENTIAL')}}@github.com/dbt-labs/dbt-utils.git" # git HTTPS URL 5 | revision: 0.8.6 6 | -------------------------------------------------------------------------------- /dbt-models/seeds/seed_bapif_collection_dates.csv: -------------------------------------------------------------------------------- 1 | date_at 2 | 2023-06-30 3 | 2023-06-02 4 | 2023-03-22 5 | -------------------------------------------------------------------------------- /dbt-models/seeds/seed_collection_dates.csv: -------------------------------------------------------------------------------- 1 | date_at 2 | 2023-05-13 3 | 2023-04-15 4 | 2023-03-11 5 | 2023-01-14 6 | 2022-11-26 7 | 2022-06-04 8 | 2022-05-15 9 | 2022-05-14 10 | 2022-05-13 11 | 2022-04-23 12 | 2022-01-08 13 | 2022-02-05 14 | 2022-03-12 15 | -------------------------------------------------------------------------------- /dbt-models/seeds/seed_grocery_shopping_dates.csv: -------------------------------------------------------------------------------- 1 | date_at 2 | 2023-06-22 3 | 2023-06-14 4 | 2023-05-24 5 | 2023-05-12 6 | 2023-04-05 7 | 2023-03-30 8 | 2023-03-21 9 | 2023-03-10 10 | 2023-03-06 11 | -------------------------------------------------------------------------------- /dbt-models/seeds/seeds.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | 4 | seeds: 5 | - name: seed_collection_dates 6 | description: > 7 | This table contains the dates of past and future product collections 8 | columns: 9 | - name: date_at 10 | description: Date of the collection 11 | 12 | - name: seed_grocery_shopping_dates 13 | description: > 14 | This table contains past and future dates when La Porte Bleue team 15 | went grocery shopping for products to distribute 16 | columns: 17 | - name: date_at 18 | description: Shopping date 19 | -------------------------------------------------------------------------------- /dbt-models/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Porte-Bleue/mongo-to-postgres-etl/2c0713eeba8d2638fa698e7f41f2d3c80da9bd8b/dbt-models/snapshots/.gitkeep -------------------------------------------------------------------------------- /dbt-models/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Porte-Bleue/mongo-to-postgres-etl/2c0713eeba8d2638fa698e7f41f2d3c80da9bd8b/dbt-models/tests/.gitkeep -------------------------------------------------------------------------------- /etl-la-porte-bleue/.gitignore: -------------------------------------------------------------------------------- 1 | # Distribution / packaging 2 | .Python 3 | env/ 4 | build/ 5 | develop-eggs/ 6 | dist/ 7 | downloads/ 8 | eggs/ 9 | .eggs/ 10 | lib/ 11 | lib64/ 12 | parts/ 13 | sdist/ 14 | var/ 15 | *.egg-info/ 16 | .installed.cfg 17 | *.egg 18 | 19 | # Serverless directories 20 | .serverless 21 | 22 | node_modules/ 23 | 24 | serverless.env.yml 25 | -------------------------------------------------------------------------------- /etl-la-porte-bleue/README.md: -------------------------------------------------------------------------------- 1 | 13 | 14 | 15 | # Deploy ETL Python Script running on AWS Lambda using the Serverless Framework 16 | 17 | ## Deployment Guide 18 | 19 | 1. Install Serverless 20 | 21 | ``` 22 | npm install -g serverless 23 | ``` 24 | 25 | 2. Install `serverless-python-requirements` 26 | 27 | To include third-party dependencies, we need to use a plugin called `serverless-python-requirements`. You can set it up by running the following command: 28 | 29 | ``` 30 | serverless plugin install -n serverless-python-requirements 31 | ``` 32 | 33 | 3. Define necessaery environment variables 34 | 35 | Append MongoDB & PostgreSQL databases' connection string into a file called serverless.env.yml, like this 36 | $ cat serverless.env.yml 37 | ``` 38 | MONGO_DB: 39 | POSTGRES_DB: 40 | ``` 41 | 42 | 4. Deploy the function 43 | 44 | ``` 45 | $ serverless deploy 46 | ``` 47 | 48 | After running deploy, you should see output similar to: 49 | 50 | ```bash 51 | Deploying etl-la-porte-bleue to stage dev (us-east-1, "la-porte-bleue" provider) 52 | 53 | ✔ Service deployed to stack etl-la-porte-bleue-dev (112s) 54 | 55 | functions: 56 | main: etl-la-porte-bleue-dev-main (12 MB) 57 | ``` 58 | 59 | ### Invocation 60 | 61 | After successful deployment, you can invoke the deployed function by using the following command: 62 | 63 | ```bash 64 | serverless invoke --function main 65 | ``` 66 | 67 | Which should result in response similar to the following: 68 | 69 | ``` 70 | extracting collection: products 71 | 72 | loading table: products 73 | 74 | extracting collection: families 75 | 76 | loading table: families 77 | 78 | extracting collection: family_members 79 | 80 | loading table: family_members 81 | 82 | extracting collection: visit_events 83 | 84 | loading table: visit_events 85 | 86 | extracting collection: operations 87 | 88 | loading table: operations 89 | ``` 90 | -------------------------------------------------------------------------------- /etl-la-porte-bleue/handler.py: -------------------------------------------------------------------------------- 1 | import os 2 | from bson.json_util import dumps 3 | import json 4 | from datetime import datetime, timedelta 5 | 6 | import pymongo 7 | import sqlalchemy 8 | from sqlmodel import Session, SQLModel, create_engine 9 | from models import Products, Families, FamilyMembers, VisitEvents, Operations, Collects 10 | 11 | orm_classes = { 12 | "products": Products, 13 | "families": Families, 14 | "family_members": FamilyMembers, 15 | "visit_events": VisitEvents, 16 | "operations": Operations, 17 | "collects": Collects, 18 | } 19 | 20 | def extract(mongodb_secret: str, collection_name: str): 21 | """Extract data from MongoDB to local JSONLine file. 22 | 23 | Ref: https://realpython.com/introduction-to-mongodb-and-python/ 24 | """ 25 | client = pymongo.MongoClient(mongodb_secret) # Connect to MongoDB instance 26 | db = client.porteBleue # Access `porteBleue` database 27 | collection = db[collection_name] # db.getCollection("products") 28 | 29 | with open(f"/tmp/{collection_name}.jsonl", "w") as fh: 30 | if collection_name == 'operations': 31 | # Filter operations from last 6 months 32 | six_months_ago = datetime.today() - timedelta(days=180) 33 | cursor = collection.find({"createdAt": {"$gte": six_months_ago}}) 34 | else: 35 | cursor = collection.find() 36 | 37 | fh.writelines([dumps(doc) + "\n" for doc in cursor]) 38 | 39 | 40 | def load(postgre_secret: str, table_name: str): 41 | """Load data from local JSONLine file to PostgresSQL.""" 42 | 43 | engine = create_engine( 44 | postgre_secret, 45 | connect_args={ 46 | "sslmode": "require", 47 | "gssencmode": "disable", 48 | "connect_timeout": 30, 49 | "application_name": "etl_production" 50 | } 51 | ) # Create engine to connect to Postgre database 52 | Class = orm_classes[table_name] 53 | connection = engine.connect() # Connect to the database 54 | 55 | # Set the statement_timeout (in milliseconds) using an SQL query 56 | statement_timeout = 300000 # 5 minutes in milliseconds 57 | query_statement_timeout = sqlalchemy.text( 58 | f"SET statement_timeout = {statement_timeout};" 59 | ) 60 | connection.execute(query_statement_timeout) # Set the statement_timeout 61 | 62 | # Create the tables if they don't exist 63 | SQLModel.metadata.create_all(engine) 64 | 65 | # Truncate the table and its dependent objects (to remove all data but keep structure) 66 | truncate_query = sqlalchemy.text( 67 | f"TRUNCATE TABLE {table_name} CASCADE;" 68 | ) 69 | connection.execute(truncate_query) 70 | connection.commit() 71 | 72 | with open(f"/tmp/{table_name}.jsonl", "r") as fh: 73 | objects = [Class(id=i, **json.loads(p)) for i, p in enumerate(fh.readlines())] 74 | 75 | # TODO: instead of writing all objects, only do an incremental write 76 | with Session(engine) as session: 77 | # print("Objects to be added to the session:", objects) 78 | session.add_all(objects) 79 | session.commit() 80 | session.close() 81 | 82 | def main(event, context): 83 | if not os.path.exists('/tmp'): 84 | os.mkdtemp('/tmp') 85 | 86 | # Load credentials 87 | mongodb_secret = os.environ.get("MONGO_DB") 88 | postgre_secret = os.environ.get("POSTGRES_DB") 89 | 90 | tables_to_extract = [ 91 | "products", 92 | "families", 93 | "family_members", 94 | "visit_events", 95 | "operations", 96 | "collects", 97 | ] 98 | 99 | for t in tables_to_extract: 100 | print(f"extracting collection: {t}") 101 | extract(mongodb_secret, collection_name=t) 102 | print(f"loading table: {t}") 103 | load(postgre_secret, table_name=t) 104 | 105 | 106 | if __name__ == "__main__": 107 | main() 108 | -------------------------------------------------------------------------------- /etl-la-porte-bleue/models.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | from pydantic import validator 3 | from sqlalchemy import ARRAY, String 4 | from sqlmodel import Field, SQLModel 5 | from datetime import datetime 6 | 7 | 8 | 9 | def to_camel(name): 10 | return "".join( 11 | x.capitalize() if i > 0 else x for i, x in enumerate(name.split("_")) 12 | ) 13 | 14 | class Products(SQLModel, table=True): 15 | product_id: str = Field(default=None, primary_key=True, alias='_id') 16 | name: str 17 | unit_of_measure: str 18 | quantity_for_one_foodstuff: int 19 | created_at: datetime 20 | updated_at: datetime 21 | created_by: str = Field(alias='created_by') 22 | category: str 23 | cupboard: str 24 | updated_by: str = Field(alias='updated_by') 25 | current_stock: int 26 | weight_in_kg: Optional[float] 27 | price_per_unit: Optional[float] 28 | 29 | # operations: List["Operations"] = Relationship(back_populates="product") 30 | 31 | @validator("product_id", "created_by", "category", "cupboard", "updated_by", pre=True) 32 | def unnest_id(cls, v): 33 | return v["$oid"] 34 | 35 | @validator("created_at", "updated_at", pre=True) 36 | def unnest_date(cls, v): 37 | return v["$date"] 38 | 39 | class Config: 40 | alias_generator = to_camel 41 | populate_by_name = True 42 | 43 | class Families(SQLModel, table=True): 44 | family_id: str = Field(default=None, primary_key=True, alias='_id') 45 | family_members_ids: List[str] = Field(sa_type=ARRAY(String), default=[], alias='familyMembers') 46 | name: str 47 | city: str 48 | housing_details: Optional[str] 49 | created_at: datetime 50 | updated_at: datetime 51 | additional_info: Optional[str] 52 | last_visit_at: Optional[datetime] 53 | know_association_by: Optional[str] 54 | certificate_of_residence_provided_at: Optional[str] # To Do: cast to date 55 | 56 | @validator("family_id", pre=True) 57 | def unnest_id(cls, v): 58 | return v["$oid"] 59 | 60 | @validator("family_members_ids", pre=True) 61 | def unnest_family_members(cls, v): 62 | return [fm["$oid"] for fm in v] 63 | 64 | @validator("created_at", "updated_at", "last_visit_at", pre=True) 65 | def unnest_date(cls, v): 66 | return v["$date"] 67 | 68 | 69 | class Config: 70 | alias_generator = to_camel 71 | populate_by_name = True 72 | 73 | class FamilyMembers(SQLModel, table=True): 74 | 75 | __tablename__: str = "family_members" 76 | 77 | family_member_id: str = Field(default=None, primary_key=True, alias='_id') 78 | gender: str 79 | adult_or_child: Optional[str] 80 | surname: Optional[str] 81 | birth_date: Optional[str] = Field(alias='birthday') 82 | created_at: Optional[datetime] 83 | updated_at: Optional[datetime] 84 | 85 | @validator("family_member_id", pre=True) 86 | def unnest_id(cls, v): 87 | return v["$oid"] 88 | 89 | @validator("created_at", "updated_at", pre=True) 90 | def unnest_date(cls, v): 91 | return v["$date"] 92 | 93 | class Config: 94 | alias_generator = to_camel 95 | populate_by_name = True 96 | 97 | class VisitEvents(SQLModel, table=True): 98 | 99 | __tablename__: str = "visit_events" 100 | 101 | visit_id: str = Field(default=None, primary_key=True, alias='_id') 102 | operation_ids: List[str] = Field(sa_type=ARRAY(String), default=[], alias='operations') 103 | family_id: str = Field(alias='family') 104 | created_at: datetime 105 | updated_at: datetime 106 | 107 | @validator("visit_id", "family_id", pre=True) 108 | def unnest_id(cls, v): 109 | return v["$oid"] 110 | 111 | @validator("operation_ids", pre=True) 112 | def unnest_family_members(cls, v): 113 | return [fm["$oid"] for fm in v] 114 | 115 | @validator("created_at", "updated_at", pre=True) 116 | def unnest_date(cls, v): 117 | return v["$date"] 118 | 119 | class Config: 120 | alias_generator = to_camel 121 | populate_by_name = True 122 | 123 | class Operations(SQLModel, table=True): 124 | operation_id: str = Field(default=None, primary_key=True, alias='_id') 125 | operation_type: str = Field(alias='type') 126 | quantity: int 127 | product_id: str = Field(alias='product') 128 | # = Field(default=None, foreign_key="products.product_id") 129 | created_at: datetime 130 | updated_at: datetime 131 | from_flow: Optional[str] 132 | 133 | # product: Optional[Products] = Relationship(back_populates="operations") 134 | 135 | @validator("operation_id", "product_id", pre=True) 136 | def unnest_id(cls, v): 137 | return v["$oid"] 138 | 139 | @validator("created_at", "updated_at", pre=True) 140 | def unnest_date(cls, v): 141 | return v["$date"] 142 | 143 | class Config: 144 | alias_generator = to_camel 145 | populate_by_name = True 146 | 147 | class Collects(SQLModel, table=True): 148 | collect_id: str = Field(default=None, primary_key=True, alias='_id') 149 | collect_type: str = Field(alias='type') 150 | date_at: str = Field(alias='Date') 151 | title: str = Field(alias='Titre') 152 | created_at: datetime 153 | updated_at: datetime 154 | created_by: str = Field(alias='created_by') 155 | updated_by: str = Field(alias='updated_by') 156 | 157 | @validator("collect_id", "created_by", "updated_by", pre=True) 158 | def unnest_id(cls, v): 159 | return v["$oid"] 160 | 161 | @validator("created_at", "updated_at", pre=True) 162 | def unnest_date(cls, v): 163 | return v["$date"] 164 | 165 | class Config: 166 | alias_generator = to_camel 167 | populate_by_name = True 168 | -------------------------------------------------------------------------------- /etl-la-porte-bleue/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "serverless": "^3.36.0", 4 | "serverless-python-requirements": "^6.1.2" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /etl-la-porte-bleue/requirements.txt: -------------------------------------------------------------------------------- 1 | bson 2 | pymongo==3.11.2 3 | dnspython 4 | sqlmodel 5 | aws-psycopg2 6 | sqlalchemy 7 | pydantic==1.10.14 8 | pydantic-core 9 | python-dotenv -------------------------------------------------------------------------------- /etl-la-porte-bleue/serverless.yml: -------------------------------------------------------------------------------- 1 | org: mialesta 2 | app: etl-la-porte-bleue 3 | service: etl-la-porte-bleue 4 | 5 | frameworkVersion: '3' 6 | 7 | plugins: 8 | - serverless-python-requirements 9 | 10 | provider: 11 | name: aws 12 | # TODO: python3.7 ? 13 | runtime: python3.9 14 | environment: 15 | MONGO_DB: ${env:MONGO_DB} 16 | POSTGRES_DB: ${env:POSTGRES_DB} 17 | # TODO: iam role? 18 | # iam: 19 | # role: 20 | # managedPolicies: 21 | # - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 22 | 23 | functions: 24 | main: 25 | handler: handler.main 26 | timeout: 360 27 | events: 28 | # Invoke Lambda function everyday at 1pm CET 29 | - schedule: cron(0 11 * * ? *) 30 | --------------------------------------------------------------------------------