├── data
    └── .gitkeep
├── .dockerignore
├── requirements-dev.txt
├── docs
    ├── Proposta tirocinio.pdf
    └── Proposta tirocinio.tex
├── .gitlab-ci.yml
├── .pre-commit-config.yaml
├── Dockerfile
├── src
    ├── scraper
    │   ├── tests
    │   │   ├── data
    │   │   │   ├── train-stop_10911.json
    │   │   │   ├── train-stop_24955.json
    │   │   │   ├── station_S01608.json
    │   │   │   ├── train-stop_52.json
    │   │   │   ├── station_S01700.json
    │   │   │   ├── train-stop_22662.json
    │   │   │   ├── train-stop_10860.json
    │   │   │   ├── train-stop_3073.json
    │   │   │   ├── train-stop_555.json
    │   │   │   └── train-stops_2647.json
    │   │   ├── __init__.py
    │   │   ├── test_api.py
    │   │   ├── test_train_stop.py
    │   │   ├── test_station.py
    │   │   └── test_train.py
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── main.py
    │   ├── api.py
    │   ├── station.py
    │   └── train_stop.py
    ├── analysis
    │   ├── __init__.py
    │   ├── assets
    │   │   ├── markers
    │   │   │   ├── trenord_reg.svg
    │   │   │   ├── obb_ec.svg
    │   │   │   ├── other.svg
    │   │   │   ├── trenitalia_fb.svg
    │   │   │   ├── trenitalia_ic.svg
    │   │   │   ├── trenitalia_icn.svg
    │   │   │   ├── trenitalia_reg.svg
    │   │   │   ├── tper_reg.svg
    │   │   │   ├── trenitalia_ec.svg
    │   │   │   ├── trenitalia_fa.svg
    │   │   │   └── trenitalia_fr.svg
    │   │   └── templates
    │   │   │   ├── stats_chart.html
    │   │   │   └── marker_legend.html
    │   ├── groupby.py
    │   ├── filter.py
    │   ├── timetable.py
    │   ├── load_data.py
    │   ├── stat.py
    │   ├── main.py
    │   └── trajectories_map.py
    ├── __init__.py
    ├── types.py
    ├── utils.py
    ├── const.py
    ├── station_extractor.py
    └── train_extractor.py
├── CONTRIBUTING.md
├── requirements.txt
├── .github
    └── workflows
    │   └── docker-build.yml
├── main.py
├── .gitignore
└── README.md


/data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | data/*
2 | venv/*
3 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pre-commit==3.1.1
2 | pytest==7.2.2
3 | black==23.1.0
4 | isort==5.12.0
5 | 


--------------------------------------------------------------------------------
/docs/Proposta tirocinio.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MarcoBuster/railway-opendata/HEAD/docs/Proposta tirocinio.pdf


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   image: python:3.11
 3 | 
 4 | before_script:
 5 |   - pip install -r requirements.txt
 6 |   - pip install -r requirements-dev.txt
 7 | 
 8 | stages:
 9 |   - test
10 |   - lint
11 | 
12 | pytest:
13 |   stage: test
14 |   script:
15 |     - pytest src/
16 |   when: always
17 | 
18 | black-linter:
19 |   stage: lint
20 |   script:
21 |     - black --check src/
22 |   when: always
23 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v3.2.0
 4 |     hooks:
 5 |     -   id: trailing-whitespace
 6 |     -   id: end-of-file-fixer
 7 |     -   id: check-yaml
 8 | 
 9 | -   repo: https://github.com/psf/black
10 |     rev: 23.1.0
11 |     hooks:
12 |     -   id: black
13 | 
14 | -   repo: https://github.com/pycqa/isort
15 |     rev: 5.12.0
16 |     hooks:
17 |       - id: isort
18 |         name: isort (python)
19 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.13
 2 | 
 3 | LABEL org.opencontainers.image.source=https://github.com/MarcoBuster/railway-opendata
 4 | LABEL org.opencontainers.image.description="Italian railway opendata scraper and analyzer"
 5 | LABEL org.opencontainers.image.licenses=GPL-2.0-or-later
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | COPY requirements.txt .
10 | RUN pip install -r requirements.txt
11 | 
12 | VOLUME /app/data
13 | ENV PYTHONHASHSEED=0
14 | 
15 | COPY . .
16 | 
17 | ENTRYPOINT ["python", "main.py"]
18 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_10911.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "arr_time": "01:35:00",
 3 |   "station": {
 4 |     "station_id": "S09999",
 5 |     "station_ori_name": "BRESCIA"
 6 |   },
 7 |   "type": "D",
 8 |   "is_journey": true,
 9 |   "actual_data": {
10 |     "actual_station_mir": "S09999",
11 |     "actual_station_name": "BRESCIA",
12 |     "actual_train_id": "110911",
13 |     "actual_type": "D",
14 |     "arr_delay_actual": 1,
15 |     "arr_actual_time": "01:36:00"
16 |   },
17 |   "cancelled": false,
18 |   "platform": "3",
19 |   "pass_count": 23,
20 |   "date": "20230325"
21 | }
22 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_24955.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dep_time": "14:35:00",
 3 |   "station": {
 4 |     "station_id": "S01933",
 5 |     "station_ori_name": "SARONNO"
 6 |   },
 7 |   "type": "O",
 8 |   "is_journey": true,
 9 |   "actual_data": {
10 |     "actual_station_mir": "S01933",
11 |     "actual_station_name": "SARONNO",
12 |     "actual_train_id": "124955",
13 |     "actual_type": "O",
14 |     "dep_actual_time": "14:37:34",
15 |     "dep_delay_actual": 2
16 |   },
17 |   "cancelled": false,
18 |   "platform": "7",
19 |   "pass_count": 1,
20 |   "date": "20230325"
21 | }
22 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/station_S01608.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "codReg": 1,
 3 |   "tipoStazione": 3,
 4 |   "dettZoomStaz": [],
 5 |   "pstaz": [],
 6 |   "mappaCitta": {
 7 |     "urlImagePinpoint": "",
 8 |     "urlImageBaloon": ""
 9 |   },
10 |   "codiceStazione": "S01608",
11 |   "codStazione": "S01608",
12 |   "lat": 45.577162,
13 |   "lon": 9.606652,
14 |   "latMappaCitta": 0,
15 |   "lonMappaCitta": 0,
16 |   "localita": {
17 |     "nomeLungo": "ARCENE",
18 |     "nomeBreve": "Arcene",
19 |     "label": "",
20 |     "id": "S01608"
21 |   },
22 |   "esterno": false,
23 |   "offsetX": 0,
24 |   "offsetY": 0,
25 |   "nomeCitta": "A"
26 | }
27 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_52.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "arr_time": "14:10:30",
 3 |   "dep_time": "14:11:30",
 4 |   "station": {
 5 |     "station_id": "S01739",
 6 |     "station_ori_name": "VARESE CASBENO"
 7 |   },
 8 |   "type": "F",
 9 |   "is_journey": true,
10 |   "actual_data": {
11 |     "actual_station_mir": "S01739",
12 |     "actual_station_name": "VARESE CASBENO",
13 |     "actual_train_id": "1900052",
14 |     "actual_type": "F",
15 |     "dep_actual_time": "14:17:43",
16 |     "arr_delay_actual": 5,
17 |     "arr_actual_time": "14:15:42",
18 |     "dep_delay_actual": 6
19 |   },
20 |   "cancelled": false,
21 |   "platform": "2",
22 |   "pass_count": 9,
23 |   "date": "20230325"
24 | }
25 | 


--------------------------------------------------------------------------------
/src/analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 


--------------------------------------------------------------------------------
/src/scraper/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from src import scraper
19 | 


--------------------------------------------------------------------------------
/src/types.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | import typing as t
19 | 
20 | JSONType = t.Union[t.Any, t.Dict[t.Any, t.Any]]
21 | 


--------------------------------------------------------------------------------
/src/scraper/__init__.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from src.scraper.api import ViaggiaTrenoAPI
19 | from src.scraper.exceptions import BadRequestException
20 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution guide
 2 | 
 3 | Contributions/forks are welcome and appreciated!
 4 | For instance, you can:
 5 | - improve the scraper;
 6 | - add more tests and documentation;
 7 | - improve or add more statistics and visualizations.
 8 | 
 9 | ## Development environment
10 | 
11 | Before starting, install the development requirements by running this command:
12 | 
13 | ```bash
14 | $ pip install -r requirements-dev.txt
15 | ```
16 | 
17 | This project has set up some [pre-commit](https://pre-commit.com/) hooks (like `black` and `isort`) to ensure code readability and consistency: please use them before submitting patches.
18 | 
19 | Due to the inability to redistribuite scraped train data (see [Licensing](#licensing) section), there are tests ([pytest](https://pytest.org)) only for the scraping module: run them with
20 | 
21 | ```bash
22 | $ pytest .
23 | ```
24 | 
25 | ## Debug logging
26 | 
27 | You can enable debug logging using the `-d` (or `--debug`) command line flag.
28 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | asttokens==3.0.0
 2 | branca==0.6.0
 3 | certifi==2025.4.26
 4 | charset-normalizer==3.4.2
 5 | colour==0.1.5
 6 | contourpy==1.3.2
 7 | cycler==0.12.1
 8 | dateparser==1.1.8
 9 | decorator==5.2.1
10 | executing==2.2.0
11 | folium==0.14.0
12 | fonttools==4.58.0
13 | geojson==3.1.0
14 | idna==3.10
15 | ipython==9.2.0
16 | ipython-pygments-lexers==1.1.1
17 | itables==1.5.2
18 | jedi==0.19.2
19 | jinja2==3.1.6
20 | joblib==1.2.0
21 | kiwisolver==1.4.8
22 | markupsafe==3.0.2
23 | matplotlib==3.10.3
24 | matplotlib-inline==0.1.7
25 | numpy==2.2.5
26 | packaging==25.0
27 | pandas==2.2.3
28 | parso==0.8.4
29 | pexpect==4.9.0
30 | pillow==11.2.1
31 | prompt-toolkit==3.0.51
32 | ptyprocess==0.7.0
33 | pure-eval==0.2.3
34 | pygments==2.19.1
35 | pyparsing==3.2.3
36 | python-dateutil==2.8.2
37 | pytz==2025.2
38 | regex==2024.11.6
39 | requests==2.28.2
40 | seaborn==0.13.2
41 | sentry-sdk==1.18.0
42 | setuptools==80.7.1
43 | six==1.17.0
44 | stack-data==0.6.3
45 | timple==0.1.5
46 | tqdm==4.66.0
47 | traitlets==5.14.3
48 | tzdata==2025.2
49 | tzlocal==5.3.1
50 | urllib3==1.26.20
51 | wcwidth==0.2.13
52 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/station_S01700.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "codReg": 1,
 3 |   "tipoStazione": 1,
 4 |   "dettZoomStaz": [
 5 |     {
 6 |       "codiceStazione": "S01700",
 7 |       "zoomStartRange": 8,
 8 |       "zoomStopRange": 9,
 9 |       "pinpointVisibile": true,
10 |       "pinpointVisible": true,
11 |       "labelVisibile": true,
12 |       "labelVisible": true,
13 |       "codiceRegione": null
14 |     },
15 |     {
16 |       "codiceStazione": "S01700",
17 |       "zoomStartRange": 10,
18 |       "zoomStopRange": 11,
19 |       "pinpointVisibile": true,
20 |       "pinpointVisible": true,
21 |       "labelVisibile": true,
22 |       "labelVisible": true,
23 |       "codiceRegione": null
24 |     }
25 |   ],
26 |   "pstaz": [
27 | 
28 |   ],
29 |   "mappaCitta": {
30 |     "urlImagePinpoint": "",
31 |     "urlImageBaloon": ""
32 |   },
33 |   "codiceStazione": "S01700",
34 |   "codStazione": "S01700",
35 |   "lat": 45.486347,
36 |   "lon": 9.204528,
37 |   "latMappaCitta": 0,
38 |   "lonMappaCitta": 0,
39 |   "localita": {
40 |     "nomeLungo": "MILANO CENTRALE",
41 |     "nomeBreve": "Milano Centrale",
42 |     "label": "Milano",
43 |     "id": "S01700"
44 |   },
45 |   "esterno": false,
46 |   "offsetX": 0,
47 |   "offsetY": 0,
48 |   "nomeCitta": "Milano"
49 | }
50 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_22662.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "orientamento": null,
 3 |   "kcNumTreno": null,
 4 |   "stazione": "TREVIGLIO",
 5 |   "id": "S01708",
 6 |   "listaCorrispondenze": null,
 7 |   "programmata": 1678639800000,
 8 |   "programmataZero": null,
 9 |   "effettiva": null,
10 |   "ritardo": 0,
11 |   "partenzaTeoricaZero": null,
12 |   "arrivoTeoricoZero": null,
13 |   "partenza_teorica": null,
14 |   "arrivo_teorico": 1678639800000,
15 |   "isNextChanged": false,
16 |   "partenzaReale": null,
17 |   "arrivoReale": null,
18 |   "ritardoPartenza": 0,
19 |   "ritardoArrivo": 0,
20 |   "progressivo": 7,
21 |   "binarioEffettivoArrivoCodice": null,
22 |   "binarioEffettivoArrivoTipo": null,
23 |   "binarioEffettivoArrivoDescrizione": null,
24 |   "binarioProgrammatoArrivoCodice": null,
25 |   "binarioProgrammatoArrivoDescrizione": "2 TR Ovest",
26 |   "binarioEffettivoPartenzaCodice": null,
27 |   "binarioEffettivoPartenzaTipo": null,
28 |   "binarioEffettivoPartenzaDescrizione": null,
29 |   "binarioProgrammatoPartenzaCodice": null,
30 |   "binarioProgrammatoPartenzaDescrizione": null,
31 |   "tipoFermata": "A",
32 |   "visualizzaPrevista": true,
33 |   "nextChanged": false,
34 |   "nextTrattaType": 2,
35 |   "actualFermataType": 0,
36 |   "materiale_label": null
37 | }
38 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_10860.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "orientamento": null,
 3 |   "kcNumTreno": null,
 4 |   "stazione": "PIACENZA",
 5 |   "id": "S05000",
 6 |   "listaCorrispondenze": null,
 7 |   "programmata": 1678608420000,
 8 |   "programmataZero": null,
 9 |   "effettiva": 1678608450000,
10 |   "ritardo": 1,
11 |   "partenzaTeoricaZero": null,
12 |   "arrivoTeoricoZero": null,
13 |   "partenza_teorica": 1678608420000,
14 |   "arrivo_teorico": null,
15 |   "isNextChanged": false,
16 |   "partenzaReale": 1678608450000,
17 |   "arrivoReale": null,
18 |   "ritardoPartenza": 1,
19 |   "ritardoArrivo": 0,
20 |   "progressivo": 1,
21 |   "binarioEffettivoArrivoCodice": "0",
22 |   "binarioEffettivoArrivoTipo": "0",
23 |   "binarioEffettivoArrivoDescrizione": "5",
24 |   "binarioProgrammatoArrivoCodice": null,
25 |   "binarioProgrammatoArrivoDescrizione": null,
26 |   "binarioEffettivoPartenzaCodice": "0",
27 |   "binarioEffettivoPartenzaTipo": "0",
28 |   "binarioEffettivoPartenzaDescrizione": "5",
29 |   "binarioProgrammatoPartenzaCodice": null,
30 |   "binarioProgrammatoPartenzaDescrizione": "5",
31 |   "tipoFermata": "P",
32 |   "visualizzaPrevista": true,
33 |   "nextChanged": false,
34 |   "nextTrattaType": 0,
35 |   "actualFermataType": 1,
36 |   "materiale_label": null
37 | }
38 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_3073.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "orientamento": null,
 3 |   "kcNumTreno": null,
 4 |   "stazione": "ARQUATA SCRIVIA",
 5 |   "id": "S04207",
 6 |   "listaCorrispondenze": null,
 7 |   "programmata": 1678639380000,
 8 |   "programmataZero": null,
 9 |   "effettiva": null,
10 |   "ritardo": 0,
11 |   "partenzaTeoricaZero": null,
12 |   "arrivoTeoricoZero": null,
13 |   "partenza_teorica": 1678639440000,
14 |   "arrivo_teorico": 1678639380000,
15 |   "isNextChanged": false,
16 |   "partenzaReale": null,
17 |   "arrivoReale": null,
18 |   "ritardoPartenza": 0,
19 |   "ritardoArrivo": 0,
20 |   "progressivo": 17,
21 |   "binarioEffettivoArrivoCodice": null,
22 |   "binarioEffettivoArrivoTipo": null,
23 |   "binarioEffettivoArrivoDescrizione": null,
24 |   "binarioProgrammatoArrivoCodice": null,
25 |   "binarioProgrammatoArrivoDescrizione": "5",
26 |   "binarioEffettivoPartenzaCodice": null,
27 |   "binarioEffettivoPartenzaTipo": null,
28 |   "binarioEffettivoPartenzaDescrizione": null,
29 |   "binarioProgrammatoPartenzaCodice": null,
30 |   "binarioProgrammatoPartenzaDescrizione": "5",
31 |   "tipoFermata": "F",
32 |   "visualizzaPrevista": true,
33 |   "nextChanged": false,
34 |   "nextTrattaType": 2,
35 |   "actualFermataType": 0,
36 |   "materiale_label": null
37 | }
38 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stop_555.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "orientamento": null,
 3 |   "kcNumTreno": null,
 4 |   "stazione": "LATINA",
 5 |   "id": "S08608",
 6 |   "listaCorrispondenze": null,
 7 |   "programmata": 1678629480000,
 8 |   "programmataZero": null,
 9 |   "effettiva": 1678629690000,
10 |   "ritardo": 4,
11 |   "partenzaTeoricaZero": null,
12 |   "arrivoTeoricoZero": null,
13 |   "partenza_teorica": 1678629600000,
14 |   "arrivo_teorico": 1678629480000,
15 |   "isNextChanged": false,
16 |   "partenzaReale": 1678629810000,
17 |   "arrivoReale": 1678629690000,
18 |   "ritardoPartenza": 4,
19 |   "ritardoArrivo": 4,
20 |   "progressivo": 7,
21 |   "binarioEffettivoArrivoCodice": "0",
22 |   "binarioEffettivoArrivoTipo": "0",
23 |   "binarioEffettivoArrivoDescrizione": "2",
24 |   "binarioProgrammatoArrivoCodice": null,
25 |   "binarioProgrammatoArrivoDescrizione": null,
26 |   "binarioEffettivoPartenzaCodice": "0",
27 |   "binarioEffettivoPartenzaTipo": "0",
28 |   "binarioEffettivoPartenzaDescrizione": "2",
29 |   "binarioProgrammatoPartenzaCodice": null,
30 |   "binarioProgrammatoPartenzaDescrizione": null,
31 |   "tipoFermata": "F",
32 |   "visualizzaPrevista": true,
33 |   "nextChanged": false,
34 |   "nextTrattaType": 0,
35 |   "actualFermataType": 1,
36 |   "materiale_label": null
37 | }
38 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-build.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [ master ]
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   build:
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     permissions:
11 |       packages: write
12 | 
13 |     steps:
14 |       - name: Checkout repository
15 |         uses: actions/checkout@v2
16 |         with:
17 |           fetch-depth: 10
18 | 
19 |       - name: Set up QEMU
20 |         uses: docker/setup-qemu-action@v2
21 | 
22 |       - name: Set up Docker Buildx
23 |         uses: docker/setup-buildx-action@v2
24 | 
25 |       - name: Login to ghcr registry
26 |         uses: docker/login-action@v1
27 |         with:
28 |           registry: ghcr.io
29 |           username: ${{ github.actor }}
30 |           password: ${{ secrets.GITHUB_TOKEN }}
31 | 
32 |       - name: Prepare version info
33 |         run: |
34 |           echo "LATEST_COMMIT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
35 | 
36 |       - name: Write version file
37 |         run: echo "${{ env.LATEST_COMMIT_SHA }}" > version.txt
38 | 
39 |       - name: Build and push Docker images
40 |         uses: docker/build-push-action@v4
41 |         with:
42 |           context: .
43 |           file: ./Dockerfile
44 |           tags: |
45 |             ghcr.io/marcobuster/railway-opendata:${{ env.LATEST_COMMIT_SHA }}
46 |             ghcr.io/marcobuster/railway-opendata:latest
47 |           push: true
48 | 


--------------------------------------------------------------------------------
/src/scraper/exceptions.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | class BadRequestException(Exception):
19 |     """Bad request to ViaggiaTreno API."""
20 | 
21 |     def __init__(
22 |         self, url: str, status_code: int, response: str, *args: object
23 |     ) -> None:
24 |         """Creates a BadRequestException.
25 | 
26 |         Args:
27 |             url (str): the request URL
28 |             status_code (int): the response status code
29 |             response (str): the response data
30 |         """
31 |         self.url = url
32 |         self.status_code = status_code
33 |         self.response = response
34 |         super().__init__(*args)
35 | 
36 | 
37 | class IncompleteTrenordStopDataException(Exception):
38 |     def __init__(self, *args: object) -> None:
39 |         super().__init__(*args)
40 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | import argparse
19 | import typing as t
20 | from pathlib import Path
21 | 
22 | 
23 | def _arg_or_default(args: argparse.Namespace, field: str, default: t.Any) -> t.Any:
24 |     if not hasattr(args, field) or not getattr(args, field):
25 |         return default
26 | 
27 |     return getattr(args, field)
28 | 
29 | 
30 | def parse_input_format_output_args(
31 |     args: argparse.Namespace,
32 | ) -> t.Tuple[Path, Path, str]:
33 |     input_f: Path = Path(args.pickle_file)
34 |     format: str = _arg_or_default(args, "format", "csv")
35 |     output_f: Path = Path(
36 |         _arg_or_default(
37 |             args,
38 |             "output_file",
39 |             input_f.parents[0] / input_f.name.replace("pickle", format),
40 |         )
41 |     )
42 |     return input_f, output_f, format
43 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenord_reg.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg">
12 |   <defs
13 |      id="defs2">
14 |     <linearGradient
15 |        id="linearGradient3245">
16 |       <stop
17 |          style="stop-color:#e75700;stop-opacity:1;"
18 |          offset="0"
19 |          id="stop3243" />
20 |     </linearGradient>
21 |   </defs>
22 |   <g
23 |      id="layer1"
24 |      transform="translate(-39.118101,-77.76478)">
25 |     <path
26 |        style="fill:#00723e;fill-opacity:1;stroke-width:0.264999"
27 |        id="path530"
28 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
29 |        transform="rotate(13.7,-131.61541,224.58358)"
30 |        clip-path="none" />
31 |     <text
32 |        xml:space="preserve"
33 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#e31936;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
34 |        x="58.502365"
35 |        y="156.80479"
36 |        id="CATEGORY"><tspan
37 |          id="tspan12525"
38 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';fill:#ffffff;stroke-width:0.265"
39 |          x="58.502365"
40 |          y="156.80479">RE</tspan></text>
41 |   </g>
42 | </svg>
43 | 


--------------------------------------------------------------------------------
/src/analysis/groupby.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | import pandas as pd
19 | from pandas.core.groupby.generic import DataFrameGroupBy
20 | 
21 | from src.const import LOCALE
22 | 
23 | 
24 | def train_hash(df: pd.DataFrame) -> DataFrameGroupBy:
25 |     """Group the dataframe by the train hash."""
26 |     return df.groupby("train_hash")
27 | 
28 | 
29 | def client_code(df: pd.DataFrame) -> DataFrameGroupBy:
30 |     """Group the dataframe by the client code."""
31 |     df = df.loc[df.client_code != "OTHER"]
32 |     return df.groupby("client_code")
33 | 
34 | 
35 | def weekday(df: pd.DataFrame) -> DataFrameGroupBy:
36 |     """Group the dataframe by the (departure) weekday"""
37 |     df["weekday"] = df.day.dt.day_name(locale=LOCALE)
38 |     return df.groupby("weekday")
39 | 
40 | 
41 | def agg_last(df_grouped: DataFrameGroupBy) -> pd.DataFrame:
42 |     return df_grouped.last()
43 | 
44 | 
45 | def agg_mean(df_grouped: DataFrameGroupBy) -> pd.DataFrame:
46 |     return df_grouped.mean()
47 | 


--------------------------------------------------------------------------------
/src/scraper/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | import itertools
19 | import typing as t
20 | 
21 | import pytest
22 | 
23 | from src.scraper import BadRequestException, ViaggiaTrenoAPI
24 | from src.scraper.train import Train
25 | 
26 | 
27 | def test_bad_request():
28 |     with pytest.raises(BadRequestException):
29 |         ViaggiaTrenoAPI._raw_request("invalid", "method")
30 | 
31 | 
32 | def test_ok_request():
33 |     response: str = ViaggiaTrenoAPI._raw_request("regione", "S01700")
34 |     assert type(response) == str
35 |     assert response == "1"
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     "kind, station_code",
40 |     itertools.product(
41 |         ("partenze", "arrivi"),
42 |         [
43 |             "S01700",
44 |             "S08409",
45 |             "S09218",
46 |             "S01608",
47 |         ],
48 |     ),
49 | )
50 | def test_station_departures_or_arrivals(kind: str, station_code: str):
51 |     response: t.List[Train] = ViaggiaTrenoAPI._station_departures_or_arrivals(
52 |         kind, station_code
53 |     )
54 |     for train in response:
55 |         assert type(train) == Train
56 |         assert train.number is not None
57 |         assert train.origin is not None
58 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/obb_ec.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg">
12 |   <defs
13 |      id="defs2">
14 |     <linearGradient
15 |        id="linearGradient3245">
16 |       <stop
17 |          style="stop-color:#e75700;stop-opacity:1;"
18 |          offset="0"
19 |          id="stop3243" />
20 |     </linearGradient>
21 |   </defs>
22 |   <g
23 |      id="layer1"
24 |      transform="translate(-39.118101,-77.76478)">
25 |     <path
26 |        style="display:inline;fill:#9d740f;fill-opacity:1;stroke-width:0.264999"
27 |        id="path530"
28 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
29 |        transform="rotate(13.7,-131.61541,224.58358)"
30 |        clip-path="none" />
31 |     <path
32 |        style="display:inline;fill:#3333cc;fill-opacity:1;stroke-width:0.264999"
33 |        id="path16617"
34 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
35 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
36 |        clip-path="none" />
37 |     <text
38 |        xml:space="preserve"
39 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
40 |        x="98.861687"
41 |        y="156.80479"
42 |        id="CATEGORY"><tspan
43 |          id="tspan12525"
44 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
45 |          x="98.861687"
46 |          y="156.80479">EC</tspan></text>
47 |   </g>
48 | </svg>
49 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/other.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg">
12 |   <defs
13 |      id="defs2">
14 |     <linearGradient
15 |        id="linearGradient3245">
16 |       <stop
17 |          style="stop-color:#e75700;stop-opacity:1;"
18 |          offset="0"
19 |          id="stop3243" />
20 |     </linearGradient>
21 |   </defs>
22 |   <g
23 |      id="layer1"
24 |      transform="translate(-39.118101,-77.76478)">
25 |     <path
26 |        style="display:inline;fill:#5d584f;fill-opacity:1;stroke-width:0.264999"
27 |        id="path530"
28 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
29 |        transform="rotate(13.7,-131.61541,224.58358)"
30 |        clip-path="none" />
31 |     <path
32 |        style="display:inline;fill:#71718e;fill-opacity:1;stroke-width:0.264999"
33 |        id="path16617"
34 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
35 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
36 |        clip-path="none" />
37 |     <text
38 |        xml:space="preserve"
39 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
40 |        x="101.73834"
41 |        y="156.66699"
42 |        id="CATEGORY"><tspan
43 |          id="tspan12525"
44 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
45 |          x="101.73834"
46 |          y="156.66699">?</tspan></text>
47 |   </g>
48 | </svg>
49 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_fb.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg">
12 |   <defs
13 |      id="defs2">
14 |     <linearGradient
15 |        id="linearGradient3245">
16 |       <stop
17 |          style="stop-color:#e75700;stop-opacity:1;"
18 |          offset="0"
19 |          id="stop3243" />
20 |     </linearGradient>
21 |   </defs>
22 |   <g
23 |      id="layer1"
24 |      transform="translate(-39.118101,-77.76478)">
25 |     <path
26 |        style="display:inline;fill:#ffffff;fill-opacity:1;stroke-width:0.264999"
27 |        id="path530"
28 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
29 |        transform="rotate(13.7,-131.61541,224.58358)"
30 |        clip-path="none" />
31 |     <path
32 |        style="display:inline;fill:#dc042b;fill-opacity:1;stroke-width:0.264999"
33 |        id="path16617"
34 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
35 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
36 |        clip-path="none" />
37 |     <text
38 |        xml:space="preserve"
39 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#ffffff;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
40 |        x="98.413818"
41 |        y="156.80479"
42 |        id="CATEGORY"><tspan
43 |          id="tspan12525"
44 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1;stroke-width:0.265"
45 |          x="98.413818"
46 |          y="156.80479">FB</tspan></text>
47 |   </g>
48 | </svg>
49 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_ic.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg">
12 |   <defs
13 |      id="defs2">
14 |     <linearGradient
15 |        id="linearGradient3245">
16 |       <stop
17 |          style="stop-color:#e75700;stop-opacity:1;"
18 |          offset="0"
19 |          id="stop3243" />
20 |     </linearGradient>
21 |   </defs>
22 |   <g
23 |      id="layer1"
24 |      transform="translate(-39.118101,-77.76478)">
25 |     <path
26 |        style="display:inline;fill:#ef7f1b;fill-opacity:1;stroke-width:0.264999"
27 |        id="path530"
28 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
29 |        transform="rotate(13.7,-131.61541,224.58358)"
30 |        clip-path="none" />
31 |     <path
32 |        style="display:inline;fill:#dc042b;fill-opacity:1;stroke-width:0.264999"
33 |        id="path16617"
34 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
35 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
36 |        clip-path="none" />
37 |     <text
38 |        xml:space="preserve"
39 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
40 |        x="98.861687"
41 |        y="156.80479"
42 |        id="CATEGORY"><tspan
43 |          id="tspan12525"
44 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
45 |          x="98.861687"
46 |          y="156.80479">IC</tspan></text>
47 |   </g>
48 | </svg>
49 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_icn.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg">
12 |   <defs
13 |      id="defs2">
14 |     <linearGradient
15 |        id="linearGradient3245">
16 |       <stop
17 |          style="stop-color:#e75700;stop-opacity:1;"
18 |          offset="0"
19 |          id="stop3243" />
20 |     </linearGradient>
21 |   </defs>
22 |   <g
23 |      id="layer1"
24 |      transform="translate(-39.118101,-77.76478)">
25 |     <path
26 |        style="display:inline;fill:#091f59;fill-opacity:1;stroke-width:0.264999"
27 |        id="path530"
28 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
29 |        transform="rotate(13.7,-131.61541,224.58358)"
30 |        clip-path="none" />
31 |     <path
32 |        style="display:inline;fill:#dc042b;fill-opacity:1;stroke-width:0.264999"
33 |        id="path16617"
34 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
35 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
36 |        clip-path="none" />
37 |     <text
38 |        xml:space="preserve"
39 |        style="font-weight:300;font-size:52.9167px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
40 |        x="101.27325"
41 |        y="150.52179"
42 |        id="CATEGORY"><tspan
43 |          id="tspan12525"
44 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:52.9167px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
45 |          x="101.27325"
46 |          y="150.52179">ICN</tspan></text>
47 |   </g>
48 | </svg>
49 | 


--------------------------------------------------------------------------------
/src/const.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from enum import Enum
19 | 
20 | from dateutil import tz
21 | 
22 | # Global timezone used in all datetime calls.
23 | TIMEZONE = tz.gettz("Europe/Rome")
24 | TIMEZONE_GMT = tz.gettz("GMT")
25 | 
26 | # Intra-day split hour
27 | INTRADAY_SPLIT_HOUR: int = 4
28 | 
29 | # Pandas locale
30 | LOCALE: str = "it_IT.utf-8"
31 | 
32 | # Italian weekdays - see 'LOCALE'
33 | WEEKDAYS = {
34 |     "Lunedì": 1,  # Monday
35 |     "Martedì": 2,  # Tuesday
36 |     "Mercoledì": 3,  # Wednesday
37 |     "Giovedì": 4,  # Thursday
38 |     "Venerdì": 5,  # Friday
39 |     "Sabato": 6,  # Saturday
40 |     "Domenica": 7,  # Sunday
41 | }
42 | 
43 | # Railway company palette
44 | RAILWAY_COMPANIES_PALETTE = {
45 |     "TRENITALIA_REG": "#fa1b0f",
46 |     "TRENORD": "#298044",
47 |     "TPER": "#d014fa",
48 |     "TRENITALIA_AV": "#c2152e",
49 |     "TRENITALIA_IC": "#1b48f2",
50 |     "OBB": "#464644",
51 |     "OTHER": "#858585",
52 | }
53 | 
54 | 
55 | class RailwayCompany(Enum):
56 |     """Italian railway companies codes."""
57 | 
58 |     TRENITALIA_AV = 1
59 |     TRENITALIA_REG = 2
60 |     TRENITALIA_IC = 4
61 |     TPER = 18
62 |     TRENORD = 63
63 |     OBB = 64
64 |     OTHER = -1
65 | 
66 |     @classmethod
67 |     def from_code(cls, code: int) -> str:
68 |         try:
69 |             instance: "RailwayCompany" = cls(code)
70 |         except ValueError:
71 |             instance: "RailwayCompany" = cls.OTHER
72 |         return instance.name
73 | 


--------------------------------------------------------------------------------
/src/analysis/filter.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from datetime import datetime
19 | 
20 | import pandas as pd
21 | 
22 | 
23 | def date_filter(
24 |     df: pd.DataFrame, start_date: datetime | None, end_date: datetime | None
25 | ) -> pd.DataFrame:
26 |     """Filter dataframe by date (day).
27 | 
28 |     Args:
29 |         df (pd.DataFrame): the considered dataframe
30 |         start_date (datetime | None): the start date
31 |         end_date (datetime | None): the end date
32 | 
33 |     Returns:
34 |         pd.DataFrame: the filtered dataframe
35 |     """
36 |     if isinstance(start_date, datetime):
37 |         start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
38 |         df = df.loc[df.day >= start_date]
39 |     if isinstance(end_date, datetime):
40 |         end_date = end_date.replace(hour=0, minute=0, second=0, microsecond=0)
41 |         df = df.loc[df.day <= end_date]
42 |     return df
43 | 
44 | 
45 | def railway_company_filter(
46 |     df: pd.DataFrame, railway_companies: str | None
47 | ) -> pd.DataFrame:
48 |     """Filter dataframe by the railway company.
49 | 
50 |     Args:
51 |         df (pd.DataFrame): the considered dataframe
52 |         client_codes (str | None): a comma-separated list of client names
53 | 
54 |     Returns:
55 |         pd.DataFrame: the filtered dataframe
56 |     """
57 |     if not railway_companies or len(railway_companies) < 1:
58 |         return df
59 | 
60 |     code_list: list[str] = [
61 |         s.strip().lower() for s in railway_companies.strip().split(",") if len(s) > 0
62 |     ]
63 |     return df.loc[df.client_code.str.lower().isin(code_list)]
64 | 
65 | 
66 | def railway_lines_filter(df: pd.DataFrame, lines: str | None):
67 |     """Filter dataframe by the railway line.
68 | 
69 |     Args:
70 |         df (pd.DataFrame): the considered dataframe
71 |         line (str | None): a comma-separated list of railway lines
72 | 
73 |     Returns:
74 |         pd.DataFrame: the filtered dataframe
75 |     """
76 |     if not lines or len(lines) < 1:
77 |         return df
78 | 
79 |     line_list: list[str] = [
80 |         l.strip().upper() for l in lines.strip().split(",") if len(l) > 0
81 |     ]
82 |     return df.loc[df.line.isin(line_list)]
83 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | import argparse
19 | import logging
20 | import os
21 | import sys
22 | 
23 | import src.analysis.main as analysis
24 | import src.scraper.main as scraper
25 | from src import station_extractor, train_extractor
26 | 
27 | parser = argparse.ArgumentParser(
28 |     prog="train-scraper",
29 | )
30 | subparsers = parser.add_subparsers(dest="subcommand", required=True)
31 | parser.add_argument("-d", "--debug", action="store_true", help="activate debug logs")
32 | 
33 | scraper_p = subparsers.add_parser(
34 |     "scraper",
35 |     help="station and train data scraper",
36 | )
37 | 
38 | train_extractor.register_args(
39 |     subparsers.add_parser(
40 |         "train-extractor",
41 |         help="convert scraped train data",
42 |     )
43 | )
44 | station_extractor.register_args(
45 |     subparsers.add_parser(
46 |         "station-extractor",
47 |         help="convert scraped station data",
48 |     )
49 | )
50 | analysis.register_args(
51 |     subparsers.add_parser(
52 |         "analyze",
53 |         help="data analyzer and visualizer",
54 |     )
55 | )
56 | 
57 | 
58 | def main():
59 |     print(
60 |         "railway-opendata, Copyright (C) 2023 Marco Aceti"
61 |         "\nrailway-opendata comes with ABSOLUTELY NO WARRANTY; "
62 |         "for details read the LICENSE."
63 |     )
64 |     print()
65 | 
66 |     hashseed: str | None = os.getenv("PYTHONHASHSEED")
67 |     if not hashseed or hashseed != "0":
68 |         logging.critical(
69 |             "Hash seed randomization is not disabled. "
70 |             "Please disable it by setting the PYTHONHASHSEED=0 environment variable."
71 |         )
72 |         sys.exit(1)
73 | 
74 |     args: argparse.Namespace = parser.parse_args()
75 | 
76 |     logging.basicConfig(
77 |         stream=sys.stdout,
78 |         format="[%(asctime)s - %(levelname)s] %(message)s",
79 |         level=logging.INFO if not args.debug else logging.DEBUG,
80 |     )
81 | 
82 |     if args.subcommand == "scraper":
83 |         scraper.main()
84 | 
85 |     if args.subcommand == "train-extractor":
86 |         train_extractor.main(args)
87 | 
88 |     if args.subcommand == "station-extractor":
89 |         station_extractor.main(args)
90 | 
91 |     if args.subcommand == "analyze":
92 |         analysis.main(args)
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_reg.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
11 |    sodipodi:docname="Marker.svg"
12 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
13 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
14 |    xmlns="http://www.w3.org/2000/svg"
15 |    xmlns:svg="http://www.w3.org/2000/svg">
16 |   <sodipodi:namedview
17 |      id="namedview7"
18 |      pagecolor="#ffffff"
19 |      bordercolor="#000000"
20 |      borderopacity="0.25"
21 |      inkscape:showpageshadow="2"
22 |      inkscape:pageopacity="0.0"
23 |      inkscape:pagecheckerboard="0"
24 |      inkscape:deskcolor="#d1d1d1"
25 |      inkscape:document-units="mm"
26 |      showgrid="false"
27 |      inkscape:zoom="1.1095045"
28 |      inkscape:cx="152.7709"
29 |      inkscape:cy="219.01668"
30 |      inkscape:window-width="1920"
31 |      inkscape:window-height="1051"
32 |      inkscape:window-x="0"
33 |      inkscape:window-y="0"
34 |      inkscape:window-maximized="1"
35 |      inkscape:current-layer="layer1" />
36 |   <defs
37 |      id="defs2">
38 |     <linearGradient
39 |        id="linearGradient3245"
40 |        inkscape:swatch="solid">
41 |       <stop
42 |          style="stop-color:#e75700;stop-opacity:1;"
43 |          offset="0"
44 |          id="stop3243" />
45 |     </linearGradient>
46 |   </defs>
47 |   <g
48 |      inkscape:label="Livello 1"
49 |      inkscape:groupmode="layer"
50 |      id="layer1"
51 |      transform="translate(-39.118101,-77.76478)">
52 |     <path
53 |        sodipodi:type="star"
54 |        style="fill:#e31936;fill-opacity:1;stroke-width:0.264999"
55 |        id="path530"
56 |        inkscape:flatsided="true"
57 |        sodipodi:sides="6"
58 |        sodipodi:cx="72.63031"
59 |        sodipodi:cy="79.10907"
60 |        sodipodi:r1="62.155178"
61 |        sodipodi:r2="53.827965"
62 |        sodipodi:arg1="1.854371"
63 |        sodipodi:arg2="2.3779698"
64 |        inkscape:rounded="0"
65 |        inkscape:randomized="0"
66 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
67 |        transform="rotate(13.7,-131.61541,224.58358)"
68 |        clip-path="none" />
69 |     <text
70 |        xml:space="preserve"
71 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#e31936;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
72 |        x="58.502365"
73 |        y="156.80479"
74 |        id="CATEGORY"
75 |        inkscape:highlight-color="#8680ee"><tspan
76 |          sodipodi:role="line"
77 |          id="tspan12525"
78 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';fill:#ffffff;stroke-width:0.265"
79 |          x="58.502365"
80 |          y="156.80479">RE</tspan></text>
81 |   </g>
82 | </svg>
83 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/tper_reg.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="124.31031mm"
 6 |    height="107.71269mm"
 7 |    viewBox="0 0 124.31031 107.71269"
 8 |    version="1.1"
 9 |    id="svg5"
10 |    inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
11 |    sodipodi:docname="Marker.svg"
12 |    inkscape:export-filename="Tesi_git/src/analysis/assets/trenord_reg.svg"
13 |    inkscape:export-xdpi="96"
14 |    inkscape:export-ydpi="96"
15 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
16 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
17 |    xmlns="http://www.w3.org/2000/svg"
18 |    xmlns:svg="http://www.w3.org/2000/svg">
19 |   <sodipodi:namedview
20 |      id="namedview7"
21 |      pagecolor="#ffffff"
22 |      bordercolor="#000000"
23 |      borderopacity="0.25"
24 |      inkscape:showpageshadow="2"
25 |      inkscape:pageopacity="0.0"
26 |      inkscape:pagecheckerboard="0"
27 |      inkscape:deskcolor="#d1d1d1"
28 |      inkscape:document-units="mm"
29 |      showgrid="false"
30 |      inkscape:zoom="1.5690763"
31 |      inkscape:cx="201.07371"
32 |      inkscape:cy="228.79703"
33 |      inkscape:window-width="1920"
34 |      inkscape:window-height="1051"
35 |      inkscape:window-x="0"
36 |      inkscape:window-y="0"
37 |      inkscape:window-maximized="1"
38 |      inkscape:current-layer="layer1" />
39 |   <defs
40 |      id="defs2">
41 |     <linearGradient
42 |        id="linearGradient3245"
43 |        inkscape:swatch="solid">
44 |       <stop
45 |          style="stop-color:#e75700;stop-opacity:1;"
46 |          offset="0"
47 |          id="stop3243" />
48 |     </linearGradient>
49 |   </defs>
50 |   <g
51 |      inkscape:label="Livello 1"
52 |      inkscape:groupmode="layer"
53 |      id="layer1"
54 |      transform="translate(-39.118101,-77.76478)">
55 |     <path
56 |        sodipodi:type="star"
57 |        style="fill:#231f20;fill-opacity:1;stroke-width:0.264999"
58 |        id="path530"
59 |        inkscape:flatsided="true"
60 |        sodipodi:sides="6"
61 |        sodipodi:cx="72.63031"
62 |        sodipodi:cy="79.10907"
63 |        sodipodi:r1="62.155178"
64 |        sodipodi:r2="53.827965"
65 |        sodipodi:arg1="1.854371"
66 |        sodipodi:arg2="2.3779698"
67 |        inkscape:rounded="0"
68 |        inkscape:randomized="0"
69 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
70 |        transform="rotate(13.7,-131.61541,224.58358)"
71 |        clip-path="none"
72 |        inkscape:label="shape" />
73 |     <text
74 |        xml:space="preserve"
75 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#e31936;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
76 |        x="58.502365"
77 |        y="156.80479"
78 |        id="CATEGORY"
79 |        inkscape:highlight-color="#8680ee"><tspan
80 |          sodipodi:role="line"
81 |          id="tspan12525"
82 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';fill:#ffffff;stroke-width:0.265"
83 |          x="58.502365"
84 |          y="156.80479">RE</tspan></text>
85 |   </g>
86 | </svg>
87 | 


--------------------------------------------------------------------------------
/src/scraper/tests/test_train_stop.py:
--------------------------------------------------------------------------------
 1 | # railway-opendata: scrape and analyze italian railway data
 2 | # Copyright (C) 2023 Marco Aceti
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | import json
19 | import pathlib
20 | from datetime import datetime
21 | 
22 | import pytest
23 | 
24 | from src import types
25 | from src.scraper.train_stop import TrainStop, TrainStopTime
26 | 
27 | DATA_DIR = pathlib.Path("src/scraper/tests/data")
28 | 
29 | 
30 | t1 = datetime(year=2023, month=1, day=1, hour=12, minute=00, second=0)
31 | t2 = datetime(year=2023, month=1, day=1, hour=12, minute=5, second=30)
32 | t3 = datetime(year=2023, month=1, day=1, hour=12, minute=6, second=0)
33 | 
34 | 
35 | @pytest.mark.parametrize(
36 |     "expected, actual, passed, delay",
37 |     [
38 |         (t1, t2, True, 5.5),
39 |         (t1, None, False, None),
40 |         (t3, t2, True, -0.5),
41 |         (t3, t1, True, -6),
42 |     ],
43 | )
44 | def test_stop_time(
45 |     expected: datetime, actual: datetime | None, passed: bool, delay: int | None
46 | ):
47 |     stop_time: TrainStopTime = TrainStopTime(expected=expected, actual=actual)
48 |     assert stop_time.passed() == passed
49 |     assert stop_time.delay() == delay
50 | 
51 | 
52 | def test_stop_time_assumption():
53 |     with pytest.raises(AssertionError):
54 |         TrainStopTime(None, actual=t1)  # type: ignore
55 | 
56 | 
57 | @pytest.mark.parametrize(
58 |     "data_file, expected_repr",
59 |     [
60 |         ("train-stop_10860.json", "@ (P) Piacenza 09:07 ~ 09:07 +0.5m [5 ~ 5]"),
61 |         ("train-stop_3073.json", "@ (F) Arquata Scrivia 17:43 --> 17:44 [5]"),
62 |         (
63 |             "train-stop_555.json",
64 |             "@ (F) Latina 14:58 ~ 15:01 +3.5m --> 15:00 ~ 15:03 +3.5m [? ~ 2]",
65 |         ),
66 |         ("train-stop_22662.json", "@ (A) Treviglio 17:50 [2 TR Ovest]"),
67 |     ],
68 | )
69 | def test_stop_repr(data_file, expected_repr):
70 |     with open(DATA_DIR / data_file, "r") as f:
71 |         data: types.JSONType = json.load(f)
72 | 
73 |     stop: TrainStop = TrainStop._from_raw_data(stop_data=data)
74 |     assert repr(stop) == expected_repr
75 | 
76 | 
77 | @pytest.mark.parametrize(
78 |     "data_file, expected_repr",
79 |     [
80 |         ("train-stop_24955.json", "@ (P) Saronno 14:35 ~ 14:37 +2.6m [7]"),
81 |         (
82 |             "train-stop_52.json",
83 |             "@ (F) Varese Casbeno 14:10 ~ 14:15 +5.2m --> 14:11 ~ 14:17 +6.2m [2]",
84 |         ),
85 |         ("train-stop_10911.json", "@ (A) Brescia 01:35 ~ 01:36 +1.0m [3]"),
86 |     ],
87 | )
88 | def test_stop_trenord(data_file, expected_repr):
89 |     with open(DATA_DIR / data_file, "r") as f:
90 |         data: types.JSONType = json.load(f)
91 | 
92 |     stop: TrainStop | None = TrainStop._from_trenord_raw_data(
93 |         stop_data=data, day=datetime.now().date()
94 |     )
95 |     assert repr(stop) == expected_repr
96 | 


--------------------------------------------------------------------------------
/src/scraper/tests/data/train-stops_2647.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "dep_time": "23:25:00",
  4 |     "station": {
  5 |       "station_id": "S01700",
  6 |       "station_ori_name": "MILANO CENTRALE"
  7 |     },
  8 |     "type": "O",
  9 |     "is_journey": true,
 10 |     "cancelled": false,
 11 |     "actual_data": {
 12 |       "dep_actual_time": "23:27:00"
 13 |     }
 14 |   },
 15 |   {
 16 |     "arr_time": "23:31:00",
 17 |     "dep_time": "23:33:00",
 18 |     "station": {
 19 |       "station_id": "S01701",
 20 |       "station_ori_name": "MILANO LAMBRATE"
 21 |     },
 22 |     "type": "F",
 23 |     "is_journey": true,
 24 |     "cancelled": false,
 25 |     "actual_data": {
 26 |       "arr_actual_time": "23:33:00",
 27 |       "dep_actual_time": "23:35:00"
 28 |     }
 29 |   },
 30 |   {
 31 |     "arr_time": "23:39:00",
 32 |     "dep_time": "23:40:00",
 33 |     "station": {
 34 |       "station_id": "S01703",
 35 |       "station_ori_name": "PIOLTELLO LIMITO"
 36 |     },
 37 |     "type": "F",
 38 |     "is_journey": true,
 39 |     "cancelled": false,
 40 |     "actual_data": {
 41 |       "arr_actual_time": "23:40:00",
 42 |       "dep_actual_time": "23:43:00"
 43 |     }
 44 |   },
 45 |   {
 46 |     "arr_time": "23:54:00",
 47 |     "dep_time": "23:56:00",
 48 |     "station": {
 49 |       "station_id": "S01708",
 50 |       "station_ori_name": "TREVIGLIO"
 51 |     },
 52 |     "type": "F",
 53 |     "is_journey": true,
 54 |     "cancelled": false,
 55 |     "actual_data": {
 56 |       "arr_actual_time": "23:54:00",
 57 |       "dep_actual_time": "23:56:00"
 58 |     }
 59 |   },
 60 |   {
 61 |     "arr_time": "00:04:00",
 62 |     "dep_time": "00:05:00",
 63 |     "station": {
 64 |       "station_id": "S01711",
 65 |       "station_ori_name": "ROMANO"
 66 |     },
 67 |     "type": "F",
 68 |     "is_journey": true,
 69 |     "cancelled": false,
 70 |     "actual_data": {
 71 |       "arr_actual_time": "00:03:00",
 72 |       "dep_actual_time": "00:05:00"
 73 |     }
 74 |   },
 75 |   {
 76 |     "arr_time": "00:13:00",
 77 |     "dep_time": "00:14:00",
 78 |     "station": {
 79 |       "station_id": "S01713",
 80 |       "station_ori_name": "CHIARI"
 81 |     },
 82 |     "type": "F",
 83 |     "is_journey": true,
 84 |     "cancelled": false,
 85 |     "actual_data": {
 86 |       "dep_actual_time": "00:15:00"
 87 |     }
 88 |   },
 89 |   {
 90 |     "arr_time": "00:19:00",
 91 |     "dep_time": "00:20:00",
 92 |     "station": {
 93 |       "station_id": "S01714",
 94 |       "station_ori_name": "ROVATO"
 95 |     },
 96 |     "type": "F",
 97 |     "is_journey": true,
 98 |     "cancelled": false,
 99 |     "actual_data": {
100 |       "arr_actual_time": "00:18:00",
101 |       "dep_actual_time": "00:20:00"
102 |     }
103 |   },
104 |   {
105 |     "arr_time": "00:31:00",
106 |     "dep_time": "00:33:00",
107 |     "station": {
108 |       "station_id": "S09999",
109 |       "station_ori_name": "BRESCIA"
110 |     },
111 |     "type": "F",
112 |     "is_journey": true,
113 |     "cancelled": false,
114 |     "actual_data": {
115 |       "arr_actual_time": "00:29:00",
116 |       "dep_actual_time": "00:33:00"
117 |     }
118 |   },
119 |   {
120 |     "arr_time": "00:48:00",
121 |     "dep_time": "00:49:00",
122 |     "station": {
123 |       "station_id": "S02084",
124 |       "station_ori_name": "DESENZANO DEL GARDA-SIRMIONE"
125 |     },
126 |     "type": "F",
127 |     "is_journey": true,
128 |     "cancelled": false,
129 |     "actual_data": {
130 |       "arr_actual_time": "00:48:00",
131 |       "dep_actual_time": "00:50:00"
132 |     }
133 |   },
134 |   {
135 |     "arr_time": "00:57:00",
136 |     "dep_time": "00:58:00",
137 |     "station": {
138 |       "station_id": "S02088",
139 |       "station_ori_name": "PESCHIERA DEL GARDA"
140 |     },
141 |     "type": "F",
142 |     "is_journey": true,
143 |     "cancelled": false,
144 |     "actual_data": {
145 |       "arr_actual_time": "00:56:00",
146 |       "dep_actual_time": "00:58:00"
147 |     }
148 |   },
149 |   {
150 |     "arr_time": "01:17:00",
151 |     "station": {
152 |       "station_id": "S02430",
153 |       "station_ori_name": "VERONA PORTA NUOVA"
154 |     },
155 |     "type": "D",
156 |     "is_journey": true,
157 |     "cancelled": false,
158 |     "actual_data": {
159 |       "arr_actual_time": "01:17:00"
160 |     }
161 |   }
162 | ]
163 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_ec.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    width="124.31031mm"
  6 |    height="107.71269mm"
  7 |    viewBox="0 0 124.31031 107.71269"
  8 |    version="1.1"
  9 |    id="svg5"
 10 |    inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
 11 |    sodipodi:docname="Marker.svg"
 12 |    inkscape:export-filename="Tesi_git/src/analysis/assets/trenitalia_ic.svg"
 13 |    inkscape:export-xdpi="96"
 14 |    inkscape:export-ydpi="96"
 15 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 16 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 17 |    xmlns="http://www.w3.org/2000/svg"
 18 |    xmlns:svg="http://www.w3.org/2000/svg">
 19 |   <sodipodi:namedview
 20 |      id="namedview7"
 21 |      pagecolor="#ffffff"
 22 |      bordercolor="#000000"
 23 |      borderopacity="0.25"
 24 |      inkscape:showpageshadow="2"
 25 |      inkscape:pageopacity="0.0"
 26 |      inkscape:pagecheckerboard="0"
 27 |      inkscape:deskcolor="#d1d1d1"
 28 |      inkscape:document-units="mm"
 29 |      showgrid="false"
 30 |      inkscape:zoom="1.1095045"
 31 |      inkscape:cx="134.74483"
 32 |      inkscape:cy="225.32581"
 33 |      inkscape:window-width="1920"
 34 |      inkscape:window-height="1051"
 35 |      inkscape:window-x="0"
 36 |      inkscape:window-y="0"
 37 |      inkscape:window-maximized="1"
 38 |      inkscape:current-layer="layer1" />
 39 |   <defs
 40 |      id="defs2">
 41 |     <linearGradient
 42 |        id="linearGradient3245"
 43 |        inkscape:swatch="solid">
 44 |       <stop
 45 |          style="stop-color:#e75700;stop-opacity:1;"
 46 |          offset="0"
 47 |          id="stop3243" />
 48 |     </linearGradient>
 49 |   </defs>
 50 |   <g
 51 |      inkscape:label="Livello 1"
 52 |      inkscape:groupmode="layer"
 53 |      id="layer1"
 54 |      transform="translate(-39.118101,-77.76478)">
 55 |     <path
 56 |        sodipodi:type="star"
 57 |        style="display:inline;fill:#9d740f;fill-opacity:1;stroke-width:0.264999"
 58 |        id="path530"
 59 |        inkscape:flatsided="true"
 60 |        sodipodi:sides="6"
 61 |        sodipodi:cx="72.63031"
 62 |        sodipodi:cy="79.10907"
 63 |        sodipodi:r1="62.155178"
 64 |        sodipodi:r2="53.827965"
 65 |        sodipodi:arg1="1.854371"
 66 |        sodipodi:arg2="2.3779698"
 67 |        inkscape:rounded="0"
 68 |        inkscape:randomized="0"
 69 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
 70 |        transform="rotate(13.7,-131.61541,224.58358)"
 71 |        clip-path="none"
 72 |        inkscape:label="shape" />
 73 |     <path
 74 |        sodipodi:type="star"
 75 |        style="display:inline;fill:#dc042b;fill-opacity:1;stroke-width:0.264999"
 76 |        id="path16617"
 77 |        inkscape:flatsided="true"
 78 |        sodipodi:sides="6"
 79 |        sodipodi:cx="72.63031"
 80 |        sodipodi:cy="79.10907"
 81 |        sodipodi:r1="62.155178"
 82 |        sodipodi:r2="53.827965"
 83 |        sodipodi:arg1="1.854371"
 84 |        sodipodi:arg2="2.3779698"
 85 |        inkscape:rounded="0"
 86 |        inkscape:randomized="0"
 87 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
 88 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
 89 |        clip-path="none"
 90 |        inkscape:label="shape_inner" />
 91 |     <text
 92 |        xml:space="preserve"
 93 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
 94 |        x="98.861687"
 95 |        y="156.80479"
 96 |        id="CATEGORY"
 97 |        inkscape:highlight-color="#8680ee"
 98 |        inkscape:label="category"><tspan
 99 |          sodipodi:role="line"
100 |          id="tspan12525"
101 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
102 |          x="98.861687"
103 |          y="156.80479">EC</tspan></text>
104 |   </g>
105 | </svg>
106 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_fa.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    width="124.31031mm"
  6 |    height="107.71269mm"
  7 |    viewBox="0 0 124.31031 107.71269"
  8 |    version="1.1"
  9 |    id="svg5"
 10 |    inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
 11 |    sodipodi:docname="Marker.svg"
 12 |    inkscape:export-filename="Tesi_git/src/analysis/assets/trenitalia_fb.svg"
 13 |    inkscape:export-xdpi="96"
 14 |    inkscape:export-ydpi="96"
 15 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 16 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 17 |    xmlns="http://www.w3.org/2000/svg"
 18 |    xmlns:svg="http://www.w3.org/2000/svg">
 19 |   <sodipodi:namedview
 20 |      id="namedview7"
 21 |      pagecolor="#ffffff"
 22 |      bordercolor="#000000"
 23 |      borderopacity="0.25"
 24 |      inkscape:showpageshadow="2"
 25 |      inkscape:pageopacity="0.0"
 26 |      inkscape:pagecheckerboard="0"
 27 |      inkscape:deskcolor="#d1d1d1"
 28 |      inkscape:document-units="mm"
 29 |      showgrid="false"
 30 |      inkscape:zoom="1.1095045"
 31 |      inkscape:cx="134.74483"
 32 |      inkscape:cy="225.32581"
 33 |      inkscape:window-width="1920"
 34 |      inkscape:window-height="1051"
 35 |      inkscape:window-x="0"
 36 |      inkscape:window-y="0"
 37 |      inkscape:window-maximized="1"
 38 |      inkscape:current-layer="layer1" />
 39 |   <defs
 40 |      id="defs2">
 41 |     <linearGradient
 42 |        id="linearGradient3245"
 43 |        inkscape:swatch="solid">
 44 |       <stop
 45 |          style="stop-color:#e75700;stop-opacity:1;"
 46 |          offset="0"
 47 |          id="stop3243" />
 48 |     </linearGradient>
 49 |   </defs>
 50 |   <g
 51 |      inkscape:label="Livello 1"
 52 |      inkscape:groupmode="layer"
 53 |      id="layer1"
 54 |      transform="translate(-39.118101,-77.76478)">
 55 |     <path
 56 |        sodipodi:type="star"
 57 |        style="display:inline;fill:#c0c0c0;fill-opacity:1;stroke-width:0.264999"
 58 |        id="path530"
 59 |        inkscape:flatsided="true"
 60 |        sodipodi:sides="6"
 61 |        sodipodi:cx="72.63031"
 62 |        sodipodi:cy="79.10907"
 63 |        sodipodi:r1="62.155178"
 64 |        sodipodi:r2="53.827965"
 65 |        sodipodi:arg1="1.854371"
 66 |        sodipodi:arg2="2.3779698"
 67 |        inkscape:rounded="0"
 68 |        inkscape:randomized="0"
 69 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
 70 |        transform="rotate(13.7,-131.61541,224.58358)"
 71 |        clip-path="none"
 72 |        inkscape:label="shape" />
 73 |     <path
 74 |        sodipodi:type="star"
 75 |        style="display:inline;fill:#dc042b;fill-opacity:1;stroke-width:0.264999"
 76 |        id="path16617"
 77 |        inkscape:flatsided="true"
 78 |        sodipodi:sides="6"
 79 |        sodipodi:cx="72.63031"
 80 |        sodipodi:cy="79.10907"
 81 |        sodipodi:r1="62.155178"
 82 |        sodipodi:r2="53.827965"
 83 |        sodipodi:arg1="1.854371"
 84 |        sodipodi:arg2="2.3779698"
 85 |        inkscape:rounded="0"
 86 |        inkscape:randomized="0"
 87 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
 88 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
 89 |        clip-path="none"
 90 |        inkscape:label="shape_inner" />
 91 |     <text
 92 |        xml:space="preserve"
 93 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
 94 |        x="97.810928"
 95 |        y="156.80479"
 96 |        id="CATEGORY"
 97 |        inkscape:highlight-color="#8680ee"
 98 |        inkscape:label="category"><tspan
 99 |          sodipodi:role="line"
100 |          id="tspan12525"
101 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
102 |          x="97.810928"
103 |          y="156.80479">FA</tspan></text>
104 |   </g>
105 | </svg>
106 | 


--------------------------------------------------------------------------------
/src/analysis/assets/markers/trenitalia_fr.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    width="124.31031mm"
  6 |    height="107.71269mm"
  7 |    viewBox="0 0 124.31031 107.71269"
  8 |    version="1.1"
  9 |    id="svg5"
 10 |    inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
 11 |    sodipodi:docname="Marker.svg"
 12 |    inkscape:export-filename="Tesi_git/src/analysis/assets/trenitalia_fa.svg"
 13 |    inkscape:export-xdpi="96"
 14 |    inkscape:export-ydpi="96"
 15 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 16 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 17 |    xmlns="http://www.w3.org/2000/svg"
 18 |    xmlns:svg="http://www.w3.org/2000/svg">
 19 |   <sodipodi:namedview
 20 |      id="namedview7"
 21 |      pagecolor="#ffffff"
 22 |      bordercolor="#000000"
 23 |      borderopacity="0.25"
 24 |      inkscape:showpageshadow="2"
 25 |      inkscape:pageopacity="0.0"
 26 |      inkscape:pagecheckerboard="0"
 27 |      inkscape:deskcolor="#d1d1d1"
 28 |      inkscape:document-units="mm"
 29 |      showgrid="false"
 30 |      inkscape:zoom="1.1095045"
 31 |      inkscape:cx="134.74483"
 32 |      inkscape:cy="225.32581"
 33 |      inkscape:window-width="1920"
 34 |      inkscape:window-height="1051"
 35 |      inkscape:window-x="0"
 36 |      inkscape:window-y="0"
 37 |      inkscape:window-maximized="1"
 38 |      inkscape:current-layer="layer1" />
 39 |   <defs
 40 |      id="defs2">
 41 |     <linearGradient
 42 |        id="linearGradient3245"
 43 |        inkscape:swatch="solid">
 44 |       <stop
 45 |          style="stop-color:#e75700;stop-opacity:1;"
 46 |          offset="0"
 47 |          id="stop3243" />
 48 |     </linearGradient>
 49 |   </defs>
 50 |   <g
 51 |      inkscape:label="Livello 1"
 52 |      inkscape:groupmode="layer"
 53 |      id="layer1"
 54 |      transform="translate(-39.118101,-77.76478)">
 55 |     <path
 56 |        sodipodi:type="star"
 57 |        style="display:inline;fill:#0e0e0e;fill-opacity:1;stroke-width:0.264999"
 58 |        id="path530"
 59 |        inkscape:flatsided="true"
 60 |        sodipodi:sides="6"
 61 |        sodipodi:cx="72.63031"
 62 |        sodipodi:cy="79.10907"
 63 |        sodipodi:r1="62.155178"
 64 |        sodipodi:r2="53.827965"
 65 |        sodipodi:arg1="1.854371"
 66 |        sodipodi:arg2="2.3779698"
 67 |        inkscape:rounded="0"
 68 |        inkscape:randomized="0"
 69 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
 70 |        transform="rotate(13.7,-131.61541,224.58358)"
 71 |        clip-path="none"
 72 |        inkscape:label="shape" />
 73 |     <path
 74 |        sodipodi:type="star"
 75 |        style="display:inline;fill:#dc042b;fill-opacity:1;stroke-width:0.264999"
 76 |        id="path16617"
 77 |        inkscape:flatsided="true"
 78 |        sodipodi:sides="6"
 79 |        sodipodi:cx="72.63031"
 80 |        sodipodi:cy="79.10907"
 81 |        sodipodi:r1="62.155178"
 82 |        sodipodi:r2="53.827965"
 83 |        sodipodi:arg1="1.854371"
 84 |        sodipodi:arg2="2.3779698"
 85 |        inkscape:rounded="0"
 86 |        inkscape:randomized="0"
 87 |        d="M 55.239954,138.78186 12.256981,93.884974 29.647337,34.212186 90.020666,19.436281 133.00364,64.333166 115.61328,124.00595 Z"
 88 |        transform="matrix(0.82346412,0.20073891,-0.20073891,0.82346412,57.345068,51.897917)"
 89 |        clip-path="none"
 90 |        inkscape:label="shape_inner" />
 91 |     <text
 92 |        xml:space="preserve"
 93 |        style="font-weight:300;font-size:70.5556px;font-family:Noteworthy;-inkscape-font-specification:'Noteworthy Light';fill:#fefefe;fill-opacity:1;stroke-width:0.264999;image-rendering:auto"
 94 |        x="98.413818"
 95 |        y="156.80479"
 96 |        id="CATEGORY"
 97 |        inkscape:highlight-color="#8680ee"
 98 |        inkscape:label="category"><tspan
 99 |          sodipodi:role="line"
100 |          id="tspan12525"
101 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:70.5556px;font-family:'Open Sans';-inkscape-font-specification:'Open Sans';text-align:center;text-anchor:middle;fill:#fefefe;fill-opacity:1;stroke-width:0.265"
102 |          x="98.413818"
103 |          y="156.80479">FR</tspan></text>
104 |   </g>
105 | </svg>
106 | 


--------------------------------------------------------------------------------
/src/station_extractor.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import argparse
 19 | import csv
 20 | import pickle
 21 | from pathlib import Path
 22 | 
 23 | from geojson import Feature, FeatureCollection, Point
 24 | 
 25 | from src.scraper.station import Station
 26 | from src.utils import parse_input_format_output_args
 27 | 
 28 | 
 29 | def load_file(file: Path) -> dict[str, Station]:
 30 |     """Load a station data pickle file and return it.
 31 | 
 32 |     Args:
 33 |         file (Path): the file to load
 34 | 
 35 |     Returns:
 36 |         dict[str, Station]: the station data contained in the file
 37 |     """
 38 |     with open(file, "rb") as f:
 39 |         data: dict[str, Station] = pickle.load(f)
 40 | 
 41 |     return data
 42 | 
 43 | 
 44 | def to_csv(data: dict[str, Station], output_file: Path) -> None:
 45 |     """Convert to CSV station data, one row per station.
 46 | 
 47 |     Args:
 48 |         data (dict[int, Station]): the data to convert
 49 |         output_file (Path): the file to write
 50 |     """
 51 |     FIELDS: tuple = (
 52 |         "code",
 53 |         "region",
 54 |         "long_name",
 55 |         "short_name",
 56 |         "latitude",
 57 |         "longitude",
 58 |     )
 59 | 
 60 |     csvfile = open(output_file, "w+", newline="")
 61 |     writer = csv.writer(
 62 |         csvfile,
 63 |         delimiter=",",
 64 |         quotechar="|",
 65 |         quoting=csv.QUOTE_MINIMAL,
 66 |     )
 67 |     writer.writerow(FIELDS)
 68 | 
 69 |     for station_c in data:
 70 |         station: Station = data[station_c]
 71 |         writer.writerow(
 72 |             (
 73 |                 station.code,
 74 |                 station.region_code,
 75 |                 station.name,
 76 |                 station.short_name if hasattr(station, "short_name") else None,
 77 |                 station.position[0] if station.position else None,
 78 |                 station.position[1] if station.position else None,
 79 |             )
 80 |         )
 81 |     csvfile.close()
 82 | 
 83 | 
 84 | def to_geojson(data: dict[str, Station], output_file: Path) -> None:
 85 |     feature_list: list[Feature] = list()
 86 | 
 87 |     for station_c in data:
 88 |         station: Station = data[station_c]
 89 |         if not station.position:
 90 |             continue
 91 | 
 92 |         feature: Feature = Feature(
 93 |             geometry=Point((station.position[1], station.position[0])),
 94 |             properties={
 95 |                 "code": station.code,
 96 |                 "name": station.name,
 97 |                 "short_name": station.short_name
 98 |                 if hasattr(station, "short_name")
 99 |                 else None,
100 |                 "region": station.region_code,
101 |             },
102 |         )
103 |         feature_list.append(feature)
104 | 
105 |     collection: FeatureCollection = FeatureCollection(feature_list)
106 |     with open(output_file, "w+") as f:
107 |         f.write(str(collection))
108 | 
109 | 
110 | def register_args(parser: argparse.ArgumentParser):
111 |     parser.add_argument(
112 |         "pickle_file",
113 |         help=".pickle file to parse",
114 |         metavar="PICKLE_FILE",
115 |     )
116 |     parser.add_argument(
117 |         "-f",
118 |         default="csv",
119 |         choices=["csv", "geojson"],
120 |         help="output file format",
121 |         dest="format",
122 |     )
123 |     parser.add_argument(
124 |         "-o",
125 |         help="output file name",
126 |         metavar="OUTPUT_FILE",
127 |         dest="output_file",
128 |     )
129 | 
130 | 
131 | def main(args: argparse.Namespace):
132 |     input_f, output_f, format = parse_input_format_output_args(args)
133 | 
134 |     data: dict[str, Station] = load_file(input_f)
135 | 
136 |     if format == "csv":
137 |         to_csv(data, output_f)
138 | 
139 |     if format == "geojson":
140 |         to_geojson(data, output_f)
141 | 


--------------------------------------------------------------------------------
/src/scraper/tests/test_station.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import json
 19 | import pathlib
 20 | import typing as t
 21 | 
 22 | import pytest
 23 | 
 24 | from src import types
 25 | from src.scraper import BadRequestException
 26 | from src.scraper.station import Station
 27 | 
 28 | DATA_DIR = pathlib.Path("src/scraper/tests/data")
 29 | 
 30 | 
 31 | @pytest.mark.parametrize(
 32 |     "station_file, expected",
 33 |     [
 34 |         (
 35 |             "station_S01700.json",
 36 |             {
 37 |                 "code": "S01700",
 38 |                 "region_code": 1,
 39 |                 "name": "Milano Centrale",
 40 |                 "short_name": "Milano Centrale",
 41 |                 "position": (45.486347, 9.204528),
 42 |             },
 43 |         ),
 44 |         (
 45 |             "station_S01608.json",
 46 |             {
 47 |                 "code": "S01608",
 48 |                 "region_code": 1,
 49 |                 "name": "Arcene",
 50 |                 "short_name": "Arcene",
 51 |                 "position": (45.577162, 9.606652),
 52 |             },
 53 |         ),
 54 |     ],
 55 | )
 56 | def test_init(station_file: str, expected: dict):
 57 |     with open(DATA_DIR / station_file, "r") as f:
 58 |         data: types.JSONType = json.load(f)
 59 | 
 60 |     station = Station._from_raw(data)
 61 |     assert station.code == expected["code"]
 62 |     assert station.region_code == expected["region_code"]
 63 |     assert station.name == expected["name"]
 64 |     assert station.short_name == expected["short_name"]
 65 |     assert station.position == expected["position"]
 66 | 
 67 | 
 68 | @pytest.mark.parametrize("region_code", range(0, 22 + 1))
 69 | def test_assumptions(region_code):
 70 |     """For each station returned by the API, we assume there is no None field."""
 71 |     response: t.List[Station] = Station.by_region(region_code)
 72 |     for station in response:
 73 |         assert station.code is not None
 74 |         assert station.name is not None
 75 |         assert station.short_name is not None
 76 |         assert station.position is not None
 77 | 
 78 | 
 79 | @pytest.mark.parametrize(
 80 |     "station_code, station_name",
 81 |     [
 82 |         ("S01700", "Milano Centrale"),
 83 |         ("S08409", "Roma Termini"),
 84 |         ("S09218", "Napoli Centrale"),
 85 |         ("S01608", "Arcene"),
 86 |     ],
 87 | )
 88 | def test_by_code(station_code, station_name):
 89 |     station: Station = Station.by_code(station_code)
 90 |     assert station.code == station_code
 91 |     assert station.name == station_name
 92 | 
 93 | 
 94 | @pytest.mark.parametrize(
 95 |     "station_code, region_code",
 96 |     [
 97 |         ("S01700", 1),  # Milano Centrale
 98 |         ("S08409", 5),  # Roma Termini
 99 |         ("S09218", 18),  # Napoli Centrale
100 |         ("S01608", 1),  # Arcene
101 |     ],
102 | )
103 | def test_station_region_code(station_code, region_code):
104 |     response: int = Station._region_code(station_code)
105 |     assert type(response) == int
106 |     assert response == region_code
107 | 
108 | 
109 | def test_station_region_code_invalid():
110 |     with pytest.raises(BadRequestException):
111 |         Station._region_code("S00000")
112 | 
113 | 
114 | @pytest.mark.parametrize("region_code", range(0, 22 + 1))
115 | def test_by_region(region_code):
116 |     response: t.List[Station] = Station.by_region(region_code)
117 |     for station in response:
118 |         assert type(station) == Station
119 |         try:
120 |             assert station.region_code == region_code
121 |         except AssertionError:
122 |             # Recheck with the *actually* correct _region_code:
123 |             # sometimes the 'elencoStazioni' call can be misleading.
124 |             assert station.region_code == Station._region_code(station.code)
125 | 
126 | 
127 | def test_hash():
128 |     milan: Station = Station.by_code("S01700")
129 |     rome: Station = Station.by_code("S08409")
130 |     assert hash(milan) != hash(rome)
131 | 


--------------------------------------------------------------------------------
/src/scraper/tests/test_train.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import itertools
 19 | import json
 20 | import pathlib
 21 | import typing as t
 22 | from datetime import date, datetime
 23 | 
 24 | import pytest
 25 | 
 26 | from src import types
 27 | from src.scraper.station import Station
 28 | from src.scraper.train import Train
 29 | from src.scraper.train_stop import TrainStop, TrainStopTime
 30 | 
 31 | DATA_DIR = pathlib.Path("src/scraper/tests/data")
 32 | 
 33 | 
 34 | @pytest.mark.parametrize(
 35 |     "kind, station_code",
 36 |     itertools.product(
 37 |         ("partenze", "arrivi"),
 38 |         [
 39 |             "S01700",
 40 |             "S08409",
 41 |             "S09218",
 42 |             "S01608",
 43 |             "N00001",
 44 |             "N00005",
 45 |         ],
 46 |     ),
 47 | )
 48 | def test_fetch(kind, station_code):
 49 |     station: Station = Station.by_code(station_code)
 50 |     trains: t.List[Train] = (
 51 |         station.departures() if kind == "partenze" else station.arrivals()
 52 |     )
 53 |     for train in trains:
 54 |         train.fetch()
 55 |         if (
 56 |             not train.departed
 57 |             and not train._phantom
 58 |             and not train._trenord_phantom
 59 |             and not train.cancelled
 60 |         ):
 61 |             assert not train.arrived()
 62 | 
 63 | 
 64 | def test_unfetched_repr_1():
 65 |     milan: Station = Station.by_code("S01700")
 66 |     train: Train = Train(10911, milan, datetime.now().date())
 67 |     assert repr(train) == "Treno [?] ??? 10911 : Milano Centrale [S01700@1] -> ???"
 68 | 
 69 | 
 70 | def test_unfetched_repr_2():
 71 |     train: Train = Train._from_station_departures_arrivals(
 72 |         {
 73 |             "numeroTreno": 10911,
 74 |             "codOrigine": "S01700",
 75 |             "categoriaDescrizione": "REG",
 76 |             "dataPartenzaTreno": 1678662000000,
 77 |             "codiceCliente": 1,
 78 |             "nonPartito": False,
 79 |             "provvedimento": 0,
 80 |             "compImgCambiNumerazione": "",
 81 |         }
 82 |     )
 83 |     assert repr(train) == "Treno [D] REG 10911 : Milano Centrale [S01700@1] -> ???"
 84 | 
 85 | 
 86 | def test_hash():
 87 |     milan: Station = Station.by_code("S01700")
 88 |     trains: list[Train] = milan.departures()
 89 |     if not trains:
 90 |         return
 91 |     assert hash(trains[0]) is not None
 92 | 
 93 | 
 94 | def test_fix_intraday_datetimes():
 95 |     milan: Station = Station.by_code("S01700")
 96 |     mock_train: Train = Train(2647, milan, date(year=2023, month=3, day=25))
 97 | 
 98 |     mock_train.category = "REG"
 99 |     mock_train.destination = Station.by_code("S02430")
100 |     mock_train._phantom = False
101 |     mock_train._trenord_phantom = False
102 |     mock_train.cancelled = False
103 |     mock_train._fetched = datetime.now()
104 | 
105 |     with open(DATA_DIR / "train-stops_2647.json") as f:
106 |         stops: list[types.JSONType] = json.load(f)
107 | 
108 |     mock_train.stops = list()
109 |     for stop in stops:
110 |         fetched_stop = TrainStop._from_trenord_raw_data(
111 |             stop, day=mock_train.departing_date
112 |         )
113 |         if fetched_stop:
114 |             mock_train.stops.append(fetched_stop)
115 | 
116 |     assert len(mock_train.stops) == 11
117 | 
118 |     mock_train._fix_intraday_datetimes()
119 | 
120 |     for i, stop in enumerate(mock_train.stops):
121 |         expected_day = 25 if i < 4 else 26
122 | 
123 |         if i != 0:
124 |             assert isinstance(stop.arrival, TrainStopTime)
125 |             assert stop.arrival.expected.day == expected_day
126 |             if isinstance(stop.arrival.actual, datetime):
127 |                 assert stop.arrival.actual.day == expected_day
128 | 
129 |         if i != len(mock_train.stops) - 1:
130 |             assert isinstance(stop.departure, TrainStopTime)
131 |             assert stop.departure.expected.day == expected_day
132 |             if isinstance(stop.departure.actual, datetime):
133 |                 assert stop.departure.actual.day == expected_day
134 | 


--------------------------------------------------------------------------------
/src/analysis/timetable.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import matplotlib.dates as mdates
 19 | import matplotlib.pyplot as plt
 20 | import pandas as pd
 21 | import timple
 22 | 
 23 | from src.const import TIMEZONE, TIMEZONE_GMT
 24 | 
 25 | 
 26 | def same_line(df: pd.DataFrame) -> bool:
 27 |     """Check if the trains in the provided DataFrame are ALL on the same line
 28 | 
 29 |     Args:
 30 |         df (pd.DataFrame): the trains to check
 31 | 
 32 |     Return:
 33 |         bool: True if the trains are all on the same line, False otherwise
 34 |     """
 35 |     return df.line.nunique() <= 1
 36 | 
 37 | 
 38 | def timetable_train(train: pd.DataFrame, expected: bool = False, collapse: bool = True):
 39 |     """Generate a timetable graph of a train
 40 | 
 41 |     Args:
 42 |         train (pd.DataFrame): the train stop data to consider
 43 |         expected (bool, optional): determines whatever to consider the 'expected' or 'actual' arrival/departure times. Defaults to False.
 44 |         collapse (bool, optional): determines whatever to _collapse_ the times in the graph, relative to the first. Defaults to True.
 45 |     """
 46 | 
 47 |     if collapse:
 48 |         train.value -= train.value.min()
 49 | 
 50 |     train_f = train.loc[
 51 |         train.variable.str.endswith("expected" if expected else "actual")
 52 |     ]
 53 |     plt.plot(
 54 |         train_f.value,
 55 |         train_f.long_name,
 56 |         "ko" if expected else "o",
 57 |         linestyle="-" if expected else "--",
 58 |         linewidth=3 if expected else 2,
 59 |         label=f"{train.iloc[0].category} {train.iloc[0].number}"
 60 |         if not expected
 61 |         else "expected",
 62 |         zorder=10 if expected else 5,
 63 |     )
 64 | 
 65 | 
 66 | def timetable_graph(trains: pd.DataFrame, st: pd.DataFrame, collapse: bool = True):
 67 |     """Generate a timetable graph of trains in a line.
 68 | 
 69 |     Args:
 70 |         trains (pd.DataFrame): the train stop data to consider
 71 |         st (pd.DataFrame): the station data
 72 |         collapse (bool, optional): determines whatever to _collapse_ the times in the graph, relative to the first. Defaults to True.
 73 |     """
 74 |     tmpl = timple.Timple()
 75 |     tmpl.enable()
 76 | 
 77 |     trains_j = (
 78 |         trains.sort_values(by="stop_number")
 79 |         .join(st, on="stop_station_code")
 80 |         .reset_index(drop=True)
 81 |     )
 82 |     trains_m = (
 83 |         pd.melt(
 84 |             trains_j,
 85 |             id_vars=[
 86 |                 "long_name",
 87 |                 "stop_number",
 88 |                 "train_hash",
 89 |                 "category",
 90 |                 "number",
 91 |                 "origin",
 92 |             ],
 93 |             value_vars=[
 94 |                 "departure_expected",
 95 |                 "departure_actual",
 96 |                 "arrival_expected",
 97 |                 "arrival_actual",
 98 |             ],
 99 |         )
100 |         .sort_values(["stop_number", "variable"])
101 |         .dropna()
102 |     )
103 | 
104 |     # expected
105 |     if collapse:
106 |         for origin in trains_m.origin.unique():
107 |             train = list(trains_m.loc[trains_m.origin == origin].groupby("train_hash"))[0][1]  # fmt: skip
108 |             timetable_train(train, True)
109 | 
110 |     # actual
111 |     for _, train in trains_m.groupby("train_hash"):
112 |         timetable_train(train, False, collapse)
113 | 
114 |     # get station names for proper title
115 |     st_names: pd.DataFrame = st.drop(
116 |         ["region", "latitude", "longitude", "short_name"],
117 |         axis=1,
118 |     )
119 |     line: pd.DataFrame = (
120 |         trains.join(st_names, on="origin")
121 |         .rename({"long_name": "station_a"}, axis=1)
122 |         .join(st_names, on="destination")
123 |         .rename({"long_name": "station_b"}, axis=1)
124 |     )[["station_a", "station_b", "stop_number"]].agg(
125 |         {
126 |             "station_a": lambda s: s.iloc[0],
127 |             "station_b": lambda s: s.iloc[0],
128 |             "stop_number": lambda n: max(n) + 1,
129 |         }
130 |     )
131 | 
132 |     plt.title(f"{line.station_a} ↔ {line.station_b} [{line.stop_number} stops]")
133 |     start_day, end_day = trains.day.min().date(), trains.day.max().date()
134 |     plt.title(f"{start_day} => {end_day}", loc="left")
135 | 
136 |     plt.ylabel("Station")
137 |     plt.xlabel("Time")
138 | 
139 |     ax = plt.gca()
140 |     ax.invert_yaxis()
141 |     ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M", TIMEZONE if not collapse else TIMEZONE_GMT))  # type: ignore
142 | 
143 |     plt.show()
144 | 


--------------------------------------------------------------------------------
/src/analysis/load_data.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | from datetime import datetime
 19 | from pathlib import Path
 20 | 
 21 | import numpy as np
 22 | import pandas as pd
 23 | 
 24 | from src.const import RailwayCompany
 25 | 
 26 | 
 27 | def read_train_csv(file: Path) -> pd.DataFrame:
 28 |     """Load train CSV to a pandas dataframe
 29 | 
 30 |     Args:
 31 |         file (Path): the train CSV file path
 32 | 
 33 |     Returns:
 34 |         pd.DataFrame: the loaded dataframe
 35 |     """
 36 | 
 37 |     df: pd.DataFrame = pd.read_csv(file)
 38 | 
 39 |     @np.vectorize
 40 |     def _parse_dt(_string: str | None) -> datetime | None:
 41 |         try:
 42 |             if not isinstance(_string, str):
 43 |                 return None
 44 |             return datetime.fromisoformat(_string)
 45 |         except ValueError:
 46 |             return None
 47 | 
 48 |     # Parse datetimes
 49 |     for dt_field in [
 50 |         "arrival_expected",
 51 |         "arrival_actual",
 52 |         "departure_expected",
 53 |         "departure_actual",
 54 |     ]:
 55 |         df[dt_field] = (
 56 |             df[dt_field]
 57 |             .apply(_parse_dt)
 58 |             .astype("object")
 59 |             .where(df[dt_field].notnull(), None)
 60 |         )
 61 | 
 62 |     df.day = pd.to_datetime(df.day)
 63 | 
 64 |     # Map client codes
 65 |     df.client_code = df.client_code.apply(RailwayCompany.from_code)  # type: ignore
 66 | 
 67 |     # Exclude phantom data
 68 |     df = df.loc[(df.phantom == False) & (df.trenord_phantom == False)].drop(
 69 |         ["phantom", "trenord_phantom"], axis=1
 70 |     )
 71 | 
 72 |     # Fix incorrect origin and destination
 73 |     df["origin"] = (df.groupby("train_hash").transform("first"))["stop_station_code"]
 74 |     df["destination"] = df.groupby("train_hash").transform("last")["stop_station_code"]
 75 | 
 76 |     return df
 77 | 
 78 | 
 79 | def read_station_csv(file: Path) -> pd.DataFrame:
 80 |     """Load station CSV to a pandas dataframe
 81 | 
 82 |     Args:
 83 |         file (Path): the station CSV file path
 84 | 
 85 |     Returns:
 86 |         pd.DataFrame: the loaded dataframe
 87 |     """
 88 | 
 89 |     st: pd.DataFrame = pd.read_csv(file, index_col="code")
 90 | 
 91 |     # Some stations (like 'Brescia') have MULTIPLE codes,
 92 |     # but only one associated row has useful (non-NaN) information.
 93 |     for idx, station in st.iterrows():
 94 |         # Search other stations with the same name
 95 |         other: pd.DataFrame = st.loc[st.long_name == station.long_name]
 96 |         if len(other) == 1:
 97 |             continue
 98 | 
 99 |         # If 'this' station has useful information, don't perform any actions
100 |         if not np.isnan(station.latitude) and not np.isnan(station.longitude):
101 |             continue
102 | 
103 |         # If present, select the 'oracle' station with information
104 |         other = other.loc[~np.isnan(other.latitude)]
105 |         if len(other) == 0:
106 |             continue
107 |         oracle = other.iloc[0]
108 | 
109 |         # Fill missing information using the oracle data
110 |         st.loc[st.index == idx, ["short_name", "latitude", "longitude"]] = (  # type: ignore
111 |             oracle.short_name,
112 |             oracle.latitude,
113 |             oracle.longitude,
114 |         )
115 | 
116 |     return st
117 | 
118 | 
119 | def tag_lines(df: pd.DataFrame, stations: pd.DataFrame) -> pd.DataFrame:
120 |     """Add 'railway line' information to the 'trains' dataframe.
121 | 
122 |     Args:
123 |         trains (pd.DataFrame): the considered dataframe
124 |         stations (pd.DataFrame): the station data
125 | 
126 |     Returns:
127 |         pd.DataFrame: the tagged dataframe
128 | 
129 |     Notes:
130 |         Two trains (t_1, t_2) are considered of the same 'railway line' iff:
131 |         - t_1.railway_company == t_2.railway_company;
132 |         - t_1.origin == t_2.origin and t_1.destination == t_2.destination or viceversa;
133 |         - t_1.stop_set == t_2.stop_set (*).
134 | 
135 |         (*): can be simplified in t_1.stop_count == t_2.stop_count.
136 | 
137 |         The above definition is just a convenient approximation.
138 |         More precise considerations can only be made on a case-by-case basis.
139 |     """
140 | 
141 |     df = df.sort_values(["train_hash", "stop_number"])
142 |     df["stop_set"] = df.groupby("train_hash").stop_station_code.transform(
143 |         lambda stops: hash(frozenset(stops.unique()))
144 |     )
145 |     df["track"] = df.apply(
146 |         lambda r: (r.origin + "_" + r.destination)
147 |         if r.origin > r.destination
148 |         else (r.destination + "_" + r.origin),
149 |         axis=1,
150 |     )
151 |     df["line"] = df.apply(
152 |         lambda r: f"{r.client_code}_{r.track}_{r.stop_set}",
153 |         axis=1,
154 |     )
155 |     return df
156 | 


--------------------------------------------------------------------------------
/src/scraper/main.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import itertools
 19 | import logging
 20 | import os
 21 | import pathlib
 22 | import pickle
 23 | import subprocess
 24 | import sys
 25 | import typing as t
 26 | from datetime import date, datetime, timedelta
 27 | 
 28 | import sentry_sdk
 29 | from tqdm import tqdm
 30 | 
 31 | from src.const import TIMEZONE
 32 | from src.scraper.station import Station
 33 | from src.scraper.train import Train
 34 | 
 35 | DATA_DIR = pathlib.Path("data/")
 36 | 
 37 | 
 38 | def get_git_revision_short_hash() -> str:
 39 |     try:
 40 |         return (
 41 |             subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
 42 |             .decode("ascii")
 43 |             .strip()
 44 |         )
 45 |     except subprocess.CalledProcessError:
 46 |         try:
 47 |             with open("version.txt", "r") as f:
 48 |                 return f.read().strip()
 49 |         except FileNotFoundError:
 50 |             return "unknown"
 51 | 
 52 | 
 53 | def load_dataset(file_path: pathlib.Path) -> dict[t.Any, t.Any]:
 54 |     try:
 55 |         with open(file_path, "rb") as f:
 56 |             return pickle.load(f)
 57 |     except FileNotFoundError:
 58 |         return dict()
 59 | 
 60 | 
 61 | def save_dataset(file_path: pathlib.Path, dataset: dict[t.Any, t.Any]) -> None:
 62 |     with open(file_path, "wb") as f:
 63 |         pickle.dump(dataset, f)
 64 | 
 65 | 
 66 | def main() -> None:
 67 |     hashseed = os.getenv("PYTHONHASHSEED")
 68 |     if not hashseed or hashseed != "0":
 69 |         logging.critical(
 70 |             "Hash seed randomization is not disabled. "
 71 |             "Please disable it by setting PYTHONHASHSEED=0 environment variable"
 72 |         )
 73 |         sys.exit(1)
 74 | 
 75 |     sentry_dsn = os.getenv("SENTRY_DSN")
 76 |     if sentry_dsn is not None:
 77 |         sentry_sdk.init(
 78 |             dsn=sentry_dsn,
 79 |             release=get_git_revision_short_hash(),
 80 |             traces_sample_rate=1.0,
 81 |         )
 82 |         logging.info("Activated sentry error reporting")
 83 | 
 84 |     # Today + ~3 hours
 85 |     today: date = (datetime.now(tz=TIMEZONE) - timedelta(hours=3)).date()
 86 |     today_path: pathlib.Path = DATA_DIR / today.strftime("%Y-%m-%d")
 87 |     try:
 88 |         os.mkdir(today_path.absolute())
 89 |     except FileExistsError:
 90 |         pass
 91 | 
 92 |     station_cache: dict[str, Station] = load_dataset(DATA_DIR / "stations.pickle")
 93 |     fetched_trains: dict[int, Train] = load_dataset(today_path / "trains.pickle")
 94 |     unfetched_trains: dict[int, Train] = load_dataset(today_path / "unfetched.pickle")
 95 | 
 96 |     fetched_old_n = len(fetched_trains)
 97 |     unfetched_old_n = len(unfetched_trains)
 98 |     logging.info(
 99 |         f"Loaded {fetched_old_n} already fetched and {unfetched_old_n} unfetched trains"
100 |     )
101 | 
102 |     # Initialize Station cache
103 |     if len(station_cache) != 0:
104 |         Station._cache = station_cache
105 |     logging.info(f"Initialized station cache with {len(station_cache)} elements")
106 | 
107 |     # Fetch stations
108 |     stations: set[Station] = set(
109 |         itertools.chain.from_iterable([Station.by_region(r) for r in range(1, 23)])
110 |     )
111 |     logging.info(f"Retrieved {len(stations)} stations")
112 | 
113 |     # Try to fetch unfetched trains
114 |     logging.info(
115 |         f"Starting fetching {len(unfetched_trains)} previously unfetched trains"
116 |     )
117 |     _fetched_trains_delete_later: list[int] = list()
118 |     for unfetched_train_hash in tqdm(unfetched_trains):
119 |         train = unfetched_trains[unfetched_train_hash]
120 |         try:
121 |             train.fetch()
122 |         except Exception as e:
123 |             logging.exception(e, exc_info=True)
124 |             continue
125 | 
126 |         if train._phantom or train.arrived():
127 |             fetched_trains[unfetched_train_hash] = train
128 |             logging.debug(f"Saved previously unfetched {train.category} {train.number}")
129 | 
130 |             # It is not possible to delete dict keys in-place
131 |             _fetched_trains_delete_later.append(unfetched_train_hash)
132 | 
133 |     for to_delete in _fetched_trains_delete_later:
134 |         del unfetched_trains[to_delete]
135 | 
136 |     logging.info("Starting fetching departures from all stations")
137 |     for station in tqdm(stations):
138 |         logging.debug(f"Processing {station}")
139 | 
140 |         departing: list[Train] = station.departures()
141 |         for train in departing:
142 |             if hash(train) in fetched_trains or hash(train) in unfetched_trains:
143 |                 continue
144 | 
145 |             try:
146 |                 train.fetch()
147 |             except Exception as e:
148 |                 logging.exception(e, exc_info=True)
149 |                 continue
150 | 
151 |             if train._phantom or train.arrived():
152 |                 fetched_trains[hash(train)] = train
153 |                 logging.debug(f"Saved {train.category} {train.number}")
154 |             else:
155 |                 unfetched_trains[hash(train)] = train
156 | 
157 |     logging.info(f"Retrieved {len(fetched_trains) - fetched_old_n} new trains")
158 |     logging.info(
159 |         f"Unfetched trains: {len(unfetched_trains)} "
160 |         f"({(len(unfetched_trains) - unfetched_old_n):+d})"
161 |     )
162 | 
163 |     save_dataset(DATA_DIR / "stations.pickle", Station._cache)
164 |     save_dataset(today_path / "trains.pickle", fetched_trains)
165 |     save_dataset(today_path / "unfetched.pickle", unfetched_trains)
166 | 
167 |     logging.info(f"Trains saved today: {len(fetched_trains)}")
168 |     logging.info(f"Station cache size: {len(Station._cache)}")
169 | 


--------------------------------------------------------------------------------
/src/scraper/api.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import json
 19 | import typing as t
 20 | from datetime import datetime
 21 | 
 22 | import requests
 23 | from requests.adapters import HTTPAdapter, Retry
 24 | 
 25 | import src.scraper.train as tr
 26 | from src import types
 27 | from src.const import TIMEZONE, TIMEZONE_GMT
 28 | from src.scraper.exceptions import BadRequestException
 29 | 
 30 | 
 31 | class ViaggiaTrenoAPI:
 32 |     BASE_URL: str = "http://www.viaggiatreno.it/infomobilita/resteasy/viaggiatreno/"
 33 | 
 34 |     # Initialize requests session with auto-retry and exponential backoff
 35 |     _session: requests.Session = requests.Session()
 36 |     _session.mount(
 37 |         "http://",
 38 |         HTTPAdapter(
 39 |             max_retries=Retry(
 40 |                 total=10,
 41 |                 read=5,
 42 |                 status=10,
 43 |                 status_forcelist=[403, 500, 502, 503, 504],
 44 |                 backoff_factor=0.2,
 45 |             )
 46 |         ),
 47 |     )
 48 | 
 49 |     @classmethod
 50 |     def _raw_request(cls, method: str, *parameters: t.Any) -> str:
 51 |         """Perform a HTTP request to ViaggiaTreno API and return a raw string,
 52 |         if the request has been successful.
 53 | 
 54 |         Args:
 55 |             method (str): the method to be called
 56 |             parameters (tuple[str]): a list of parameters
 57 | 
 58 |         Raises:
 59 |             BadRequestException: if the response is not ok
 60 | 
 61 |         Returns:
 62 |             str: the raw response from the API
 63 |         """
 64 |         response: requests.Response = cls._session.get(
 65 |             f"{ViaggiaTrenoAPI.BASE_URL}{method}/"
 66 |             f"{'/'.join(map(lambda p: str(p), parameters))}"
 67 |         )
 68 | 
 69 |         if response.status_code != 200 or "Error" in response.text:
 70 |             raise BadRequestException(
 71 |                 url=response.url,
 72 |                 status_code=response.status_code,
 73 |                 response=response.text,
 74 |             )
 75 | 
 76 |         return response.text
 77 | 
 78 |     @staticmethod
 79 |     def _decode_json(string: str) -> types.JSONType:
 80 |         """Decode a JSON string.
 81 | 
 82 |         Args:
 83 |             string (str): the string to decode
 84 | 
 85 |         Returns:
 86 |             types.JSONType: the decoded JSON value
 87 |         """
 88 |         return json.loads(string)
 89 | 
 90 |     @staticmethod
 91 |     def _to_datetime(time: int | None) -> datetime | None:
 92 |         """Convert a UNIX timestamp with milliseconds to datetime.
 93 |         If None is passed, None is returned.
 94 | 
 95 |         Args:
 96 |             time (int | None): the UNIX timestamp to convert
 97 | 
 98 |         Returns:
 99 |             datetime | None: the resulting datetime object
100 |         """
101 |         if not time:
102 |             return None
103 | 
104 |         return datetime.fromtimestamp(time / 1000, tz=TIMEZONE)
105 | 
106 |     @staticmethod
107 |     def _station_departures_or_arrivals(
108 |         kind: str, station_code: str
109 |     ) -> t.List["tr.Train"]:
110 |         """Helper function to Station.departures and Station.arrivals methods.
111 | 
112 |         Args:
113 |             kind (str): either 'partenze' (departures) or 'arrivi' (arrivals)
114 |             station_code (str): the code of the considered station
115 | 
116 |         Returns:
117 |             t.List[Train]: a list of trains departing o arriving to the station
118 |         """
119 |         assert kind in ["partenze", "arrivi"]
120 | 
121 |         now: str = datetime.now(tz=TIMEZONE_GMT).strftime("%a %b %d %Y %H:%M:%S %Z%z")
122 |         raw_trains: str = ViaggiaTrenoAPI._raw_request(kind, station_code, now)
123 |         trains: types.JSONType = ViaggiaTrenoAPI._decode_json(raw_trains)
124 |         return list(
125 |             map(
126 |                 lambda t: tr.Train._from_station_departures_arrivals(t),
127 |                 trains,
128 |             )
129 |         )
130 | 
131 | 
132 | class TrenordAPI:
133 |     BASE_URL: str = "https://admin.trenord.it/store-management-api/mia/"
134 | 
135 |     TRENORD_CLIENT_CODE: int = 63
136 | 
137 |     # Initialize requests session with auto-retry and exponential backoff
138 |     _session: requests.Session = requests.Session()
139 |     _session.mount(
140 |         "https://",
141 |         HTTPAdapter(
142 |             max_retries=Retry(
143 |                 total=10,
144 |                 read=5,
145 |                 status=5,
146 |                 status_forcelist=[403, 500, 502, 503, 504],
147 |                 backoff_factor=0.2,
148 |             )
149 |         ),
150 |     )
151 | 
152 |     @classmethod
153 |     def _raw_request(cls, method: str, *parameters: t.Any) -> str:
154 |         """Perform a HTTP request to Trenord API and return a raw string,
155 |         if the request has been successful.
156 | 
157 |         Args:
158 |             method (str): the method to be called
159 |             parameters (tuple[str]): a list of parameters
160 | 
161 |         Raises:
162 |             BadRequestException: if the response is not ok
163 | 
164 |         Returns:
165 |             str: the raw response from the API
166 |         """
167 | 
168 |         response: requests.Response = cls._session.get(
169 |             f"{TrenordAPI.BASE_URL}{method}/"
170 |             f"{'/'.join(map(lambda p: str(p), parameters))}"
171 |         )
172 | 
173 |         if response.status_code != 200 or "Error" in response.text:
174 |             raise BadRequestException(
175 |                 url=response.url,
176 |                 status_code=response.status_code,
177 |                 response=response.text,
178 |             )
179 | 
180 |         return response.text
181 | 


--------------------------------------------------------------------------------
/src/analysis/stat.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import argparse
 19 | import webbrowser
 20 | from tempfile import NamedTemporaryFile
 21 | 
 22 | import matplotlib as mpl
 23 | import matplotlib.pyplot as plt
 24 | import pandas as pd
 25 | import seaborn as sns
 26 | from itables import to_html_datatable
 27 | from pandas.core.groupby.generic import DataFrameGroupBy
 28 | 
 29 | from src.const import RAILWAY_COMPANIES_PALETTE, WEEKDAYS
 30 | 
 31 | 
 32 | def describe(df: pd.DataFrame | DataFrameGroupBy) -> None:
 33 |     """Call pandas.DataFrame.describe()"""
 34 |     print(
 35 |         df[["stop_number", "arrival_delay", "departure_delay", "crowding"]].describe()
 36 |     )
 37 | 
 38 | 
 39 | def prepare_mpl(df: pd.DataFrame, args: argparse.Namespace) -> None:
 40 |     """Prepare matplotlib params"""
 41 |     if args.stat not in [
 42 |         "delay_boxplot",
 43 |         "day_train_count",
 44 |     ]:
 45 |         return
 46 | 
 47 |     mpl.rcParams["figure.figsize"] = (12, 12 * 5 / 7)
 48 |     sns.set_theme(style="whitegrid", palette="pastel")
 49 | 
 50 |     plt.title(args.stat)
 51 | 
 52 |     start_day, end_day = df.day.min().date(), df.day.max().date()
 53 |     plt.title(f"{start_day} => {end_day}", loc="left")
 54 | 
 55 |     if args.group_by != "none":
 56 |         grouped_str = f" grouped by {args.group_by}"
 57 |         if args.agg_func == "none":
 58 |             grouped_str += ", unaggregated"
 59 |         else:
 60 |             grouped_str += f", aggr. with '{args.agg_func}' func"
 61 |         plt.title(grouped_str, loc="right")
 62 | 
 63 | 
 64 | def delay_boxplot(df: pd.DataFrame | DataFrameGroupBy) -> None:
 65 |     """Show a seaborn boxplot of departure and arrival delays"""
 66 | 
 67 |     if isinstance(df, DataFrameGroupBy):
 68 |         grouped_by: str = df.any().index.name
 69 |         group_melt = pd.DataFrame()
 70 | 
 71 |         grouped: list = list(df)  # type: ignore
 72 | 
 73 |         # Re-order fields
 74 |         if grouped_by == "weekday":
 75 |             grouped.sort(key=lambda t: WEEKDAYS[t[0]])
 76 |         elif grouped_by == "client_code":
 77 |             grouped.sort(key=lambda g: len(g[1]), reverse=True)
 78 | 
 79 |         for group in grouped:  # type: ignore
 80 |             melt = pd.melt(
 81 |                 group[1],
 82 |                 id_vars=[
 83 |                     col
 84 |                     for col in df.obj.columns
 85 |                     if col
 86 |                     not in [
 87 |                         "arrival_delay",
 88 |                         "departure_delay",
 89 |                     ]
 90 |                 ],
 91 |                 value_name="value",
 92 |             )
 93 |             group_melt = pd.concat([group_melt, melt])
 94 | 
 95 |         ax = sns.boxplot(
 96 |             group_melt[[grouped_by, "variable", "value"]],
 97 |             x=grouped_by,
 98 |             y="value",
 99 |             hue="variable",
100 |             showfliers=False,
101 |         )
102 |         ax.set(xlabel=grouped_by, ylabel="Delay (minutes)")
103 | 
104 |     elif isinstance(df, pd.DataFrame):
105 |         ax = sns.boxplot(
106 |             df[["arrival_delay", "departure_delay"]],
107 |             showfliers=False,
108 |         )
109 |         ax.set(xlabel="Variable", ylabel="Delay (minutes)")
110 | 
111 | 
112 | def day_train_count(df: pd.DataFrame | DataFrameGroupBy) -> None:
113 |     """Show a seaborn barplot of unique train count, grouped by day"""
114 | 
115 |     if isinstance(df, DataFrameGroupBy):
116 |         grouped_by: str = df.any().index.name
117 | 
118 |         palette: None | dict[str, str] = None
119 |         hue_order: None | list[str] = None
120 | 
121 |         if grouped_by == "client_code":
122 |             palette = RAILWAY_COMPANIES_PALETTE
123 |             hue_order = (
124 |                 df.train_hash.nunique().sort_values(ascending=False).index.to_list()
125 |             )
126 | 
127 |         grouped = df.obj.groupby(["day", grouped_by]).nunique().reset_index()
128 |         grouped["day"] = grouped["day"].apply(lambda d: d.date().isoformat())
129 | 
130 |         ax = sns.barplot(
131 |             data=grouped,
132 |             x="day",
133 |             y="train_hash",
134 |             hue=grouped_by,
135 |             palette=palette,
136 |             hue_order=hue_order,
137 |         )
138 | 
139 |     elif isinstance(df, pd.DataFrame):
140 |         grouped = df.groupby("day").nunique().reset_index()
141 |         grouped["day"] = grouped["day"].apply(lambda d: d.date().isoformat())
142 | 
143 |         ax = sns.barplot(
144 |             data=grouped,
145 |             x="day",
146 |             y="train_hash",
147 |         )
148 | 
149 |     ax.set(xlabel="Day", ylabel="Train count")
150 |     plt.xticks(rotation=45)
151 | 
152 | 
153 | def detect_lines(df: pd.DataFrame, st: pd.DataFrame) -> None:
154 |     """Show a interactive table with the detected (by tag_lines) railway lines"""
155 | 
156 |     st_names: pd.DataFrame = st.drop(
157 |         ["region", "latitude", "longitude", "short_name"],
158 |         axis=1,
159 |     )
160 |     lines: pd.DataFrame = (
161 |         (
162 |             df.join(st_names, on="origin")
163 |             .rename({"long_name": "station_a"}, axis=1)
164 |             .join(st_names, on="destination")
165 |             .rename({"long_name": "station_b"}, axis=1)
166 |         )[["line", "station_a", "station_b", "train_hash", "stop_number"]]
167 |         .groupby("line")
168 |         .agg(
169 |             {
170 |                 "station_a": "first",
171 |                 "station_b": "first",
172 |                 "train_hash": "nunique",
173 |                 "stop_number": lambda g: max(g) + 1,
174 |             }
175 |         )
176 |         .rename({"train_hash": "train_count"}, axis=1)
177 |         .sort_values(by="train_count", ascending=False)
178 |         .reset_index()
179 |     )
180 |     html: str = to_html_datatable(
181 |         lines,
182 |         caption="Detected railway lines",
183 |         lengthMenu=[20, 50, 100],
184 |         order=[3, "desc"],
185 |         maxBytes=2**17,
186 |     )
187 | 
188 |     outfile = NamedTemporaryFile(delete=False, suffix=".html")
189 |     outfile.write(html.encode("utf-8"))
190 |     webbrowser.open(outfile.name)
191 | 


--------------------------------------------------------------------------------
/src/analysis/assets/templates/stats_chart.html:
--------------------------------------------------------------------------------
  1 | <!--
  2 |     railway-opendata: scrape and analyze italian railway data
  3 |     Copyright (C) 2023 Marco Aceti
  4 | 
  5 |     This program is free software; you can redistribute it and/or modify
  6 |     it under the terms of the GNU General Public License as published by
  7 |     the Free Software Foundation; either version 2 of the License, or
  8 |     (at your option) any later version.
  9 | 
 10 |     This program is distributed in the hope that it will be useful,
 11 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |     GNU General Public License for more details.
 14 | 
 15 |     You should have received a copy of the GNU General Public License
 16 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | -->
 18 | 
 19 | {% macro html(this, kwargs) %}
 20 | 
 21 | <!DOCTYPE html>
 22 | <html lang="en">
 23 |     <head>
 24 |         <meta charset="utf-8">
 25 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 26 | 
 27 |         <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
 28 |         <script src=" https://cdn.jsdelivr.net/npm/chart.js@4.3.0/dist/chart.umd.min.js "></script>
 29 |         <script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-moment@^1"></script>
 30 | 
 31 |         <style>
 32 |             .legend {
 33 |                 background: rgba(255, 255, 255, 0.8);
 34 |             }
 35 |             .trainchart-container {
 36 |                 position: absolute;
 37 |                 z-index: 9999;
 38 |                 top: 413px;
 39 |                 right: 10px;
 40 |                 width: 450px;
 41 |                 padding-right: 25px;
 42 |                 padding-left: 25px;
 43 |                 background: rgba(255, 255, 255, 0.8);
 44 |             }
 45 |         </style>
 46 |     </head>
 47 | 
 48 |     <body>
 49 |         <div class="leaflet-control-container">
 50 |             <div class="leaflet-right">
 51 |                 <div class="trainchart-container">
 52 |                     <canvas id="trainChart"></canvas>
 53 |                 </div>
 54 |             </div>
 55 |         </div>
 56 | 
 57 |         <script>
 58 |             $(document).ready(() => {
 59 |                 // Hacky way to get the Leaflet map object
 60 |                 const var_name = $('.leaflet-container')[0].id;
 61 |                 const map = window[var_name];
 62 | 
 63 |                 function getCurrentTime() {
 64 |                     const currentTimestamp = map.timeDimension._availableTimes[map.timeDimension._currentTimeIndex];
 65 |                     let currentTime = moment.unix(currentTimestamp / 1000);
 66 |                     return currentTime;
 67 |                 }
 68 | 
 69 |                 const train_count_data = {{ this.get_train_count_data() }};
 70 |                 const delays_data = {{ this.get_delays_data() }};
 71 | 
 72 |                 const startTime = moment(train_count_data[0]['x']);
 73 |                 const endTime = moment(train_count_data[train_count_data.length - 1]['x']);
 74 | 
 75 |                 const timeHighlighter = {
 76 |                     id: 'timeHighlighter',
 77 |                     beforeDatasetDraw(chart, args, pluginOptions) {
 78 |                         const { ctx: _ctx, chartArea: { top, bottom, left, right, width, height }, scales: { x, y }} = chart;
 79 | 
 80 |                         const currentTime = getCurrentTime();
 81 | 
 82 |                         const startTimePixel = x.getPixelForValue(startTime.format());
 83 |                         const endTimePixel = x.getPixelForValue(endTime.format());
 84 |                         const currentTimePixel = ( startTimePixel * (endTime - currentTime) + endTimePixel * (currentTime - startTime) ) / ( endTime - startTime );
 85 | 
 86 |                         _ctx.fillStyle = 'rgba(0, 0, 0, 0.1)';
 87 |                         _ctx.fillRect(startTimePixel, top, currentTimePixel - startTimePixel, height);
 88 |                         return true;
 89 |                     }
 90 |                 }
 91 | 
 92 |                 const ctx = $('#trainChart');
 93 |                 let chart = new Chart(ctx, {
 94 |                     type: 'line',
 95 |                     data: {
 96 |                         datasets: [
 97 |                             {
 98 |                                 data: train_count_data,
 99 |                                 label: "Circulating train count",
100 |                                 yAxisID: 'y',
101 |                             },
102 |                             {
103 |                                 data: delays_data,
104 |                                 label: "Mean delay",
105 |                                 yAxisID: 'y1',
106 |                             }
107 |                         ],
108 |                     },
109 |                     options: {
110 |                         scales: {
111 |                             xAxes: {
112 |                                 type: 'time',
113 |                                 time: {
114 |                                     tooltipFormat: 'YYYY-MM-DD HH:MM',
115 |                                     displayFormats: {
116 |                                         hour: "HH",
117 |                                     },
118 |                                     isoWeekday: true,
119 |                                 }
120 |                             },
121 |                             y: {
122 |                                 display: true,
123 |                                 type: 'linear',
124 |                                 position: 'left',
125 |                             },
126 |                             y1: {
127 |                                 display: true,
128 |                                 type: 'linear',
129 |                                 position: 'right',
130 |                                 title: {
131 |                                     display: true,
132 |                                     text: "Minutes"
133 |                                 }
134 |                             }
135 |                         },
136 |                         elements: {
137 |                             point: {
138 |                                 radius: 1,
139 |                                 hitRadius: 70,
140 |                             }
141 |                         },
142 |                         plugins: {
143 |                             legend: {
144 |                                 display: true,
145 |                             }
146 |                         },
147 |                     },
148 |                     plugins: [
149 |                         timeHighlighter,
150 |                     ]
151 |                 });
152 | 
153 |                 function updateChart() {
154 |                     chart.render();
155 |                 }
156 |                 setInterval(updateChart, 200);
157 |             });
158 |         </script>
159 |     </body>
160 | </html>
161 | 
162 | {% endmacro %}
163 | 


--------------------------------------------------------------------------------
/src/analysis/main.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import argparse
 19 | import logging
 20 | import pathlib
 21 | import warnings
 22 | from datetime import datetime
 23 | 
 24 | import matplotlib.pyplot as plt
 25 | import pandas as pd
 26 | from dateparser import parse
 27 | from joblib import Parallel, delayed
 28 | from pandas.core.groupby.generic import DataFrameGroupBy
 29 | 
 30 | from src.analysis import groupby, stat, timetable, trajectories_map
 31 | from src.analysis.filter import *
 32 | from src.analysis.load_data import read_station_csv, read_train_csv, tag_lines
 33 | 
 34 | 
 35 | def register_args(parser: argparse.ArgumentParser):
 36 |     parser.add_argument(
 37 |         "--start-date",
 38 |         help="the start date in a 'dateparser'-friendly format",
 39 |     )
 40 |     parser.add_argument(
 41 |         "--end-date",
 42 |         help="the end date in a 'dateparser'-friendly format",
 43 |     )
 44 |     parser.add_argument(
 45 |         "--railway-companies",
 46 |         help="comma-separated list of railway companies to include. If not set, all companies will be included.",
 47 |         dest="client_codes",
 48 |     )
 49 |     parser.add_argument(
 50 |         "--railway-lines",
 51 |         help=(
 52 |             "comma-separated list of railway lines to include. "
 53 |             "If not set, all lines will be include. "
 54 |             "Use --stat detect_lines to see available lines."
 55 |         ),
 56 |         dest="railway_lines",
 57 |     )
 58 |     parser.add_argument(
 59 |         "--group-by",
 60 |         help="group by stops by a value",
 61 |         choices=(
 62 |             "none",
 63 |             "train_hash",
 64 |             "client_code",
 65 |             "weekday",
 66 |         ),
 67 |         default="none",
 68 |     )
 69 |     parser.add_argument(
 70 |         "--agg-func",
 71 |         help="group by aggregation function",
 72 |         choices=(
 73 |             "none",
 74 |             "mean",
 75 |             "last",
 76 |         ),
 77 |         default="none",
 78 |     )
 79 |     parser.add_argument(
 80 |         "--stat",
 81 |         help="the stat to calculate",
 82 |         choices=(
 83 |             "describe",
 84 |             "delay_boxplot",
 85 |             "day_train_count",
 86 |             "trajectories_map",
 87 |             "detect_lines",
 88 |             "timetable",
 89 |         ),
 90 |         default="describe",
 91 |     )
 92 |     parser.add_argument(
 93 |         "--save-fig",
 94 |         metavar="FILENAME",
 95 |         help="save the output figure to a file if using delay_boxplot or day_train_count stats. If not specified, use pyplot.show()",
 96 |         default=None,
 97 |     )
 98 |     parser.add_argument(
 99 |         "--timetable-collapse",
100 |         help="collapse the train stop times in the graph, relative to the first (only for 'timetable' stat). Defaults to False",
101 |         action=argparse.BooleanOptionalAction,
102 |         default=False,
103 |     )
104 |     parser.add_argument(
105 |         "station_csv",
106 |         help="exported station CSV",
107 |     )
108 |     parser.add_argument(
109 |         "trains_csv",
110 |         nargs="+",
111 |         help="exported train CSV",
112 |     )
113 | 
114 | 
115 | @delayed
116 | def _load_train_dataset(train_csv: str) -> pd.DataFrame:
117 |     path = pathlib.Path(train_csv)
118 |     train_df: pd.DataFrame = read_train_csv(pathlib.Path(train_csv))
119 |     logging.debug(f"Loaded {len(train_df)} data points @ {path}")
120 |     return train_df
121 | 
122 | 
123 | def main(args: argparse.Namespace):
124 |     with warnings.catch_warnings():
125 |         warnings.simplefilter("ignore")
126 | 
127 |         start_date: datetime | None = parse(args.start_date if args.start_date else "")
128 |         if args.start_date and not start_date:
129 |             raise argparse.ArgumentTypeError("invalid start_date")
130 | 
131 |         end_date: datetime | None = parse(args.end_date if args.end_date else "")
132 |         if args.end_date and not end_date:
133 |             raise argparse.ArgumentTypeError("invalid end_date")
134 | 
135 |     railway_companies: str | None = args.client_codes
136 |     railway_lines: str | None = args.railway_lines
137 | 
138 |     # Load dataset
139 |     df: pd.DataFrame | DataFrameGroupBy = pd.DataFrame()
140 |     logging.info("Loading datasets...")
141 | 
142 |     for train_df in Parallel(n_jobs=-1, verbose=5)(
143 |         _load_train_dataset(train_csv) for train_csv in args.trains_csv  # type: ignore
144 |     ):
145 |         df = pd.concat([df, train_df], axis=0)
146 | 
147 |     df.reset_index(drop=True, inplace=True)
148 | 
149 |     stations: pd.DataFrame = read_station_csv(args.station_csv)
150 |     original_length: int = len(df)
151 | 
152 |     # Tag lines
153 |     df = tag_lines(df, stations)
154 | 
155 |     # Apply filters
156 |     df = date_filter(df, start_date, end_date)
157 |     df = railway_company_filter(df, railway_companies)
158 |     df = railway_lines_filter(df, railway_lines)
159 |     logging.info(f"Loaded {len(df)} data points ({original_length} before filtering)")
160 | 
161 |     # Prepare graphics
162 |     stat.prepare_mpl(df, args)
163 | 
164 |     if args.group_by != "none":
165 |         df_grouped: DataFrameGroupBy | None = None
166 | 
167 |         if args.group_by == "train_hash":
168 |             df_grouped = groupby.train_hash(df)
169 |         elif args.group_by == "client_code":
170 |             df_grouped = groupby.client_code(df)
171 |         elif args.group_by == "weekday":
172 |             df_grouped = groupby.weekday(df)
173 | 
174 |         assert df_grouped is not None
175 | 
176 |         if args.agg_func == "last":
177 |             df = df_grouped.last()
178 |         elif args.agg_func == "mean":
179 |             df = df_grouped.mean(numeric_only=True)
180 |         elif args.agg_func == "none":
181 |             df = df_grouped
182 | 
183 |     if args.stat in [
184 |         "trajectories_map",
185 |         "detect_lines",
186 |         "timetable",
187 |     ] and not isinstance(df, pd.DataFrame):
188 |         raise ValueError(f"can't use {args.stat} with unaggregated data")
189 | 
190 |     if args.stat == "describe":
191 |         stat.describe(df)
192 |     elif args.stat == "delay_boxplot":
193 |         stat.delay_boxplot(df)
194 |     elif args.stat == "day_train_count":
195 |         stat.day_train_count(df)
196 |     elif args.stat == "trajectories_map":
197 |         trajectories_map.build_map(stations, df)
198 |     elif args.stat == "detect_lines":
199 |         stat.detect_lines(df, stations)
200 |     elif args.stat == "timetable":
201 |         if not timetable.same_line(df):
202 |             raise ValueError(
203 |                 f"can't use timetable if --railway-lines filter is not used"
204 |             )
205 |         timetable.timetable_graph(df, stations, args.timetable_collapse)
206 | 
207 |     # Visualizations only
208 |     if args.stat in ["delay_boxplot", "day_train_count", "timetable"]:
209 |         plt.tight_layout()
210 |         if args.save_fig:
211 |             plt.savefig(args.save_fig)
212 |         else:
213 |             plt.show()
214 | 


--------------------------------------------------------------------------------
/src/train_extractor.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import argparse
 19 | import csv
 20 | import hashlib
 21 | import pickle
 22 | from datetime import date, datetime
 23 | from pathlib import Path
 24 | 
 25 | from src.const import TIMEZONE
 26 | from src.scraper.train import Train
 27 | from src.scraper.train_stop import TrainStopTime
 28 | from src.utils import parse_input_format_output_args
 29 | 
 30 | 
 31 | def load_file(file: Path) -> dict[int, Train]:
 32 |     """Load a train data pickle file and return it.
 33 | 
 34 |     Args:
 35 |         file (Path): the file to load
 36 | 
 37 |     Returns:
 38 |         dict[int, Train]: the train data contained in the file
 39 | 
 40 |     Notes:
 41 |         Before commit 48966dfab25553650e3d743a4ecc77db02c4b30,
 42 |         departure and arrival timestamps dates of Trenord trains
 43 |         were all 1900-01-01.
 44 |         This function fixes such incorrect dates.
 45 |     """
 46 |     with open(file, "rb") as f:
 47 |         data: dict[int, Train] = pickle.load(f)
 48 | 
 49 |     def _fix_datetime(train: Train, dt: datetime | None) -> datetime | None:
 50 |         """Fix departure and arrival timestamps"""
 51 |         if isinstance(dt, datetime) and dt.year < 2000:
 52 |             dep_date: date = train.departing_date
 53 |             dt = dt.replace(
 54 |                 year=dep_date.year,
 55 |                 month=dep_date.month,
 56 |                 day=dep_date.day,
 57 |                 tzinfo=TIMEZONE,
 58 |             )
 59 |         return dt
 60 | 
 61 |     def _detect_crazy_time_difference(train: Train, time: TrainStopTime):
 62 |         """Ignore trains if the difference between expected and actual
 63 |         times in a stop is greater than one day.
 64 | 
 65 |         Example:
 66 |             REG Train 17907 operated by TPER. S05311 stop on 2023-03-30.
 67 |             arrival_expected             2025-08-30 17:33:00+02:00
 68 |             arrival_actual               2023-03-30 17:34:30+02:00
 69 |             arrival_delay                                  -1438.5
 70 |         """
 71 |         if not time.actual or not time.expected:
 72 |             return
 73 | 
 74 |         if abs((time.actual - time.expected).days) > 1:
 75 |             train._phantom = True
 76 | 
 77 |     for train_h in data:
 78 |         train: Train = data[train_h]
 79 | 
 80 |         for stop in train.stops if isinstance(train.stops, list) else []:
 81 |             if isinstance(stop.arrival, TrainStopTime):
 82 |                 _detect_crazy_time_difference(train, stop.arrival)
 83 |                 stop.arrival.actual = _fix_datetime(train, stop.arrival.actual)
 84 |                 stop.arrival.expected = _fix_datetime(train, stop.arrival.expected)  # type: ignore
 85 |             if isinstance(stop.departure, TrainStopTime):
 86 |                 _detect_crazy_time_difference(train, stop.departure)
 87 |                 stop.departure.actual = _fix_datetime(train, stop.departure.actual)
 88 |                 stop.departure.expected = _fix_datetime(train, stop.departure.expected)  # type: ignore
 89 | 
 90 |         if train.client_code == 63:
 91 |             train._fix_intraday_datetimes()
 92 | 
 93 |     return data
 94 | 
 95 | 
 96 | def to_csv(data: dict[int, Train], output_file: Path) -> None:
 97 |     """Convert to CSV train data, one row per stop.
 98 | 
 99 |     Args:
100 |         data (dict[int, Train]): the data to convert
101 |         output_file (Path): the file to write
102 |     """
103 |     FIELDS: tuple = (
104 |         "train_hash",
105 |         "number",
106 |         "day",
107 |         "origin",
108 |         "destination",
109 |         "category",
110 |         "client_code",
111 |         "phantom",
112 |         "trenord_phantom",
113 |         "cancelled",
114 |         "stop_number",
115 |         "stop_station_code",
116 |         "stop_type",
117 |         "platform",
118 |         "arrival_expected",
119 |         "arrival_actual",
120 |         "arrival_delay",
121 |         "departure_expected",
122 |         "departure_actual",
123 |         "departure_delay",
124 |         "crowding",
125 |     )
126 | 
127 |     csvfile = open(output_file, "w+", newline="")
128 |     writer = csv.writer(
129 |         csvfile,
130 |         delimiter=",",
131 |         quotechar="|",
132 |         quoting=csv.QUOTE_MINIMAL,
133 |     )
134 |     writer.writerow(FIELDS)
135 | 
136 |     for train_h in data:
137 |         train: Train = data[train_h]
138 | 
139 |         for i, stop in enumerate(train.stops) if isinstance(train.stops, list) else []:
140 |             writer.writerow(
141 |                 (
142 |                     hashlib.md5(str(train_h).encode("ascii")).hexdigest(),
143 |                     train.number,
144 |                     train.departing_date.isoformat(),
145 |                     train.origin.code,
146 |                     train.destination.code if train.destination else None,
147 |                     train.category,
148 |                     train.client_code,
149 |                     train._phantom,
150 |                     train._trenord_phantom
151 |                     if hasattr(train, "_trenord_phantom")
152 |                     else False,
153 |                     train.cancelled,
154 |                     i,
155 |                     stop.station.code,
156 |                     stop.stop_type.value,
157 |                     stop.platform_actual or stop.platform_expected,
158 |                     stop.arrival.expected.isoformat()
159 |                     if stop.arrival and stop.arrival.expected
160 |                     else None,
161 |                     stop.arrival.actual.isoformat()
162 |                     if stop.arrival and stop.arrival.actual
163 |                     else None,
164 |                     stop.arrival.delay() if stop.arrival else None,
165 |                     stop.departure.expected.isoformat()
166 |                     if stop.departure and stop.departure.expected
167 |                     else None,
168 |                     stop.departure.actual.isoformat()
169 |                     if stop.departure and stop.departure.actual
170 |                     else None,
171 |                     stop.departure.delay() if stop.departure else None,
172 |                     train.crowding if hasattr(train, "crowding") else None,
173 |                 )
174 |             )
175 | 
176 |     csvfile.close()
177 | 
178 | 
179 | def register_args(parser: argparse.ArgumentParser):
180 |     parser.add_argument(
181 |         "pickle_file",
182 |         help=".pickle file to parse",
183 |         metavar="PICKLE_FILE",
184 |     )
185 |     parser.add_argument(
186 |         "-f",
187 |         default="csv",
188 |         choices=[
189 |             "csv",
190 |         ],
191 |         help="output file format",
192 |         dest="format",
193 |     )
194 |     parser.add_argument(
195 |         "-o",
196 |         help="output file name",
197 |         metavar="OUTPUT_FILE",
198 |         dest="output_file",
199 |     )
200 | 
201 | 
202 | def main(args: argparse.Namespace):
203 |     input_f, output_f, format = parse_input_format_output_args(args)
204 | 
205 |     data: dict[int, Train] = load_file(input_f)
206 |     if format == "csv":
207 |         to_csv(data, output_f)
208 | 


--------------------------------------------------------------------------------
/src/scraper/station.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import logging
 19 | import typing as t
 20 | 
 21 | import src.scraper.api as api
 22 | import src.scraper.train as tr
 23 | from src import types
 24 | from src.scraper.exceptions import BadRequestException
 25 | 
 26 | 
 27 | class Station:
 28 |     """A ViaggiaTreno station.
 29 | 
 30 |     Attributes:
 31 |         code (str): the station code, used in API calls (e.g. S01700)
 32 |         region_code (int): the code of the region where the station is located
 33 |         name (str | None): the station name (e.g. Milano Centrale)
 34 |         short_name (str | None): a shortened version of the name (e.g. Milano C.le)
 35 |         position (Tuple[float, float] | None): the latitude and longitude of the station
 36 | 
 37 |     Other attributes:
 38 |         _phantom (bool): if True, the details of the station can't be fetched
 39 |     """
 40 | 
 41 |     _cache: dict[str, "Station"] = dict()
 42 | 
 43 |     def __init__(
 44 |         self,
 45 |         code: str,
 46 |         region_code: int,
 47 |         name: str | None,
 48 |         short_name: str | None = None,
 49 |         position: t.Tuple[float, float] | None = None,
 50 |     ) -> None:
 51 |         """Initialize a new station.
 52 | 
 53 |         Args:
 54 |             code (str): the station code, used in API calls (e.g. S01700)
 55 |             region_code (int): the code of the region where the station is located
 56 |             name (str | None): the station name (e.g. Milano Centrale)
 57 |             short_name (str | None, optional): a shortened version of the name (e.g. Milano C.le)
 58 |             position (Tuple[float, float] | None, optional): the latitude and longitude of the station
 59 |         """
 60 |         self.code: str = code
 61 |         self.region_code: int = region_code
 62 |         self.name: str | None = None
 63 |         if name:
 64 |             self.name: str | None = name.title().strip()
 65 |             self.short_name: str | None = (
 66 |                 short_name.title().strip() if short_name else name
 67 |             )
 68 |         self.position: t.Tuple[float, float] | None = position
 69 | 
 70 |         self._phantom: bool = self.name == None
 71 | 
 72 |     @classmethod
 73 |     def _from_raw(cls, raw_data: dict) -> "Station":
 74 |         """Initialize a new station from raw API data, or use the class cache.
 75 | 
 76 |         Args:
 77 |             station_data (dict): raw data returned by the API.
 78 |         """
 79 |         station_code = raw_data["codStazione"]
 80 | 
 81 |         if station_code not in cls._cache:
 82 |             cls._cache[station_code] = cls(
 83 |                 code=station_code,
 84 |                 region_code=raw_data["codReg"],
 85 |                 name=raw_data["localita"]["nomeLungo"],
 86 |                 short_name=raw_data["localita"]["nomeBreve"],
 87 |                 position=(raw_data["lat"], raw_data["lon"]),
 88 |             )
 89 |         else:
 90 |             cached: Station = cls._cache[station_code]
 91 | 
 92 |             # codReg can have multiple values depending on the request.
 93 |             # If an inequality is detected, settle the correct region_code once for all.
 94 |             if raw_data["codReg"] != cached.region_code:
 95 |                 logging.warning(
 96 |                     f"Provided region code for {station_code} is different from the cached one"
 97 |                 )
 98 |                 cached.region_code = Station._region_code(station_code)
 99 | 
100 |         return cls._cache[station_code]
101 | 
102 |     def __repr__(self) -> str:
103 |         return f"{self.name} [{self.code}@{self.region_code}]"
104 | 
105 |     @classmethod
106 |     def by_code(cls, station_code: str) -> "Station":
107 |         """Retrieve a station by its code, or use cache.
108 | 
109 |         Args:
110 |             station_code (str): the station code
111 | 
112 |         Returns:
113 |             Station: a station corresponding to the passed station code
114 |         """
115 |         if station_code not in cls._cache:
116 |             try:
117 |                 region_code: int = cls._region_code(station_code)
118 |             except BadRequestException as e:
119 |                 if e.status_code != 204:
120 |                     raise e
121 | 
122 |                 region_code: int = 0
123 | 
124 |             try:
125 |                 response: str = api.ViaggiaTrenoAPI._raw_request(
126 |                     "dettaglioStazione", station_code, region_code
127 |                 )
128 |                 raw_data: types.JSONType = api.ViaggiaTrenoAPI._decode_json(response)
129 |                 cls._cache[station_code] = cls._from_raw(raw_data)
130 |             except BadRequestException as e:
131 |                 if e.status_code != 204:
132 |                     raise e
133 | 
134 |                 cls._cache[station_code] = cls(
135 |                     code=station_code,
136 |                     region_code=region_code,
137 |                     name=None,
138 |                 )
139 | 
140 |         return cls._cache[station_code]
141 | 
142 |     @staticmethod
143 |     def _region_code(station_code: str) -> int:
144 |         """Retrieve the region code of a given station (by its code).
145 | 
146 |         Args:
147 |             station_code (str): the code of the station to check
148 | 
149 |         Raises:
150 |             BadRequestException: if the response is not ok
151 | 
152 |         Returns:
153 |             int: the region code of the given station
154 |         """
155 |         region_code = api.ViaggiaTrenoAPI._raw_request("regione", station_code)
156 |         return int(region_code)
157 | 
158 |     @classmethod
159 |     def by_region(cls, region_code: int) -> t.List["Station"]:
160 |         """Retrieve the list of train stations of a given region.
161 | 
162 |         Args:
163 |             region_code (int): the code of the region to query
164 | 
165 |         Returns:
166 |             t.List[Station]: a list of train stations
167 |         """
168 |         raw_stations: str = api.ViaggiaTrenoAPI._raw_request(
169 |             "elencoStazioni", region_code
170 |         )
171 |         stations: types.JSONType = api.ViaggiaTrenoAPI._decode_json(raw_stations)
172 |         return list(
173 |             map(
174 |                 lambda s: cls._from_raw(s),
175 |                 filter(lambda s: s["tipoStazione"] != 4, stations),
176 |             )
177 |         )
178 | 
179 |     def departures(self) -> t.List["tr.Train"]:
180 |         """Retrieve the departures of a train station.
181 | 
182 |         Args:
183 |             station_code (str): the code of the considered station
184 | 
185 |         Returns:
186 |             t.List[Train]: a list of trains departing from the station
187 |         """
188 |         return api.ViaggiaTrenoAPI._station_departures_or_arrivals(
189 |             "partenze", self.code
190 |         )
191 | 
192 |     def arrivals(self) -> t.List["tr.Train"]:
193 |         """Retrieve the arrivals of a train station.
194 | 
195 |         Args:
196 |             station_code (str): the code of the considered station
197 | 
198 |         Returns:
199 |             t.List[Train]: a list of trains departing from the station
200 |         """
201 |         return api.ViaggiaTrenoAPI._station_departures_or_arrivals("arrivi", self.code)
202 | 
203 |     def __hash__(self) -> int:
204 |         return hash(self.name)
205 | 


--------------------------------------------------------------------------------
/src/analysis/assets/templates/marker_legend.html:
--------------------------------------------------------------------------------
  1 | <!--
  2 |     railway-opendata: scrape and analyze italian railway data
  3 |     Copyright (C) 2023 Marco Aceti
  4 | 
  5 |     This program is free software; you can redistribute it and/or modify
  6 |     it under the terms of the GNU General Public License as published by
  7 |     the Free Software Foundation; either version 2 of the License, or
  8 |     (at your option) any later version.
  9 | 
 10 |     This program is distributed in the hope that it will be useful,
 11 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |     GNU General Public License for more details.
 14 | 
 15 |     You should have received a copy of the GNU General Public License
 16 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | -->
 18 | 
 19 | {% macro html(this, kwargs) %}
 20 | 
 21 | <!DOCTYPE html>
 22 | <html lang="en">
 23 |     <head>
 24 |         <meta charset="utf-8">
 25 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 26 | 
 27 |         <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
 28 |         <script src="https://cdn.jsdelivr.net/npm/popper.js@1.14.3/dist/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script>
 29 |         <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.1.3/dist/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
 30 |         <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.1.3/dist/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script>
 31 | 
 32 |         <style>
 33 |             .marker-legend {
 34 |                 position: absolute;
 35 |                 z-index: 9999;
 36 |                 top: 55px;
 37 |                 right: 10px;
 38 |                 width: 450px;
 39 |                 background: rgba(255, 255, 255, 0.8);
 40 |                 padding-left: 25px;
 41 |                 padding-right: 25px;
 42 |                 font-family: "Helvetica Neue", Arial, Helvetica, sans-serif !important;
 43 |             }
 44 | 
 45 |             .marker-legend-table td {
 46 |                 text-align: center;
 47 |             }
 48 |             div.icon-container {
 49 |                 display: inline-block;
 50 |                 vertical-align: top;
 51 |                 text-align: center;
 52 |                 width: 80px;
 53 |             }
 54 |             .marker-icon {
 55 |                 width: 50px;
 56 |                 height: 50px;
 57 |             }
 58 |             .icon-caption {
 59 |                 display: block;
 60 |                 font-size: 130%;
 61 |             }
 62 |         </style>
 63 |     </head>
 64 | 
 65 |     <body>
 66 |         <div class="leaflet-control-container">
 67 |             <div class="leaflet-right">
 68 |                 <div class="marker-legend container-fluid">
 69 |                     <table class="table marker-legend-table">
 70 |                         <thead>
 71 | 
 72 |                         </thead>
 73 |                         <tbody>
 74 |                             <tr>
 75 |                                 <th scope="row" class="align-middle"><h4>Regional</h4></th>
 76 |                                 <td>
 77 |                                     <div class="icon-container">
 78 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_reg.svg">
 79 |                                         <span class="icon-caption">Trenitalia</span>
 80 |                                     </div>
 81 |                                 </td>
 82 |                                 <td>
 83 |                                     <div class="icon-container">
 84 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenord_reg.svg">
 85 |                                         <span class="icon-caption">Trenord</span>
 86 |                                     </div>
 87 |                                 </td>
 88 |                                 <td>
 89 |                                     <div class="icon-container">
 90 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/tper_reg.svg">
 91 |                                         <span class="icon-caption">TPER</span>
 92 |                                     </div>
 93 |                                 </td>
 94 |                             </tr>
 95 |                             <tr>
 96 |                                 <th scope="row" class="align-middle"><h4>High speed</h4></th>
 97 |                                 <td>
 98 |                                     <div class="icon-container">
 99 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_fr.svg">
100 |                                         <span class="icon-caption">Frecciarossa</span>
101 |                                     </div>
102 |                                 </td>
103 |                                 <td>
104 |                                     <div class="icon-container">
105 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_fa.svg">
106 |                                         <span class="icon-caption">Frecciargento</span>
107 |                                     </div>
108 |                                 </td>
109 |                                 <td>
110 |                                     <div class="icon-container">
111 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_fb.svg">
112 |                                         <span class="icon-caption">Frecciabianca</span>
113 |                                     </div>
114 |                                 </td>
115 |                             </tr>
116 |                             <tr>
117 |                                 <th scope="row" class="align-middle"><h4>Long haul</h4></th>
118 |                                 <td>
119 |                                     <div class="icon-container">
120 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_ic.svg">
121 |                                         <span class="icon-caption">Intercity</span>
122 |                                     </div>
123 |                                 </td>
124 |                                 <td>
125 |                                     <div class="icon-container">
126 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_icn.svg">
127 |                                         <span class="icon-caption">IC Notte</span>
128 |                                     </div>
129 |                                 </td>
130 |                                 <td><!-- phantom --></td>
131 |                             </tr>
132 |                             <tr>
133 |                                 <th scope="row" class="align-middle"><h4>International</h4></th>
134 |                                 <td>
135 |                                     <div class="icon-container">
136 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/trenitalia_ec.svg">
137 |                                         <span class="icon-caption">Eurocity</span>
138 |                                     </div>
139 |                                 </td>
140 |                                 <td>
141 |                                     <div class="icon-container">
142 |                                         <img class="marker-icon" src="{{ this.get_markers_path() }}/obb_ec.svg">
143 |                                         <span class="icon-caption">OBB</span>
144 |                                     </div>
145 |                                 </td>
146 |                                 <td><!-- phantom --></td>
147 |                             </tr>
148 |                         </tbody>
149 |                     </table>
150 |                 </div>
151 |             </div>
152 |         </div>
153 |     </body>
154 | </html>
155 | 
156 | {% endmacro %}
157 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### VSCodium
  2 | .vscode/*
  3 | !.vscode/settings.json
  4 | !.vscode/tasks.json
  5 | !.vscode/launch.json
  6 | !.vscode/extensions.json
  7 | !.vscode/*.code-snippets
  8 | 
  9 | # Local History for Visual Studio Code
 10 | .history/
 11 | 
 12 | # Built Visual Studio Code Extensions
 13 | *.vsix
 14 | 
 15 | ### Python
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | 
 21 | # C extensions
 22 | *.so
 23 | 
 24 | # Distribution / packaging
 25 | .Python
 26 | build/
 27 | develop-eggs/
 28 | dist/
 29 | downloads/
 30 | eggs/
 31 | .eggs/
 32 | lib/
 33 | lib64/
 34 | parts/
 35 | sdist/
 36 | var/
 37 | wheels/
 38 | share/python-wheels/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | MANIFEST
 43 | 
 44 | # PyInstaller
 45 | #  Usually these files are written by a python script from a template
 46 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 47 | *.manifest
 48 | *.spec
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .nox/
 58 | .coverage
 59 | .coverage.*
 60 | .cache
 61 | nosetests.xml
 62 | coverage.xml
 63 | *.cover
 64 | *.py,cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | cover/
 68 | 
 69 | # Translations
 70 | *.mo
 71 | *.pot
 72 | 
 73 | # Django stuff:
 74 | *.log
 75 | local_settings.py
 76 | db.sqlite3
 77 | db.sqlite3-journal
 78 | 
 79 | # Flask stuff:
 80 | instance/
 81 | .webassets-cache
 82 | 
 83 | # Scrapy stuff:
 84 | .scrapy
 85 | 
 86 | # Sphinx documentation
 87 | docs/_build/
 88 | 
 89 | # PyBuilder
 90 | .pybuilder/
 91 | target/
 92 | 
 93 | # Jupyter Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # IPython
 97 | profile_default/
 98 | ipython_config.py
 99 | 
100 | # pyenv
101 | #   For a library or package, you might want to ignore these files since the code is
102 | #   intended to run in multiple environments; otherwise, check them in:
103 | # .python-version
104 | 
105 | # pipenv
106 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
107 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
108 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
109 | #   install all needed dependencies.
110 | #Pipfile.lock
111 | 
112 | # poetry
113 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
114 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
115 | #   commonly ignored for libraries.
116 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
117 | #poetry.lock
118 | 
119 | # pdm
120 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
121 | #pdm.lock
122 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
123 | #   in version control.
124 | #   https://pdm.fming.dev/#use-with-ide
125 | .pdm.toml
126 | 
127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128 | __pypackages__/
129 | 
130 | # Celery stuff
131 | celerybeat-schedule
132 | celerybeat.pid
133 | 
134 | # SageMath parsed files
135 | *.sage.py
136 | 
137 | # Environments
138 | .env
139 | .venv
140 | env/
141 | venv/
142 | ENV/
143 | env.bak/
144 | venv.bak/
145 | 
146 | # Spyder project settings
147 | .spyderproject
148 | .spyproject
149 | 
150 | # Rope project settings
151 | .ropeproject
152 | 
153 | # mkdocs documentation
154 | /site
155 | 
156 | # mypy
157 | .mypy_cache/
158 | .dmypy.json
159 | dmypy.json
160 | 
161 | # Pyre type checker
162 | .pyre/
163 | 
164 | # pytype static type analyzer
165 | .pytype/
166 | 
167 | # Cython debug symbols
168 | cython_debug/
169 | 
170 | # PyCharm
171 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
172 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
173 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
174 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
175 | #.idea/
176 | 
177 | ### Rust
178 | # Generated by Cargo
179 | # will have compiled files and executables
180 | debug/
181 | target/
182 | 
183 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
184 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
185 | Cargo.lock
186 | 
187 | # These are backup files generated by rustfmt
188 | **/*.rs.bk
189 | 
190 | # MSVC Windows builds of rustc generate these, which store debugging information
191 | *.pdb
192 | 
193 | ### TeX
194 | ## Core latex/pdflatex auxiliary files:
195 | *.aux
196 | *.lof
197 | *.log
198 | *.lot
199 | *.fls
200 | *.out
201 | *.toc
202 | *.fmt
203 | *.fot
204 | *.cb
205 | *.cb2
206 | .*.lb
207 | 
208 | ## Intermediate documents:
209 | *.dvi
210 | *.xdv
211 | *-converted-to.*
212 | # these rules might exclude image files for figures etc.
213 | # *.ps
214 | # *.eps
215 | # *.pdf
216 | 
217 | ## Generated if empty string is given at "Please type another file name for output:"
218 | .pdf
219 | 
220 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
221 | *.bbl
222 | *.bcf
223 | *.blg
224 | *-blx.aux
225 | *-blx.bib
226 | *.run.xml
227 | 
228 | ## Build tool auxiliary files:
229 | *.fdb_latexmk
230 | *.synctex
231 | *.synctex(busy)
232 | *.synctex.gz
233 | *.synctex.gz(busy)
234 | *.pdfsync
235 | 
236 | ## Build tool directories for auxiliary files
237 | # latexrun
238 | latex.out/
239 | 
240 | ## Auxiliary and intermediate files from other packages:
241 | # algorithms
242 | *.alg
243 | *.loa
244 | 
245 | # achemso
246 | acs-*.bib
247 | 
248 | # amsthm
249 | *.thm
250 | 
251 | # beamer
252 | *.nav
253 | *.pre
254 | *.snm
255 | *.vrb
256 | 
257 | # changes
258 | *.soc
259 | 
260 | # comment
261 | *.cut
262 | 
263 | # cprotect
264 | *.cpt
265 | 
266 | # elsarticle (documentclass of Elsevier journals)
267 | *.spl
268 | 
269 | # endnotes
270 | *.ent
271 | 
272 | # fixme
273 | *.lox
274 | 
275 | # feynmf/feynmp
276 | *.mf
277 | *.mp
278 | *.t[1-9]
279 | *.t[1-9][0-9]
280 | *.tfm
281 | 
282 | #(r)(e)ledmac/(r)(e)ledpar
283 | *.end
284 | *.?end
285 | *.[1-9]
286 | *.[1-9][0-9]
287 | *.[1-9][0-9][0-9]
288 | *.[1-9]R
289 | *.[1-9][0-9]R
290 | *.[1-9][0-9][0-9]R
291 | *.eledsec[1-9]
292 | *.eledsec[1-9]R
293 | *.eledsec[1-9][0-9]
294 | *.eledsec[1-9][0-9]R
295 | *.eledsec[1-9][0-9][0-9]
296 | *.eledsec[1-9][0-9][0-9]R
297 | 
298 | # glossaries
299 | *.acn
300 | *.acr
301 | *.glg
302 | *.glo
303 | *.gls
304 | *.glsdefs
305 | *.lzo
306 | *.lzs
307 | *.slg
308 | *.slo
309 | *.sls
310 | 
311 | # uncomment this for glossaries-extra (will ignore makeindex's style files!)
312 | # *.ist
313 | 
314 | # gnuplot
315 | *.gnuplot
316 | *.table
317 | 
318 | # gnuplottex
319 | *-gnuplottex-*
320 | 
321 | # gregoriotex
322 | *.gaux
323 | *.glog
324 | *.gtex
325 | 
326 | # htlatex
327 | *.4ct
328 | *.4tc
329 | *.idv
330 | *.lg
331 | *.trc
332 | *.xref
333 | 
334 | # hyperref
335 | *.brf
336 | 
337 | # knitr
338 | *-concordance.tex
339 | # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
340 | # *.tikz
341 | *-tikzDictionary
342 | 
343 | # listings
344 | *.lol
345 | 
346 | # luatexja-ruby
347 | *.ltjruby
348 | 
349 | # makeidx
350 | *.idx
351 | *.ilg
352 | *.ind
353 | 
354 | # minitoc
355 | *.maf
356 | *.mlf
357 | *.mlt
358 | *.mtc[0-9]*
359 | *.slf[0-9]*
360 | *.slt[0-9]*
361 | *.stc[0-9]*
362 | 
363 | # minted
364 | _minted*
365 | *.pyg
366 | 
367 | # morewrites
368 | *.mw
369 | 
370 | # newpax
371 | *.newpax
372 | 
373 | # nomencl
374 | *.nlg
375 | *.nlo
376 | *.nls
377 | 
378 | # pax
379 | *.pax
380 | 
381 | # pdfpcnotes
382 | *.pdfpc
383 | 
384 | # sagetex
385 | *.sagetex.sage
386 | *.sagetex.py
387 | *.sagetex.scmd
388 | 
389 | # scrwfile
390 | *.wrt
391 | 
392 | # svg
393 | svg-inkscape/
394 | 
395 | # sympy
396 | *.sout
397 | *.sympy
398 | sympy-plots-for-*.tex/
399 | 
400 | # pdfcomment
401 | *.upa
402 | *.upb
403 | 
404 | # pythontex
405 | *.pytxcode
406 | pythontex-files-*/
407 | 
408 | # tcolorbox
409 | *.listing
410 | 
411 | # thmtools
412 | *.loe
413 | 
414 | # TikZ & PGF
415 | *.dpth
416 | *.md5
417 | *.auxlock
418 | 
419 | # titletoc
420 | *.ptc
421 | 
422 | # todonotes
423 | *.tdo
424 | 
425 | # vhistory
426 | *.hst
427 | *.ver
428 | 
429 | # easy-todo
430 | *.lod
431 | 
432 | # xcolor
433 | *.xcp
434 | 
435 | # xmpincl
436 | *.xmpi
437 | 
438 | # xindy
439 | *.xdy
440 | 
441 | # xypic precompiled matrices and outlines
442 | *.xyc
443 | *.xyd
444 | 
445 | # endfloat
446 | *.ttt
447 | *.fff
448 | 
449 | # Latexian
450 | TSWLatexianTemp*
451 | 
452 | ## Editors:
453 | # WinEdt
454 | *.bak
455 | *.sav
456 | 
457 | # Texpad
458 | .texpadtmp
459 | 
460 | # LyX
461 | *.lyx~
462 | 
463 | # Kile
464 | *.backup
465 | 
466 | # gummi
467 | .*.swp
468 | 
469 | # KBibTeX
470 | *~[0-9]*
471 | 
472 | # TeXnicCenter
473 | *.tps
474 | 
475 | # auto folder when using emacs and auctex
476 | ./auto/*
477 | *.el
478 | 
479 | # expex forward references with \gathertags
480 | *-tags.tex
481 | 
482 | # standalone packages
483 | *.sta
484 | 
485 | # Makeindex log files
486 | *.lpz
487 | 
488 | # xwatermark package
489 | *.xwm
490 | 
491 | # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
492 | # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
493 | # Uncomment the next line to have this generated file ignored.
494 | #*Notes.bib
495 | 
496 | # Emacs .gitignore
497 | *~
498 | \#*\#
499 | /.emacs.desktop
500 | /.emacs.desktop.lock
501 | *.elc
502 | auto-save-list
503 | tramp
504 | .\#*
505 | 
506 | # Org-mode
507 | .org-id-locations
508 | *_archive
509 | 
510 | # flymake-mode
511 | *_flymake.*
512 | 
513 | # eshell files
514 | /eshell/history
515 | /eshell/lastdir
516 | 
517 | # elpa packages
518 | /elpa/
519 | 
520 | # reftex files
521 | *.rel
522 | 
523 | # AUCTeX auto folder
524 | /auto/
525 | 
526 | # cask packages
527 | .cask/
528 | dist/
529 | 
530 | # Flycheck
531 | flycheck_*.el
532 | 
533 | # server auth directory
534 | /server/
535 | 
536 | # projectiles files
537 | .projectile
538 | 
539 | # directory configuration
540 | .dir-locals.el
541 | 
542 | # network security
543 | /network-security.data
544 | 
545 | ### Custom
546 | data/*
547 | !.gitkeep
548 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RailScrape (railway-opendata)
  2 | 
  3 | In Italy there are no available official **Open Data** about the _performance_ (delays, cancellations, ...) of the **italian public rail transport**.
  4 | This project offers a tool which allows anyone to gather it and run some stats and visualizations.
  5 | 
  6 | ## Architecture
  7 | 
  8 | ```mermaid
  9 | flowchart TB
 10 | 
 11 | S[Scraper] --> |Downloads data| D("ViaggiaTreno and Trenord APIs")
 12 | S -->|Produces| P[(Daily .pickle dumps)]
 13 | E[Extractor] -->|Reads| P
 14 | E[Extractor] -->|Produces| C[(Daily .CSV dumps)]
 15 | A2["(BYOD Analyzer)"] -.->|Reads| C
 16 | A[Analyzer] -->|Reads| C
 17 | A[Analyzer] -->|Produces| K(Stats, visualizations, etc...)
 18 | ```
 19 | 
 20 | The application is composed by multiple modules, accessible via CLI:
 21 | - **`scraper`**: unattended script to incrementally download and preserve the current status of the italian railway network. If run constantly (e.g. ~every hour using `cron`) all trains will be captured and saved in `data/%Y-%m-%d/trains.pickle`.
 22 | - **`train-extractor`** and **`station-extractor`**: converts raw scraped data to usable `.csv` files;
 23 | - **`analyze`** : shows reproducible stats and visualizations.
 24 | 
 25 | ## Running
 26 | 
 27 | The project is written in Python and it uses modern typing annotations, so **Python >= 3.11** is needed.
 28 | 
 29 | ### Using Docker (easy)
 30 | 
 31 | A [Dockerfile](./Dockerfile) is available to avoid installing the dependencies manually.
 32 | You can use the automatically updated [ghcr.io/marcobuster/railway-opendata:latest](https://github.com/MarcoBuster/railway-opendata/pkgs/container/railway-opendata)
 33 | Docker image if you want the latest version available on the master branch.
 34 | 
 35 | For instance, the following command will start the scraper on your machine.
 36 | 
 37 | ```bash
 38 | $ docker run -v ./data:/app/data ghcr.io/marcobuster/railway-opendata:latest scraper
 39 | ```
 40 | 
 41 | ### Using virtual envs
 42 | 
 43 | > ⚠️ __WARNING__: this project currently uses the builtin `hash(...)` function to quickly index objects.
 44 | > To ensure reproducibility between runs, you need to disable Python's **hash seed randomization** by setting the `PYTHONHASHSEED=0` environment variable.
 45 | > If you fail to do so, the software will refuse to start.
 46 | 
 47 | ```bash
 48 | $ export PYTHONHASHSEED=0
 49 | $ virtualenv venv
 50 | $ source ./venv/bin/activate
 51 | $ pip install -r requirements.txt
 52 | $ python main.py ...
 53 | ```
 54 | 
 55 | ## Example usages
 56 | 
 57 | - __Start the scraper__. For continuos data collection, it should be run every ~hour.
 58 | 
 59 |     `$ python main.py scraper`
 60 | 
 61 | - __Extract train data__ from a pickle file and save it in CSV.
 62 | 
 63 |     `$ python main.py train-extractor -o data/2023/04-29/trains.csv data/2023-04-29/trains.pickle`
 64 | 
 65 | - __Extract station data__ from a pickle file and save it in GeoJSON.
 66 | 
 67 |     `$ python main.py station-extractor -f geojson data/stations.pickle`
 68 | 
 69 | - __Describe a dataset__ and filter observation by date.
 70 | 
 71 |     `$ python main.py analyze --start-date 2023-05-01 --end-date today data/stations.csv data/2023-05-*/trains.csv --stat describe`
 72 | 
 73 | - __Show delay stats__ of the last stop.
 74 | 
 75 |     `$ python main.py analyze --group-by train_hash --agg-func last [..]/stations.csv [..]/trains.csv --stat delay_box_plot`
 76 | 
 77 | - __Show daily train count__ grouped by railway companies.
 78 | 
 79 |     `$ python main.py analyze --group-by client_code [..]/stations.csv [..]/trains.csv --stat day_train_count`
 80 | 
 81 | - __Display an interactive map__ and open it in the web browser.
 82 | 
 83 |     `$ python main.py analyze [..]/stations.csv [..]/trains.csv --stat trajectories_map`
 84 | 
 85 | - __Display a timetable graph__.
 86 | 
 87 |     `$ python main.py analyze [..]/stations.csv [..]/trains.csv --stat timetable --timetable-collapse`
 88 | 
 89 | ## Fields
 90 | 
 91 | ### Stations CSV
 92 | 
 93 | | Column | Data type | Description | Notes |
 94 | |--------|-----------|-------------|-------|
 95 | | `code` | String | Station code | This field is not actually unique. One station can have multiple codes |
 96 | | `region` | Integer | Region code | If zero, unknown. Used in API calls |
 97 | | `long_name` | String | Station long name | |
 98 | | `short_name` | String | Station short name | Can be empty |
 99 | | `latitude` | Float | Station latitude | Can be empty |
100 | | `longitude` | Float | Station longitude | Can be empty |
101 | 
102 | ### Trains CSV
103 | In the extracted trains CSV, each line is a _train stop_ (not station nor train).
104 | Many fields are actually duplicated.
105 | 
106 | | Column | Data type | Description | Notes |
107 | |--------|-----------|-------------|-------|
108 | | `train_hash` | MD5 hash | Unique identifier for a particular train | |
109 | | `number` | Integer | Train number | Can't be used to uniquely identify a train[^train_number_unique] |
110 | | `day` | Date | Train departing date | |
111 | | `origin` | Station (code) | Train absolute origin | |
112 | | `category` | String | Train Category | See table[^categories] |
113 | | `destination` | Station (code) | Train final destination | |
114 | | `client_code` | Integer | Railway company | See table[^client_codes] |
115 | | `phantom` | Boolean | True if train was only partially fetched | Trains with this flag should be safely ignored |
116 | | `trenord_phantom` | Boolean | True if the train was only partially fetched using Trenord APIs | Trains with this flag should be safely ignored[^trenord_phantom] |
117 | | `cancelled` | Boolean | True if the train is marked as cancelled | Not all cancelled trains are marked as cancelled: for more accuracy, you should always check `stop_type` |
118 | | `stop_number` | Integer | Stop progressive number (starting at 0) | |
119 | | `stop_station_code` | Station (code) | Stop station code | |
120 | | `stop_type` | Char | Stop type | `P` if first, `F` if intermediate, `A` if last, `C` if cancelled |
121 | | `platform` | String | Stop platform | Can be empty |
122 | | `arrival_expected` | ISO 8601 | Stop expected arrival time | Can be empty |
123 | | `arrival_actual` | ISO 8601 | Stop actual arriving time | Can be empty |
124 | | `arrival_delay` | Integer | Stop arriving delay in minutes | Is empty if `arrival_expected` or `arrival_actual` are both empty |
125 | | `departure_expected` | ISO 8601 | Stop expected departing time | Can be empty |
126 | | `departure_actual` | ISO 8601 | Stop actual departing time | Can be empty |
127 | | `departure_delay` | Integer | Stop departing delay in minutes | Is empty if `departing_expected` or `departing_actual` are both empty |
128 | | `crowding` | Integer | Train crowding in percentage | Reported by Trenord |
129 | 
130 | [^train_number_unique]: In Italy, two different trains can share the same number. A train is only uniquely identified by the triple (number, origin, day).
131 | 
132 | [^categories]: Known categories are listed below.
133 | 
134 |     | Category | Description |
135 |     |----------|-------------|
136 |     | REG | Regional trains |
137 |     | MET | Metropolitan trains |
138 |     | FR | Frecciarossa (red arrow) |
139 |     | IC | Intercity |
140 |     | ICN | Intercity Night |
141 |     | EC | Eurocity |
142 |     | FB | Frecciabianca (white arrow) |
143 |     | FA | Frecciargento (silver arrow) |
144 |     | EN | EuroNight |
145 |     | EC ER | Eurocity |
146 | 
147 | [^client_codes]: Known client codes are listed below.
148 | 
149 |     | Client code | Railway company |
150 |     |-------------|-----------------|
151 |     | 1 | TRENITALIA_AV |
152 |     | 2 | TRENITALIA_REG |
153 |     | 4 | TRENITALIA_IC |
154 |     | 18 | TPER |
155 |     | 63 | TRENORD |
156 |     | 64 | OBB |
157 | 
158 | [^trenord_phantom]: This flag is activated when a train is seen on ViaggiaTreno APIs and marked as Trenord's but it can't be fetched on Trenord's APIs.
159 | 
160 | ## Contributing
161 | 
162 | See [CONTRIBUTING.md](CONTRIBUTING.md).
163 | 
164 | ## Notes and caveats
165 | 
166 | ### Data completeness and correctness
167 | 
168 | The [ViaggiaTreno](https://viaggiatreno.it) APIs are [known](https://medium.com/@albigiu/trenitalia-shock-non-crederete-mai-a-queste-api-painful-14433096502c) to be **buggy** and **unreliable**.
169 | As stated before, many fields (like `departure_expected` and `arrival_expected`) are not always guaranteed to be present and some concepts are counter-intuitive (a train number is not an unique identifier nor are station codes).
170 | 
171 | ViaggiaTreno is the main _source of truth_ for many final user applications (like [Trenìt!](https://play.google.com/store/apps/details?id=eu.baroncelli.oraritrenitalia) or [Orario Treni](https://play.google.com/store/apps/details?id=org.paoloconte.treni_lite)) and is itself linked on the Trenitalia official website.
172 | For instance, if the API does not return information for a train stop, no other application will display it: the data simply does not exists online.
173 | The scraper always tries to save as much data as possible (___"best effort"___) even when is probably incomplete; in those cases, proper flags (like `phantom` and `trenord_phantom`) are activated so the developer can choose for themselves.
174 | 
175 | ### Licensing
176 | 
177 | Copyright (c) 2023 Marco Aceti. Some rights reserved (see [LICENSE](./LICENSE)).
178 | 
179 | Terms and conditions of the ViaggiaTreno web portal state that copying is prohibited (except for personal use) as **all rights for the content are reserved** to the original owner (Trenitalia or Gruppo FS).
180 | In July 2019 Trenitalia sued Trenìt for using train data in its app, but [partially lost](https://www.wired.it/lifestyle/mobilita/2019/09/06/trenitalia-tornata-online-trenit/).
181 | I think data about the performance of __public__ transport should be __open__ as well, but I'm not a lawyer and I'm not willing to risk lawsuits redistributing data; if someone wants to, the tool is now available.
182 | 
183 | BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
184 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
185 | THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.
186 | SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
187 | 
188 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
189 | 


--------------------------------------------------------------------------------
/src/scraper/train_stop.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import typing as t
 19 | from datetime import date, datetime
 20 | from enum import Enum
 21 | 
 22 | import src.scraper.api as api
 23 | import src.scraper.station as st
 24 | from src.scraper.exceptions import IncompleteTrenordStopDataException
 25 | 
 26 | 
 27 | class TrainStopType(Enum):
 28 |     """A train stop type."""
 29 | 
 30 |     FIRST = "P"
 31 |     STOP = "F"
 32 |     LAST = "A"
 33 |     CANCELLED = "C"
 34 | 
 35 | 
 36 | class TrainStopTime:
 37 |     """Helper class to handle arrival and departures times.
 38 | 
 39 |     Attributes:
 40 |         expected (datetime): expected departing or arrival time
 41 |         actual (datetime | None): actual departing or arrival time
 42 |     """
 43 | 
 44 |     def __init__(self, expected: datetime, actual: datetime | None) -> None:
 45 |         """Initialize a new TrainStopTime object.
 46 | 
 47 |         Args:
 48 |             expected (datetime): expected departing or arrival time
 49 |             actual (datetime | None): actual departing or arrival time
 50 |         """
 51 |         assert expected is not None
 52 | 
 53 |         self.expected: datetime = expected
 54 |         self.actual: datetime | None = actual
 55 | 
 56 |     def passed(self) -> bool:
 57 |         """Return if the train actually arrived or departed from the station.
 58 | 
 59 |         Returns:
 60 |             bool: True if the actual time is not None
 61 |         """
 62 |         return self.actual is not None
 63 | 
 64 |     def delay(self) -> float | None:
 65 |         """Return the delay in minutes.
 66 | 
 67 |         Returns:
 68 |             int | None: delay in minutes, None if not .passed().
 69 |         """
 70 |         if not self.passed():
 71 |             return None
 72 | 
 73 |         assert isinstance(self.actual, datetime)
 74 |         assert isinstance(self.expected, datetime)
 75 | 
 76 |         if self.actual >= self.expected:
 77 |             return (self.actual - self.expected).seconds / 60
 78 |         else:
 79 |             return -(self.expected - self.actual).seconds / 60
 80 | 
 81 |     def __repr__(self) -> str:
 82 |         hm = lambda d: d.strftime("%H:%M")
 83 | 
 84 |         ret: str = hm(self.expected)
 85 |         if not self.passed():
 86 |             return ret
 87 | 
 88 |         ret += f" ~ {hm(self.actual)}"
 89 |         if self.delay() == 0:
 90 |             return ret
 91 | 
 92 |         delay: float | None = self.delay()
 93 |         assert isinstance(delay, float)
 94 | 
 95 |         sign: str = "+" if delay > 0 else "-"
 96 |         ret += f" {sign}{round(abs(delay), 1)}m"
 97 | 
 98 |         return ret
 99 | 
100 | 
101 | class TrainStop:
102 |     """A ViaggiaTreno train stop.
103 | 
104 |     Attributes:
105 |         station (st.Station): the station the train is stopping by
106 |         stop_type (TrainStopType): the type of stop (first, last, stop)
107 |         platform_expected (str | None): expected platform
108 |         platform_actual (str | None): actual platform
109 |         arrival (TrainStopTime | None): arrival time, can be None if it's the first stop
110 |         departure (TrainStopTime | None): departure time, can be None if it's the last stop
111 |     """
112 | 
113 |     def __init__(
114 |         self,
115 |         station: st.Station,
116 |         stop_type: TrainStopType,
117 |         platform_expected: str | None,
118 |         platform_actual: str | None,
119 |         arrival_expected: datetime | None,
120 |         arrival_actual: datetime | None,
121 |         departure_expected: datetime | None,
122 |         departure_actual: datetime | None,
123 |     ) -> None:
124 |         """Initialize a new TrainStop object.
125 | 
126 |         Args:
127 |             station (st.Station): the station the train is stopping by
128 |             stop_type (TrainStopType): the type of stop (first, last, stop)
129 |             platform_expected (str | None): expected platform
130 |             platform_actual (str | None): actual platform
131 |             arrival_expected (datetime | None): expected arrival time
132 |             arrival_actual (datetime | None): actual arrival time
133 |             departure_expected (datetime | None): expected departure time
134 |             departure_actual (datetime | None): actual departure time
135 |         """
136 |         self.station: st.Station = station
137 |         self.stop_type: TrainStopType = stop_type
138 | 
139 |         self.platform_expected: str | None = platform_expected
140 |         self.platform_actual: str | None = platform_actual
141 | 
142 |         self.arrival: TrainStopTime | None = None
143 |         self.departure: TrainStopTime | None = None
144 | 
145 |         if self.stop_type == TrainStopType.CANCELLED:
146 |             return
147 | 
148 |         if self.stop_type != TrainStopType.FIRST:
149 |             assert isinstance(arrival_expected, datetime)
150 |             self.arrival = TrainStopTime(arrival_expected, arrival_actual)
151 | 
152 |         if self.stop_type != TrainStopType.LAST:
153 |             assert isinstance(departure_expected, datetime)
154 |             self.departure = TrainStopTime(departure_expected, departure_actual)
155 | 
156 |     @classmethod
157 |     def _from_raw_data(cls, stop_data: dict) -> "TrainStop":
158 |         """Initialize a new train stop from the data processed by Train.fetch()
159 | 
160 |         Args:
161 |             stop_data (dict): the data to initialize the class with
162 | 
163 |         Returns:
164 |             TrainStop: a constructed TrainStop object
165 |         """
166 |         station = st.Station.by_code(stop_data["id"])
167 |         if station._phantom:
168 |             station.name = stop_data["stazione"].title().strip()
169 | 
170 |         stop_type: TrainStopType
171 |         if stop_data["tipoFermata"] == "P":
172 |             stop_type = TrainStopType.FIRST
173 |         elif stop_data["tipoFermata"] == "A":
174 |             stop_type = TrainStopType.LAST
175 |         elif stop_data["tipoFermata"] == "F":
176 |             stop_type = TrainStopType.STOP
177 |         else:
178 |             stop_type = TrainStopType.CANCELLED
179 | 
180 |         _to_dt = api.ViaggiaTrenoAPI._to_datetime
181 | 
182 |         return cls(
183 |             station=station,
184 |             stop_type=stop_type,
185 |             platform_expected=(
186 |                 stop_data["binarioProgrammatoArrivoDescrizione"]
187 |                 or stop_data["binarioProgrammatoPartenzaDescrizione"]
188 |             ),
189 |             platform_actual=(
190 |                 stop_data["binarioEffettivoArrivoDescrizione"]
191 |                 or stop_data["binarioEffettivoPartenzaDescrizione"]
192 |             ),
193 |             arrival_expected=_to_dt(stop_data["arrivo_teorico"]),
194 |             arrival_actual=_to_dt(stop_data["arrivoReale"]),
195 |             departure_expected=_to_dt(stop_data["partenza_teorica"]),
196 |             departure_actual=_to_dt(stop_data["partenzaReale"]),
197 |         )
198 | 
199 |     @classmethod
200 |     def _from_trenord_raw_data(
201 |         cls, stop_data: dict, day: date
202 |     ) -> t.Union["TrainStop", None]:
203 |         """Initialize a new train stop from data processed by Train.trenord_fetch()
204 | 
205 |         Args:
206 |             stop_data (dict): the data to initialize the class with
207 |             today (date): the date of the train, used to parse datetimes
208 | 
209 |         Returns:
210 |             TrainStop | None: a constructed TrainStop object,
211 |             or None if there isn't actual data
212 |         """
213 | 
214 |         def _hhmmss_to_dt(hhmmss: str | None) -> datetime | None:
215 |             """Parse and return a Trenord time string into a datetime object.
216 | 
217 |             Args:
218 |                 hhmmss (str | None): the string to parse
219 | 
220 |             Returns:
221 |                 datetime | None: the parsed datetime object.
222 |             """
223 |             if not hhmmss:
224 |                 return None
225 | 
226 |             return datetime.strptime(hhmmss, "%H:%M:%S").replace(
227 |                 year=day.year,
228 |                 month=day.month,
229 |                 day=day.day,
230 |                 tzinfo=api.TIMEZONE,
231 |             )
232 | 
233 |         if not stop_data["actual_data"]:
234 |             return None
235 | 
236 |         station_code: str | None = (
237 |             stop_data["station"].get("station_id")
238 |             or stop_data["actual_data"]["actual_station_mir"]
239 |         )
240 |         try:
241 |             assert isinstance(station_code, str) and len(station_code) > 0
242 |         except AssertionError:
243 |             raise IncompleteTrenordStopDataException
244 | 
245 |         station = st.Station.by_code(station_code)
246 |         if station._phantom and stop_data.get("station", {}).get("station_ori_name"):
247 |             station.name = stop_data["station"]["station_ori_name"].title().strip()
248 | 
249 |         stop_type: TrainStopType
250 |         stop_type_raw = (
251 |             stop_data["actual_data"].get("actual_type", None) or stop_data["type"]
252 |         )
253 |         if stop_type_raw == "O":
254 |             stop_type = TrainStopType.FIRST
255 |         elif stop_type_raw == "F":
256 |             stop_type = TrainStopType.STOP
257 |         elif stop_type_raw == "D":
258 |             stop_type = TrainStopType.LAST
259 |         else:
260 |             stop_type = TrainStopType.CANCELLED
261 | 
262 |         if stop_data["cancelled"]:
263 |             stop_type = TrainStopType.CANCELLED
264 | 
265 |         return cls(
266 |             station=station,
267 |             stop_type=stop_type,
268 |             platform_expected=stop_data.get("platform", None),
269 |             platform_actual=None,
270 |             arrival_expected=_hhmmss_to_dt(stop_data.get("arr_time")),
271 |             arrival_actual=_hhmmss_to_dt(
272 |                 stop_data["actual_data"].get("arr_actual_time")
273 |             ),
274 |             departure_expected=_hhmmss_to_dt(stop_data.get("dep_time")),
275 |             departure_actual=_hhmmss_to_dt(
276 |                 stop_data["actual_data"].get("dep_actual_time")
277 |             ),
278 |         )
279 | 
280 |     def __repr__(self) -> str:
281 |         ret = f"@ ({self.stop_type.value}) {self.station.name} "
282 |         if self.stop_type == TrainStopType.FIRST:
283 |             ret += f"{self.departure}"
284 |         elif self.stop_type == TrainStopType.LAST:
285 |             ret += f"{self.arrival}"
286 |         else:
287 |             ret += f"{self.arrival} --> {self.departure}"
288 | 
289 |         platform_exp: str = self.platform_expected if self.platform_expected else "?"
290 | 
291 |         if self.platform_actual:
292 |             return ret + f" [{platform_exp} ~ {self.platform_actual}]"
293 |         else:
294 |             return ret + f" [{platform_exp}]"
295 | 


--------------------------------------------------------------------------------
/docs/Proposta tirocinio.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[italian,11pt,a4paper,final]{article}
  2 | \usepackage[a4paper,
  3 | bindingoffset=0.2in,
  4 | left=1in,
  5 | right=1in,
  6 | top=1in,
  7 | bottom=1in,
  8 | footskip=.25in]{geometry}
  9 | \usepackage[utf8]{inputenc}
 10 | \usepackage[T1]{fontenc}
 11 | \usepackage{hyperref}
 12 | \usepackage{babel}
 13 | \date{2 marzo 2023}
 14 | 
 15 | \newcommand{\hochkomma}{$^{,\,}$}
 16 | 
 17 | \author{Marco Aceti}
 18 | \title{
 19 | 	Open Data e trasporto ferroviario \\
 20 | 	\textit{\small{Proposta di tirocinio interno}}
 21 | }
 22 | 
 23 | \begin{document}
 24 | 	\maketitle
 25 | 
 26 | 	\begin{abstract}
 27 | 		In Italia non esistono Open Data sulle performance del trasporto pubblico ferroviario: le metriche definite nei contratti di servizio tra gli enti locali committenti e le imprese ferroviarie sono insufficienti e spesso inaccessibili.
 28 | 		La proposta di tirocinio si articola sull'idea di preservare i dati istantanei della circolazione ferroviaria dalla piattaforma ViaggiaTreno per produrre Open Data storici, \textit{machine-readable} e di qualità.
 29 | 		Infine, si propone un'analisi dei dati raccolti a fini statistici e di verifica.
 30 | 	\end{abstract}
 31 | 
 32 | 	\section{Stato dell'arte}
 33 | 	In Italia, il servizio di trasporto pubblico è operato da aziende\footnote{\url{https://it.wikipedia.org/wiki/Aziende_di_trasporto_pubblico_italiane}} private o partecipate.
 34 | 	Sul territorio nazionale sono autorizzate\footnote{\url{https://www.mit.gov.it/documentazione/elenco-imprese-ferroviarie-titolari-di-licenza-1}} una ventina di \textit{Imprese Ferroviarie} (IF) adibite al trasporto passeggeri aventi in essere numerosi \textit{Contratti di Servizio} (CdS) con gli enti locali (tipicamente le Regioni).
 35 | 	La qualità del servizio è misurata da \textbf{metriche di performance} stabilite nei CdS e comunicate agli enti dalle IF.
 36 | 
 37 | 	\subsection{Esempio: il servizio ferroviario lombardo}
 38 | 	In Lombardia, Trenord S.r.l.\ definisce\footnote{\url{https://www.regione.lombardia.it/wps/wcm/connect/7144d5b9-7e3c-4e44-82ad-30a1652e2642/Contratto+Trenord+con+firme.pdf} -- Allegato 11} un \textit{indice di puntualità entro i 5 minuti} che considera il \textit{``numero di corse circolanti giunte puntuali o con ritardo fino a 5 minuti''}, ma esclude i \textit{``ritardi maturati per cause esterne''} o \textit{``per lavori''}.
 39 | 	La Regione pubblica mensilmente un rapporto sulla puntualità dei treni\footnote{\url{https://www.regione.lombardia.it/wps/wcm/connect/4eae62eb-dfcf-4446-82ea-72dbfdfb2c4a/Puntualit\%C3\%A0.pdf}} in formato PDF, ma con diverse criticità:
 40 | 	\begin{itemize}
 41 | 		\item vengono considerati solo i ritardi in arrivo alla destinazione finale, escludendo quindi le stazioni intermedie;
 42 | 		\item i dati forniti non sono granulari ma \textit{brutalmente} aggregati per mese;
 43 | 		\item sono escluse le \textit{cause esterne} e le \textit{circostanze occasionali}: gli indici di puntualità effettivi non sono pubblicati;
 44 | 		\item i rapporti non rispettano neanche una \textit{stella} dei livelli definiti da Tim Berners-Lee per valutare gli Open Data: non è nemmeno presente una licenza d'uso.
 45 | 	\end{itemize}
 46 | 
 47 | 	C'è da considerare inoltre che Trenord (società tra l'altro partecipata al 50\% da Regione Lombardia stessa) comunica al committente gli indici già calcolati, senza che quest'ultimo abbia modo di verificarli.
 48 | 
 49 | 	Infine, non tutti gli enti committenti pubblicano rapporti sulla qualità del servizio: per esempio, la Regione Campania prevede nel suo CdS\footnote{
 50 | 		\url{https://www.regione.campania.it/assets/documents/contratto-di-servizio-tpl-ferro.pdf} \\
 51 | 		sez.\ \textit{``Penali e forme di mitigazione delle stesse''} -- Allegato 7
 52 | 	} con Trenitalia S.p.A.\ la fornitura di indici simili per il calcolo di penali e mitigazioni,
 53 | 	ma non è reperibile nessun documento che li attesti. \\
 54 | 
 55 | 	\subsection{Open Data storici}
 56 | 	In conclusione, non esistono attualmente Open Data {storici}, completi, strutturati e \textit{machine-readable} sul servizio di trasporto ferroviario in Italia.
 57 | 	Gli indici di puntualità (e affidabilità) definiti nei CdS possono essere utili agli enti committenti per calcolare penali o comparare offerte di mercato, ma i Cittadini Digitali meritano una \textbf{maggiore trasparenza} per poter verificare autonomamente lo stato reale del \textit{Sistema Ferrovia}.
 58 | 
 59 | 	\section{Rilevazioni istantanee}
 60 | 	Nella sezione precedente si è discusso di \textbf{dati storici}; la situazione è molto più rosea per i \textbf{dati in tempo reale}.
 61 | 	Esistono innumerevoli siti web e applicazioni, ufficiali e non, che mostrano lo stato attuale di un treno in viaggio.
 62 | 	L'app \textit{Orario Treni}\footnote{\url{https://www.orariotreniapp.it/}} di Paolo Conte, per esempio, presenta con un'interfaccia molto semplice e intuitiva la possibilità di cercare treni per itinerario e numero, visualizzare arrivi e partenze di una stazione e consultare l'\textit{andamento istantaneo} di un treno.
 63 | 	Quest'ultimo è composto da informazioni come gli orari programmati ed \textit{effettivi} di partenza e arrivo ad ogni fermata intermedia, ritardo cumulato fino a quel momento e luogo di ultimo rilevamento (non necessariamente corrispondente ad una fermata). \\
 64 | 
 65 | 	L'idea fondante della proposta in oggetto è sfruttare la ghiotta quantità di dati offerta dalle rilevazioni istantanee nel corso del tempo per produrre Open Data storici.
 66 | 
 67 | 	\subsection{ViaggiaTreno}
 68 | 	Il Gruppo Ferrovie dello Stato Italiane (\textit{holding} di diverse società\footnote{\url{https://it.wikipedia.org/wiki/Ferrovie_dello_Stato_Italiane}} come Trenitalia, RFI, ANAS, ...) permette ai viaggiatori di trovare soluzioni di viaggio e visualizzare l'andamento di una corsa tramite la piattaforma web ViaggiaTreno\footnote{\url{http://www.viaggiatreno.it/infomobilita/index.jsp}}, similmente all'app \textit{Orario Treni}.
 69 | 	Si può infatti speculare che quest'ultima utilizzi proprio ViaggiaTreno come fonte dei dati.
 70 | 
 71 | 	\subsubsection{API}
 72 | 	Il \textit{motore} dell'interfaccia web di ViaggiaTreno è un insieme di API ``REST'' non ufficialmente documentate e di scarsa qualità\footnote{\url{https://medium.com/@albigiu/trenitalia-shock-non-crederete-mai-a-queste-api-painful-14433096502c}}.
 73 | 	In rete sono presenti diversi tentativi di documentazione, mantenuti dalla community open source\footnote{\url{https://github.com/sabas/trenitalia}}\hochkomma\footnote{\url{https://github.com/roughconsensusandrunningcode/TrainMonitor/wiki/API-del-sistema-Viaggiatreno}}\hochkomma\footnote{\url{https://github.com/Razorphyn/Informazioni-Treni-Italiani}}.
 74 | 
 75 | 	\subsubsection{Copyright e licenza d'uso}
 76 | 	Le \textit{note legali} riportate sul portale ViaggiaTreno sono abbastanza aggressive.
 77 | 	\begin{quote}
 78 | 	\textit{I contenuti, la grafica e le immagini sono soggetti a Copyright. \textbf{Ogni diritto sui contenuti} (a titolo esemplificativo e non esaustivo: l’architettura del servizio, i testi, le immagini grafiche e fotografiche, ecc.) \textbf{è riservato ai sensi della normativa vigente}. I contenuti di ViaggiaTreno non possono, neppure in parte, essere copiati, riprodotti, trasferiti, caricati, pubblicati o distribuiti in qualsiasi modo senza il preventivo consenso scritto della società Trenitalia S.p.A.. È possibile scaricare i contenuti nel proprio computer e/o stampare estratti \textbf{unicamente per utilizzo personale} di carattere informativo. \textbf{Qualsiasi forma di link al sito www.ViaggiaTreno.it deve essere preventivamente autorizzata}\footnote{L'autore di questo documento si dichiara reo del \textit{reato di linking non autorizzato}} e non deve recare danno all'immagine e alle attività di Trenitalia S.p.A.. è vietato il c.d.\ deep linking ossia l'utilizzo, su siti di soggetti terzi, di parti del Servizio Internet o, comunque, il collegamento diretto alle pagine senza passare per la home page del Servizio Internet. \textbf{L'eventuale inosservanza delle presenti disposizioni}, salvo esplicita autorizzazione scritta, \textbf{sarà perseguita} nelle competenti sedi giudiziarie civili e penali.}
 79 | 	\end{quote}
 80 | 	Il Gruppo Ferrovie dello Stato Italiane vieta formalmente ai soggetti non autorizzati l'utilizzo di ViaggiaTreno per fini diversi dal mero uso personale, riservando tutti i diritti sui contenuti.
 81 | 	Nel 2019, l'applicazione Trenìt!\ è stata costretta\footnote{\url{https://www.startmag.it/smartcity/perche-trenitalia-ha-tamponato-lapp-trenit-per-il-momento/}} a interrompere il servizio in seguito a un processo giudiziario iniziato da Trenitalia, che contestava il riutilizzo dei dati sulla circolazione ferroviaria presenti su  ViaggiaTreno.
 82 | 	Il giudice nella sua sentenza\footnote{\url{https://www.startmag.it/innovazione/trenit-trenitalia/}} ha invece stabilito che \textit{``la banca dati degli orari dei treni e i prezzi di questi, non è protetta da diritto d’autore''} e quindi Trenìt!\ li può utilizzare. \\
 83 | 
 84 | 	Ritengo quindi che non ci siano reali limiti legali nell'utilizzo della piattaforma ViaggiaTreno e in particolare delle sue API per i fini della proposta in oggetto.
 85 | 
 86 | 	\subsection{Avvisi Trenord sulla circolazione}
 87 | 
 88 | 	Trenord, oltre alla tracciabilità dei suoi treni in ViaggiaTreno, offre anche un servizio di avviso delle criticità di tutte le linee (simile all'InfoMobilità di Trenitalia).
 89 | 	Gli avvisi sono rilasciati da esseri umani, ma hanno un formato simile. Di seguito ne sono riportati alcuni della linea \textit{Verona-Brescia-Milano}\footnote{\url{https://www.trenord.it/linee-e-orari/circolazione/le-nostre-linee/brescia-treviglio-milano/?code=R4}}.
 90 | 
 91 | 	\begin{quote}
 92 | 		\textbf{Criticità} --- 01/03/2023 06:24
 93 | 
 94 | 		\texttt{Aggiornamento:
 95 | 		Il treno 10913 (MILANO GRECO PIRELLI 05:52 - BRESCIA 07:12) sta viaggiando con un ritardo di 30 minuti perché è stato necessario prolungare i controlli tecnici che precedono la partenza del treno.}
 96 | 	\end{quote}
 97 | 
 98 | 	\begin{quote}
 99 | 		\textbf{Criticità} --- 01/03/2023 10:07\nopagebreak
100 | 
101 | 		\texttt{Il treno 2624 (VERONA PORTA NUOVA 09:43 - MILANO CENTRALE 11:35) viaggia con 12 minuti di ritardo in seguito alla sosta prolungata di un altro treno della linea.}
102 | 	\end{quote}
103 | 
104 | 	\section{Proposta operativa}
105 | 	La proposta si articola in tre fasi.
106 | 
107 | 	\subsection{Indagine esplorativa}
108 | 	Come concordato a voce nello scorso colloquio, in questa fase potrei indagare più a fondo sullo stato degli Open Data nel trasporto ferroviario in Italia e negli altri Paesi europei.
109 | 	Progetti simili potrebbero influenzare positivamente scelte come la granularità e il formato dei dati.
110 | 
111 | 	\subsection{Raccolta e produzione degli Open Data}
112 | 	Lo scopo di questa fase è progettare e implementare uno strumento che raccoglie i dati in tempo reale dal portale ViaggiaTreno di \textbf{tutti i treni} in circolazione e li salva in un database.
113 | 	Quindi, creare successivamente un altro strumento che li esporta in un formato concordato.
114 | 
115 | 	Per quanto riguarda gli avvisi di Trenord, un semplice script che li scarica dal sito web dovrebbe essere sufficiente. \\
116 | 
117 | 	Al fine di avere dati significati nella fase successiva, è importante iniziare il prima possibile l'attività di raccolta dati.
118 | 
119 | 	\subsection{Analisi dei dati raccolti}
120 | 	Innanzitutto, è necessario definire opportunamente i concetti di \textit{tratta} e \textit{corsa}: considerando che i numeri identificativi dei treni mutano da un giorno all'altro e non sono univoci, non è un compito banale.
121 | 	Quindi, si possono ricalcolare gli indici di puntualità e affidabilità \textit{effettivi} per ogni tratta individuata e trovare correlazioni tra le performance del servizio e giorno della settimana, orario, condizioni meteo, ecc\ldots
122 | 
123 | 	Si potrebbe anche verificare la regolarità e correttezza degli avvisi ai passeggeri nelle tratte affidate a Trenord e analizzare le cause dichiarate più comuni. \\
124 | 
125 | 	\section{Sviluppi futuri}
126 | 
127 | 	Nonostante ritenga l'attività di analisi dei dati estremamente interessante, l'obiettivo principale della proposta in oggetto è fornire strumenti liberi e Open Data di qualità per permettere a chiunque dotato delle capacità necessarie di continuare il lavoro.
128 | 	Con il supporto del Dipartimento di Informatica si potrebbe rendere l'attività di raccolta dati permanente e costante fornendo \textit{dump} regolari accessibili da un portale web, anche di semplice costruzione.
129 | 
130 | \end{document}
131 | 


--------------------------------------------------------------------------------
/src/analysis/trajectories_map.py:
--------------------------------------------------------------------------------
  1 | # railway-opendata: scrape and analyze italian railway data
  2 | # Copyright (C) 2023 Marco Aceti
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import itertools
 19 | import logging
 20 | import pathlib
 21 | import typing as t
 22 | import webbrowser
 23 | from collections import defaultdict
 24 | from datetime import datetime, timedelta
 25 | from tempfile import NamedTemporaryFile
 26 | 
 27 | import folium
 28 | import folium.plugins
 29 | import numpy as np
 30 | import pandas as pd
 31 | from branca.colormap import LinearColormap
 32 | from branca.element import MacroElement, Template
 33 | from colour import Color
 34 | from joblib import Parallel, delayed
 35 | 
 36 | # The 'length' (in minutes) of a frame
 37 | WINDOW_SIZE: int = 2
 38 | assert WINDOW_SIZE > 0
 39 | 
 40 | # Minimum line weight
 41 | MIN_WEIGHT: int = 4
 42 | assert MIN_WEIGHT > 0
 43 | 
 44 | # Safe values used in sanity checks
 45 | MIN_YEAR: int = datetime.now().year - 50
 46 | MAX_YEAR: int = datetime.now().year + 10
 47 | 
 48 | # Folium map initialization arguments
 49 | MAP_KWARGS: dict = {
 50 |     "location": (41.890, 12.492),
 51 |     "zoom_start": 7,
 52 |     "attr": "OSM",
 53 | }
 54 | 
 55 | # Assets path (marker icons)
 56 | ASSETS_PATH = pathlib.Path("./src/analysis/assets/").resolve()
 57 | 
 58 | # Delay color range: (lower_bound, color)
 59 | _color_map: list[tuple[float, Color]] = [
 60 |     (-5, Color("#34ebc0")),
 61 |     (0, Color("green")),
 62 |     (10, Color("orange")),
 63 |     (30, Color("red")),
 64 |     (120, Color("black")),
 65 | ]
 66 | 
 67 | # Statically populate COLORS dict
 68 | COLORS: dict[int | float, Color] = defaultdict(lambda: Color("gray"))
 69 | for i, (lower_bound, color) in enumerate(_color_map[1:]):
 70 |     prev_bound, prev_color = _color_map[i + 1 - 1]
 71 |     n_range: range = range(round(prev_bound), round(lower_bound) + 1)
 72 |     scale: list[Color] = list(prev_color.range_to(color, len(n_range)))
 73 |     for j, n in enumerate(n_range):
 74 |         COLORS[n] = scale[j]
 75 | 
 76 | 
 77 | def fill_time(start: datetime, end: datetime) -> t.Generator[datetime, None, None]:
 78 |     """Generate a consecutive list of times between the 'start' and 'end' period.
 79 | 
 80 |     Args:
 81 |         start (datetime): start time
 82 |         end (datetime): end time
 83 | 
 84 |     Returns:
 85 |         Generator[datetime, None, None]: the generated datetimes
 86 |     """
 87 |     # Fix empty intervals
 88 |     if start == end:
 89 |         start -= timedelta(minutes=WINDOW_SIZE)
 90 | 
 91 |     while start <= end:
 92 |         yield start
 93 |         start += timedelta(minutes=WINDOW_SIZE)
 94 | 
 95 | 
 96 | def icon_marker(railway_company: str, category: str) -> str:
 97 |     """Select a proper marker (from the src/analysis/assets/markers/ directory)
 98 |     by railway_company and category.
 99 | 
100 |     Args:
101 |         railway_company (str): a railway company
102 |         category (str): a category
103 | 
104 |     Returns:
105 |         str: filename of the proper marker
106 |     """
107 | 
108 |     category = category.replace("MET", "REG").replace("EC FR", "EC")
109 |     railway_company = railway_company.lower()
110 | 
111 |     if railway_company.startswith("trenitalia") and category in [
112 |         "EC",
113 |         "FA",
114 |         "FB",
115 |         "FR",
116 |         "IC",
117 |         "ICN",
118 |         "REG",
119 |     ]:
120 |         return f"trenitalia_{category.lower()}.svg"
121 | 
122 |     if railway_company in ["trenord", "tper"] and category == "REG":
123 |         return f"{railway_company}_reg.svg"
124 | 
125 |     if railway_company == "obb" and category == "EC":
126 |         return "obb_ec.svg"
127 | 
128 |     return "other.svg"
129 | 
130 | 
131 | @delayed
132 | def train_stop_geojson(st: pd.DataFrame, train: pd.DataFrame) -> list[dict]:
133 |     """Generate a list of GeoJSON formatted data for train stops.
134 | 
135 |     Args:
136 |         st (pd.DataFrame): global station data
137 |         train (pd.DataFrame): the train stop data
138 | 
139 |     Returns:
140 |         Generator[dict, None, None]: a generator of GeoJSON formatted
141 |         dictionaries representing the train _geographic trajectory_.
142 |     """
143 |     ret: list[dict] = list()
144 |     train = train.sort_values(by="stop_number")
145 | 
146 |     # Iterate the train stops two by two
147 |     for i in range(len(train))[1:]:
148 |         prev = train.iloc[i - 1]
149 |         curr = train.iloc[i]
150 | 
151 |         try:
152 |             prev_st = st.loc[
153 |                 (st.index == prev.stop_station_code)
154 |                 & ~st.latitude.isna()
155 |                 & ~st.longitude.isna()
156 |             ].iloc[0]
157 |             curr_st = st.loc[
158 |                 (st.index == curr.stop_station_code)
159 |                 & ~st.latitude.isna()
160 |                 & ~st.longitude.isna()
161 |             ].iloc[0]
162 |         except IndexError:
163 |             # The station location can't be retrieved
164 |             continue
165 | 
166 |         prev_time: datetime | None = prev.departure_actual or prev.departure_expected
167 |         curr_time: datetime | None = curr.arrival_actual or curr.arrival_expected
168 |         delay: float = (
169 |             round(prev.departure_delay)
170 |             if not np.isnan(prev.departure_delay)
171 |             else np.nan
172 |         )
173 | 
174 |         # Sanity check: _time must be not null
175 |         if not prev_time or not curr_time:
176 |             continue
177 | 
178 |         # Sanity check: a train should arrive in a given station after
179 |         # it departs from the previous one
180 |         if not curr_time >= prev_time:
181 |             continue
182 | 
183 |         # Sanity check: sometimes the API returns insane year values
184 |         if curr_time.year > MAX_YEAR or prev_time.year < MIN_YEAR:
185 |             continue
186 | 
187 |         # Tooltip pop up display
188 |         tooltip: str = (
189 |             f"<b>{curr.client_code}</b> &#8729; <b>{curr.category}</b> <b>{curr.number}</b>"
190 |             f"<dd>{prev_st.long_name} "
191 |             f"{f'({round(prev.departure_delay, 1):+g} min)' if not np.isnan(prev.departure_delay) else ''}"
192 |             f" &rarr; "
193 |             f"{curr_st.long_name} "
194 |             f"{f' ({round(prev.arrival_delay, 1):+g} min)' if not np.isnan(prev.arrival_delay) else ''}"
195 |         )
196 | 
197 |         for timestamp in fill_time(prev_time, curr_time):
198 |             ret.extend(
199 |                 [
200 |                     {
201 |                         "type": "Feature",
202 |                         "geometry": {
203 |                             "type": "LineString",
204 |                             "coordinates": [
205 |                                 (prev_st.longitude, prev_st.latitude),
206 |                                 (curr_st.longitude, curr_st.latitude),
207 |                             ],
208 |                         },
209 |                         "properties": {
210 |                             "times": [timestamp.isoformat()] * 2,
211 |                             "style": {
212 |                                 "color": COLORS[delay].get_hex(),
213 |                                 "weight": int(curr.crowding / 10)
214 |                                 if not np.isnan(curr.crowding)
215 |                                 and curr.crowding > MIN_WEIGHT * 10
216 |                                 else MIN_WEIGHT,
217 |                             },
218 |                             "tooltip": tooltip,
219 |                         },
220 |                     },
221 |                     {
222 |                         "type": "Feature",
223 |                         "geometry": {
224 |                             "type": "Point",
225 |                             "coordinates": (curr_st.longitude, curr_st.latitude),
226 |                         },
227 |                         "properties": {
228 |                             "icon": "marker",
229 |                             "iconstyle": {
230 |                                 "iconUrl": str(
231 |                                     ASSETS_PATH
232 |                                     / "markers"
233 |                                     / icon_marker(curr.client_code, curr.category)
234 |                                 ),
235 |                                 "iconSize": [24, 24],
236 |                                 "fillOpacity": 1,
237 |                             },
238 |                             "tooltip": tooltip,
239 |                             "name": "",
240 |                             "times": [timestamp.isoformat()],
241 |                         },
242 |                     },
243 |                 ]
244 |             )
245 | 
246 |     return ret
247 | 
248 | 
249 | class StatsChart(MacroElement):
250 |     """Helper class to compute and embed the train count chart."""
251 | 
252 |     def __init__(self, df: pd.DataFrame, *args, **kwargs):
253 |         """Initialize a new object.
254 | 
255 |         Args:
256 |             df (pd.DataFrame): the train stop data
257 |         """
258 |         super().__init__(*args, **kwargs)
259 | 
260 |         # Prepare dataset
261 |         trains = df.groupby("train_hash")
262 |         self.data = pd.DataFrame(index=df.train_hash.unique())
263 |         self.data["departure"] = trains.first()["departure_actual"].fillna(
264 |             trains.first()["departure_expected"]
265 |         )
266 |         self.data["arrival"] = trains.last()["arrival_actual"].fillna(
267 |             trains.first()["arrival_expected"]
268 |         )
269 |         self.data["delay"] = trains.mean(numeric_only=True)["departure_delay"].fillna(
270 |             trains.mean(numeric_only=True)["arrival_delay"]
271 |         )
272 | 
273 |     def get_train_count_data(self) -> list[dict[str, str | int]]:
274 |         """Return circulating train count in a JS-likable format."""
275 |         ret: list[dict[str, str | int]] = []
276 |         for time in fill_time(self.data.departure.min(), self.data.arrival.max()):
277 |             subset: pd.DataFrame = self.data.loc[
278 |                 (time >= self.data.departure) & (time <= self.data.arrival)
279 |             ]
280 |             ret.append(
281 |                 {
282 |                     "x": time.isoformat(),
283 |                     "y": len(subset),
284 |                 }
285 |             )
286 |         return ret
287 | 
288 |     def get_delays_data(self) -> list[dict[str, str | float]]:
289 |         ret: list[dict[str, str | float]] = []
290 |         for time in fill_time(self.data.departure.min(), self.data.arrival.max()):
291 |             subset: pd.DataFrame = self.data.loc[
292 |                 (time >= self.data.departure) & (time <= self.data.arrival)
293 |             ]
294 |             ret.append(
295 |                 {
296 |                     "x": time.isoformat(),
297 |                     "y": subset.delay.mean() if len(subset) > 20 else "NaN",
298 |                 }
299 |             )
300 |         return ret
301 | 
302 | 
303 | class MarkerLegend(MacroElement):
304 |     """Helper class to embed the marker legend"""
305 | 
306 |     @staticmethod
307 |     def get_markers_path() -> str:
308 |         """Return the absolute path of assets"""
309 |         return str(ASSETS_PATH / "markers")
310 | 
311 | 
312 | def build_map(st: pd.DataFrame, df: pd.DataFrame) -> None:
313 |     """Build a Folium map with train trajectories,
314 |     and open it with a web browser.
315 | 
316 |     Args:
317 |         st (pd.DataFrame): global station data
318 |         df (pd.DataFrame): the train stop data
319 |     """
320 |     m = folium.Map(**MAP_KWARGS)
321 | 
322 |     # Drop cancelled stops and trains
323 |     df = df.loc[(df.stop_type != "C") & (df.cancelled == False)].copy()
324 | 
325 |     logging.info("Generating GeoJSON features...")
326 |     features = Parallel(n_jobs=-1, verbose=5)(
327 |         train_stop_geojson(st, train_df) for _, train_df in df.groupby("train_hash")
328 |     )
329 | 
330 |     # Add TimestampedGeoJson plugin
331 |     folium.plugins.TimestampedGeoJson(
332 |         {
333 |             "type": "FeatureCollection",
334 |             "features": list(itertools.chain(*features)),  # type: ignore
335 |         },
336 |         add_last_point=False,
337 |         period=f"PT{WINDOW_SIZE}M",
338 |         duration=f"PT{WINDOW_SIZE}M",
339 |     ).add_to(m)
340 | 
341 |     # Add delay legend
342 |     LinearColormap(
343 |         colors=list(map(lambda c: c.get_rgb(), COLORS.values())),
344 |         index=COLORS.keys(),
345 |         vmin=min(COLORS.keys()),
346 |         vmax=min(60, max(COLORS.keys())),
347 |         max_labels=50,
348 |         tick_labels=list(range(-5, 61, 5)),
349 |         caption="Departure delay",
350 |     ).add_to(m)
351 | 
352 |     # Add marker legend
353 |     legend = MarkerLegend()
354 |     with open(ASSETS_PATH / "templates" / "marker_legend.html", "r") as f:
355 |         legend._template = Template("\n".join(f.readlines()))
356 |     m.get_root().add_child(legend)
357 | 
358 |     # Add train count chart
359 |     macro = StatsChart(df)
360 |     with open(ASSETS_PATH / "templates" / "stats_chart.html", "r") as f:
361 |         macro._template = Template("\n".join(f.readlines()))
362 |     m.get_root().add_child(macro)
363 | 
364 |     # Save the map to a temporary file and open it with a web browser
365 |     outfile = NamedTemporaryFile(delete=False, suffix=".html")
366 |     m.save(outfile.file)
367 | 
368 |     webbrowser.open(outfile.name)
369 | 


--------------------------------------------------------------------------------