├── .codecov.yml
├── .dockerignore
├── .github
└── workflows
│ └── main.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── conda-linux-64.lock
├── conda-osx-64.lock
├── conda-win-64.lock
├── conf
├── insight-lane.conf
├── start.sh
└── supervisord.conf
├── docs
├── MassDOT RoadInvDictionary.pdf
└── model_data_dictionary.md
├── environment_linux.yml
├── environment_mac.yml
├── environment_pc.yml
├── models
└── .gitkeep
├── notebooks
├── .gitkeep
├── benchmark
│ ├── crash_predict_benchmark.ipynb
│ └── crash_predict_rnn.ipynb
├── clustering_segment_level.ipynb
├── general_analysis.ipynb
├── intervention_effect.ipynb
├── python2
│ ├── TMC_analysis.ipynb
│ ├── crash_predict_car.ipynb
│ ├── data_explore_12_14_crashes_road_details.ipynb
│ ├── make_shapefiles_for_tests.ipynb
│ ├── open_street_map.ipynb
│ └── tutorial.ipynb
├── route_level_risk_distribute.ipynb
└── vision_zero_analysis.ipynb
├── references
└── .gitkeep
├── requirements.txt
├── src
├── .coveragerc
├── .pylintrc
├── README.md
├── config
│ ├── config_ada_county.yml
│ ├── config_boise.yml
│ ├── config_boston.yml
│ ├── config_brisbane.yml
│ ├── config_buffalo.yml
│ ├── config_cambridge.yml
│ ├── config_chicago.yml
│ ├── config_dc.yml
│ ├── config_losangeles.yml
│ ├── config_melbourne.yml
│ ├── config_meridian.yml
│ ├── config_nyc.yml
│ ├── config_philly.yml
│ ├── config_pittsburgh.yml
│ └── config_somerville.yml
├── data
│ ├── .gitkeep
│ ├── README.md
│ ├── TMC_scraping
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── parse_tmc.py
│ ├── __init__.py
│ ├── add_map.py
│ ├── add_waze_data.py
│ ├── analysis_util.py
│ ├── config.py
│ ├── create_segments.py
│ ├── extract_intersections.py
│ ├── geocoding_util.py
│ ├── join_segments_crash.py
│ ├── make_dataset.py
│ ├── make_preds_viz.py
│ ├── osm_create_maps.py
│ ├── propagate_volume.py
│ ├── record.py
│ ├── see_click_fix
│ │ └── seeclickfix.py
│ ├── segment.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── data
│ │ │ ├── bad_intersection_test.geojson
│ │ │ ├── concern_joined.json
│ │ │ ├── concern_test_dummy.json
│ │ │ ├── config_brisbane_no_supplemental.yml
│ │ │ ├── config_brisbane_supplemental.yml
│ │ │ ├── config_features.yml
│ │ │ ├── crash_test_dummy.json
│ │ │ ├── missing_segments_test.geojson
│ │ │ ├── osm_crash_file.json
│ │ │ ├── osm_output.gpickle
│ │ │ ├── processed
│ │ │ │ ├── crash_joined.json
│ │ │ │ └── maps
│ │ │ │ │ ├── boston_test_elements.geojson
│ │ │ │ │ ├── inters.geojson
│ │ │ │ │ ├── non_inters_segments.geojson
│ │ │ │ │ ├── osm.gpkg
│ │ │ │ │ ├── osm_elements.geojson
│ │ │ │ │ ├── test_line_convert.cpg
│ │ │ │ │ ├── test_line_convert.dbf
│ │ │ │ │ ├── test_line_convert.shp
│ │ │ │ │ └── test_line_convert.shx
│ │ │ ├── raw
│ │ │ │ ├── ma_cob_spatially_joined_streets.cpg
│ │ │ │ ├── ma_cob_spatially_joined_streets.dbf
│ │ │ │ ├── ma_cob_spatially_joined_streets.prj
│ │ │ │ ├── ma_cob_spatially_joined_streets.shp
│ │ │ │ └── ma_cob_spatially_joined_streets.shx
│ │ │ ├── standardized
│ │ │ │ ├── Vision_Zero_Entry.csv
│ │ │ │ ├── concerns.json
│ │ │ │ ├── crashes.csv
│ │ │ │ └── crashes.json
│ │ │ ├── test_add_map
│ │ │ │ ├── ma_cob_small.cpg
│ │ │ │ ├── ma_cob_small.dbf
│ │ │ │ ├── ma_cob_small.shp
│ │ │ │ ├── ma_cob_small.shx
│ │ │ │ ├── osm3857.cpg
│ │ │ │ ├── osm3857.dbf
│ │ │ │ ├── osm3857.geojson
│ │ │ │ ├── osm3857.prj
│ │ │ │ ├── osm3857.shp
│ │ │ │ └── osm3857.shx
│ │ │ ├── test_create_segments
│ │ │ │ ├── additional_points.json
│ │ │ │ ├── empty_set_inter.geojson
│ │ │ │ ├── missing_int_segments.geojson
│ │ │ │ ├── no_non_inter.geojson
│ │ │ │ ├── points.geojson
│ │ │ │ ├── points_test.json
│ │ │ │ ├── test_adjacency.geojson
│ │ │ │ ├── test_get_connections1.geojson
│ │ │ │ ├── test_get_connections2.geojson
│ │ │ │ ├── test_linestring.geojson
│ │ │ │ └── unconnected.geojson
│ │ │ ├── test_get_roads_and_inters.geojson
│ │ │ ├── test_waze
│ │ │ │ ├── osm_elements.geojson
│ │ │ │ └── test_waze.json
│ │ │ └── viz_preds_tests
│ │ │ │ ├── crashes_rollup.geojson
│ │ │ │ ├── crashes_rollup_pedestrian.geojson
│ │ │ │ ├── single_prediction.json
│ │ │ │ ├── single_prediction_viz.geojson
│ │ │ │ └── single_segment.geojson
│ │ ├── test_add_map.py
│ │ ├── test_add_waze_data.py
│ │ ├── test_all.py
│ │ ├── test_analysis_util.py
│ │ ├── test_config.py
│ │ ├── test_create_segments.py
│ │ ├── test_extract_intersections.py
│ │ ├── test_initialize_city.py
│ │ ├── test_join_segments_crash.py
│ │ ├── test_make_preds_viz.py
│ │ ├── test_osm_create_maps.py
│ │ ├── test_pipeline.py
│ │ └── test_util.py
│ ├── util.py
│ └── weather
│ │ ├── BostonWeather2016_Wunderground.Rda
│ │ ├── BostonWeather2016_Wunderground.csv
│ │ ├── README.md
│ │ └── weatherScrapingScript.R
├── data_standardization
│ ├── __init__.py
│ ├── boston_volume.py
│ ├── standardization_util.py
│ ├── standardize_crashes.py
│ ├── standardize_point_data.py
│ ├── standardize_volume.py
│ ├── standardize_waze_data.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── data
│ │ ├── 8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX
│ │ └── waze
│ │ │ ├── 2018-10-15-20-15.json.gz
│ │ │ ├── 2018-10-16-08-00.json
│ │ │ └── 2018-10-17-16-15.json.gz
│ │ ├── test-schema.json
│ │ ├── test_boston_volume.py
│ │ ├── test_standardization_util.py
│ │ ├── test_standardize_crashes.py
│ │ ├── test_standardize_point_data.py
│ │ └── test_standardize_waze_data.py
├── features
│ ├── .gitkeep
│ ├── __init__.py
│ ├── build_features.py
│ ├── make_canon_dataset.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── data
│ │ └── processed
│ │ │ ├── concern_joined.json
│ │ │ ├── crash_joined.json
│ │ │ ├── inters_data.json
│ │ │ └── maps
│ │ │ └── inter_and_non_int.geojson
│ │ └── test_make_canon.py
├── initialize_city.py
├── models
│ ├── .gitkeep
│ ├── __init__.py
│ ├── make_weekly.py
│ ├── model_classes.py
│ ├── old_train.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── data
│ │ │ ├── data_model.csv
│ │ │ └── features.yml
│ │ └── test_train_model.py
│ └── train_model.py
├── pipeline.py
├── showcase
│ ├── .dockerignore
│ ├── .gcloudignore
│ ├── Dockerfile
│ ├── Dockerfile.gcp
│ ├── __init__.py
│ ├── app.py
│ ├── cloudbuild.yaml
│ ├── nginx.conf
│ ├── run_all_cities.py
│ ├── static
│ │ ├── config.js
│ │ ├── gcp_config.js
│ │ ├── make_plots.js
│ │ ├── style.css
│ │ └── update_map.js
│ └── templates
│ │ └── index.html
├── tools
│ ├── __init__.py
│ ├── geocode_batch.py
│ ├── make_map_subset.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── data
│ │ │ ├── geocoded_addresses.csv
│ │ │ ├── make_map_multilinestring.geojson
│ │ │ ├── test_make_map.geojson
│ │ │ └── to_geocode.csv
│ │ ├── test_geocode_batch.py
│ │ ├── test_make_map_subset.py
│ │ └── test_update_config.py
│ ├── update_configs.py
│ └── waze_feed.py
└── visualization
│ ├── .gitkeep
│ ├── README.md
│ ├── __init__.py
│ ├── plot_points.py
│ ├── risk_map.py
│ ├── tests
│ ├── __init__.py
│ ├── data
│ │ ├── single_segment.geojson
│ │ └── test_prediction.csv
│ └── test_visualization.py
│ └── visualize.py
├── standards
├── concerns-instance.json
├── concerns-schema.json
├── crashes-instance.json
├── crashes-schema.json
├── points-instance.json
├── points-schema.json
├── volumes-instance.json
└── volumes-schema.json
├── test_environment.py
└── tox.ini
/.codecov.yml:
--------------------------------------------------------------------------------
1 | codecov:
2 | notify:
3 | require_ci_to_pass: yes
4 |
5 | coverage:
6 | precision: 2
7 | round: down
8 | range: "39...100"
9 |
10 | status:
11 | project: yes
12 | patch: no
13 | changes: no
14 |
15 | parsers:
16 | gcov:
17 | branch_detection:
18 | conditional: yes
19 | loop: yes
20 | method: no
21 | macro: no
22 |
23 | comment:
24 | layout: "header, diff"
25 | behavior: default
26 | require_changes: no
27 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | data/
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python application
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | linux:
14 | env:
15 | PYTHONFAULTHANDLER: "true"
16 | runs-on: ubuntu-latest
17 | defaults:
18 | run:
19 | shell: bash -l {0}
20 | steps:
21 | - uses: actions/checkout@v2
22 | - uses: conda-incubator/setup-miniconda@v2
23 | with:
24 | auto-update-conda: false
25 | # activate-environment: crash-model
26 | # environment-file: conda-linux-64.lock
27 | channels: conda-forge, defaults
28 | channel-priority: strict
29 | - name: Run linux tests
30 | run: |
31 | set -eo pipefail
32 | conda env create --name crash-model -f requirements.txt
33 | conda activate crash-model
34 | cd src
35 | py.test --cov=./ --cov-report=xml
36 | - name: Upload coverage to Codecov
37 | uses: codecov/codecov-action@v1
38 | with:
39 | file: ./src/coverage.xml
40 | flags: unittests
41 | env_vars: OS,PYTHON
42 | name: codecov-umbrella
43 | fail_ci_if_error: true
44 | mac:
45 | env:
46 | PYTHONFAULTHANDLER: "true"
47 | runs-on: macos-11
48 | steps:
49 | - uses: actions/checkout@v2
50 | - uses: conda-incubator/setup-miniconda@v2
51 | with:
52 | auto-update-conda: true
53 | #activate-environment: crash-model
54 | #environment-file: conda-osx-64.lock
55 | #environment-file: environment_mac.yml
56 | channels: conda-forge, defaults
57 | channel-priority: strict
58 | - name: Run mac tests
59 | run: |
60 | set -eo pipefail
61 | . /usr/local/miniconda/etc/profile.d/conda.sh
62 | sudo chown -R $USER $CONDA
63 | conda env create --name crash-model -f requirements.txt
64 | conda activate crash-model
65 | cd src
66 | pytest
67 | # unclear that we need to test on windows
68 | # windows:
69 | # env:
70 | # PYTHONFAULTHANDLER: "true"
71 | # runs-on: windows-latest
72 | # steps:
73 | # - uses: actions/checkout@v2
74 | # - uses: conda-incubator/setup-miniconda@v2
75 | # with:
76 | # auto-update-conda: true
77 | # #activate-environment: crash-model
78 | # #environment-file: conda-win-64.lock
79 | # channels: conda-forge, defaults
80 | # channel-priority: strict
81 | # - name: Run windows tests
82 | # shell: bash -l {0}
83 | # run: |
84 | # set -eo pipefail
85 | # conda env create --name crash-model -f requirements.txt
86 | # cd src
87 | # py.test
88 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Python template
2 | # Byte-compiled / optimized / DLL files
3 | __pycache__/
4 | *.py[cod]
5 | *$py.class
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | env/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # dotenv
80 | .env
81 |
82 | # virtualenv
83 | .venv/
84 | venv/
85 | ENV/
86 |
87 | # Spyder project settings
88 | .spyderproject
89 |
90 | # Rope project settings
91 | .ropeproject
92 |
93 | # Conda envs
94 | Scripts
95 |
96 | # Data
97 | /data/
98 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3
2 | # update
3 | RUN conda update -n base conda
4 |
5 | # Set package installer as non-interactive
6 | ENV DEBIAN_FRONTEND noninteractive
7 |
8 | # Set a terminal type
9 | ENV TERM xterm-256color
10 |
11 | WORKDIR /app
12 |
13 | # Install packges
14 | RUN apt-get update -qq && apt-get install -y --no-install-recommends \
15 | # apache for serving the visualisation
16 | apache2 \
17 | # easier management of services via supervisor
18 | supervisor \
19 | # base anaconda image seems to lack libgl support required for our virtual environment
20 | libgl1-mesa-glx \
21 | # handy text editor
22 | vim
23 |
24 | # Setup apache & supervisor
25 | RUN rm -rf /var/www/html && ln -s /app/reports /var/www/html
26 | ADD conf/insight-lane.conf /etc/apache2/sites-available/insight-lane.conf
27 | RUN ln -s /etc/apache2/sites-available/insight-lane.conf /etc/apache2/sites-enabled/insight-lane.conf
28 | RUN a2enmod rewrite
29 | ADD conf/supervisord.conf /etc/supervisord.conf
30 |
31 | # Make sure processes are stopped
32 | RUN service apache2 stop && service supervisor stop
33 |
34 | # Entrypoint script that will kick off supervisor (which in turn starts apache)
35 | ADD conf/start.sh /start.sh
36 | RUN chmod +x /start.sh
37 |
38 | # Setup the project's virtual environment
39 | COPY conda-linux-64.lock /app/conda-linux-64.lock
40 | RUN ["conda", "create", "--name", "crash-model", "--file", "conda-linux-64.lock"]
41 |
42 |
43 | # Use bash for the entrypoint rather than sh, for 'conda activate' compatibility
44 | ENTRYPOINT ["/bin/bash", "-c"]
45 |
46 | # Activate the project's virtual environment
47 | RUN echo "conda activate crash-model" >> ~/.bashrc
48 |
49 | # this startup script runs supervisor in foreground (which in turn starts apache) to keep container running
50 | CMD ["/start.sh"]
51 |
52 | # Make the apache port available
53 | EXPOSE 8080
54 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | The MIT License (MIT)
3 | Copyright (c) 2017, terryf82
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 |
11 |
--------------------------------------------------------------------------------
/conf/insight-lane.conf:
--------------------------------------------------------------------------------
1 | Listen 8080
2 |
3 |
4 | DocumentRoot /var/www/html
5 |
6 |
7 | # Base Apache config
8 | Options -Indexes +FollowSymLinks -MultiViews
9 | AllowOverride all
10 | Require all granted
11 |
12 | RewriteEngine on
13 |
14 | # if a directory or a file exists, use it directly
15 | RewriteCond %{REQUEST_FILENAME} !-f
16 | RewriteCond %{REQUEST_FILENAME} !-d
17 |
18 | # otherwise forward it to index.html
19 | RewriteRule ^ index.html [QSA,L]
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/conf/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # -e: exit immediately if a command exits with a non-zerio status
4 | set -e
5 | echo "starting supervisor in foreground"
6 | supervisord -c /etc/supervisord.conf -n
7 |
8 | # don't put anything else in this file, it won't run!
9 |
--------------------------------------------------------------------------------
/conf/supervisord.conf:
--------------------------------------------------------------------------------
1 | [supervisord]
2 | http_port=/var/tmp/supervisor.sock ; (default is to run a UNIX domain socket server)
3 |
4 | logfile=/var/log/supervisor/supervisord.log ; (main log file;default $CWD/supervisord.log)
5 | logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB)
6 | logfile_backups=10 ; (num of main logfile rotation backups;default 10)
7 | loglevel=info ; (logging level;default info; others: debug,warn)
8 | pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
9 | nodaemon=false ; (start in foreground if true;default false)
10 | minfds=1024 ; (min. avail startup file descriptors;default 1024)
11 | minprocs=200 ; (min. avail process descriptors;default 200)
12 |
13 | [supervisorctl]
14 | serverurl=unix:///var/tmp/supervisor.sock ; use a unix:// URL for a unix socket
15 |
16 | [program:apache2]
17 | command=apachectl -e info -DFOREGROUND
18 | autostart=true
19 | autorestart=true
20 | user=root
21 |
--------------------------------------------------------------------------------
/docs/MassDOT RoadInvDictionary.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/docs/MassDOT RoadInvDictionary.pdf
--------------------------------------------------------------------------------
/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/models/.gitkeep
--------------------------------------------------------------------------------
/notebooks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/notebooks/.gitkeep
--------------------------------------------------------------------------------
/notebooks/python2/tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Before you get started, you need to get an account for data.world, and download the data/ directory. That directory goes in the boston-crash-modeling directory."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 4,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "PROCESSED_DATA_FP = '../osm-data/processed/'"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 5,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "import json\n",
30 | "import os\n",
31 | "BASE_DIR = os.path.dirname(os.getcwd())\n",
32 | "os.chdir(BASE_DIR + '/src/data/')\n",
33 | "import util\n",
34 | "os.chdir(BASE_DIR + '/notebooks/')"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "In data/processed/ there is a file called crash_joined.json. This is a list of dicts containing each crash event and which road segment (intersection or non-intersection) it is closest to."
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 6,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "7220 crashes found\n",
54 | "{u'vehicles': u\"[{u'category': u'car'}]\", u'summary': u'REPORTED INJURIES (P) (E) (F)', u'persons': u'{}', u'address': u'None', u'id': u'1403832', u'near_id': u'0011059', u'dateOccurred': u'2016-01-01 00:56:45-05:00'}\n",
55 | "3379 unique crash locations found\n",
56 | "\n",
57 | "Highest number of crashes at any one location: [38, 34, 31, 29, 29, 27, 27, 27, 22, 21]\n"
58 | ]
59 | }
60 | ],
61 | "source": [
62 | "items = json.load(open(PROCESSED_DATA_FP + 'crash_joined.json'))\n",
63 | "crash_list, crashes_by_id = util.group_json_by_location(items)\n",
64 | "print str(len(crash_list)) + \" crashes found\"\n",
65 | "\n",
66 | "# Crash_list is a list of dicts about each individual crash\n",
67 | "print crash_list[0]\n",
68 | "\n",
69 | "# Crashes by id is a dict where the key is the segment id. It contains the count of crashes at that intersection\n",
70 | "# Can also contain other information about this location, if called with arg otherfields. In this case, we're\n",
71 | "# Looking at time\n",
72 | "sorted = [value['count'] for key,value in crashes_by_id.items() if key != '']\n",
73 | "sorted.sort(reverse=True)\n",
74 | "print str(len(sorted)) + \" unique crash locations found\\n\"\n",
75 | "\n",
76 | "print \"Highest number of crashes at any one location: \" + str(sorted[0:10])\n",
77 | "worst_id = [key for key, value in crashes_by_id.items() if value['count'] == sorted[0]][0]\n"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 7,
83 | "metadata": {},
84 | "outputs": [
85 | {
86 | "name": "stdout",
87 | "output_type": "stream",
88 | "text": [
89 | "(u'Oxford St & Beacon St, Somerville, MA 02143, USA', 42.3860143, -71.1161841)\n"
90 | ]
91 | }
92 | ],
93 | "source": [
94 | "x = util.geocode_address('Beacon and Oxford, Somerville MA')\n",
95 | "print x"
96 | ]
97 | }
98 | ],
99 | "metadata": {
100 | "kernelspec": {
101 | "display_name": "Python 2",
102 | "language": "python",
103 | "name": "python2"
104 | },
105 | "language_info": {
106 | "codemirror_mode": {
107 | "name": "ipython",
108 | "version": 2
109 | },
110 | "file_extension": ".py",
111 | "mimetype": "text/x-python",
112 | "name": "python",
113 | "nbconvert_exporter": "python",
114 | "pygments_lexer": "ipython2",
115 | "version": "2.7.13"
116 | }
117 | },
118 | "nbformat": 4,
119 | "nbformat_minor": 2
120 | }
121 |
--------------------------------------------------------------------------------
/references/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/references/.gitkeep
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | codecov==2.1.13
2 | fiona==1.9.4
3 | flask==3.0.2
4 | folium==0.16.0
5 | geopandas==0.14.3
6 | geocoder==1.38.1
7 | geojson==3.1.0
8 | matplotlib==3.8.3
9 | numpy==1.26.4
10 | openpyxl==3.1.2
11 | osmnx==1.9.1
12 | pandas==2.2.1
13 | pyproj==3.6.1
14 | pytest==8.0.2
15 | pyyaml==6.0.1
16 | pytest-cov==4.1.0
17 | rtree==1.2.0
18 | ruamel.yaml==0.17.33
19 | scikit-learn==1.4.1.post1
20 | shapely==2.0.3
21 | tzlocal==5.2
22 | xlrd==2.0.1
23 | xgboost==2.0.3
24 | tox==4.14.1
25 | jsonschema==4.21.1
26 | pylint==3.1.0
--------------------------------------------------------------------------------
/src/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = */tests/*
3 | showcase/*
--------------------------------------------------------------------------------
/src/config/config_ada_county.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Ada County
3 | # City center point latitude & longitude (default geocoded values set)
4 | city_latitude: 43.60764000000006
5 | city_longitude: -116.19339999999994
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: America/Boise
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 | speed_unit: mph
12 |
13 | # By default, maps are created from OSM's polygon data and fall back to radius
14 | # if there is no polygon data, but but you can change the openstreetmap_geography
15 | # to 'radius' if preferred
16 | map_geography: polygon
17 |
18 | # The folder under data where this city's data is stored
19 | name: ada_county
20 |
21 | # If given, limit crashes to after startdate and no later than enddate
22 | # Recommended to limit to just a few years for now
23 | startdate: 2015-01-01
24 | enddate: 2019-12-31
25 |
26 | #################################################################
27 | # Configuration for data standardization
28 |
29 | # crash file configurations
30 | crashes_files:
31 | Ada_County_Crashes_2015_2019_filtered.csv:
32 | required:
33 | id: FID
34 | latitude: latitude
35 | longitude: longitude
36 | # If date supplied in single column:
37 | date_complete: accident_date
38 | # If date is separated into year/month/day:
39 | date_year:
40 | date_month:
41 | # Leave date_day empty if not available
42 | date_day:
43 | # If time is available and separate from date:
44 | time: accident_time
45 | # If time specified, time_format is one of:
46 | # default (HH:MM:SS)
47 | # seconds (since midnight)
48 | # military (HHMM)
49 | time_format: default
50 | optional:
51 | summary:
52 | # If the crash file doesn't have a lat/lon, you must give the address field
53 | # and you will need to run the geocode_batch script - see the README
54 | address:
55 | # This section allows you to specify additional feature in the crash file
56 | # (split_columns) to go into the training set
57 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
58 | # but you can specify other fields in the crash data file.
59 | # See the README for examples
60 | split_columns:
61 | pedestrian:
62 | column_name: crash_type
63 | column_value: pedestrian
64 | bike:
65 | column_name: crash_type
66 | column_value: bicycle
67 | vehicle:
68 | column_name: crash_type
69 | column_value: vehicle
70 |
71 | #################################################################
72 | # Configuration for default features
73 |
74 | # Default features from open street map. You can remove features you don't want
75 | # Note: we don't support adding features in the config file.
76 | # If there is an additional feature you want from open street map, contact the development team
77 | openstreetmap_features:
78 | categorical:
79 | width: Width
80 | cycleway_type: Bike lane
81 | signal: Signal
82 | oneway: One Way
83 | lanes: Number of lanes
84 | continuous:
85 | width_per_lane: Average width per lane
86 |
87 | # Speed limit is a required feature
88 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
89 | speed_limit: osm_speed
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/src/config/config_boise.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Boise, Idaho, USA
3 |
4 | # City centerpoint latitude & longitude (default geocoded values set)
5 | city_latitude: 43.60764000000006
6 | city_longitude: -116.19339999999994
7 |
8 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
9 | timezone: America/Denver
10 |
11 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
12 | city_radius: 20
13 | speed_unit: mph
14 |
15 | # By default, maps are created from OSM's polygon data and fall back to radius
16 | # if there is no polygon data, but but you can change the openstreetmap_geography
17 | # to 'radius' if preferred
18 | map_geography: polygon
19 |
20 | # The folder under data where this city's data is stored
21 | name: boise
22 |
23 | # If given, limit crashes to after startdate and no later than enddate
24 | # Recommended to limit to just a few years for now
25 | startdate: 2015-01-01
26 | enddate: 2019-12-31
27 |
28 | #################################################################
29 | # Configuration for data standardization
30 |
31 | # crash file configurations
32 | crashes_files:
33 | Boise_Crash_Data_2005_-_Present.csv:
34 | required:
35 | id: OBJECTID
36 | latitude: Y
37 | longitude: X
38 | # If date supplied in single column:
39 | date_complete: Accident_Date_Time
40 | # If date is separated into year/month/day:
41 | date_year:
42 | date_month:
43 | # Leave date_day empty if not available
44 | date_day:
45 | # If time is available and separate from date:
46 | time:
47 | # If time specified, time_format is one of:
48 | # default (HH:MM:SS)
49 | # seconds (since midnight)
50 | # military (HHMM)
51 | time_format:
52 | optional:
53 | summary:
54 | # If the crash file doesn't have a lat/lon, you must give the address field
55 | # and you will need to run the geocode_batch script - see the README
56 | address:
57 | # This section allows you to specify additional feature in the crash file
58 | # (split_columns) to go into the training set
59 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
60 | # but you can specify other fields in the crash data file.
61 | # See the README for examples
62 |
63 | #################################################################
64 | # Configuration for default features
65 |
66 | # Default features from open street map. You can remove features you don't want
67 | # Note: we don't support adding features in the config file.
68 | # If there is an additional feature you want from open street map, contact the development team
69 | openstreetmap_features:
70 | categorical:
71 | width: Width
72 | cycleway_type: Bike lane
73 | signal: Signal
74 | oneway: One Way
75 | lanes: Number of lanes
76 | continuous:
77 | width_per_lane: Average width per lane
78 |
79 | # Speed limit is a required feature
80 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
81 | speed_limit: osm_speed
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/src/config/config_boston.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Boston, Massachusetts, USA
3 | # The folder under data where this city's data is stored
4 | name: boston
5 | # City centerpoint latitude & longitude
6 | city_latitude: 42.3600825
7 | city_longitude: -71.0588801
8 | timezone: America/New_York
9 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data)
10 | city_radius: 15
11 | # If given, limit crashes to after startdate and no later than enddate
12 | startdate: 2021-01-01
13 | enddate: 2024-03-01
14 |
15 | ##############################################################################
16 | # Configuration for data standardization
17 |
18 | # crash file configurations
19 | crashes_files:
20 | vzopendata.csv:
21 | required:
22 | id: ID
23 | latitude: lat
24 | longitude: long
25 | # If date supplied in single column:
26 | date_complete: dispatch_ts
27 | # If date is separated into year/month/day:
28 | date_year:
29 | date_month:
30 | # Leave date_day empty if not available
31 | date_day:
32 | # If time is available and separate from date:
33 | time:
34 | # If time specified, time_format is one of:
35 | # default (HH:MM:SS)
36 | # seconds (since midnight)
37 | # military (HHMM)
38 | time_format:
39 | optional:
40 | summary: location_type
41 | address:
42 | split_columns:
43 | pedestrian:
44 | column_name: mode_type
45 | column_value: ped
46 | bike:
47 | column_name: mode_type
48 | column_value: bike
49 | vehicle:
50 | column_name: mode_type
51 | column_value: mv
52 |
53 | ##############################################################################
54 |
55 | # atr filepath (should be in processed subfolder)
56 | # leave as '' if not adding atrs
57 | atr: 'atrs_predicted.csv'
58 | # atr column names as list
59 | atr_cols:
60 | - speed_coalesced
61 | - volume_coalesced
62 |
63 | # tmc filepath (should be in processed subfolder)
64 | # leave as '' if not adding tmcs
65 | tmc: 'tmc_summary.json'
66 | # tmc column names as list
67 | tmc_cols:
68 | - Conflict
69 |
70 | #################################################################
71 | # Configuration for default features
72 |
73 | # Default features from open street map. You can remove features you don't want
74 | # Note: we don't support adding features in the config file.
75 | # If there is an additional feature you want from open street map, contact the development team
76 | openstreetmap_features:
77 | categorical:
78 | width: Width
79 | cycleway_type: Bike lane
80 | oneway: One Way
81 | lanes: Number of lanes
82 | continuous:
83 | width_per_lane: Average width per lane
84 |
85 | # Configuration for default waze features
86 | waze_features:
87 | categorical:
88 | jam: Existence of a jam
89 | continuous:
90 | jam_percent: Percent of time there was a jam
91 |
92 |
93 | # Additional city-specific features can be added from alternate map
94 | additional_map_features:
95 | # The path to the extra map, must be in 3857 projection
96 | extra_map: ../data/boston/raw/maps/ma_cob_spatially_joined_streets.shp
97 | continuous:
98 | AADT: Average annual daily traffic
99 | categorical:
100 | Struct_Cnd: Condition
101 | Surface_Tp: Road type
102 | F_F_Class: Functional class
103 | speed_limit: SPEEDLIMIT
104 |
105 | # Additional data sources
106 | data_source:
107 | - name: visionzero
108 | filename: Vision_Zero_Entry.csv
109 | latitude: Y
110 | longitude: X
111 | date: REQUESTDATE
112 | category: REQUESTTYPE
113 | - name: see_click_fix
114 | filename: bos_scf.csv
115 | latitude: Y
116 | longitude: X
117 | date: created
118 |
119 |
--------------------------------------------------------------------------------
/src/config/config_brisbane.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Brisbane, Australia
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: -27.4697707
5 | city_longitude: 153.0251235
6 | timezone: Australia/Brisbane
7 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
8 | city_radius: 10
9 | # The folder under data where this city's data is stored
10 | name: brisbane
11 | # If given, limit crashes to after startdate and no later than enddate
12 | # Recommended to limit to just a few years for now
13 | startdate: 2015-01-01
14 | enddate: 2017-12-31
15 |
16 | #################################################################
17 | # Configuration for data standardization
18 |
19 | # crash file configurations
20 | crashes_files:
21 | locations_2014_2017.csv:
22 | required:
23 | id: Crash_Ref_Number
24 | latitude: Crash_Latitude_GDA94
25 | longitude: Crash_Longitude_GDA94
26 | # If date supplied in single column:
27 | date_complete: Crash_Date
28 | # If date is separated into year/month/day:
29 | date_year:
30 | date_month:
31 | # Leave date_day empty if not available
32 | date_day:
33 | # If time is available and separate from date:
34 | time:
35 | # If time specified, time_format is one of:
36 | # default (HH:MM:SS)
37 | # seconds (since midnight)
38 | # military (HHMM)
39 | time_format:
40 | optional:
41 | summary: Crash_DCA_Description
42 | address: Crash_Street
43 | split_columns:
44 | pedestrian:
45 | column_name: Count_Unit_Pedestrian
46 | column_value: any
47 | bike:
48 | column_name: Count_Unit_Bicycle
49 | column_value: any
50 | vehicle:
51 | not_column: pedestrian bike
52 |
53 |
54 | #################################################################
55 | # Configuration for default features
56 |
57 | # Default features from open street map. You can remove features you don't want
58 | # Note: we don't support adding features in the config file.
59 | # If there is an additional feature you want from open street map, contact the development team
60 | openstreetmap_features:
61 | categorical:
62 | width: Width
63 | cycleway_type: Bike lane
64 | oneway: One Way
65 | lanes: Number of lanes
66 | continuous:
67 | width_per_lane: Average width per lane
68 |
69 |
--------------------------------------------------------------------------------
/src/config/config_buffalo.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Buffalo, NY, USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 42.885440000000074
5 | city_longitude: -78.87845999999996
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: America/New_York
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 |
12 | # By default, maps are created from OSM's polygon data and fall back to radius
13 | # if there is no polygon data, but but you can change the openstreetmap_geography
14 | # to 'radius' if preferred
15 | map_geography: polygon
16 |
17 | # The folder under data where this city's data is stored
18 | name: buffalo
19 |
20 | # If given, limit crashes to after startdate and no later than enddate
21 | # Recommended to limit to just a few years for now
22 | startdate: 2016-01-01
23 | enddate: 2018-12-31
24 |
25 | #################################################################
26 | # Configuration for data standardization
27 |
28 | # crash file configurations
29 | crashes_files:
30 | buffalo_converted.csv:
31 | required:
32 | id: CASE_NUM
33 | latitude: lat_converted
34 | longitude: lon_converted
35 | # If date supplied in single column:
36 | date_complete: ACC_DATE
37 | # If date is separated into year/month/day:
38 | date_year:
39 | date_month:
40 | # Leave date_day empty if not available
41 | date_day:
42 | # If time is available and separate from date:
43 | time: ACCD_TME
44 | # If time specified, time_format is one of:
45 | # default (HH:MM:SS)
46 | # seconds (since midnight)
47 | # military (HHMM)
48 | time_format:
49 | optional:
50 | summary:
51 | # If the crash file doesn't have a lat/lon, you must give the address field
52 | # and you will need to run the geocode_batch script - see the README
53 | address:
54 | # This section allows you to specify additional feature in the crash file
55 | # (split_columns) to go into the training set
56 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
57 | # but you can specify other fields in the crash data file.
58 | # See the README for examples
59 |
60 | #################################################################
61 | # Configuration for default features
62 |
63 | # Default features from open street map. You can remove features you don't want
64 | # Note: we don't support adding features in the config file.
65 | # If there is an additional feature you want from open street map, contact the development team
66 | openstreetmap_features:
67 | categorical:
68 | width: Width
69 | cycleway_type: Bike lane
70 | signal: Signal
71 | oneway: One Way
72 | lanes: Number of lanes
73 | continuous:
74 | width_per_lane: Average width per lane
75 |
76 | # Speed limit is a required feature
77 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
78 | speed_limit: osm_speed
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/src/config/config_cambridge.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Cambridge, Massachusetts, USA
3 | # The folder under data where this city's data is stored
4 | name: cambridge
5 | # City centerpoint latitude & longitude
6 | city_latitude: 42.3736158
7 | city_longitude: -71.10973349999999
8 | timezone: America/New_York
9 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data)
10 | city_radius: 10
11 | # If given, limit crashes to after startdate and no later than enddate
12 | startdate:
13 | enddate:
14 |
15 | ##############################################################################
16 | # Configuration for data standardization
17 |
18 | # crash file configurations
19 | crashes_files:
20 | Crashes20152017.csv:
21 | required:
22 | id: ID
23 | latitude: Y
24 | longitude: X
25 | # If date supplied in single column:
26 | date_complete: DateTime
27 | # If date is separated into year/month/day:
28 | date_year:
29 | date_month:
30 | # Leave date_day empty if not available
31 | date_day:
32 | # If time is available and separate from date:
33 | time:
34 | # If time specified, time_format is one of:
35 | # default (HH:MM:SS)
36 | # seconds (since midnight)
37 | # military (HHMM)
38 | time_format:
39 | optional:
40 | summary:
41 | address: Address
42 | split_columns:
43 | pedestrian:
44 | column_name: Type
45 | column_value: PED
46 | bike:
47 | column_name: Type
48 | column_value: CYC
49 | vehicle:
50 | column_name: Type
51 | column_value: AUTO
52 |
53 | # Additional data sources
54 | data_source:
55 | - name: parking_tickets
56 | filename: Cambridge_Parking_Tickets.csv
57 | address: Location
58 | date: Ticket Issue Date
59 | time: Issue Time
60 | category: Violation Description
61 | notes:
62 | # Feature can be categorical (f_cat) or continuous (f_cont)
63 | feat_type: continuous
64 | - name: seeclickfix
65 | filename: Commonwealth_Connect_Service_Requests.csv
66 | latitude: lat
67 | longitude: lng
68 | date: ticket_created_date_time
69 | category: issue_type
70 | # feat_type: categorical
71 |
72 | openstreetmap_features:
73 | categorical:
74 | width: Width
75 | cycleway_type: Bike lane
76 | oneway: One Way
77 | lanes: Number of lanes
78 | continuous:
79 | width_per_lane: Average width per lane
80 | waze_features:
81 | categorical:
82 | jam: Existence of a jam
83 | continuous:
84 | jam_percent: Percent of time there was a jam
85 | avg_jam_level: Jam level
86 | # alert_WEATHERHAZARD: Waze weather hazard alert
87 | # alert_JAM: Waze jam alert
88 | # alert_ROAD_CLOSED: Waze road closed alert
89 | # alert_ACCIDENT: Waze crash alert
90 |
--------------------------------------------------------------------------------
/src/config/config_chicago.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Chicago,IL,USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 41.884250000000065
5 | city_longitude: -87.63244999999995
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: America/Chicago
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 | speed_unit: mph
12 |
13 | # By default, maps are created from OSM's polygon data and fall back to radius
14 | # if there is no polygon data, but but you can change the openstreetmap_geography
15 | # to 'radius' if preferred
16 | map_geography: shapefile
17 | boundary_shapefile: chicago_boundaries.geojson
18 |
19 | # The folder under data where this city's data is stored
20 | name: chicago
21 |
22 | # If given, limit crashes to after startdate and no later than enddate
23 | # Recommended to limit to just a few years for now
24 | startdate: 2019-01-01
25 | enddate: 2020-12-31
26 |
27 | crashes_files:
28 | Chicago_Crashes_Joined_Crashes_People.csv:
29 | required:
30 | id: ID
31 | latitude: LATITUDE
32 | longitude: LONGITUDE
33 | # If date supplied in single column:
34 | date_complete: CRASH_DATE_x
35 | # If date is separated into year/month/day:
36 | date_year:
37 | date_month:
38 | # Leave date_day empty if not available
39 | date_day:
40 | # If time is available and separate from date:
41 | time:
42 | # If time specified, time_format is one of:
43 | # default (HH:MM:SS)
44 | # seconds (since midnight)
45 | # military (HHMM)
46 | time_format:
47 | optional:
48 | summary:
49 | # If the crash file doesn't have a lat/lon, you must give the address field
50 | # and you will need to run the geocode_batch script - see the README
51 | address:
52 | # This section allows you to specify additional feature in the crash file
53 | # (split_columns) to go into the training set
54 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
55 | # but you can specify other fields in the crash data file.
56 | # See the README for examples
57 | split_columns:
58 | pedestrian:
59 | column_name: PERSON_TYPE
60 | column_value: PEDESTRIAN
61 | bike:
62 | column_name: PERSON_TYPE
63 | column_value: BICYCLE
64 | vehicle:
65 | column_name: PERSON_TYPE
66 | column_value: DRIVER
67 |
68 | #################################################################
69 | # Configuration for default features
70 |
71 | # Default features from open street map. You can remove features you don't want
72 | # Note: we don't support adding features in the config file.
73 | # If there is an additional feature you want from open street map, contact the development team
74 | openstreetmap_features:
75 | categorical:
76 | width: Width
77 | cycleway_type: Bike lane
78 | signal: Signal
79 | oneway: One Way
80 | lanes: Number of lanes
81 | continuous:
82 | width_per_lane: Average width per lane
83 |
84 | # Speed limit is a required feature
85 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
86 | speed_limit: osm_speed
87 |
88 |
89 |
90 | # Additional data sources
91 | # Any csv file with rows corresponding to location points
92 | data_source:
93 | - name: 311_DOT_Requests
94 | filename: 311_Service_Requests.csv
95 | latitude: LATITUDE
96 | longitude: LONGITUDE
97 | date: CREATED_DATE
98 | time:
99 | category: SR_TYPE
100 | notes:
101 | # Feature is 'categorical' or 'continuous'
102 | feat: categorical
103 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value'
104 | feat_agg:
105 | # if latest, the column name where the value can be found
106 | value:
107 |
108 |
109 |
--------------------------------------------------------------------------------
/src/config/config_dc.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Washington, DC, USA
3 | # City centerpoint latitude & longitude
4 | city_latitude: 38.9071923
5 | city_longitude: -77.0368707
6 | timezone: America/New_York
7 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data)
8 | city_radius: 25
9 | # The folder under data where this city's data is stored
10 | name: dc
11 | # If given, limit crashes to after startdate and no later than enddate
12 | # Recommended to limit to just a few years for now
13 | startdate: 2014-01-01
14 | enddate: 2016-12-31
15 |
16 | #################################################################
17 | # Configuration for data standardization
18 |
19 | # crash file configurations
20 | crashes_files:
21 | Crashes_in_DC.csv:
22 | required:
23 | id: OBJECTID
24 | latitude: Y
25 | longitude: X
26 | # If date supplied in single column:
27 | date_complete: REPORTDATE
28 | # If date is separated into year/month/day:
29 | date_year:
30 | date_month:
31 | # Leave date_day empty if not available
32 | date_day:
33 | # If time is available and separate from date:
34 | time:
35 | # If time specified, time_format is one of:
36 | # default (HH:MM:SS)
37 | # seconds (since midnight)
38 | # military (HHMM)
39 | time_format:
40 | optional:
41 | summary: MAR_ADDRESS
42 | split_columns:
43 | pedestrian:
44 | column_name: TOTAL_PEDESTRIANS
45 | column_value: any
46 | bike:
47 | column_name: TOTAL_BICYCLES
48 | column_value: any
49 | vehicle:
50 | column_name: TOTAL_VEHICLES
51 | column_value: any
52 |
53 | #################################################################
54 | # Configuration for default features
55 |
56 | # Default features from open street map. You can remove features you don't want
57 | # Note: we don't support adding features in the config file.
58 | # If there is an additional feature you want from open street map, contact the development team
59 | openstreetmap_features:
60 | categorical:
61 | width: Width
62 | cycleway_type: Bike lane
63 | oneway: One Way
64 | lanes: Number of lanes
65 | continuous:
66 | width_per_lane: Average width per lane
67 |
68 |
69 | data_source:
70 | - name: visionzero
71 | filename: Vision_Zero_Safety.csv
72 | latitude: Y
73 | longitude: X
74 | date: REQUESTDATE
75 |
--------------------------------------------------------------------------------
/src/config/config_losangeles.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Los Angeles, CA, USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 34.0522
5 | city_longitude: -118.2437
6 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
7 | timezone: America/Los_Angeles
8 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
9 | city_radius: 50
10 |
11 | # The folder under data where this city's data is stored
12 | name: losangeles
13 |
14 | # If given, limit crashes to after startdate and no later than enddate
15 | # Recommended to limit to just a few years for now
16 | startdate:
17 | enddate:
18 |
19 | #################################################################
20 | # Configuration for data standardization
21 |
22 | # crash file configurations
23 | crashes_files:
24 | LA_collision_data.csv:
25 | required:
26 | id: ID
27 | latitude: point_y
28 | longitude: point_x
29 | # If date supplied in single column:
30 | date_complete: collision_date
31 | # If date is separated into year/month/day:
32 | date_year:
33 | date_month:
34 | # Leave date_day empty if not available
35 | date_day:
36 | # If time is available and separate from date:
37 | time: collision_time
38 | # If time specified, time_format is one of:
39 | # default (HH:MM:SS)
40 | # seconds (since midnight)
41 | # military (HHMM)
42 | time_format: military
43 | optional:
44 | summary:
45 | # If the crash file doesn't have a lat/lon, you must give the address field
46 | # and you will need to run the geocode_batch script - see the README
47 | address:
48 | split_columns:
49 | pedestrian:
50 | column_name: pedestrian_accident
51 | column_value: any
52 | bike:
53 | column_name: bicycle_accident
54 | column_value: any
55 | vehicle:
56 | not_column: pedestrian bike
57 |
58 | #################################################################
59 | # Configuration for default features
60 |
61 | # Default features from open street map. You can remove features you don't want
62 | # Note: we don't support adding features in the config file.
63 | # If there is an additional feature you want from open street map, contact the development team
64 | openstreetmap_features:
65 | categorical:
66 | width: Width
67 | cycleway_type: Bike lane
68 | signal: Signal
69 | oneway: One Way
70 | lanes: Number of lanes
71 | continuous:
72 | width_per_lane: Average width per lane
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/src/config/config_melbourne.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Melbourne, VIC, Australia
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: -37.8163
5 | city_longitude: 144.9631
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: Australia/Melbourne
9 |
10 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
11 | city_radius: 10
12 | #map_geography: shapefile
13 | #boundary_shapefile: VicRoads_Regions.shp
14 |
15 | # The folder under data where this city's data is stored
16 | name: melbourne
17 |
18 | # If given, limit crashes to after startdate and no later than enddate
19 | # Recommended to limit to just a few years for now
20 | startdate: 1/8/2013
21 | enddate:
22 |
23 |
24 | #################################################################
25 | # Configuration for data standardization
26 |
27 | # crash file configurations
28 | crashes_files:
29 | Crashes_Last_Five_Years.csv:
30 | required:
31 | id: ID
32 | latitude: LATITUDE
33 | longitude: LONGITUDE
34 | # If date supplied in single column:
35 | date_complete: ACCIDENT_DATE
36 | # If date is separated into year/month/day:
37 | date_year:
38 | date_month:
39 | # Leave date_day empty if not available
40 | date_day:
41 | # If time is available and separate from date:
42 | time: ACCIDENT_TIME
43 | # If time specified, time_format is one of:
44 | # default (HH:MM:SS)
45 | # seconds (since midnight)
46 | # military (HHMM)
47 | time_format:
48 | optional:
49 | summary:
50 | # If the crash file doesn't have a lat/lon, you must give the address field
51 | # and you will need to run the geocode_batch script - see the README
52 | address:
53 | split_columns:
54 | pedestrian:
55 | column_name: PEDESTRIAN
56 | column_value: any
57 | bike:
58 | column_name: BICYCLIST
59 | column_value: any
60 | vehicle:
61 | not_column: pedestrian bike
62 |
63 | #################################################################
64 | # Configuration for default features
65 |
66 | # Default features from open street map. You can remove features you don't want
67 | # Note: we don't support adding features in the config file.
68 | # If there is an additional feature you want from open street map, contact the development team
69 | openstreetmap_features:
70 | categorical:
71 | width: Width
72 | cycleway_type: Bike lane
73 | signal: Signal
74 | oneway: One Way
75 | lanes: Number of lanes
76 | continuous:
77 | width_per_lane: Average width per lane
78 |
--------------------------------------------------------------------------------
/src/config/config_meridian.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Meridian, Idaho, USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 43.61102000000005
5 | city_longitude: -116.39257999999995
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: America/Denver
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 | speed_unit: mph
12 |
13 | # By default, maps are created from OSM's polygon data and fall back to radius
14 | # if there is no polygon data, but but you can change the openstreetmap_geography
15 | # to 'radius' if preferred
16 | map_geography: polygon
17 |
18 | # The folder under data where this city's data is stored
19 | name: meridian
20 |
21 | # If given, limit crashes to after startdate and no later than enddate
22 | # Recommended to limit to just a few years for now
23 | startdate: 2015-01-01
24 | enddate: 2019-12-31
25 |
26 | #################################################################
27 | # Configuration for data standardization
28 |
29 | # crash file configurations
30 | crashes_files:
31 | Meridian_ID_Crash_Data_2005_-_Present.csv:
32 | required:
33 | id: OBJECTID
34 | latitude: Y
35 | longitude: X
36 | # If date supplied in single column:
37 | date_complete: Accident_Date_Time
38 | # If date is separated into year/month/day:
39 | date_year:
40 | date_month:
41 | # Leave date_day empty if not available
42 | date_day:
43 | # If time is available and separate from date:
44 | time:
45 | # If time specified, time_format is one of:
46 | # default (HH:MM:SS)
47 | # seconds (since midnight)
48 | # military (HHMM)
49 | time_format:
50 | optional:
51 | summary:
52 | # If the crash file doesn't have a lat/lon, you must give the address field
53 | # and you will need to run the geocode_batch script - see the README
54 | address:
55 | # This section allows you to specify additional feature in the crash file
56 | # (split_columns) to go into the training set
57 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
58 | # but you can specify other fields in the crash data file.
59 | # See the README for examples
60 |
61 | #################################################################
62 | # Configuration for default features
63 |
64 | # Default features from open street map. You can remove features you don't want
65 | # Note: we don't support adding features in the config file.
66 | # If there is an additional feature you want from open street map, contact the development team
67 | openstreetmap_features:
68 | categorical:
69 | width: Width
70 | cycleway_type: Bike lane
71 | signal: Signal
72 | oneway: One Way
73 | lanes: Number of lanes
74 | continuous:
75 | width_per_lane: Average width per lane
76 |
77 | # Speed limit is a required feature
78 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
79 | speed_limit: osm_speed
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/src/config/config_nyc.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: New York, NY, USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 40.71455000000003
5 | city_longitude: -74.00713999999994
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: America/New_York
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 | speed_unit: kph
12 |
13 | # By default, maps are created from OSM's polygon data and fall back to radius
14 | # if there is no polygon data, but but you can change the openstreetmap_geography
15 | # to 'radius' if preferred
16 | map_geography: polygon
17 |
18 | # The folder under data where this city's data is stored
19 | name: nyc
20 |
21 | # If given, limit crashes to after startdate and no later than enddate
22 | # Recommended to limit to just a few years for now
23 | startdate:
24 | enddate:
25 |
26 | #################################################################
27 | # Configuration for data standardization
28 |
29 | # crash file configurations
30 | crashes_files:
31 | Motor_Vehicle_Collisions_-_Crashes.csv:
32 | required:
33 | id: COLLISION_ID
34 | latitude: LATITUDE
35 | longitude: LONGITUDE
36 | # If date supplied in single column:
37 | date_complete: CRASH DATE
38 | # If date is separated into year/month/day:
39 | date_year:
40 | date_month:
41 | # Leave date_day empty if not available
42 | date_day:
43 | # If time is available and separate from date:
44 | time: CRASH TIME
45 | # If time specified, time_format is one of:
46 | # default (HH:MM:SS)
47 | # seconds (since midnight)
48 | # military (HHMM)
49 | time_format:
50 | optional:
51 | # summary:
52 | # If the crash file doesn't have a lat/lon, you must give the address field
53 | # and you will need to run the geocode_batch script - see the README
54 | # address:
55 | # Currently only considering number of persons injured for pedestrian/cyclists,
56 | # which removes a small number of fatal accidents (less than .01%)
57 | split_columns:
58 | pedestrian:
59 | column_name: NUMBER OF PEDESTRIANS INJURED
60 | column_value: any
61 | bike:
62 | column_name: NUMBER OF CYCLIST INJURED
63 | column_value: any
64 | vehicle:
65 | not_column: pedestrian bike
66 |
67 | #################################################################
68 | # Configuration for default features
69 |
70 | # Default features from open street map. You can remove features you don't want
71 | # Note: we don't support adding features in the config file.
72 | # If there is an additional feature you want from open street map, contact the development team
73 | openstreetmap_features:
74 | categorical:
75 | width: Width
76 | cycleway_type: Bike lane
77 | signal: Signal
78 | oneway: One Way
79 | lanes: Number of lanes
80 | continuous:
81 | width_per_lane: Average width per lane
82 |
83 | # Speed limit is a required feature
84 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
85 | speed_limit: osm_speed
86 |
--------------------------------------------------------------------------------
/src/config/config_philly.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Philadelphia, Pennslyvania, USA
3 | # The folder under data where this city's data is stored
4 | name: philly
5 | # City centerpoint latitude & longitude
6 | city_latitude: 39.9526
7 | city_longitude: -75.1652
8 | timezone: America/New_York
9 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data)
10 | city_radius: 15
11 | # If given, limit crashes to after startdate and no later than enddate
12 | startdate: 2017-01-01
13 | enddate: 2017-12-31
14 |
15 | ##############################################################################
16 | # Configuration for data standardization
17 |
18 | # crash file configurations
19 | crashes_files:
20 | crash_data_collision_crash_2007_2017.csv:
21 | required:
22 | id: objectid_1
23 | latitude: dec_lat
24 | longitude: dec_long
25 | date_complete:
26 | date_year: crash_year
27 | date_month: crash_month
28 | date_day:
29 | time: time_of_day
30 | time_format: military
31 | optional:
32 | intersection: intersect_type
33 | split_columns:
34 | pedestrian:
35 | column_name: ped_count
36 | column_value: any
37 | bike:
38 | column_name: bicycle_count
39 | column_value: any
40 | vehicle:
41 | column_name: vehicle_count
42 | column_value: any
43 |
44 | #################################################################
45 | # Configuration for default features
46 |
47 | # Default features from open street map. You can remove features you don't want
48 | # Note: we don't support adding features in the config file.
49 | # If there is an additional feature you want from open street map, contact the development team
50 | openstreetmap_features:
51 | categorical:
52 | width: Width
53 | cycleway_type: Bike lane
54 | signal: Signal
55 | oneway: One Way
56 | lanes: Number of lanes
57 | hwy_type: Highway type
58 | continuous:
59 | width_per_lane: Average width per lane
60 |
61 | # Additional data sources
62 | data_source:
63 | - name: DVRPCTrafficVolume
64 | filename: DVRPC_Traffic_Counts.csv
65 | latitude: LATITUDE
66 | longitude: LONGITUDE
67 | date: SETDATE
68 | notes:
69 | # Feature is categorical (f_cat) or continuous (f_cont)\n" +
70 | feat: f_cont
71 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' \n" +
72 | feat_agg: latest
73 | # if latest, the desired value
74 | value: AADT
75 | - name: DVRPCWalkVolume
76 | filename: DVRPC__Pedestrian_Counts.csv
77 | latitude: LATITUDE
78 | longitude: LONGITUDE
79 | date: SETDATE
80 | notes:
81 | # Feature is categorical (f_cat) or continuous (f_cont)\n" +
82 | feat: f_cont
83 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' \n" +
84 | feat_agg: latest
85 | # if latest, the desired value
86 | value: AADP
87 | - name: DVRPCBikeVolume
88 | filename: DVRPC__Bicycle__Counts.csv
89 | latitude: LATITUDE
90 | longitude: LONGITUDE
91 | date: SETDATE
92 | notes:
93 | # Feature is categorical (f_cat) or continuous (f_cont)\n" +
94 | feat: f_cont
95 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' \n" +
96 | feat_agg: latest
97 | # if latest, the desired value
98 | value: AADT
99 | - name: parking_tickets
100 | filename: parking_violations_14.csv
101 | latitude: lat
102 | longitude: lon
103 | date: issue_datetime
104 | category: violation_desc
105 | notes:
106 | # Feature can be categorical (f_cat) or continuous (f_cont)
107 | feat: f_cont
108 |
109 |
--------------------------------------------------------------------------------
/src/config/config_pittsburgh.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Pittsburgh, PA, USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 40.44062479999999
5 | city_longitude: -79.9958864
6 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
7 | city_radius: 20
8 | # The folder under data where this city's data is stored
9 | name: pittsburgh
10 | # If given, limit crashes to after startdate and no later than enddate
11 | # Recommended to limit to just a few years for now
12 | startdate:
13 | enddate:
14 |
15 |
16 | #################################################################
17 | # Configuration for data standardization
18 |
19 | # crash file configurations
20 | crashes_files:
21 | pittsburgh_2017.csv:
22 | required:
23 | id: _id
24 | latitude: DEC_LAT
25 | longitude: DEC_LONG
26 | # If date supplied in single column:
27 | date_complete:
28 | # If date is separated into year/month/day:
29 | date_year: CRASH_YEAR
30 | date_month: CRASH_MONTH
31 | # Leave date_day empty if not available
32 | date_day:
33 | # If time is available and separate from date:
34 | time: TIME_OF_DAY
35 | # If time specified, time_format is one of:
36 | # default (HH:MM:SS)
37 | # seconds (since midnight)
38 | # military (HHMM)
39 | time_format: military
40 | optional:
41 | summary:
42 | address:
43 |
44 |
--------------------------------------------------------------------------------
/src/config/config_somerville.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Somerville, Massachusetts, USA
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: 42.3876
5 | city_longitude: -71.0995
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: America/New_York
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 |
12 | # The folder under data where this city's data is stored
13 | name: somerville
14 |
15 | # If given, limit crashes to after startdate and no later than enddate
16 | # Recommended to limit to just a few years for now
17 | startdate:
18 | enddate:
19 |
20 | #################################################################
21 | # Configuration for data standardization
22 |
23 | # crash file configurations
24 | crashes_files:
25 | Motor_Vehicle_Crash_Reports.csv:
26 | required:
27 | id: ID
28 | latitude:
29 | longitude:
30 | # If date supplied in single column:
31 | date_complete: Date
32 | # If date is separated into year/month/day:
33 | date_year:
34 | date_month:
35 | # Leave date_day empty if not available
36 | date_day:
37 | # If time is available and separate from date:
38 | time:
39 | # If time specified, time_format is one of:
40 | # default (HH:MM:SS)
41 | # seconds (since midnight)
42 | # military (HHMM)
43 | time_format:
44 | optional:
45 | summary:
46 | # If the crash file doesn't have a lat/lon, you must give the address field
47 | # and you will need to run the geocode_batch script - see the README
48 | address: Location
49 | split_columns:
50 | pedestrian:
51 | column_name: Pedestrian
52 | column_value: any
53 | bike:
54 | column_name: Bicycle
55 | column_value: any
56 | vehicle:
57 | not_column: pedestrian bike
58 |
59 |
60 |
61 | #################################################################
62 | # Configuration for default features
63 |
64 | # Default features from open street map. You can remove features you don't want
65 | # Note: we don't support adding features in the config file.
66 | # If there is an additional feature you want from open street map, contact the development team
67 | openstreetmap_features:
68 | categorical:
69 | width: Width
70 | cycleway_type: Bike lane
71 | signal: Signal
72 | oneway: One Way
73 | lanes: Number of lanes
74 | continuous:
75 | width_per_lane: Average width per lane
76 |
77 |
78 | # Configuration for default waze features
79 | waze_features:
80 | categorical:
81 | jam: Existence of a jam
82 | continuous:
83 | jam_percent: Percent of time there was a jam
84 |
85 |
86 |
--------------------------------------------------------------------------------
/src/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/.gitkeep
--------------------------------------------------------------------------------
/src/data/TMC_scraping/README.md:
--------------------------------------------------------------------------------
1 | # TMC scraping
--------------------------------------------------------------------------------
/src/data/TMC_scraping/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This package parses turning movement count files
3 | """
4 |
--------------------------------------------------------------------------------
/src/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/__init__.py
--------------------------------------------------------------------------------
/src/data/record.py:
--------------------------------------------------------------------------------
1 | from pyproj import Transformer
2 | from . import util
3 | from dateutil.parser import parse
4 |
5 | # transformer object between 4326 projection and 3857 projection
6 | transformer_4326_to_3857 = Transformer.from_proj(
7 | 4326, 3857, always_xy=True)
8 | # transformer object between 3857 projection and 4326 projection
9 | transformer_3857_to_4326 = Transformer.from_proj(
10 | 3857, 4326, always_xy=True)
11 |
12 |
13 | class Record(object):
14 | "A record contains a dict of properties and a point in 4326 projection"
15 |
16 | def __init__(self, properties, point=None):
17 | if point:
18 | self.point = point
19 | else:
20 | self.point = util.get_reproject_point(
21 | properties['location']['latitude'],
22 | properties['location']['longitude'],
23 | transformer_4326_to_3857)
24 | self.properties = properties
25 |
26 | @property
27 | def schema(self):
28 | return util.make_schema('Point', self.properties)
29 |
30 | def _get_near_id(self):
31 | if 'near_id' in self.properties:
32 | return self.properties['near_id']
33 | return None
34 |
35 | def _set_near_id(self, near_id):
36 | self.properties['near_id'] = near_id
37 |
38 | near_id = property(_get_near_id, _set_near_id)
39 |
40 | @property
41 | def timestamp(self):
42 | if 'timestamp' in self.properties:
43 | return self.properties['timestamp']
44 | else:
45 | return ''
46 |
47 |
48 | class Crash(Record):
49 | def __init__(self, properties):
50 | Record.__init__(self, properties)
51 |
52 | @property
53 | def timestamp(self):
54 | return parse(self.properties['dateOccurred'])
55 |
56 |
--------------------------------------------------------------------------------
/src/data/see_click_fix/seeclickfix.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import requests
3 | import time
4 | import json
5 | import os
6 | import csv
7 | from dateutil.parser import parse
8 |
9 |
10 | def convert_to_csv(filename):
11 |
12 | with open(filename + '.json', 'r') as f:
13 | tickets = json.load(f)
14 |
15 | print "Converting " + str(len(tickets)) + " tickets to csv"
16 | # Since this so far only looks at Boston, hard coding
17 | # fields we care about. Will need to check against other cities
18 | fieldnames = ['X', 'Y', 'type', 'created', 'summary', 'description']
19 | with open(filename + '.csv', 'w') as csvfile:
20 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
21 |
22 | writer.writeheader()
23 |
24 | for t in tickets:
25 |
26 | writer.writerow({
27 | 'X': t['lng'],
28 | 'Y': t['lat'],
29 | 'type': t['request_type']['title']
30 | if 'title' in t['request_type'].keys() else '',
31 | 'created': t['created_at'],
32 | 'summary': t['summary'].encode("utf-8"),
33 | 'description': t['description'].encode("utf-8")
34 | if t['description'] else ''
35 | })
36 |
37 |
38 | def get_tickets(place_url, outfile, statuses=[
39 | 'open', 'acknowledged', 'closed', 'archived'], start_date=None):
40 | print outfile
41 | if not os.path.exists(outfile):
42 | status_str = ','.join(statuses)
43 |
44 | request_str = 'https://seeclickfix.com/api/v2/issues?place_url=' \
45 | + place_url \
46 | + '&status=' + status_str
47 | if start_date:
48 | start_date = parse(start_date).isoformat()
49 | request_str += '&after=' + start_date
50 | curr_page = requests.get(request_str)
51 |
52 | md = curr_page.json()['metadata']['pagination']
53 | print "Getting " + str(md['pages']) + " pages of see click fix data"
54 |
55 | next_page_url = md['next_page_url']
56 | all = curr_page.json()['issues']
57 | print "page:" + str(md['page'])
58 | while next_page_url:
59 | curr_page = requests.get(next_page_url)
60 | md = curr_page.json()['metadata']['pagination']
61 | print "page:" + str(md['page'])
62 | all += curr_page.json()['issues']
63 | next_page_url = md['next_page_url']
64 | time.sleep(.5)
65 |
66 | with open(outfile, 'w') as f:
67 | json.dump(all, f)
68 | else:
69 | print "See click fix file already exists, skipping query..."
70 |
71 | if __name__ == '__main__':
72 |
73 | parser = argparse.ArgumentParser()
74 |
75 | parser.add_argument("outputfile", type=str,
76 | help="output file prefix")
77 | parser.add_argument("-c", "--city", type=str, default='Boston')
78 | parser.add_argument("-status", "--status_list", nargs="+",
79 | default=['open', 'acknowledged', 'closed', 'archived'])
80 | parser.add_argument("-start", "--start_date")
81 |
82 | args = parser.parse_args()
83 |
84 | filename = args.outputfile
85 | city = args.city
86 |
87 | get_tickets(
88 | city,
89 | filename + '.json',
90 | statuses=args.status_list,
91 | start_date=args.start_date
92 | )
93 | convert_to_csv(filename)
94 |
--------------------------------------------------------------------------------
/src/data/segment.py:
--------------------------------------------------------------------------------
1 |
2 | class Segment(object):
3 | "A segment contains a dict of properties and a shapely shape"
4 |
5 | def __init__(self, geometry, properties):
6 |
7 | self.geometry = geometry
8 | self.properties = properties
9 |
10 |
11 | class Intersection(object):
12 | """
13 | Creates an Intersection object
14 | Args:
15 | count (int): Unique identifier for the intersection.
16 | lines (list of shapely.geometry.linestring): List of lines forming the intersection.
17 | properties (list of dict): List of dictionaries containing properties for each line.
18 | data (dict, optional): Additional data associated with the intersection. Defaults to an empty dictionary.
19 | nodes (list of dict, optional): List of dictionaries containing information about nodes in the intersection. Defaults to an empty list.
20 | connected_segments (list of int, optional): List of IDs of segments connected to the intersection. Defaults to an empty list.
21 |
22 | """
23 |
24 | def __init__(self, segment_id, lines, data, properties,
25 | nodes=[], connected_segments=[]):
26 | self.id = segment_id
27 | self.lines = lines
28 | self.data = data
29 | self.properties = properties
30 | self.geometry = None
31 | # Nodes are the points (with openstreetmap node id) in the intersection
32 | self.nodes = nodes
33 | self.connected_segments = connected_segments
34 |
35 |
36 | class IntersectionBuffer(object):
37 | """
38 | An intersection buffer consists of a polygon, and a list of
39 | records associated with the intersection points
40 | """
41 | def __init__(self, buffer, points):
42 | self.buffer = buffer
43 | self.points = points
44 |
--------------------------------------------------------------------------------
/src/data/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for data_generation
3 | """
4 |
--------------------------------------------------------------------------------
/src/data/tests/data/concern_test_dummy.json:
--------------------------------------------------------------------------------
1 | [{"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14807, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "Broadway Bridge is wide & off highway ramps. Vehicles speed over. Key connection for bikes (& everyone) to get back into Southie fr much of city. W.Broadway is wide enough for bike lanes all the way across this bridge down to at least E.Broadway.", "USERTYPE": "bikes", "REQUESTDATE": "2016-01-19T14:43:50.000Z", "REQUESTID": 14807.0, "near_id": "004581", "Y": 42.343488869715976, "X": -71.05869817972585}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14808, "GLOBALID": "", "REQUESTTYPE": "other", "COMMENTS": "This intersection is dangerous. Cars don't follow the lane markings (ie go straight while in turn lane) so it's nearly impossible to safely position yourself on a bike. In a car, people are trying to move over/it's unclear where people are going.", "USERTYPE": "bikes", "REQUESTDATE": "2016-01-19T14:48:45.000Z", "REQUESTID": 14808.0, "near_id": 6391, "Y": 42.354167552594284, "X": -71.05414378860903}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "people don't yield while going straight", "COMMENTS": "It's terrifying to walk over here. It seems like it's impossible to get the cars to stop stop, even at the crosswalks.", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "0013093", "Y": 42.33938397670106, "X": -71.0994798889095}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14810, "GLOBALID": "", "REQUESTTYPE": "low visibility", "COMMENTS": "cars coming around the corner of this wide one street are speeding and not visible for persons on the crosswalk", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T15:36:25.000Z", "REQUESTID": 14810.0, "near_id": 6083, "Y": 42.349364649630935, "X": -71.06656509857143}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14811, "GLOBALID": "", "REQUESTTYPE": "people don't yield while turning", "COMMENTS": "as you come off the bike path, it's unclear how to get across to continue south on Washington, and cars turn right into your path as you try to cross west with traffic to position yourself to go south with traffic.", "USERTYPE": "bikes", "REQUESTDATE": "2016-01-19T21:26:54.000Z", "REQUESTID": 14811.0, "near_id": "0011866", "Y": 42.30199319771136, "X": -71.11441432121919},
2 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "people don't yield while going straight", "COMMENTS": "TEST", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": 5492, "Y": 42.342341340478789, "X": -71.065894221691337},
3 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "people don't yield while going straight", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005598", "Y": 42.325676212533196, "X": -71.065894221691337},
4 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005598", "Y": 42.325676212533196, "X": -71.065894221691337},
5 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005598", "Y": 42.325676212533196, "X": -71.065894221691337},
6 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005593", "Y": 42.330339, "X": -71.05727900000001}]
7 |
8 |
--------------------------------------------------------------------------------
/src/data/tests/data/config_brisbane_no_supplemental.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Brisbane, Australia
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: -27.4697707
5 | city_longitude: 153.0251235
6 |
7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
8 | timezone: Australia/Brisbane
9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
10 | city_radius: 20
11 | speed_unit: kph
12 |
13 | # By default, maps are created from OSM's polygon data and fall back to radius
14 | # if there is no polygon data, but but you can change the openstreetmap_geography
15 | # to 'radius' if preferred
16 | map_geography: polygon
17 |
18 | # The folder under data where this city's data is stored
19 | name: brisbane
20 |
21 | # If given, limit crashes to after startdate and no later than enddate
22 | # Recommended to limit to just a few years for now
23 | startdate:
24 | enddate:
25 |
26 | #################################################################
27 | # Configuration for data standardization
28 |
29 | # crash file configurations
30 | crashes_files:
31 | test_crashes.csv:
32 | required:
33 | id:
34 | latitude:
35 | longitude:
36 | # If date supplied in single column:
37 | date_complete:
38 | # If date is separated into year/month/day:
39 | date_year:
40 | date_month:
41 | # Leave date_day empty if not available
42 | date_day:
43 | # If time is available and separate from date:
44 | time:
45 | # If time specified, time_format is one of:
46 | # default (HH:MM:SS)
47 | # seconds (since midnight)
48 | # military (HHMM)
49 | time_format:
50 | optional:
51 | summary:
52 | # If the crash file doesn't have a lat/lon, you must give the address field
53 | # and you will need to run the geocode_batch script - see the README
54 | address:
55 | # This section allows you to specify additional feature in the crash file
56 | # (split_columns) to go into the training set
57 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
58 | # but you can specify other fields in the crash data file.
59 | # See the README for examples
60 |
61 | #################################################################
62 | # Configuration for default features
63 |
64 | # Default features from open street map. You can remove features you don't want
65 | # Note: we don't support adding features in the config file.
66 | # If there is an additional feature you want from open street map, contact the development team
67 | openstreetmap_features:
68 | categorical:
69 | width: Width
70 | cycleway_type: Bike lane
71 | signal: Signal
72 | oneway: One Way
73 | lanes: Number of lanes
74 | continuous:
75 | width_per_lane: Average width per lane
76 |
77 | # Speed limit is a required feature
78 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
79 | speed_limit: osm_speed
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/src/data/tests/data/config_brisbane_supplemental.yml:
--------------------------------------------------------------------------------
1 | # City name
2 | city: Brisbane, Australia
3 | # City centerpoint latitude & longitude (default geocoded values set)
4 | city_latitude: -27.4697707
5 | city_longitude: 153.0251235
6 | # City's time zone: defaults to the local time zone of computer initializing the city's config file
7 | timezone: Australia/Brisbane
8 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km)
9 | city_radius: 20
10 | speed_unit: kph
11 |
12 | # By default, maps are created from OSM's polygon data and fall back to radius
13 | # if there is no polygon data, but but you can change the openstreetmap_geography
14 | # to 'radius' if preferred
15 | map_geography: polygon
16 |
17 | # The folder under data where this city's data is stored
18 | name: brisbane
19 | # If given, limit crashes to after startdate and no later than enddate
20 | # Recommended to limit to just a few years for now
21 | startdate:
22 | enddate:
23 | #################################################################
24 | # Configuration for data standardization
25 |
26 | # crash file configurations
27 | crashes_files:
28 | test_crashes.csv:
29 | required:
30 | id:
31 | latitude:
32 | longitude:
33 | # If date supplied in single column:
34 | date_complete:
35 | # If date is separated into year/month/day:
36 | date_year:
37 | date_month:
38 | # Leave date_day empty if not available
39 | date_day:
40 | # If time is available and separate from date:
41 | time:
42 | # If time specified, time_format is one of:
43 | # default (HH:MM:SS)
44 | # seconds (since midnight)
45 | # military (HHMM)
46 | time_format:
47 | optional:
48 | summary:
49 | # If the crash file doesn't have a lat/lon, you must give the address field
50 | # and you will need to run the geocode_batch script - see the README
51 | address:
52 | # This section allows you to specify additional feature in the crash file
53 | # (split_columns) to go into the training set
54 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle)
55 | # but you can specify other fields in the crash data file.
56 | # See the README for examples
57 |
58 | # List of concern type information
59 | concern_files:
60 | - name: concern
61 | filename: test_concerns.csv
62 | latitude:
63 | longitude:
64 | time:
65 |
66 | # Additional data sources
67 | data_source:
68 | - name:
69 | filename: parking_tickets_dummy_file_1.csv
70 | address:
71 | date:
72 | time:
73 | category:
74 | notes:
75 | # Feature is categorical (f_cat) or continuous (f_cont)
76 | feat:
77 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value'
78 | feat_agg:
79 | # if latest, the column name where the value can be found
80 | value:
81 |
82 | #################################################################
83 | # Configuration for default features
84 |
85 | # Default features from open street map. You can remove features you don't want
86 | # Note: we don't support adding features in the config file.
87 | # If there is an additional feature you want from open street map, contact the development team
88 | openstreetmap_features:
89 | categorical:
90 | width: Width
91 | cycleway_type: Bike lane
92 | signal: Signal
93 | oneway: One Way
94 | lanes: Number of lanes
95 | continuous:
96 | width_per_lane: Average width per lane
97 |
98 | # Speed limit is a required feature
99 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here
100 | # speed_limit: osm_speed
101 |
102 |
103 |
--------------------------------------------------------------------------------
/src/data/tests/data/config_features.yml:
--------------------------------------------------------------------------------
1 | city: Boston, Massachusetts, USA
2 | name: boston
3 | city_latitude: 42.3600825
4 | city_longitude: -71.0588801
5 | city_radius: 15
6 | timezone: America/New_York
7 | crashes_files:
8 | test:
9 | dummy
10 |
11 | openstreetmap_features:
12 | categorical:
13 | osm_speed: Speed limit
14 | width: Width
15 | cycleway_type: Bike lane
16 | oneway: One Way
17 | lanes: Number of lanes
18 | signal: Traffic signal
19 | crosswalk: Crosswalk
20 | continuous:
21 | width_per_lane: Average width per lane
22 | waze_features:
23 | categorical:
24 | jam: Existence of a jam
25 | continuous:
26 | jam_percent: Percent of time there was a jam
27 |
--------------------------------------------------------------------------------
/src/data/tests/data/crash_test_dummy.json:
--------------------------------------------------------------------------------
1 | [{"CAD_EVENT_REL_COMMON_ID": "1481358", "YCOORD": "2950076.65", "CALENDAR_DATE": "2016-02-08 00:00:00.000", "X": "-71.065840037158878", "FIRST_EVENT_SUBTYPE": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)", "XCOORD": "773540.86", "N_EVENTS": "1", "TIME": "20:40:23", "Y": "42.342341340478789", "near_id": 5492}, {"CAD_EVENT_REL_COMMON_ID": "1666213", "YCOORD": "2944003.51", "CALENDAR_DATE": "2016-05-06 00:00:00.000", "X": "-71.065894221691337", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "773557.12", "N_EVENTS": "1", "TIME": "14:14:30", "Y": "42.325676212533196", "near_id": "005598"}, {"CAD_EVENT_REL_COMMON_ID": "1689595", "YCOORD": "2945764.85", "CALENDAR_DATE": "2016-05-16 00:00:00.000", "X": "-71.057661166075064", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "775774.4", "N_EVENTS": "1", "TIME": "18:08:29", "Y": "42.330478134494818", "near_id": "005593"}, {"CAD_EVENT_REL_COMMON_ID": "2069106", "YCOORD": "2945652", "CALENDAR_DATE": "2016-10-28 00:00:00.000", "X": "-71.057275245294946", "FIRST_EVENT_SUBTYPE": "PEDESTRIAN STRUCK (P) (E) (F)", "XCOORD": "775879.34", "N_EVENTS": "1", "TIME": "10:27:18", "Y": "42.330166976202662", "near_id": "005593"}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "618110", "CALENDAR_DATE": "2015-01-19", "mode_type": "mv", "X": "-71.05727900000001", "TIME,": "85064", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.330339", "near_id": "005593"}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "862107", "CALENDAR_DATE": "2015-05-09", "mode_type": "mv", "X": "-71.027179", "TIME,": "64588", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.335786999999996", "near_id": 5829}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "982316", "CALENDAR_DATE": "2015-06-28", "mode_type": "mv", "X": "-71.05727900000001", "TIME,": "23951", "FIRST_EVENT_SUBTYPE": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.330339", "near_id": "005593"}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "2246232", "CALENDAR_DATE": "2017-01-23", "mode_type": "mv", "X": "-71.06572800000001", "TIME,": "45770", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.342415", "near_id": 5492}]
--------------------------------------------------------------------------------
/src/data/tests/data/osm_crash_file.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": 1403832,
4 | "dateOccurred": "2016-01-01T00:56:45-05:00",
5 | "location": {
6 | "latitude": 42.3311855,
7 | "longitude": -71.0748389
8 | },
9 | "vehicles": [
10 | {
11 | "category": "car"
12 | }
13 | ],
14 | "summary": "REPORTED INJURIES (P) (E) (F)"
15 | },
16 | {
17 | "id": 1403832,
18 | "dateOccurred": "2016-01-01T00:56:45-05:00",
19 | "location": {
20 | "latitude": 42.3611498,
21 | "longitude": -71.0645559
22 | },
23 | "vehicles": [
24 | {
25 | "category": "car"
26 | }
27 | ],
28 | "summary": "REPORTED INJURIES (P) (E) (F)"
29 | },
30 | {
31 | "id": 1403832,
32 | "dateOccurred": "2016-01-01T00:56:45-05:00",
33 | "location": {
34 | "latitude": 42.333375,
35 | "longitude": -71.07736
36 | },
37 | "vehicles": [
38 | {
39 | "category": "car"
40 | }
41 | ],
42 | "summary": "REPORTED INJURIES (P) (E) (F)"
43 | }
44 |
45 | ]
46 |
--------------------------------------------------------------------------------
/src/data/tests/data/osm_output.gpickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/osm_output.gpickle
--------------------------------------------------------------------------------
/src/data/tests/data/processed/maps/inters.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": { "id_2": 2, "id_1": 1, "intersection": 1, "connected_segments": [10, 11]}, "geometry": { "type": "Point", "coordinates": [ -71.130919614355903, 42.236953123165236 ] } },
2 | { "type": "Feature", "properties": { "id_2": 6, "id_1": 2, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.130457692279407, 42.236823198783938 ] } },
3 | { "type": "Feature", "properties": { "id_2": 4, "id_1": 0, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.130747480344851, 42.234915582812036 ] } },
4 | { "type": "Feature", "properties": { "id_2": 5, "id_1": 3, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.129743187575258, 42.236620170391106 ] } },
5 | { "type": "Feature", "properties": { "id_2": 3, "id_1": 0, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.129934237072263, 42.236675963780598 ] } },
6 | { "type": "Feature", "properties": { "id_2": 4, "id_1": 1, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.131724767847999, 42.235164325359385 ] } }
7 | ]
8 | }
9 |
--------------------------------------------------------------------------------
/src/data/tests/data/processed/maps/osm.gpkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/osm.gpkg
--------------------------------------------------------------------------------
/src/data/tests/data/processed/maps/test_line_convert.cpg:
--------------------------------------------------------------------------------
1 | ISO-8859-1
--------------------------------------------------------------------------------
/src/data/tests/data/processed/maps/test_line_convert.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/test_line_convert.dbf
--------------------------------------------------------------------------------
/src/data/tests/data/processed/maps/test_line_convert.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/test_line_convert.shp
--------------------------------------------------------------------------------
/src/data/tests/data/processed/maps/test_line_convert.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/test_line_convert.shx
--------------------------------------------------------------------------------
/src/data/tests/data/raw/ma_cob_spatially_joined_streets.cpg:
--------------------------------------------------------------------------------
1 | ISO-8859-1
--------------------------------------------------------------------------------
/src/data/tests/data/raw/ma_cob_spatially_joined_streets.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/raw/ma_cob_spatially_joined_streets.dbf
--------------------------------------------------------------------------------
/src/data/tests/data/raw/ma_cob_spatially_joined_streets.prj:
--------------------------------------------------------------------------------
1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]
--------------------------------------------------------------------------------
/src/data/tests/data/raw/ma_cob_spatially_joined_streets.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/raw/ma_cob_spatially_joined_streets.shp
--------------------------------------------------------------------------------
/src/data/tests/data/raw/ma_cob_spatially_joined_streets.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/raw/ma_cob_spatially_joined_streets.shx
--------------------------------------------------------------------------------
/src/data/tests/data/standardized/Vision_Zero_Entry.csv:
--------------------------------------------------------------------------------
1 | X,Y,OBJECTID,GLOBALID,REQUESTID,REQUESTTYPE,REQUESTDATE,STATUS,STREETSEGID,COMMENTS,USERTYPE
2 | -71.129924,42.236677,14807,,14807,bike facilities don't exist or need improvement,2016-01-19T14:43:50.000Z,Unassigned,0,Broadway Bridge is wide & off highway ramps. Vehicles speed over. Key connection for bikes (& everyone) to get back into Southie fr much of city. W.Broadway is wide enough for bike lanes all the way across this bridge down to at least E.Broadway.,bikes
3 |
--------------------------------------------------------------------------------
/src/data/tests/data/standardized/concerns.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": 67658,
4 | "dateCreated": "2016-02-12T21:22:30.000Z",
5 | "status": "Unassigned",
6 | "tags": ["poorSignange", "driversIgnoreSignage"],
7 | "location": {
8 | "latitude": 42.236677,
9 | "longitude": -71.129924
10 | },
11 | "address": "685 Tremont Street, Boston",
12 | "summary": "Drivers do not stop for pedestrians in the designated crosswalks"
13 | }
14 | ]
15 |
--------------------------------------------------------------------------------
/src/data/tests/data/standardized/crashes.csv:
--------------------------------------------------------------------------------
1 | X,Y,CAD_EVENT_REL_COMMON_ID,FIRST_EVENT_SUBTYPE,XCOORD,YCOORD,CALENDAR_DATE,TIME,N_EVENTS
2 | -71.130909,42.236942,1403832,REPORTED INJURIES (P) (E) (F),772136.21,2934954.42,2016-01-01 00:00:00.000,00:56:45,1
3 |
--------------------------------------------------------------------------------
/src/data/tests/data/standardized/crashes.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": 1403832,
4 | "dateOccurred": "2016-01-01T00:56:45-05:00",
5 | "mode": "vehicle",
6 | "location": {
7 | "latitude": 42.236942,
8 | "longitude": -71.130909
9 | },
10 | "address": "14 Corona Street",
11 | "summary": "REPORTED INJURIES (P) (E) (F)"
12 | }
13 | ]
14 |
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/ma_cob_small.cpg:
--------------------------------------------------------------------------------
1 | ISO-8859-1
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/ma_cob_small.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/ma_cob_small.dbf
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/ma_cob_small.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/ma_cob_small.shp
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/ma_cob_small.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/ma_cob_small.shx
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/osm3857.cpg:
--------------------------------------------------------------------------------
1 | ISO-8859-1
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/osm3857.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/osm3857.dbf
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/osm3857.prj:
--------------------------------------------------------------------------------
1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/osm3857.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/osm3857.shp
--------------------------------------------------------------------------------
/src/data/tests/data/test_add_map/osm3857.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/osm3857.shx
--------------------------------------------------------------------------------
/src/data/tests/data/test_create_segments/additional_points.json:
--------------------------------------------------------------------------------
1 | [{"feature": "parking_tickets", "date": "2016-05-17T00:00:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "category": "NO PARKING"},
2 | {"feature": "parking_tickets", "date": "2014-01-04T15:50:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "category": "METER EXPIRED"},
3 | {"feature": "traffic_volume", "date": "2014-01-04T15:50:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "feat_agg":"latest", "value":100},
4 | {"feature": "traffic_volume", "date": "2015-01-04T15:50:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "feat_agg":"latest", "value":200}]
5 |
--------------------------------------------------------------------------------
/src/data/tests/data/test_create_segments/empty_set_inter.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13578386374338, 42.258548279519616], [-71.13582699649501, 42.25852287054979], [-71.13587333014675, 42.25849538531848], [-71.13591916288622, 42.258460997638835]]}, "properties": {"id": "13321", "id_1": null, "id_2": null, "intersection": null, "ST_NAME": "Boundary", "ST_TYPE": "RD", "SUF_DIR": null, "CFCC": "A31", "SPEEDLIMIT": 1, "ONEWAY": "N", "FT_COST": 0.0, "TF_COST": 0.0, "TF_DIR": null, "FT_DIR": null, "Route_ID": "L120383 EB", "F_F_Class": 7, "Jurisdictn": "0", "Hwy_Dist": "6", "Hwy_Subdst": "6A", "Med_Width": 0.0, "Med_Type": 0, "Mile_Count": 1, "Num_Lanes": 2, "Opp_Lanes": 0, "Shldr_Lt_W": 0.0, "Shldr_Lt_T": 0, "Shldr_Rt_W": 0.0, "Shldr_Rt_T": 0, "Speed_Lim": 0, "Op_Dir_SL": 0, "ST_Name_1": "BLUE LEDGE DRIVE", "Fm_St_Name": "WASHINGTON STREET", "To_St_Name": "ENNEKING PARKWAY", "City": 35, "County": "M", "Operation": 2, "Struct_Cnd": 2, "Surface_Tp": 6, "Surface_Wd": 26.0, "Terrain": 1, "Toll_Road": 0, "AADT": 0, "AADT_Year": 0, "AADT_Deriv": 0, "Statn_Num": 0, "Curb": 3, "Shldr_UL_W": 0.0, "Shldr_UL_T": 0, "Lt_Sidewlk": 4, "Rt_Sidewlk": 4, "orig_id": 9913321}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13577927225563, 42.258377648139316], [-71.13599614724784, 42.2585068664975], [-71.13603227173553, 42.25852824802771], [-71.13605920937619, 42.25854419145341]]}, "properties": {"id": "2657", "id_1": null, "id_2": null, "intersection": null, "ST_NAME": "Enneking", "ST_TYPE": "PKWY", "SUF_DIR": null, "CFCC": "A31", "SPEEDLIMIT": 30, "ONEWAY": null, "FT_COST": 0.535, "TF_COST": 0.535, "TF_DIR": null, "FT_DIR": null, "Route_ID": "L233833 EB", "F_F_Class": 7, "Jurisdictn": "0", "Hwy_Dist": "6", "Hwy_Subdst": "6A", "Med_Width": 0.0, "Med_Type": 0, "Mile_Count": 1, "Num_Lanes": 0, "Opp_Lanes": 0, "Shldr_Lt_W": 0.0, "Shldr_Lt_T": 0, "Shldr_Rt_W": 0.0, "Shldr_Rt_T": 0, "Speed_Lim": 0, "Op_Dir_SL": 0, "ST_Name_1": "BOLD KNOB PATH", "Fm_St_Name": "ENNEKING PARKWAY", "To_St_Name": "EAST BOUNDARY PATH", "City": 35, "County": "M", "Operation": 0, "Struct_Cnd": 0, "Surface_Tp": 0, "Surface_Wd": 0.0, "Terrain": 1, "Toll_Road": 0, "AADT": 0, "AADT_Year": 0, "AADT_Deriv": 0, "Statn_Num": 0, "Curb": 0, "Shldr_UL_W": 0.0, "Shldr_UL_T": 0, "Lt_Sidewlk": 0, "Rt_Sidewlk": 0, "orig_id": 992657}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.13591916281045, 42.25846099769571]}, "properties": {"intersection": 1}}]}
--------------------------------------------------------------------------------
/src/data/tests/data/test_create_segments/missing_int_segments.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10680621667301, 42.365203534606216], [-71.1068893, 42.36508589999996]]}, "properties": {"id": "626", "access": null, "bridge": null, "from": "61327145", "highway": "tertiary", "junction": null, "key": "0", "lanes": 1, "length": "40.807", "maxspeed": null, "name": "Pleasant Street", "oneway": 1, "osmid": "8615853", "ref": null, "to": "61317661", "tunnel": null, "width": 12, "hwy_type": 5, "osm_speed": 0, "signal": 0, "width_per_lane": 12, "segment_id": "8615853-61327145-61317661", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10719479999999, 42.3648758], [-71.10716289999999, 42.36485609999997], [-71.10705704275215, 42.364790669448304]]}, "properties": {"id": "669", "access": null, "bridge": null, "from": "61318588", "highway": "residential", "junction": null, "key": "0", "lanes": 1, "length": "55.25", "maxspeed": "25 mph", "name": "Auburn Street", "oneway": 1, "osmid": "13583073", "ref": null, "to": "61327311", "tunnel": null, "width": 12, "hwy_type": 0, "osm_speed": 25, "signal": 0, "width_per_lane": 12, "segment_id": "13583073-61318588-61327311", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10731257878744, 42.3647757307271], [-71.1072207, 42.36485379999999], [-71.10719479999999, 42.3648758]]}, "properties": {"id": "1651", "access": null, "bridge": null, "from": "61333677", "highway": "residential", "junction": null, "key": "0", "lanes": 1, "length": "116.20300000000002", "maxspeed": "25 mph", "name": "Pleasant Street", "oneway": 1, "osmid": "164024921", "ref": null, "to": "61318588", "tunnel": null, "width": 12, "hwy_type": 0, "osm_speed": 25, "signal": 0, "width_per_lane": 12, "segment_id": "164024921-61333677-61318588", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1068893, 42.36508589999996], [-71.1072174, 42.36505979999999]]}, "properties": {"id": "1827", "access": null, "bridge": null, "from": "61317661", "highway": "secondary", "junction": null, "key": "0", "lanes": 4, "length": "27.112", "maxspeed": null, "name": "Western Avenue", "oneway": 1, "osmid": "138749294", "ref": null, "to": "61317663", "tunnel": null, "width": 20, "hwy_type": 1, "osm_speed": 0, "signal": 0, "width_per_lane": 5, "segment_id": "138749294-61317661-61317663", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10671117302242, 42.365102253508525], [-71.1068893, 42.36508589999996]]}, "properties": {"id": "1828", "access": null, "bridge": null, "from": "61317667", "highway": "secondary", "junction": null, "key": "0", "lanes": 4, "length": "56.09", "maxspeed": null, "name": "Western Avenue", "oneway": 1, "osmid": "519812477", "ref": null, "to": "61317661", "tunnel": null, "width": 20, "hwy_type": 1, "osm_speed": 0, "signal": 0, "width_per_lane": 5, "segment_id": "519812477-61317667-61317661", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10719479999999, 42.3648758], [-71.1072174, 42.36505979999999]]}, "properties": {"id": "1829", "access": null, "bridge": null, "from": "61318588", "highway": "residential", "junction": null, "key": "0", "lanes": 2, "length": "20.544", "maxspeed": "25 mph", "name": "Pleasant Street", "oneway": 0, "osmid": "13583074", "ref": null, "to": "61317663", "tunnel": null, "width": 12, "hwy_type": 0, "osm_speed": 25, "signal": 0, "width_per_lane": 6, "segment_id": "13583074-61318588-61317663", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1072174, 42.36505979999999], [-71.10739575202363, 42.36504456744246]]}, "properties": {"id": "1830", "access": null, "bridge": null, "from": "61317663", "highway": "secondary", "junction": null, "key": "0", "lanes": 4, "length": "75.436", "maxspeed": null, "name": "Western Avenue", "oneway": 1, "osmid": "138749294", "ref": null, "to": "61317675", "tunnel": null, "width": 20, "hwy_type": 1, "osm_speed": 0, "signal": 0, "width_per_lane": 5, "segment_id": "138749294-61317663-61317675", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1068893, 42.36508589999996]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1072174, 42.36505979999999]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.10719479999999, 42.3648758]}, "properties": {"intersection": 1}}]}
--------------------------------------------------------------------------------
/src/data/tests/data/test_create_segments/points.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "id": "61170342", "geometry": {"type": "Point", "coordinates": [-71.138121, 42.383125]}, "properties": {"feature": "signal"}}, {"type": "Feature", "id": "61171136", "geometry": {"type": "Point", "coordinates": [-71.1161581, 42.386904]}, "properties": {"feature": "signal"}}, {"type": "Feature", "id": "61172660", "geometry": {"type": "Point", "coordinates": [-71.1377047, 42.3834466]}, "properties": {"feature": "crosswalk"}}]}
--------------------------------------------------------------------------------
/src/data/tests/data/test_create_segments/test_get_connections1.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1200424, 42.379453]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.11993622994846, 42.379346131508484], [-71.11993139999998, 42.3793892], [-71.11991448286459, 42.37954613062321]]}, "properties": {"id": "259", "access": null, "bridge": null, "from": "61327430", "highway": "primary", "junction": null, "key": "0", "lanes": 2, "length": "253.047", "maxspeed": null, "name": "Massachusetts Avenue", "oneway": 1, "osmid": "507868287", "ref": "MA 2A", "to": "2559968287", "tunnel": null, "width": 20, "hwy_type": 2, "osm_speed": "0", "signal": 0, "width_per_lane": 10, "segment_id": "507868287-61327430-2559968287", "dead_end": null, "streets": null, "intersection": null, "orig_id": 994}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.12002868592982, 42.379585218759324], [-71.1200424, 42.379453]]}, "properties": {"id": "263", "access": null, "bridge": null, "from": "2559968299", "highway": "primary", "junction": null, "key": "0", "lanes": 2, "length": "60.21", "maxspeed": null, "name": "Massachusetts Avenue", "oneway": 1, "osmid": "249388676", "ref": "MA 2A", "to": "3306934506", "tunnel": null, "width": 20, "hwy_type": 2, "osm_speed": "0", "signal": 0, "width_per_lane": 10, "segment_id": "249388676-2559968299-3306934506", "dead_end": null, "streets": null, "intersection": null, "orig_id": 998}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1200424, 42.379453], [-71.12005019999998, 42.379381099999996], [-71.12005628884195, 42.37932078730472]]}, "properties": {"id": "1167", "access": null, "bridge": null, "from": "3306934506", "highway": "primary", "junction": null, "key": "0", "lanes": 2, "length": "195.353", "maxspeed": null, "name": "Massachusetts Avenue", "oneway": 1, "osmid": "249388676", "ref": "MA 2A", "to": "61321358", "tunnel": null, "width": 20, "hwy_type": 2, "osm_speed": "0", "signal": 0, "width_per_lane": 10, "segment_id": "249388676-3306934506-61321358", "dead_end": null, "streets": null, "intersection": null, "orig_id": 9913}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1200424, 42.379453], [-71.1201581, 42.3794316], [-71.12018899999998, 42.3794069], [-71.12019574286946, 42.37938414519778]]}, "properties": {"id": "1168", "access": null, "bridge": null, "from": "3306934506", "highway": "residential", "junction": null, "key": "0", "lanes": 1, "length": "187.493", "maxspeed": "25 mph", "name": "Massachusetts Avenue Branch", "oneway": 1, "osmid": "323920752", "ref": null, "to": "61324087", "tunnel": null, "width": 20, "hwy_type": 0, "osm_speed": "25", "signal": 0, "width_per_lane": 20, "segment_id": "323920752-3306934506-61324087", "dead_end": null, "streets": null, "intersection": null, "orig_id": 9914}}]}
--------------------------------------------------------------------------------
/src/data/tests/data/test_create_segments/unconnected.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1337941, 42.371415999999996]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1337188, 42.37130749999999]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1337941, 42.371415999999996], [-71.1339089, 42.37140499999999], [-71.1339730821058, 42.37140575955156]]}, "properties": {"id": "70", "access": null, "bridge": null, "from": "61326095", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "110.983", "maxspeed": null, "name": "Gerry's Landing Road", "oneway": 1, "osmid": "42161639", "ref": null, "to": "61325507", "tunnel": null, "width": 26, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 13, "segment_id": "42161639-61326095-61325507", "dead_end": null, "streets": null, "intersection": null, "orig_id": 9970}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1337941, 42.371415999999996], [-71.13396706786295, 42.37138053992674]]}, "properties": {"id": "212", "access": null, "bridge": null, "from": "61326095", "highway": "secondary_link", "junction": null, "key": "0", "lanes": 3, "length": "76.199", "maxspeed": null, "name": null, "oneway": 1, "osmid": "41743733", "ref": null, "to": "61316733", "tunnel": null, "width": 37, "hwy_type": 4, "osm_speed": 0, "signal": 0, "width_per_lane": 12, "segment_id": "41743733-61326095-61316733", "dead_end": null, "streets": null, "intersection": null, "orig_id": 99212}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13382560991367, 42.371200968244224], [-71.1337188, 42.37130749999999]]}, "properties": {"id": "199", "access": null, "bridge": null, "from": "61326039", "highway": "secondary_link", "junction": null, "key": "0", "lanes": 2, "length": "144.49499999999998", "maxspeed": null, "name": null, "oneway": 1, "osmid": "93128876", "ref": null, "to": "61283383", "tunnel": null, "width": 37, "hwy_type": 4, "osm_speed": 0, "signal": 0, "width_per_lane": 18, "segment_id": "93128876-61326039-61283383", "dead_end": null, "streets": null, "intersection": null, "orig_id": 99199}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13362851835943, 42.371467428137386], [-71.1337033, 42.3714401], [-71.1337941, 42.371415999999996]]}, "properties": {"id": "213", "access": null, "bridge": "yes", "from": "4223586646", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "27.222", "maxspeed": null, "name": "Gerrys Landing Road/Eliot Bridge", "oneway": 1, "osmid": "[42161642, 42161639]", "ref": null, "to": "61326095", "tunnel": null, "width": 0, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 0, "segment_id": "[42161642, 42161639]-4223586646-61326095", "dead_end": null, "streets": null, "intersection": null, "orig_id": 99213}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1337188, 42.37130749999999], [-71.133626, 42.37135879999998], [-71.13357330574013, 42.371385118021024]]}, "properties": {"id": "1198", "access": null, "bridge": "yes", "from": "61283383", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "27.810000000000002", "maxspeed": null, "name": "Eliot Bridge/Gerrys Landing Road", "oneway": 1, "osmid": "[41743732, 353942598]", "ref": null, "to": "4223586645", "tunnel": null, "width": 0, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 0, "segment_id": "[41743732, 353942598]-61283383-4223586645", "dead_end": null, "streets": null, "intersection": null, "orig_id": 991198}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13385555213752, 42.371221558865074], [-71.1338096, 42.37125439999999], [-71.1337188, 42.37130749999999]]}, "properties": {"id": "1551", "access": null, "bridge": null, "from": "61316733", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "80.116", "maxspeed": null, "name": "Gerry's Landing Road", "oneway": 1, "osmid": "353942598", "ref": null, "to": "61283383", "tunnel": null, "width": 26, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 13, "segment_id": "353942598-61316733-61283383", "dead_end": null, "streets": null, "intersection": null, "orig_id": 991551}}]}
--------------------------------------------------------------------------------
/src/data/tests/data/test_get_roads_and_inters.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "id": "442", "geometry": {"type": "LineString", "coordinates": [[-71.10053800000001, 42.36912999999998], [-71.1006832, 42.3691875], [-71.10092959999999, 42.369286499999966], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "442", "access": null, "bridge": null, "from": "61317355", "highway": "tertiary", "junction": null, "key": "0", "lanes": 3, "length": "43.561", "maxspeed": null, "name": "Broadway", "oneway": 0, "osmid": "[426455459, 302156855]", "ref": null, "to": "61326778", "tunnel": null, "width": 24, "hwy_type": 5, "osm_speed": "0", "signal": 0, "width_per_lane": 8, "segment_id": "[426455459, 302156855]-61317355-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "id": "443", "geometry": {"type": "LineString", "coordinates": [[-71.101981, 42.36970099999999], [-71.1013635, 42.369456799999966], [-71.1013231, 42.369440799999985], [-71.1011784, 42.3693844], [-71.101119, 42.369361199999986], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "443", "access": null, "bridge": null, "from": "61321175", "highway": "tertiary", "junction": null, "key": "0", "lanes": 3, "length": "90.918", "maxspeed": null, "name": "Broadway", "oneway": 0, "osmid": "[426455462, 33720646]", "ref": null, "to": "61326778", "tunnel": null, "width": 24, "hwy_type": 5, "osm_speed": "0", "signal": 0, "width_per_lane": 8, "segment_id": "[426455462, 33720646]-61321175-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "id": "444", "geometry": {"type": "LineString", "coordinates": [[-71.1017379, 42.36829510000001], [-71.1016837, 42.368371200000006], [-71.1014763, 42.368662400000005], [-71.1013023, 42.36890659999999], [-71.1010657, 42.36923209999999], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "444", "access": null, "bridge": null, "from": "61317367", "highway": "secondary", "junction": null, "key": "0", "lanes": 3, "length": "128.576", "maxspeed": null, "name": "Prospect Street", "oneway": 0, "osmid": "[426455475, 426455483, 426455486]", "ref": null, "to": "61326778", "tunnel": null, "width": 15, "hwy_type": 1, "osm_speed": "0", "signal": 0, "width_per_lane": 5, "segment_id": "[426455475, 426455483, 426455486]-61317367-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "id": "445", "geometry": {"type": "LineString", "coordinates": [[-71.100067, 42.370669999999954], [-71.100104, 42.3706169], [-71.1002278, 42.3704355], [-71.1003334, 42.3702808], [-71.1003786, 42.37021659999997], [-71.1005678, 42.3699383], [-71.1007327, 42.3696959], [-71.10094689999998, 42.36939659999999], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "445", "access": null, "bridge": null, "from": "61321196", "highway": "secondary", "junction": null, "key": "0", "lanes": 3, "length": "169.066", "maxspeed": null, "name": "Prospect Street", "oneway": 0, "osmid": "[426455489, 302156882]", "ref": null, "to": "61326778", "tunnel": null, "width": 15, "hwy_type": 1, "osm_speed": "0", "signal": 0, "width_per_lane": 5, "segment_id": "[426455489, 302156882]-61321196-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1010043, 42.36931649999998]}, "properties": {"id": null, "access": null, "bridge": null, "from": null, "highway": "traffic_signals", "junction": null, "key": null, "lanes": null, "length": null, "maxspeed": null, "name": null, "oneway": null, "osmid": "61326778", "ref": null, "to": null, "tunnel": null, "width": null, "hwy_type": null, "osm_speed": null, "signal": 1, "width_per_lane": null, "segment_id": null, "dead_end": null, "streets": "Broadway, Prospect Street", "intersection": 1}}]}
--------------------------------------------------------------------------------
/src/data/tests/data/viz_preds_tests/crashes_rollup.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "total_crashes": 2, "crash_dates": "2015-04-03T09:10:00-04:00,2015-09-18T08:45:00-04:00" }, "geometry": { "type": "Point", "coordinates": [ -71.107141, 42.353438 ] } },
5 | { "type": "Feature", "properties": { "total_crashes": 2, "crash_dates": "2015-06-25T17:00:00-04:00,2016-06-01T20:30:00-04:00" }, "geometry": { "type": "Point", "coordinates": [ -71.10526852, 42.35351 ] } }
6 | ]}
--------------------------------------------------------------------------------
/src/data/tests/data/viz_preds_tests/crashes_rollup_pedestrian.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "total_crashes": 1, "crash_dates": "2015-04-03T09:10:00-04:00" }, "geometry": { "type": "Point", "coordinates": [ -71.107141, 42.353438 ] } },
5 | ]}
--------------------------------------------------------------------------------
/src/data/tests/data/viz_preds_tests/single_prediction.json:
--------------------------------------------------------------------------------
1 | {
2 | "0": {
3 | "segment_id": "001",
4 | "year": 2017,
5 | "week": 51,
6 | "prediction": 0.1223508492,
7 | "crash": 0,
8 | "pre_week": 0,
9 | "pre_month": 0,
10 | "pre_quarter": 0,
11 | "avg_week": 0.0065359477,
12 | "AADT": 22222,
13 | "SPEEDLIMIT": 20,
14 | "Struct_Cnd": 2,
15 | "Surface_Tp": 6,
16 | "F_F_Class": 3,
17 | "visionzero": 0,
18 | "id": "001",
19 | "speed_coalesced": 20.3420443264,
20 | "volume_coalesced": 3941.1356224647,
21 | "near_id": null,
22 | "Conflict": 0,
23 | "SPEEDLIMIT0": 0,
24 | "SPEEDLIMIT1": 0,
25 | "SPEEDLIMIT5": 0,
26 | "SPEEDLIMIT10": 0,
27 | "SPEEDLIMIT15": 0,
28 | "SPEEDLIMIT20": 1,
29 | "SPEEDLIMIT25": 0,
30 | "SPEEDLIMIT30": 0,
31 | "SPEEDLIMIT35": 0,
32 | "SPEEDLIMIT45": 0,
33 | "SPEEDLIMIT55": 0,
34 | "SPEEDLIMIT65": 0,
35 | "Struct_Cnd0": 0,
36 | "Struct_Cnd1": 0,
37 | "Struct_Cnd2": 1,
38 | "Struct_Cnd3": 0,
39 | "Struct_Cnd4": 0,
40 | "Surface_Tp0": 0,
41 | "Surface_Tp1": 0,
42 | "Surface_Tp2": 0,
43 | "Surface_Tp3": 0,
44 | "Surface_Tp4": 0,
45 | "Surface_Tp5": 0,
46 | "Surface_Tp6": 1,
47 | "Surface_Tp7": 0,
48 | "Surface_Tp8": 0,
49 | "F_F_Class0": 0,
50 | "F_F_Class1": 0,
51 | "F_F_Class2": 0,
52 | "F_F_Class3": 1,
53 | "F_F_Class4": 0,
54 | "F_F_Class5": 0,
55 | "F_F_Class7": 0,
56 | "log_AADT": 10.0088830676,
57 | "intersection": 0
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/data/tests/data/viz_preds_tests/single_prediction_viz.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.06858488357565, 42.35165031556542], [-71.06876751642436, 42.35161688446769]]}, "properties": {"segment_id": "001", "crash": 0, "prediction": 0.1223508492, "SPEEDLIMIT": 20, "segment": {"id": "001", "display_name": "Park Plaza between Columbus Avenue and Hadassah Way", "center_x": -71.06867620000001, "center_y": 42.35163360001877}}}]}
2 |
--------------------------------------------------------------------------------
/src/data/tests/data/viz_preds_tests/single_segment.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | {
5 | "type": "Feature",
6 | "id": "001",
7 | "geometry": {
8 | "type": "LineString",
9 | "coordinates": [
10 | [
11 | -71.06858488357565,
12 | 42.35165031556542
13 | ],
14 | [
15 | -71.06876751642436,
16 | 42.35161688446769
17 | ]
18 | ]
19 | },
20 | "properties": {
21 | "id": "001",
22 | "access": null,
23 | "area": null,
24 | "bridge": null,
25 | "from": "61341696",
26 | "highway": "secondary",
27 | "junction": null,
28 | "key": "0",
29 | "lanes": 2,
30 | "length": "44.954",
31 | "maxspeed": null,
32 | "name": "Park Plaza",
33 | "oneway": 1,
34 | "osmid": "8652528",
35 | "ref": null,
36 | "to": "61341267",
37 | "tunnel": null,
38 | "width": 30,
39 | "hwy_type": 1,
40 | "osm_speed": "0",
41 | "signal": 0,
42 | "width_per_lane": 15,
43 | "segment_id": "8652528-61341696-61341267",
44 | "dead_end": null,
45 | "streets": null,
46 | "intersection": null,
47 | "orig_id": 991,
48 | "inter": 0,
49 | "display_name": "Park Plaza between Columbus Avenue and Hadassah Way",
50 | "center_y": 42.35163360001877,
51 | "center_x": -71.06867620000001
52 | }
53 | }
54 | ]
55 | }
56 |
--------------------------------------------------------------------------------
/src/data/tests/test_add_map.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import shutil
4 |
5 |
6 | def test_add_map(tmpdir):
7 |
8 | # Copy test data into temp directory in appropriate place
9 | base_path = os.path.dirname(
10 | os.path.abspath(__file__)) + '/data/'
11 | orig_path = base_path + 'test_add_map'
12 | path = tmpdir.strpath + '/data'
13 |
14 | data_path = os.path.join(path, "processed/maps")
15 | shutil.copytree(orig_path, data_path)
16 |
17 | # To test the mapping, use much smaller versions of the osm
18 | # and osm3857 files, as well as much smaller versions of boston data
19 |
20 | # Then as in the standard workflow, extract_intersections
21 | # and create_segments need to be run (in the test directory)
22 | # and then the mapping can be run and tested
23 |
24 | # Extract and create on osm data
25 | subprocess.check_call([
26 | 'python',
27 | '-m',
28 | 'data.extract_intersections',
29 | os.path.join(data_path, 'osm3857.shp'),
30 | '-d',
31 | path
32 | ], shell=True)
33 |
34 | subprocess.check_call([
35 | 'python',
36 | '-m',
37 | 'data.create_segments',
38 | '-d',
39 | path,
40 | '-r',
41 | os.path.join(data_path, 'elements.geojson'),
42 | '-c',
43 | os.path.join(base_path, 'config_features.yml')
44 | ], shell=True)
45 |
46 | # Extract and create on supplemental map
47 | subprocess.check_call([
48 | 'python',
49 | '-m',
50 | 'data.extract_intersections',
51 | os.path.join(data_path, 'ma_cob_small.shp'),
52 | '-d',
53 | path,
54 | '-n',
55 | 'boston'
56 | ], shell=True)
57 |
58 | subprocess.check_call([
59 | 'python',
60 | '-m',
61 | 'data.create_segments',
62 | '-d',
63 | path,
64 | '-r',
65 | os.path.join(data_path, 'boston/elements.geojson'),
66 | '-n',
67 | 'boston',
68 | '-c',
69 | os.path.join(base_path, 'config_features.yml')
70 |
71 | ], shell=True)
72 |
73 | # Above was all set up, now the testing part
74 | # and add features
75 | subprocess.check_call([
76 | 'python',
77 | '-m',
78 | 'data.add_map',
79 | path,
80 | 'boston',
81 | ], shell=True)
82 |
--------------------------------------------------------------------------------
/src/data/tests/test_add_waze_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import geojson
4 | from .. import add_waze_data
5 |
6 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
7 |
8 |
9 | def test_make_map(tmpdir):
10 |
11 | original_filename = os.path.join(
12 | TEST_FP, 'data', 'test_waze', 'test_waze.json')
13 | with open(original_filename) as f:
14 | original = geojson.load(f)
15 | original = [x for x in original if x['eventType'] == 'jam']
16 | add_waze_data.make_map(original_filename, tmpdir.strpath)
17 |
18 | # Read back in the resulting map
19 | with open(os.path.join(tmpdir.strpath, 'waze.geojson')) as f:
20 | items = geojson.load(f)
21 |
22 | # The number of lines in the original json file should
23 | # equal the number of linestrings in the resulting geojson map
24 | assert len(original) == len(items['features'])
25 |
26 |
27 | def test_map_segments(tmpdir):
28 |
29 | # Copy test data into temp directory
30 | orig_path = os.path.join(TEST_FP, 'data', 'test_waze')
31 | path = os.path.join(tmpdir.strpath, 'processed', 'maps')
32 |
33 | os.makedirs(path)
34 | shutil.copyfile(
35 | os.path.join(orig_path, 'osm_elements.geojson'),
36 | os.path.join(path, 'osm_elements.geojson')
37 | )
38 |
39 | add_waze_data.map_segments(
40 | tmpdir.strpath,
41 | os.path.join(orig_path, 'test_waze.json')
42 | )
43 |
44 | # Read back in the jams information
45 | with open(os.path.join(path, 'jams.geojson')) as f:
46 | items = geojson.load(f)
47 | # Test that the number of jam segments is consistent
48 | # This is not the number of jams total, since jams can
49 | # encompass more than one segment from osm_elements
50 | assert len(items['features']) == 22
51 |
52 | # Read back in the osm_elements, make sure number of elements
53 | # with a jam percentage matches the number of jam segments
54 | with open(os.path.join(path, 'osm_elements.geojson')) as f:
55 | osm_items = geojson.load(f)
56 | assert len([x for x in osm_items['features']
57 | if x['geometry']['type'] == 'LineString'
58 | and x['properties']['jam_percent'] > 0]) == 22
59 |
60 | # Test that the points in the file still exist
61 | # after modifying the linestrings
62 | assert len(osm_items['features']) == 90
63 |
64 | # Test the average level of delay is accurate on a segment
65 | test_segment = [x for x in osm_items['features']
66 | if x['properties']['segment_id']
67 | == '426492374-61330572-5720026211'][0]
68 | assert test_segment['properties']['avg_jam_level'] == 2
69 |
70 | # Test that alerts get added
71 | test_segment = [x for x in osm_items['features']
72 | if 'alert_JAM' in x['properties']][0]
73 | assert test_segment['properties']['alert_JAM'] == 1
74 |
--------------------------------------------------------------------------------
/src/data/tests/test_all.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import json
4 | import shutil
5 |
6 |
7 | def test_all(tmpdir):
8 |
9 | # Copy test data into temp directory
10 | orig_path = os.path.dirname(
11 | os.path.abspath(__file__)) + '/data/'
12 | path = tmpdir.strpath + '/data'
13 | shutil.copytree(orig_path, path)
14 | filename = path + '/raw/ma_cob_spatially_joined_streets.shp'
15 |
16 | subprocess.check_call([
17 | 'python',
18 | '-m',
19 | 'data.extract_intersections',
20 | filename,
21 | '-d',
22 | path
23 | ])
24 |
25 | subprocess.check_call([
26 | 'python',
27 | '-m',
28 | 'data.create_segments',
29 | '-d',
30 | path,
31 | '-r',
32 | path + '/processed/maps/elements.geojson',
33 | '-c',
34 | path + '/config_features.yml'
35 | ])
36 |
37 | subprocess.check_call([
38 | 'python',
39 | '-m',
40 | 'data.join_segments_crash',
41 | '-d',
42 | path,
43 | '-c',
44 | path + '/config_features.yml'
45 |
46 | ])
47 | data = json.load(open(path + '/processed/crash_joined.json'))
48 | #TODO : previously 2, now 4, this may be because of ordering issues with update
49 | assert data[0]['near_id'] == 4
50 |
51 |
52 |
--------------------------------------------------------------------------------
/src/data/tests/test_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import ruamel.yaml
3 | import data.config
4 |
5 |
6 | def write_to_file(filename, d):
7 | with open(filename, "w") as f:
8 | ruamel.yaml.round_trip_dump(d, f)
9 |
10 |
11 | def test_get_feature_list(tmpdir):
12 |
13 | config_dict = {
14 | 'city': 'Boston, Massachusetts, USA',
15 | 'name': 'boston',
16 | 'city_latitude': 42.3600825,
17 | 'city_longitude': -71.0588801,
18 | 'city_radius': 15,
19 | 'timezone': 'America/New_York',
20 | 'crashes_files': {'test': {}},
21 | 'openstreetmap_features': {
22 | 'categorical': {
23 | 'width': 'Width',
24 | 'cycleway_type': 'Bike lane',
25 | 'signal': 'Signal',
26 | 'oneway': 'One Way',
27 | 'lanes': 'Number of lanes'
28 | },
29 | 'continuous': {
30 | 'width_per_lane': 'Average width per lane'
31 | }
32 | },
33 | }
34 |
35 | yml_file = os.path.join(tmpdir, 'test.yml')
36 | write_to_file(yml_file, config_dict)
37 | config = data.config.Configuration(yml_file)
38 | assert config.continuous_features == ['width_per_lane']
39 | assert config.categorical_features == [
40 | 'width', 'cycleway_type', 'signal', 'oneway', 'lanes', 'osm_speed']
41 | assert set(config.features) == set([
42 | 'width', 'cycleway_type', 'signal',
43 | 'oneway', 'lanes', 'width_per_lane',
44 | 'osm_speed'
45 | ])
46 |
47 | config_dict['waze_features'] = {
48 | 'categorical': {'jam': 'Existence of a jam'},
49 | 'continuous': {'jam_percent': 'Percent of time there was a jam'}
50 | }
51 | write_to_file(yml_file, config_dict)
52 | config = data.config.Configuration(yml_file)
53 |
54 | assert config.continuous_features == ['width_per_lane', 'jam_percent']
55 | assert config.categorical_features == [
56 | 'width', 'cycleway_type', 'signal',
57 | 'oneway', 'lanes', 'jam', 'osm_speed']
58 | assert set(config.features) == set([
59 | 'width_per_lane', 'jam_percent',
60 | 'width', 'cycleway_type', 'signal',
61 | 'oneway', 'lanes', 'jam', 'osm_speed'])
62 |
63 | config_dict['waze_features'] = {}
64 | config_dict['openstreetmap_features'] = {}
65 | config_dict['additional_map_features'] = {
66 | 'extra_map': 'test',
67 | 'continuous': {'AADT': 'test name'},
68 | 'categorical': {
69 | 'Struct_Cnd': 'test name3',
70 | 'Surface_Tp': 'test name4',
71 | 'F_F_Class': 'test name5'
72 | }
73 | }
74 | config_dict['speed_limit'] = 'SPEEDLIMIT'
75 |
76 | write_to_file(yml_file, config_dict)
77 | config = data.config.Configuration(yml_file)
78 |
79 | assert set(config.categorical_features) == set([
80 | 'SPEEDLIMIT', 'Struct_Cnd', 'Surface_Tp', 'F_F_Class'])
81 | assert config.continuous_features == ['AADT']
82 | assert set(config.features) == set([
83 | 'SPEEDLIMIT', 'Struct_Cnd', 'Surface_Tp', 'F_F_Class', 'AADT'])
84 |
85 | config_dict['data_source'] = [
86 | {'filename': 'test_multi',
87 | 'feats': [
88 | {'name': 'cat_test',
89 | 'feat_type': 'categorical'},
90 | {'name': 'cont_test',
91 | 'feat_type': 'continuous'},
92 | {'name': 'default_test'},
93 | ]}]
94 |
95 | write_to_file(yml_file, config_dict)
96 | config = data.config.Configuration(yml_file)
97 | assert all([c in config.continuous_features for c in ['cont_test', 'default_test']])
98 | assert 'cat_test' in config.categorical_features
99 |
--------------------------------------------------------------------------------
/src/data/tests/test_extract_intersections.py:
--------------------------------------------------------------------------------
1 | from shapely.geometry import Point, LineString
2 | from .. import extract_intersections
3 |
4 |
5 | def test_generate_intersections():
6 | lines = [
7 | (0, LineString([
8 | Point(-1, -1),
9 | Point(0, 0)
10 | ])),
11 | (1, LineString([
12 | Point(1, 0),
13 | Point(3, 1),
14 | ])),
15 | (2, LineString([
16 | Point(0, 5),
17 | Point(3, 5),
18 | ])),
19 | (3, LineString([
20 | Point(2, -1),
21 | Point(2, 10)
22 | ]))
23 | ]
24 | result = extract_intersections.generate_intersections(lines)
25 |
26 | assert result == [
27 | (Point(2.0, 0.5), {'id_1': 1, 'id_2': 3}),
28 | (Point(2.0, 5.0), {'id_1': 2, 'id_2': 3})
29 | ]
30 |
31 |
--------------------------------------------------------------------------------
/src/data/tests/test_initialize_city.py:
--------------------------------------------------------------------------------
1 | import os
2 | import initialize_city
3 |
4 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
5 |
6 |
7 | def mockreturn(address):
8 | return "Brisbane, Australia", -27.4697707, 153.0251235, 'S'
9 |
10 |
11 | def test_initialize_city_brisbane_no_supplemental(tmpdir, monkeypatch):
12 |
13 | monkeypatch.setattr(initialize_city, 'geocode_address', mockreturn)
14 |
15 | # Generate a test config for Brisbane
16 | initialize_city.make_config_file(
17 | tmpdir.join('/test_config_brisbane_no_supplemental.yml'),
18 | 'Brisbane, Australia',
19 | 'Australia/Brisbane',
20 | 'brisbane',
21 | 'test_crashes.csv',
22 | False
23 | )
24 |
25 | # check that the file contents generated is identical to a pre-built string
26 | with open(tmpdir.join(
27 | '/test_config_brisbane_no_supplemental.yml'), 'r') as test_file:
28 | test_file_contents = test_file.read()
29 | with open(os.path.join(
30 | TEST_FP, 'data', 'config_brisbane_no_supplemental.yml'), 'r'
31 | ) as test_file:
32 | expected_file_contents = test_file.read()
33 | print(tmpdir)
34 | assert test_file_contents == expected_file_contents
35 |
36 |
37 | def test_supplemental_arg_changes_content_of_config_file(tmpdir, monkeypatch):
38 |
39 | monkeypatch.setattr(initialize_city, 'geocode_address', mockreturn)
40 |
41 | # Generate a test config for Brisbane
42 | initialize_city.make_config_file(
43 | tmpdir.join('/test_config_brisbane_supplemental.yml'),
44 | 'Brisbane, Australia',
45 | 'Australia/Brisbane',
46 | 'brisbane',
47 | 'test_crashes.csv',
48 | ['parking_tickets_dummy_file_1.csv']
49 | )
50 |
51 | with open(tmpdir.join(
52 | '/test_config_brisbane_supplemental.yml'), 'r') as test_file:
53 | expected_file_contents = test_file.read()
54 |
55 | with open(tmpdir.join(
56 | '/test_config_brisbane_supplemental.yml'), 'r') as test_file:
57 | test_file_contents = test_file.read()
58 | assert test_file_contents == expected_file_contents
59 |
60 |
61 |
--------------------------------------------------------------------------------
/src/data/tests/test_join_segments_crash.py:
--------------------------------------------------------------------------------
1 | import geopandas as gpd
2 | from shapely.geometry import Point
3 | from pandas.testing import assert_frame_equal
4 | from .. import join_segments_crash
5 |
6 |
7 | def test_make_rollup():
8 | """
9 | Tests total number of crashes per crash location is correctly calculated and
10 | list of unique crash dates per location is correctly generated
11 | """
12 | standardized_crashes = [{
13 | "id": 1,
14 | "dateOccurred": "2015-01-01T00:45:00-05:00",
15 | "location": {
16 | "latitude": 42.365,
17 | "longitude": -71.106
18 | },
19 | "address": "GREEN ST & PLEASANT ST",
20 | "vehicle": 1
21 | }, {
22 | "id": 1,
23 | "dateOccurred": "2015-04-15T00:45:00-05:00",
24 | "location": {
25 | "latitude": 42.365,
26 | "longitude": -71.106
27 | },
28 | "address": "GREEN ST & PLEASANT ST",
29 | "pedestrian": 1
30 | }, {
31 | "id": 1,
32 | "dateOccurred": "2015-10-20T00:45:00-05:00",
33 | "location": {
34 | "latitude": 42.365,
35 | "longitude": -71.106
36 | },
37 | "address": "GREEN ST & PLEASANT ST",
38 | "vehicle": 1
39 | }, {
40 | "id": 2,
41 | "dateOccurred": "2015-01-01T01:12:00-05:00",
42 | "location": {
43 | "latitude": 42.361,
44 | "longitude": -71.097
45 | },
46 | "address": "LANDSDOWNE ST & MASSACHUSETTS AVE",
47 | "bike": 1
48 | }, {
49 | "id": 3,
50 | "dateOccurred": "2015-01-01T01:54:00-05:00",
51 | "location": {
52 | "latitude": 42.396,
53 | "longitude": -71.127
54 | },
55 | "address": "LOCKE ST & SHEA RD",
56 | "bike": 1
57 | }, {
58 | "id": 3,
59 | "dateOccurred": "2015-01-01T01:54:00-05:00",
60 | "location": {
61 | "latitude": 42.396,
62 | "longitude": -71.127
63 | },
64 | "address": "LOCKE ST & SHEA RD",
65 | "vehicle": 1
66 | }]
67 | expected_rollup_total = gpd.GeoDataFrame()
68 | expected_rollup_total["coordinates"] = gpd.GeoSeries([
69 | Point(-71.106, 42.365),
70 | Point(-71.097, 42.361),
71 | Point(-71.127, 42.396)])
72 | expected_rollup_total["total_crashes"] = [3, 1, 2]
73 | expected_rollup_total["crash_dates"] = [
74 | "2015-01-01T00:45:00-05:00,2015-04-15T00:45:00-05:00,2015-10-20T00:45:00-05:00",
75 | "2015-01-01T01:12:00-05:00",
76 | "2015-01-01T01:54:00-05:00"
77 | ]
78 |
79 | expected_rollup_pedestrian = gpd.GeoDataFrame()
80 | expected_rollup_pedestrian["coordinates"] = gpd.GeoSeries([
81 | Point(-71.106, 42.365)
82 | ])
83 | expected_rollup_pedestrian["total_crashes"] = [1]
84 | expected_rollup_pedestrian["crash_dates"] = [
85 | "2015-04-15T00:45:00-05:00"
86 | ]
87 |
88 | expected_rollup_bike = gpd.GeoDataFrame()
89 | expected_rollup_bike["coordinates"] = gpd.GeoSeries([
90 | Point(-71.097, 42.361),
91 | Point(-71.127, 42.396)
92 | ])
93 | expected_rollup_bike["total_crashes"] = [1, 1]
94 | expected_rollup_bike["crash_dates"] = [
95 | "2015-01-01T01:12:00-05:00",
96 | "2015-01-01T01:54:00-05:00"
97 | ]
98 |
99 | expected_rollup_vehicle = gpd.GeoDataFrame()
100 | expected_rollup_vehicle["coordinates"] = [
101 | Point(-71.106, 42.365),
102 | Point(-71.127, 42.396)
103 | ]
104 | expected_rollup_vehicle["total_crashes"] = [2, 1]
105 | expected_rollup_vehicle["crash_dates"] = [
106 | "2015-01-01T00:45:00-05:00,2015-10-20T00:45:00-05:00",
107 | "2015-01-01T01:54:00-05:00"
108 | ]
109 | split_columns = ['pedestrian', 'bike', 'vehicle']
110 |
111 | results = join_segments_crash.make_crash_rollup(standardized_crashes, split_columns)
112 |
113 | assert_frame_equal(results['all'], expected_rollup_total)
114 | assert_frame_equal(results['pedestrian'], expected_rollup_pedestrian)
115 | assert_frame_equal(results['bike'], expected_rollup_bike)
116 |
--------------------------------------------------------------------------------
/src/data/tests/test_make_preds_viz.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | from pandas.testing import assert_frame_equal
4 | import ruamel.yaml
5 | import shutil
6 | import data.config
7 | from .. import make_preds_viz
8 |
9 | DATA_FP = os.path.join(
10 | os.path.dirname(
11 | os.path.abspath(__file__)),
12 | "data",
13 | "viz_preds_tests",
14 | )
15 |
16 |
17 | def test_make_preds_viz_boston(tmpdir):
18 | """
19 | Confirm that predictions & segments are combined as expected.
20 | """
21 | # load the test predictions & segments
22 | preds_test = pd.read_json(
23 | os.path.join(DATA_FP,
24 | "single_prediction.json"),
25 | orient="index", typ="series", dtype=False
26 | )
27 |
28 | segs_test = pd.read_json(os.path.join(
29 | DATA_FP, "single_segment.geojson"))["features"]
30 |
31 | # combine the two
32 | preds_combined_test = make_preds_viz.combine_predictions_and_segments(preds_test, segs_test)
33 |
34 | # write to file
35 | tmpdir_test_path = os.path.join(tmpdir.strpath, "preds_viz.geojson")
36 | make_preds_viz.write_preds_as_geojson(preds_combined_test, tmpdir_test_path)
37 |
38 | # compare the new file's contents to test data
39 | tmpdir_preds_viz = pd.read_json(os.path.join(tmpdir.strpath, "preds_viz.geojson"))
40 | preds_viz_test = pd.read_json(os.path.join(
41 | DATA_FP, "single_prediction_viz.geojson")
42 | )
43 |
44 | assert_frame_equal(tmpdir_preds_viz, preds_viz_test)
45 |
46 |
47 |
48 | def test_write_all_preds(tmpdir):
49 | config_dict = {
50 | 'name': 'cambridge',
51 | 'crashes_files': {
52 | 'file1': {}
53 | },
54 | 'city_latitude': 42.3600825,
55 | 'city_longitude': -71.0588801,
56 | 'city_radius': 15,
57 | 'city': "Cambridge, Massachusetts, USA",
58 | 'timezone': "America/New_York",
59 |
60 | }
61 | config_filename = os.path.join(tmpdir, 'test.yml')
62 |
63 | with open(config_filename, "w") as f:
64 | ruamel.yaml.round_trip_dump(config_dict, f)
65 | config = data.config.Configuration(config_filename)
66 |
67 | os.makedirs(os.path.join(tmpdir, 'processed'))
68 | os.makedirs(os.path.join(tmpdir, 'processed', 'maps'))
69 | shutil.copy(
70 | os.path.join(
71 | DATA_FP,
72 | 'single_prediction.json'),
73 | os.path.join(
74 | tmpdir,
75 | 'processed',
76 | 'seg_with_predicted.json'
77 | )
78 | )
79 | shutil.copy(
80 | os.path.join(
81 | DATA_FP,
82 | 'single_segment.geojson'),
83 | os.path.join(
84 | tmpdir,
85 | 'processed',
86 | 'maps',
87 | 'inter_and_non_int.geojson'
88 | )
89 | )
90 | make_preds_viz.write_all_preds(tmpdir, config)
91 | assert os.path.exists(os.path.join(
92 | tmpdir, 'processed', 'preds_viz.geojson'))
93 |
94 |
95 | def test_write_all_preds_split_column(tmpdir):
96 | config_dict = {
97 | 'name': 'cambridge',
98 | 'crashes_files': {
99 | 'file1': {
100 | 'optional': {
101 | 'split_columns': {
102 | 'pedestrian': {}
103 | }
104 | }
105 |
106 | }
107 | },
108 | 'city_latitude': 42.3600825,
109 | 'city_longitude': -71.0588801,
110 | 'city_radius': 15,
111 | 'city': "Cambridge, Massachusetts, USA",
112 | 'timezone': "America/New_York",
113 |
114 | }
115 | config_filename = os.path.join(tmpdir, 'test.yml')
116 |
117 | with open(config_filename, "w") as f:
118 | ruamel.yaml.round_trip_dump(config_dict, f)
119 | config = data.config.Configuration(config_filename)
120 |
121 | os.makedirs(os.path.join(tmpdir, 'processed'))
122 | os.makedirs(os.path.join(tmpdir, 'processed', 'maps'))
123 | shutil.copy(
124 | os.path.join(
125 | DATA_FP,
126 | 'single_prediction.json'),
127 | os.path.join(
128 | tmpdir,
129 | 'processed',
130 | 'seg_with_predicted_pedestrian.json'
131 | )
132 | )
133 | shutil.copy(
134 | os.path.join(
135 | DATA_FP,
136 | 'single_segment.geojson'),
137 | os.path.join(
138 | tmpdir,
139 | 'processed',
140 | 'maps',
141 | 'inter_and_non_int.geojson'
142 | )
143 | )
144 | make_preds_viz.write_all_preds(tmpdir, config)
145 | assert os.path.exists(os.path.join(
146 | tmpdir, 'processed', 'preds_viz_pedestrian.geojson'))
147 |
--------------------------------------------------------------------------------
/src/data/tests/test_osm_create_maps.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from shapely.geometry import Polygon
4 | import networkx as nx
5 | import json
6 | import fiona
7 | import pickle
8 | from .. import osm_create_maps
9 | from .. import util
10 | from .. import config
11 | from ..record import transformer_4326_to_3857
12 |
13 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
14 |
15 |
16 | def test_get_width():
17 | assert osm_create_maps.get_width('15.2') == 15
18 | assert osm_create_maps.get_width('') == 0
19 | assert osm_create_maps.get_width("['14.9', '12.2']") == 0
20 | assert osm_create_maps.get_width('t') == 0
21 |
22 |
23 | def test_get_speed():
24 | assert osm_create_maps.get_speed('') == 0
25 | assert osm_create_maps.get_speed('signals') == 0
26 | assert osm_create_maps.get_speed('60') == 60
27 | assert osm_create_maps.get_speed("['90', '100']") == 100
28 |
29 |
30 | def test_reproject_and_clean_feats(tmpdir):
31 |
32 | tmppath = tmpdir.strpath
33 | shutil.copy(
34 | TEST_FP + '/data/processed/maps/osm.gpkg',
35 | tmppath
36 | )
37 |
38 | # For now, just make sure it runs
39 | osm_create_maps.clean_ways(
40 | tmppath + '/osm.gpkg',
41 | tmppath + '/docs'
42 | )
43 |
44 |
45 | def test_expand_polygon():
46 |
47 | test_polygon = {
48 | 'type': 'Polygon',
49 | 'coordinates': [[[-71.0770265, 42.3364517], [-71.0810509, 42.3328703],
50 | [-71.0721386, 42.3325241]]]
51 | }
52 | points_file = os.path.join(TEST_FP, 'data', 'osm_crash_file.json')
53 |
54 | # Too many points fall outside of the polygon to buffer
55 | result = osm_create_maps.expand_polygon(test_polygon, points_file)
56 | assert result is None
57 |
58 | polygon_coords = [util.get_reproject_point(
59 | x[1], x[0], transformer_4326_to_3857, coords=True
60 | ) for x in test_polygon['coordinates'][0]]
61 | orig_shape = Polygon(polygon_coords)
62 |
63 | result = osm_create_maps.expand_polygon(test_polygon, points_file,
64 | max_percent=.7)
65 |
66 | result_coords = [util.get_reproject_point(
67 | x[1], x[0], transformer_4326_to_3857, coords=True
68 | ) for x in result.exterior.coords]
69 | result_shape = Polygon(result_coords)
70 |
71 | # Check whether the new polygon has a larger area than the old one
72 | assert result_shape.area > orig_shape.area
73 |
74 | records = util.read_records(points_file, 'crash')
75 |
76 | # The first two points are outside the original shape
77 | # and the last point is within
78 | assert orig_shape.contains(records[0].point) is False
79 | assert orig_shape.contains(records[1].point) is False
80 | assert orig_shape.contains(records[2].point)
81 |
82 | # The first point should be within the new shape, but not the
83 | # second point, since it was too far from the original shape
84 | assert result_shape.contains(records[0].point)
85 | assert result_shape.contains(records[1].point) is False
86 | assert result_shape.contains(records[2].point)
87 |
88 |
89 | def mockreturn(config):
90 | G1 = pickle.load(open(os.path.join(TEST_FP, 'data', 'osm_output.gpickle'), 'rb'))
91 | return G1
92 |
93 |
94 | def test_simple_get_roads(tmpdir, monkeypatch):
95 |
96 | monkeypatch.setattr(osm_create_maps, 'get_graph', mockreturn)
97 | c = config.Configuration(
98 | os.path.join(TEST_FP, 'data', 'config_features.yml'))
99 | osm_create_maps.simple_get_roads(c, tmpdir)
100 |
101 | with open(os.path.join(tmpdir, 'features.geojson')) as f:
102 | data = json.load(f)
103 | signals = [x for x in data['features']
104 | if x['properties']['feature'] == 'signal']
105 | assert len(signals) == 2
106 | intersections = [x for x in data['features']
107 | if x['properties']['feature'] == 'intersection']
108 | assert len(intersections) == 14
109 | crosswalks = [x for x in data['features']
110 | if x['properties']['feature'] == 'crosswalk']
111 | assert len(crosswalks) == 9
112 |
113 | nodes = fiona.open(os.path.join(tmpdir, 'osm.gpkg'), layer='nodes')
114 | ways = fiona.open(os.path.join(tmpdir, 'osm.gpkg'), layer='edges')
115 |
116 | # It's just coincidence that the number of ways and nodes is the same
117 | assert len(nodes) == 28
118 | assert len(ways) == 28
119 |
--------------------------------------------------------------------------------
/src/data/weather/BostonWeather2016_Wunderground.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/weather/BostonWeather2016_Wunderground.Rda
--------------------------------------------------------------------------------
/src/data/weather/README.md:
--------------------------------------------------------------------------------
1 | # Weather data
--------------------------------------------------------------------------------
/src/data/weather/weatherScrapingScript.R:
--------------------------------------------------------------------------------
1 |
2 | # To run the script, insert an api key from Wunderground below:
3 |
4 | apikey <- ""
5 |
6 | library(jsonlite) #for parsing json output (fromJSON function)
7 | library(plyr)
8 |
9 | #Extract data for Jan-01 and then append daily weather data for Boston, 2016 to this dataset
10 | url <- "http://api.wunderground.com/api/api_key/history_20160101/q/MA/Boston.json"
11 | data <- fromJSON(txt = url)
12 |
13 | data <- as.data.frame(data$history$dailysummary)
14 | data$dateFull <- data$date$pretty
15 | data <- data[,2:71] #extract out the nested date dataframe that carries same information as data$date$pretty
16 |
17 | for (month in 1:12)
18 | {
19 | if (nchar(month) != 2)
20 | {
21 | month <- paste("0",month,sep="")
22 | }
23 | #separating months with 31 days from 30 and 29
24 | if (month == "01" | month == "03" | month == "05" | month == "07" |
25 | month == "08" | month == "10" | month == "12")
26 | {
27 | for (day in 1:31)
28 | {
29 | if (nchar(day) != 2)
30 | {
31 | day <- paste("0",day,sep="")
32 | }
33 | url <- paste("http://api.wunderground.com/api/api_key/history_2016",month,day,"/q/MA/Boston.json",sep="")
34 | data2 <- fromJSON(txt = url)
35 | data2 <- as.data.frame(data2$history$dailysummary)
36 | data2$dateFull <- data2$date$pretty
37 | data2 <- data2[-1]
38 | data <- rbind(data, data2)
39 | }
40 | }
41 | else if (month == "02") #accounting for february as leap
42 | {
43 | for (day in 1:29)
44 | {
45 | if (nchar(day) != 2)
46 | {
47 | day <- paste("0",day,sep="")
48 | }
49 | url <- paste("http://api.wunderground.com/api/api_key/history_2016",month,day,"/q/MA/Boston.json",sep="")
50 | data2 <- fromJSON(txt = url)
51 | data2 <- as.data.frame(data2$history$dailysummary)
52 | data2$dateFull <- data2$date$pretty
53 | data2 <- data2[-1]
54 | data <- rbind(data, data2)
55 | }
56 | }
57 | else
58 | {
59 | for (day in 1:30)
60 | {
61 | if (nchar(day) != 2)
62 | {
63 | day <- paste("0",day,sep="")
64 | }
65 | url <- paste("http://api.wunderground.com/api/77655c0d74f69756/history_2016",month,day,"/q/MA/Boston.json",sep="")
66 | data2 <- fromJSON(txt = url)
67 | data2 <- as.data.frame(data2$history$dailysummary)
68 | data2$dateFull <- data2$date$pretty
69 | data2 <- data2[-1]
70 | data <- rbind(data, data2)
71 | }
72 | }
73 | }
74 |
75 | data <- data[-1,] #remove duplicate January 1st observation
76 | data <- plyr::rename(data, replace = c("dateFull" = "date")) #rename date variable
77 | weatherBoston <- data
78 | save(weatherBoston, file = "BostonWeather2016_Wunderground.Rda")
79 | write.csv(weatherBoston, file = "BostonWeather2016_Wunderground.Rda")
80 |
--------------------------------------------------------------------------------
/src/data_standardization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/__init__.py
--------------------------------------------------------------------------------
/src/data_standardization/standardization_util.py:
--------------------------------------------------------------------------------
1 | import dateutil.parser as date_parser
2 | from datetime import datetime, timedelta
3 | import json
4 | from jsonschema import validate
5 | from dateutil import tz
6 |
7 |
8 | def parse_date(date: str, timezone: str, time=None, time_format=None):
9 | """
10 | Turn a date (and optional time) into a datetime string
11 | in standardized format
12 | """
13 |
14 | # If date is badly formatted, skip
15 | try:
16 | # Date can either be a date or a date time
17 | date = date_parser.parse(date)
18 | except ValueError as _:
19 | print("{} is badly formatted, skipping".format(date))
20 | return None
21 |
22 | # If there's no time in the date given, look at the time field
23 | # if available
24 | if date.hour == 0 and date.minute == 0 and date.second == 0 and time:
25 |
26 | if time_format == "military":
27 | # military times less than 4 chars require padding with leading zeros
28 | # e.g 155 becomes 0155
29 | while (len(str(time)) < 4):
30 | time = "0" + str(time)
31 |
32 | # ignore invalid times
33 | if int(time) <= 2359:
34 | date = date_parser.parse(
35 | date.strftime('%Y-%m-%d ') + datetime.strptime(str(time), '%H%M').strftime('%I:%M%p').lower()
36 | )
37 |
38 | else:
39 | date = date_parser.parse(
40 | date.strftime('%Y-%m-%d ')
41 | )
42 |
43 | elif time_format == "seconds":
44 | date = date + timedelta(seconds=int(time))
45 |
46 | else:
47 | try:
48 | date = date_parser.parse(
49 | date.strftime('%Y-%m-%d ') + str(time)
50 | )
51 | # if time can't be parsed, just use bare date
52 | except ValueError as _:
53 | pass
54 |
55 | # Add timezone if it wasn't included in the string formatting originally
56 | if not date.tzinfo:
57 | date = timezone.localize(date)
58 | # If the timezone was set to utc, reformat into local time with offset
59 | elif date.tzinfo == tz.tzutc():
60 | date = date.astimezone(timezone)
61 | date_time = date.isoformat()
62 |
63 | return date_time
64 |
65 |
66 | def parse_address(address):
67 | """
68 | Some cities have the lat/lon as part of the address.
69 | If that's the format, parse out these values
70 | """
71 | lines = address.split('\n')
72 |
73 | if len(lines) == 3 and lines[2]:
74 | street = ' '.join(lines[0].split()[1:])
75 | lat, lon = lines[2][1:-1].split(', ')
76 | return street, float(lat), float(lon)
77 | return None, None, None
78 |
79 |
80 | def validate_and_write_schema(schema_path, schema_values, output_file):
81 | """
82 | Validate a schema according to a schema file, and write to file
83 | Args:
84 | schema_path - the schema filename
85 | schema_values - a list of dicts
86 | output_file
87 | """
88 |
89 | with open(schema_path) as schema:
90 | validate(schema_values, json.load(schema))
91 |
92 | with open(output_file, "w") as f:
93 | json.dump(schema_values, f)
94 |
95 | print("- output written to {}".format(output_file))
96 |
--------------------------------------------------------------------------------
/src/data_standardization/standardize_volume.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | from jsonschema import validate
4 | import json
5 | from .boston_volume import BostonVolumeParser
6 | import data.config
7 |
8 | BASE_FP = None
9 | PROCESSED_DATA_FP = None
10 | CURR_FP = os.path.dirname(
11 | os.path.abspath(__file__))
12 |
13 |
14 | def write_volume(volume_counts):
15 |
16 | schema_path = os.path.join(os.path.dirname(os.path.dirname(
17 | CURR_FP)), "standards", "volumes-schema.json")
18 | with open(schema_path) as volume_schema:
19 | validate(volume_counts, json.load(volume_schema))
20 | volume_output = os.path.join(BASE_FP, "standardized", "volume.json")
21 | with open(volume_output, "w") as f:
22 | json.dump(volume_counts, f)
23 |
24 |
25 | if __name__ == '__main__':
26 |
27 | parser = argparse.ArgumentParser()
28 | parser.add_argument("-c", "--config", type=str, required=True,
29 | help="city config filename")
30 | parser.add_argument("-d", "--datadir", type=str,
31 | help="data directory")
32 |
33 | args = parser.parse_args()
34 | BASE_FP = os.path.join(args.datadir)
35 |
36 | config = data.config.Configuration(args.config)
37 | if config.name == 'boston':
38 | volume_counts = BostonVolumeParser(args.datadir).get_volume()
39 | write_volume(volume_counts)
40 | else:
41 | print("No volume data given for {}".format(config.name))
42 |
--------------------------------------------------------------------------------
/src/data_standardization/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/__init__.py
--------------------------------------------------------------------------------
/src/data_standardization/tests/data/8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/data/8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX
--------------------------------------------------------------------------------
/src/data_standardization/tests/data/waze/2018-10-15-20-15.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/data/waze/2018-10-15-20-15.json.gz
--------------------------------------------------------------------------------
/src/data_standardization/tests/data/waze/2018-10-16-08-00.json:
--------------------------------------------------------------------------------
1 | {"startTimeMillis": 1539676620000, "alerts": [{"type": "WEATHERHAZARD", "subtype": "HAZARD_ON_ROAD_CONSTRUCTION", "city": "Cambridge, MA", "pubMillis": 1539607721062, "location": {"y": 42.371072, "x": -71.114300}}, {"type": "NONE", "city": "Boston, MA", "pubMillis": 1537582060620, "location": {"y": 42.371072, "x": -71.114300}}], "jams": [{"roadType": 1, "city": "Cambridge, MA", "pubMillis": 1539670005835}, {"city": "Boston, MA", "pubMillis": 1539610200000}], "startTime": "2018-10-16 07:57:00:000", "endTime": "2018-10-16 08:00:00:000"}
2 |
--------------------------------------------------------------------------------
/src/data_standardization/tests/data/waze/2018-10-17-16-15.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/data/waze/2018-10-17-16-15.json.gz
--------------------------------------------------------------------------------
/src/data_standardization/tests/test-schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-06/schema#",
3 | "title": "Test",
4 | "description": "Defines the structure of a set of information",
5 | "type": "array",
6 | "items": {
7 | "title": "Test",
8 | "description": "Defines the structure of a test",
9 | "type": "object",
10 | "properties": {
11 | "id": {
12 | "description": "Unique identifier of test",
13 | "type": ["string", "number"]
14 | },
15 | "dateOccurred": {
16 | "description": "Date test occurred, ISO8601 formatted",
17 | "type": "string",
18 | "format": "date-time"
19 | },
20 | "location": {
21 | "description": "Coordinates of test, WGS84 formatted",
22 | "type": "object",
23 | "properties": {
24 | "latitude": {
25 | "description": "Latitude of test",
26 | "type": "number"
27 | },
28 | "longitude": {
29 | "description": "Longitude of test",
30 | "type": "number"
31 | }
32 | }
33 | }
34 | },
35 | "required": ["id", "dateOccurred", "location"]
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/data_standardization/tests/test_boston_volume.py:
--------------------------------------------------------------------------------
1 | from ..boston_volume import BostonVolumeParser
2 | import os
3 |
4 |
5 | def test_is_readable_ATR():
6 |
7 | parser = BostonVolumeParser(os.path.abspath(__file__))
8 |
9 | bad = '7147_NA_NA_53_CLAPP-ST_DORCHESTER_24-HOURS_SPEED_02-25-2013.XLS'
10 | assert not parser.is_readable_ATR(bad)
11 |
12 | bad = '8652_NA_NA_0_SOUTHWEST-CORRIDOR_ROXBURY_48-HOURS_XXX_09-27-2016.XLS'
13 | assert not parser.is_readable_ATR(bad)
14 |
15 | good = '8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX'
16 | assert parser.is_readable_ATR(good)
17 |
18 |
19 | def test_clean_ATR_fname():
20 | parser = BostonVolumeParser(os.path.abspath(__file__))
21 |
22 | file = '7362_NA_NA_147_TRAIN-ST_DORCHESTER_24-HOURS_XXX_03-19-2014.XLSX'
23 | assert parser.clean_ATR_fname(file) == '147 TRAIN ST Boston, MA'
24 |
25 |
26 | def test_read_ATR():
27 | path = os.path.dirname(
28 | os.path.abspath(__file__)) + '/data/'
29 | file = os.path.join(path,
30 | '8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX')
31 |
32 | parser = BostonVolumeParser(path)
33 | assert parser.read_ATR(file) == (
34 | # total
35 | 243,
36 | # speed
37 | 14,
38 | # motos/bikes
39 | 14,
40 | # light vehicles
41 | 215,
42 | # heavy vehicles
43 | 14,
44 | # date
45 | '2017-01-11',
46 | # hourly totals
47 | [2, 0, 1, 0, 3, 3, 6, 26, 21, 15, 11, 12, 7, 20, 12, 15,
48 | 11, 16, 23, 11, 10, 11, 4, 3]
49 | )
50 |
51 |
--------------------------------------------------------------------------------
/src/data_standardization/tests/test_standardization_util.py:
--------------------------------------------------------------------------------
1 | from .. import standardization_util
2 | import json
3 | import os
4 | import pytz
5 |
6 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
7 |
8 |
9 | def test_parse_date():
10 | timezone = pytz.timezone('America/New_York')
11 | assert standardization_util.parse_date(
12 | '01/08/2009 08:53:00 PM', timezone) == '2009-01-08T20:53:00-05:00'
13 |
14 | assert standardization_util.parse_date(
15 | '01/08/2009',
16 | timezone,
17 | time='08:53:00 PM') == '2009-01-08T20:53:00-05:00'
18 |
19 | assert standardization_util.parse_date(
20 | '01/08/2009',
21 | timezone,
22 | time='75180',
23 | time_format='seconds') == '2009-01-08T20:53:00-05:00'
24 |
25 | assert standardization_util.parse_date('01/08/2009 unk', timezone) \
26 | is None
27 |
28 | assert standardization_util.parse_date(
29 | '01/08/2009',
30 | timezone,
31 | time='0201',
32 | time_format='military') == '2009-01-08T02:01:00-05:00'
33 |
34 | assert standardization_util.parse_date(
35 | '01/08/2009',
36 | timezone,
37 | time='1201',
38 | time_format='military') == '2009-01-08T12:01:00-05:00'
39 |
40 | assert standardization_util.parse_date(
41 | '01/08/2009',
42 | timezone,
43 | time='9999',
44 | time_format='military') == '2009-01-08T00:00:00-05:00'
45 |
46 | # Test daylight savings time
47 | assert standardization_util.parse_date(
48 | '08/08/2009 08:53:00 PM', timezone) == '2009-08-08T20:53:00-04:00'
49 |
50 | # Test UTC conversion
51 | assert standardization_util.parse_date(
52 | '2009-01-08T08:53:00.000Z', timezone) == '2009-01-08T03:53:00-05:00'
53 |
54 | assert standardization_util.parse_date(
55 | '2009', timezone)[:4] == '2009'
56 |
57 |
58 | def test_parse_address():
59 |
60 | address = "29 OXFORD ST\n" + \
61 | "Cambridge, MA\n" + \
62 | "(42.37857940800046, -71.11657724799966)"
63 |
64 | street, lat, lon = standardization_util.parse_address(address)
65 | assert street == 'OXFORD ST'
66 | assert lat == 42.37857940800046
67 | assert lon == -71.11657724799966
68 |
69 |
70 | def test_validate_and_write_schema(tmpdir):
71 | tmppath = tmpdir.strpath
72 |
73 | values = [{
74 | "id": "1",
75 | "dateOccurred": "2009-01-08T20:53:00Z",
76 | "location": {
77 | "latitude": 42.37857940800046,
78 | "longitude": -71.11657724799966
79 | }
80 | }]
81 | print(values)
82 | standardization_util.validate_and_write_schema(
83 | os.path.join(TEST_FP, 'test-schema.json'),
84 | values,
85 | os.path.join(tmppath, 'test.json')
86 | )
87 |
88 | # Now load the json back and make sure it matches
89 | items = json.load(open(os.path.join(tmppath, 'test.json')))
90 | assert items == values
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/src/data_standardization/tests/test_standardize_waze_data.py:
--------------------------------------------------------------------------------
1 | import ruamel.yaml
2 | from .. import standardize_waze_data
3 | import data.config
4 | import os
5 | import pytz
6 |
7 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
8 |
9 |
10 | def test_get_datetime():
11 | timezone = pytz.timezone("America/New_York")
12 |
13 | result = standardize_waze_data.get_datetime(
14 | '2018-10-04 12:13:00:000', timezone)
15 | assert result.isoformat() == '2018-10-04T08:13:00-04:00'
16 |
17 | result = standardize_waze_data.get_datetime(
18 | '2018-11-04 01:13:00:000', timezone)
19 | assert result.isoformat() == '2018-11-03T21:13:00-04:00'
20 |
21 | result = standardize_waze_data.get_datetime(
22 | '2018-11-04 06:13:00:000', timezone)
23 | assert result.isoformat() == '2018-11-04T01:13:00-05:00'
24 |
25 |
26 | def test_read_snapshots(tmpdir):
27 | config_dict = {
28 | 'name': 'cambridge',
29 | 'city_latitude': 42.3600825,
30 | 'city_longitude': -71.0588801,
31 | 'city_radius': 15,
32 | 'timezone': 'America/New_York',
33 | 'crashes_files': {'test': {}},
34 | 'city': "Cambridge, Massachusetts, USA",
35 | 'timezone': "America/New_York"
36 | }
37 | filename = os.path.join(tmpdir, 'test.yml')
38 | with open(filename, "w") as f:
39 | ruamel.yaml.round_trip_dump(config_dict, f)
40 | config = data.config.Configuration(filename)
41 |
42 | results = standardize_waze_data.read_snapshots(os.path.join(
43 | TEST_FP, 'data', 'waze'), config)
44 |
45 | expected_results = [
46 | {
47 | 'pubMillis': 1539632995870,
48 | 'city': 'Cambridge, MA',
49 | 'eventType': 'jam',
50 | 'pubTimeStamp': '2018-10-15 15:49:55',
51 | 'snapshotId': 1
52 | },
53 | {
54 | 'country': 'US',
55 | 'subtype': '',
56 | 'pubMillis': 1539632447442,
57 | 'city': 'Cambridge, MA',
58 | 'type': 'JAM',
59 | 'reportRating': 2,
60 | 'location': {
61 | 'latitude': 42.373807,
62 | 'longitude': -71.112465
63 | },
64 | 'eventType': 'alert',
65 | 'pubTimeStamp': '2018-10-15 15:40:47',
66 | 'snapshotId': 1
67 | },
68 | {
69 | 'roadType': 1,
70 | 'city': 'Cambridge, MA',
71 | 'pubMillis': 1539670005835,
72 | 'eventType': 'jam',
73 | 'pubTimeStamp': '2018-10-16 02:06:45',
74 | 'snapshotId': 2
75 | },
76 | {
77 | 'type': 'WEATHERHAZARD',
78 | 'subtype': 'HAZARD_ON_ROAD_CONSTRUCTION',
79 | 'city': 'Cambridge, MA',
80 | 'pubMillis': 1539607721062,
81 | 'location': {
82 | 'latitude': 42.371072,
83 | 'longitude': -71.1143
84 | },
85 | 'eventType': 'alert',
86 | 'pubTimeStamp': '2018-10-15 08:48:41',
87 | 'snapshotId': 2
88 | },
89 | {
90 | 'updateDate': 'Wed Oct 17 16:14:17 +0000 2018',
91 | 'speed': 3.79,
92 | 'city': 'Cambridge, MA',
93 | 'detectionDateMillis': 1539788890781,
94 | 'detectionDate': 'Wed Oct 17 15:08:10 +0000 2018',
95 | 'type': 'Small',
96 | 'eventType': 'irregularity',
97 | 'snapshotId': 3
98 | }
99 | ]
100 | assert results == expected_results
101 |
102 | results = standardize_waze_data.read_snapshots(
103 | os.path.join(TEST_FP, 'data', 'waze'),
104 | config,
105 | startdate='2018-10-16',
106 | enddate='2018-10-16'
107 | )
108 | assert results == [
109 | {
110 | 'roadType': 1,
111 | 'city': 'Cambridge, MA',
112 | 'pubMillis': 1539670005835,
113 | 'eventType': 'jam',
114 | 'pubTimeStamp': '2018-10-16 02:06:45',
115 | 'snapshotId': 1
116 | },
117 | {
118 | 'type': 'WEATHERHAZARD',
119 | 'subtype': 'HAZARD_ON_ROAD_CONSTRUCTION',
120 | 'city': 'Cambridge, MA',
121 | 'pubMillis': 1539607721062,
122 | 'location': {
123 | 'latitude': 42.371072,
124 | 'longitude': -71.1143
125 | },
126 | 'eventType': 'alert',
127 | 'pubTimeStamp': '2018-10-15 08:48:41',
128 | 'snapshotId': 1
129 | },
130 | ]
131 |
--------------------------------------------------------------------------------
/src/features/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/features/.gitkeep
--------------------------------------------------------------------------------
/src/features/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/features/__init__.py
--------------------------------------------------------------------------------
/src/features/build_features.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/features/build_features.py
--------------------------------------------------------------------------------
/src/features/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for feature generation
3 | """
4 |
--------------------------------------------------------------------------------
/src/features/tests/test_make_canon.py:
--------------------------------------------------------------------------------
1 | import os
2 | import warnings
3 | import pandas as pd
4 | from .. import make_canon_dataset
5 |
6 |
7 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
8 | DATA_FP = os.path.join(TEST_FP, 'data', 'processed')
9 |
10 |
11 | def test_read_records(tmpdir):
12 |
13 | result = make_canon_dataset.read_records(
14 | os.path.join(DATA_FP, 'crash_joined.json'),
15 | 'near_id',
16 | ['bike', 'pedestrian', 'vehicle']
17 | )
18 | expected = pd.DataFrame({
19 | 'near_id': [1, 2, 3, '000', '002', '003', '004', '005', '007', '008'],
20 | 'crash': [2, 18, 2, 5, 3, 14, 2, 11, 1, 4],
21 | 'bike': [0, 3, 0, 0, 1, 1, 0, 3, 0, 1],
22 | 'pedestrian': [0, 3, 1, 1, 0, 0, 1, 0, 0, 0],
23 | 'vehicle': [2, 12, 1, 4, 2, 13, 1, 8, 1, 3]
24 | })
25 | pd.testing.assert_frame_equal(result, expected, check_dtype=False)
26 |
27 |
28 | def test_aggregate_roads():
29 | """
30 | Test case for the aggregate_roads function in the make_canon_dataset module.
31 |
32 | This test case verifies that the aggregate_roads function correctly aggregates road data
33 | and combines it with crash data.
34 |
35 | It performs the following checks:
36 | - Verifies that the expected columns are present in the resulting dataframe.
37 | - Verifies that the inferred dtype of the 'segment_id' column is 'string'.
38 | - Verifies the shape of the resulting dataframe.
39 | - Verifies the values of the 'width' column in the resulting dataframe.
40 |
41 | """
42 |
43 | aggregated, cr_con = make_canon_dataset.aggregate_roads(
44 | ['width', 'lanes', 'hwy_type', 'signal', 'oneway'],
45 | ['osm_speed'],
46 | DATA_FP,
47 | ['bike', 'pedestrian', 'vehicle']
48 | )
49 | expected_columns = set(['width', 'lanes', 'hwy_type', 'osm_speed', 'signal', 'oneway',
50 | 'segment_id', 'crash', 'bike', 'pedestrian', 'vehicle'])
51 |
52 | expected_width = set([24, 24, 24, 15, 15, 24, 5, 24, 12, 12, 24, 24, 24, 24])
53 |
54 | cr_con_roads = make_canon_dataset.combine_crash_with_segments(
55 | cr_con, aggregated)
56 |
57 | import pandas.testing as pd_testing
58 |
59 | assert pd.api.types.infer_dtype(cr_con_roads.segment_id) == 'string'
60 | assert set(cr_con_roads.columns.tolist()) == expected_columns
61 | assert cr_con_roads.shape == (14, 11)
62 | assert set(cr_con_roads.width) == expected_width
63 |
64 |
65 | def test_road_make():
66 | with warnings.catch_warnings(record=True) as w:
67 | result = make_canon_dataset.road_make(
68 | ['test1', 'test2', 'width', 'lanes', 'hwy_type', 'osm_speed'],
69 | os.path.join(DATA_FP, 'maps', 'inter_and_non_int.geojson'))
70 | assert len(w) == 1
71 | assert str(w[0].message) \
72 | == "2 feature(s) missing, skipping (test1, test2)"
73 | assert list(result.columns) == [
74 | 'width', 'lanes', 'hwy_type', 'osm_speed']
75 |
76 | expected = pd.DataFrame({
77 | 'id': ['000', '001', '002', '003', '004', '005', '006',
78 | '007', '008', '009', '0', '1', '2', '3'],
79 | 'width': [24, 24, 24, 15, 15, 24, 5, 24, 12, 12, 24, 24, 24, 24],
80 | 'lanes': [2, 3, 3, 3, 3, 2, 1, 2, 1, 1, 2, 3, 3, 3],
81 | 'hwy_type': [6, 6, 6, 3, 6, 6, 1, 6, 1, 1, 1, 1, 3, 1],
82 | 'osm_speed': [0, 0, 0, 0, 25, 0, 25, 0, 25, 25, 25, 25, 25, 25]
83 | })
84 | expected.set_index('id', inplace=True)
85 | pd.testing.assert_frame_equal(expected, result)
86 |
87 |
--------------------------------------------------------------------------------
/src/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/.gitkeep
--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/__init__.py
--------------------------------------------------------------------------------
/src/models/make_weekly.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/make_weekly.py
--------------------------------------------------------------------------------
/src/models/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/tests/__init__.py
--------------------------------------------------------------------------------
/src/models/tests/data/features.yml:
--------------------------------------------------------------------------------
1 | f_cat:
2 | - width
3 | f_cont:
4 | - lanes
5 | - hwy_type
6 | - osm_speed
7 | - signal
8 | - oneway
9 | - width_per_lane
10 | - jam_percent
11 |
--------------------------------------------------------------------------------
/src/models/tests/test_train_model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import ruamel.yaml
3 | import pandas as pd
4 | from .. import train_model
5 | import data.config
6 |
7 |
8 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
9 |
10 |
11 | def test_get_features(tmpdir):
12 | test_data = pd.DataFrame(data={
13 | 'width': [10, 12],
14 | 'signal': [1, 0],
15 | 'jam_percent': [1, 12],
16 | 'lanes': [2, 1]
17 | })
18 | config_dict = {
19 | 'name': 'cambridge',
20 | 'city_latitude': 42.3600825,
21 | 'city_longitude': -71.0588801,
22 | 'city_radius': 15,
23 | 'crashes_files': {'test': {}},
24 | 'city': "Cambridge, Massachusetts, USA",
25 | 'timezone': "America/New_York",
26 | 'openstreetmap_features': {
27 | 'categorical': {
28 | 'signal': 'Signal',
29 | 'test_missing': 'Missing Field'
30 | },
31 | 'continuous': {'missing': 'Missing Field'}
32 | },
33 | 'atr': '',
34 | 'tmc': '',
35 | 'concern': ''
36 | }
37 |
38 | config_filename = os.path.join(tmpdir, 'test.yml')
39 | with open(config_filename, "w") as f:
40 | ruamel.yaml.round_trip_dump(config_dict, f)
41 | config = data.config.Configuration(config_filename)
42 |
43 | f_cat, f_cont, feats = train_model.get_features(config, test_data)
44 | assert f_cat == ['signal']
45 | assert f_cont == []
46 | assert feats == ['signal']
47 |
48 | def test_process_features(tmpdir):
49 | test_data = pd.DataFrame(data={
50 | 'width': [10, 12],
51 | 'signal': [1, 0],
52 | 'jam_percent': [0, 0],
53 | 'lanes': [2, 1],
54 | 'segment_id': ['001', '002']
55 | })
56 | f_cat = ['signal', 'lanes']
57 | f_cont = ['width', 'jam_percent']
58 | features = ['signal', 'lanes', 'width', 'jam_percent']
59 | test_data, features, lm_features = train_model.process_features(features, f_cat, f_cont, test_data)
60 | assert set(features) == set(['intersection', 'signal_1', 'signal_0', 'log_width', 'lanes_2', 'lanes_1'])
61 | assert set(lm_features) == set(['intersection', 'signal_1', 'log_width', 'lanes_2'])
62 |
63 |
64 | def test_initialize_and_run(tmpdir):
65 | # For now, just test the model runs
66 | model = pd.read_csv(os.path.join(TEST_FP, 'data', 'data_model.csv'))
67 | # Since we're going to test
68 | features = ['lanes0', 'oneway1', 'log_width', 'lanes1', 'signal2',
69 | 'hwy_type1', 'hwy_type5', 'oneway0', 'signal1', 'hwy_type9',
70 | 'lanes3', 'lanes2', 'intersection', 'osm_speed0',
71 | 'osm_speed25', 'signal0', 'hwy_type0']
72 | train_model.initialize_and_run(model, features, features, 'target',
73 | tmpdir, seed=1)
74 |
--------------------------------------------------------------------------------
/src/showcase/.dockerignore:
--------------------------------------------------------------------------------
1 | data/
--------------------------------------------------------------------------------
/src/showcase/.gcloudignore:
--------------------------------------------------------------------------------
1 | data/
--------------------------------------------------------------------------------
/src/showcase/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:alpine3.7
2 | RUN pip install flask
3 | COPY . /app
4 | WORKDIR /app
5 | EXPOSE 5000
6 | CMD ["python", "app.py"]
7 |
--------------------------------------------------------------------------------
/src/showcase/Dockerfile.gcp:
--------------------------------------------------------------------------------
1 | FROM nginx
2 |
3 | COPY templates/index.html /usr/share/nginx/html
4 | COPY static /usr/share/nginx/html/static/
5 |
6 | COPY nginx.conf /etc/nginx/conf.d/default.conf
--------------------------------------------------------------------------------
/src/showcase/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/showcase/__init__.py
--------------------------------------------------------------------------------
/src/showcase/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | from flask import Flask, render_template, send_from_directory
3 |
4 |
5 | app = Flask(__name__)
6 |
7 |
8 | CONFIG_FILE = os.path.join('static', 'config.js')
9 |
10 |
11 | @app.route('/data/')
12 | def static_files(path):
13 | return send_from_directory('data', path)
14 |
15 |
16 | @app.route('/', methods=['GET', 'POST'])
17 | def index():
18 | if 'CONFIG_FILE' in os.environ:
19 | global CONFIG_FILE
20 | CONFIG_FILE = os.environ['CONFIG_FILE']
21 | return render_template(
22 | 'index.html',
23 | mapbox_token=os.environ['MAPBOX_TOKEN'],
24 | config_file=CONFIG_FILE
25 | )
26 |
27 |
28 | if __name__ == '__main__':
29 |
30 | app.run(host='0.0.0.0')
31 |
--------------------------------------------------------------------------------
/src/showcase/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 | - name: 'gcr.io/cloud-builders/docker'
3 | args: ['build', '-t', 'gcr.io/insight-lane/showcase', '--file', 'Dockerfile.gcp', '.']
4 | images:
5 | - 'gcr.io/insight-lane/showcase'
--------------------------------------------------------------------------------
/src/showcase/nginx.conf:
--------------------------------------------------------------------------------
1 | server {
2 | sub_filter "{{ config_file }}" "static/gcp_config.js";
3 | sub_filter "{{ mapbox_token }}" "pk.eyJ1IjoidGVycnlmODIiLCJhIjoiY2poOXlvc2NnMGdoNDM3cWc1bHVlejNtMSJ9.JPUsgcaeW0r12m5sBEcvVw";
4 | listen 8080;
5 | server_name localhost;
6 | location / {
7 | root /usr/share/nginx/html;
8 | index index.html index.htm;
9 | }
10 | }
--------------------------------------------------------------------------------
/src/showcase/run_all_cities.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import subprocess
4 | import argparse
5 |
6 |
7 | DATA_FP = os.path.dirname(
8 | os.path.dirname(
9 | os.path.dirname(
10 | os.path.abspath(__file__)))) + '/data/'
11 |
12 |
13 | if __name__ == '__main__':
14 |
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('--forceupdate', action='store_true',
17 | help='Whether to force update the maps')
18 | # Can also choose which steps of the process to run
19 | parser.add_argument('--onlysteps',
20 | help="Give list of steps to run, as comma-separated " +
21 | "string. Has to be among 'standardization'," +
22 | "'generation', 'model', 'visualization'")
23 | args = parser.parse_args()
24 |
25 | cities = os.listdir(DATA_FP)
26 | for city in cities:
27 | config_file = os.path.join('config', 'config_{}.yml'.format(city))
28 |
29 | print("Running pipeline for {}".format(city))
30 | print(args.onlysteps)
31 | subprocess.check_call([
32 | 'python',
33 | 'pipeline.py',
34 | '-c',
35 | config_file,
36 | ] + (['--forceupdate'] if args.forceupdate else []) +
37 | (['--onlysteps', args.onlysteps] if args.onlysteps else [])
38 | )
39 | city_list = ", ".join(cities)
40 | print("Ran pipeline on {}".format(city_list))
41 |
--------------------------------------------------------------------------------
/src/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/tools/__init__.py
--------------------------------------------------------------------------------
/src/tools/geocode_batch.py:
--------------------------------------------------------------------------------
1 |
2 | import argparse
3 | import os
4 | import csv
5 | from data.geocoding_util import lookup_address, read_geocode_cache
6 |
7 |
8 | def parse_addresses(directory, filename, city, addressfield,
9 | mapboxtoken=None):
10 |
11 | cached = read_geocode_cache(filename=os.path.join(
12 | directory, 'processed', 'geocoded_addresses.csv'))
13 |
14 | results = []
15 | geocoded_count = [0, 0, 0]
16 |
17 | # Read in the csv file
18 | with open(filename) as f:
19 | csv_reader = csv.DictReader(f)
20 | for r in csv_reader:
21 | address = r[addressfield] + ' ' + city
22 | geocoded_add, lat, lng, status = lookup_address(
23 | address, cached, mapboxtoken=mapboxtoken, city=city, strict=True)
24 | cached[address] = [geocoded_add, lat, lng, status]
25 |
26 | if status == 'S':
27 | geocoded_count[0] += 1
28 | elif status == 'F':
29 | geocoded_count[1] += 1
30 | else:
31 | geocoded_count[2] += 1
32 |
33 | print('Number successfully geocoded: {}'.format(geocoded_count[0]))
34 | print('Unable to geocode: {}'.format(geocoded_count[1]))
35 | print('Timed out on {} addresses'.format(geocoded_count[2]))
36 |
37 | # Write out the cache
38 | with open(os.path.join(directory, 'processed',
39 | 'geocoded_addresses.csv'), 'w', newline='\n') as csvfile:
40 |
41 | writer = csv.writer(csvfile, delimiter=',')
42 | writer.writerow([
43 | 'Input Address',
44 | 'Output Address',
45 | 'Latitude',
46 | 'Longitude',
47 | 'Status'
48 | ])
49 |
50 | for name, value in cached.items():
51 | writer.writerow([name] + value)
52 |
53 | return results
54 |
55 |
56 | if __name__ == '__main__':
57 | parser = argparse.ArgumentParser()
58 | parser.add_argument("-d", "--directory", type=str, required=True)
59 | parser.add_argument("-f", "--filename", type=str, required=True)
60 | parser.add_argument("-c", "--city", type=str, required=True)
61 | parser.add_argument("-a", "--address", type=str, required=True,
62 | help="Address column name")
63 | parser.add_argument('-m', '--mapboxtoken', type=str,
64 | help="mapbox token")
65 | args = parser.parse_args()
66 | parse_addresses(args.directory, args.filename, args.city,
67 | args.address, args.mapboxtoken)
68 |
69 |
--------------------------------------------------------------------------------
/src/tools/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/tools/tests/__init__.py
--------------------------------------------------------------------------------
/src/tools/tests/data/geocoded_addresses.csv:
--------------------------------------------------------------------------------
1 | Input Address,Output Address,Latitude,Longitude,Status
2 | "21 GREYCLIFF RD Boston, MA","21 Greycliff Rd, Brighton, MA 02135, USA",42.3408948,-71.16084219999999,S
--------------------------------------------------------------------------------
/src/tools/tests/data/make_map_multilinestring.geojson:
--------------------------------------------------------------------------------
1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "id": "682", "geometry": {"type": "MultiLineString", "coordinates": [[[-71.10108933357631, 42.36919958642794], [-71.1010657, 42.3692321], [-71.1010043, 42.36931649999999]], [[-71.1008465100983, 42.36925311568192], [-71.10092959999999, 42.369286499999994], [-71.1010043, 42.36931649999999]], [[-71.10116315079033, 42.369378444080525], [-71.101119, 42.36936119999999], [-71.1010043, 42.36931649999999]], [[-71.10092027806371, 42.369433798702836], [-71.10094689999998, 42.36939659999999], [-71.1010043, 42.36931649999999]]]}, "properties": {"id": "682", "access": null, "bridge": null, "cycleway": null, "from": null, "highway": null, "junction": null, "key": null, "lanes": 3, "length": null, "maxspeed": null, "name": null, "oneway": 0, "osmid": null, "ref": null, "to": null, "tunnel": null, "width": 24, "hwy_type": null, "cycleway_type": 1, "osm_speed": null, "signal": null, "width_per_lane": 8, "segment_id": null, "dead_end": null, "streets": null, "intersection": null, "jam_percent": 24.80037576326914, "jam": 1, "avg_jam_speed": null, "avg_jam_level": 4, "alert_WEATHERHAZARD": null, "alert_JAM": null, "alert_ROAD_CLOSED": null, "alert_ACCIDENT": null, "orig_id": null, "inter": null, "display_name": "Broadway and Prospect Street", "center_y": 42.36931670366326, "center_x": -71.10100482259885, "intersection_segments": null, "parking_tickets": 69, "crosswalk": null}}]}
--------------------------------------------------------------------------------
/src/tools/tests/data/to_geocode.csv:
--------------------------------------------------------------------------------
1 | Date,Location,ID
2 | 01/02/2016,"21 GREYCLIFF RD",1
3 | 01/03/2016,"216 SAVIN HILL AVE Boston, MA",2
--------------------------------------------------------------------------------
/src/tools/tests/test_geocode_batch.py:
--------------------------------------------------------------------------------
1 | import os
2 | from .. import geocode_batch
3 | import shutil
4 |
5 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
6 |
7 |
8 | def mockreturn(address, cached, city, mapboxtoken, strict):
9 | if address in cached:
10 | return cached[address]
11 | else:
12 | return ["216 Savin Hill Ave, Dorchester, MA 02125",
13 | 42.3092288, -71.0480357, 'S']
14 |
15 |
16 | def test_parse_addresses(tmpdir, monkeypatch):
17 |
18 | monkeypatch.setattr(geocode_batch, 'lookup_address', mockreturn)
19 |
20 | path = os.path.join(tmpdir.strpath, 'processed')
21 | os.makedirs(path)
22 | shutil.copyfile(
23 | os.path.join(TEST_FP, 'data', 'geocoded_addresses.csv'),
24 | os.path.join(path, 'geocoded_addresses.csv')
25 | )
26 |
27 | datadir = os.path.join(TEST_FP, 'data')
28 | geocode_batch.parse_addresses(
29 | tmpdir.strpath,
30 | os.path.join(datadir, 'to_geocode.csv'),
31 | "Boston, MA",
32 | 'Location'
33 | )
34 |
35 | # check that the resulting geocoded file is correct
36 | with open(os.path.join(path,
37 | 'geocoded_addresses.csv'), 'r') as test_file:
38 | test_file_contents = test_file.read()
39 |
40 | assert test_file_contents == """Input Address,Output Address,Latitude,Longitude,Status
41 | "21 GREYCLIFF RD Boston, MA","21 Greycliff Rd, Brighton, MA 02135, USA",42.3408948,-71.16084219999999,S
42 | "216 SAVIN HILL AVE Boston, MA Boston, MA","216 Savin Hill Ave, Dorchester, MA 02125",42.3092288,-71.0480357,S
43 | """
44 |
45 |
--------------------------------------------------------------------------------
/src/tools/tests/test_make_map_subset.py:
--------------------------------------------------------------------------------
1 |
2 | from .. import make_map_subset
3 | from data.util import get_reproject_point, reproject_records
4 | import os
5 | from data.record import transformer_4326_to_3857
6 |
7 |
8 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
9 |
10 |
11 | def test_get_buffer():
12 | results = make_map_subset.get_buffer(
13 | os.path.join(TEST_FP, 'data', 'test_make_map.geojson'),
14 | 42.3693239,
15 | -71.10103649999999,
16 | 20
17 | )
18 | assert len(results['features']) == 5
19 | lines = [x for x in results['features']
20 | if x['geometry']['type'] == 'LineString']
21 | assert len(lines) == 4
22 | point = [x for x in results['features']
23 | if x['geometry']['type'] == 'Point']
24 | assert len(point) == 1
25 |
26 | # Make sure that all the resulting features are at least partially
27 | # within the buffer
28 | center_point = get_reproject_point(
29 | 42.3693239,
30 | -71.10103649999999,
31 | transformer_4326_to_3857)
32 | buff_poly = center_point.buffer(20)
33 |
34 | # To do this, have to convert the points and linestrings back to 3857
35 | reprojected_lines = reproject_records(lines)
36 | for r in reprojected_lines:
37 | assert r['geometry'].intersects(buff_poly)
38 |
39 | point_3857 = get_reproject_point(
40 | point[0]['geometry']['coordinates'][1],
41 | point[0]['geometry']['coordinates'][0],
42 | transformer_4326_to_3857)
43 | assert point_3857.within(buff_poly)
44 |
45 | results = make_map_subset.get_buffer(
46 | os.path.join(TEST_FP, 'data', 'test_make_map.geojson'),
47 | 42.3601,
48 | 71.0589,
49 | 20
50 | )
51 | assert results == []
52 |
53 | # Test multilinestring
54 | results = make_map_subset.get_buffer(
55 | os.path.join(TEST_FP, 'data', 'make_map_multilinestring.geojson'),
56 | 42.3693167036633,
57 | -71.1010048225989,
58 | 20
59 | )
60 | assert len(results['features']) == 1
61 |
--------------------------------------------------------------------------------
/src/tools/tests/test_update_config.py:
--------------------------------------------------------------------------------
1 | from .. import update_configs
2 | import os
3 |
4 |
5 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
6 |
7 |
8 | def test_add_feature(tmpdir):
9 |
10 | # Write a test config to file
11 | test_config = """
12 | # Test comments are preserved
13 |
14 | openstreetmap_features:
15 | categorical:
16 | width: Width
17 | """
18 | config_filename = os.path.join(tmpdir, 'test.yml')
19 | with open(config_filename, "w") as f:
20 | f.write(test_config)
21 |
22 | update_configs.add_feature(config_filename, [
23 | 'openstreetmap_features',
24 | 'categorical',
25 | 'test',
26 | 'Test Name'
27 | ])
28 | update_configs.add_feature(config_filename, [
29 | 'openstreetmap_features',
30 | 'continuous',
31 | 'another_test',
32 | 'Test Name2'
33 | ])
34 |
35 | with open(config_filename) as f:
36 | result = f.read()
37 | assert result == """# Test comments are preserved
38 |
39 | openstreetmap_features:
40 | categorical:
41 | width: Width
42 | test: Test Name
43 | continuous:
44 | another_test: Test Name2
45 | """
46 |
--------------------------------------------------------------------------------
/src/tools/update_configs.py:
--------------------------------------------------------------------------------
1 | # Update config files
2 | import argparse
3 | import ruamel.yaml
4 |
5 |
6 | def add_feature(filename, feat_info):
7 | """
8 | Add new features to a config file
9 | Args:
10 | filename - config file
11 | feat_info - a list consisting of
12 | - feature set type (e.g. openstreetmap_features)
13 | - feature type (categorical or continuuous)
14 | - feature (the name of the feature, e.g. width)
15 | - feature name (human readable feature name)
16 | If the feature set type doesn't exist, it will be added,
17 | but it needs to be in the set of feature set types possible:
18 | openstreetmap_features or waze_features
19 | """
20 | with open(filename, 'r') as myfile:
21 | yaml_str = myfile.read()
22 |
23 | config = ruamel.yaml.round_trip_load(yaml_str)
24 |
25 | if len(feat_info) != 4:
26 | print("Wrong number of args to -a")
27 | return
28 |
29 | feat_set = feat_info[0]
30 | if feat_set not in ('openstreetmap_features', 'waze_features'):
31 | print("feature set given is not valid")
32 | return
33 |
34 | feat_type = feat_info[1]
35 | feat = feat_info[2]
36 | feat_name = feat_info[3]
37 |
38 | # If the feature set doesn't exist, add it
39 | if feat_set not in config:
40 | config.insert(
41 | len(config), feat_set, ruamel.yaml.comments.CommentedMap())
42 |
43 | # if the feat_type doesn't exist, add it
44 | if feat_type not in config[feat_set]:
45 | config[feat_set][feat_type] = {}
46 | # if the feature does not exist, add it
47 | if feat not in config[feat_set][feat_type]:
48 | config[feat_set][feat_type][feat] = feat_name
49 | else:
50 | print("Feature already exists, skipping")
51 | with open(filename, "w") as f:
52 | ruamel.yaml.round_trip_dump(config, f)
53 |
54 |
55 | if __name__ == '__main__':
56 | """
57 | Examples
58 | - Add a feature to open street map features
59 | - -a "openstreetmap_features categorical test human readable name"
60 | - Add a feature to waze features
61 | - Remove a feature from osm or waze
62 | """
63 |
64 | parser = argparse.ArgumentParser()
65 | parser.add_argument("-f", "--filenames", nargs="+",
66 | help="config filenames",
67 | required=True)
68 | parser.add_argument("-a", "--addfeatures", nargs="+",
69 | help="Feature to add, a string with feature set " +
70 | "(e.g. openstreetmap_features)," +
71 | "feature type (categorical or continuous), " +
72 | "feature name, human readable feature name in quotes")
73 | args = parser.parse_args()
74 |
75 | if args.addfeatures:
76 | for filename in args.filenames:
77 | add_feature(filename, args.addfeatures)
78 |
79 |
--------------------------------------------------------------------------------
/src/tools/waze_feed.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import requests
4 | import argparse
5 | import datetime
6 | import json
7 | import gzip
8 | import os
9 | import yaml
10 |
11 |
12 | if __name__ == '__main__':
13 | """
14 | Given a link to a waze feed, and a directory to write to, zip and write
15 | the resulting json file to the directory
16 | """
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('-f', "--file", type=str, required=True,
19 | help="yml file containing city and waze feed urls")
20 | parser.add_argument('-d', "--dirname", type=str, required=True,
21 | help="directory to write results to")
22 |
23 | args = parser.parse_args()
24 |
25 | if not os.path.exists(args.dirname):
26 | os.makedirs(args.dirname)
27 |
28 | with open(args.file) as f:
29 | feeds = yaml.safe_load(f)
30 |
31 | for city in feeds:
32 | response = requests.get(feeds[city])
33 | dirname = os.path.join(args.dirname, city)
34 | if not os.path.exists(dirname):
35 | os.makedirs(dirname)
36 |
37 | # Filename is the current minute, in utc time
38 | timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%M")
39 | json_str = json.dumps(response.json())
40 | json_bytes = json_str.encode('utf-8')
41 |
42 | outfile = os.path.join(dirname,
43 | timestamp + '.json.gz')
44 |
45 | with gzip.open(outfile, 'wb') as f:
46 | f.write(json_bytes)
47 |
48 |
49 |
--------------------------------------------------------------------------------
/src/visualization/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/.gitkeep
--------------------------------------------------------------------------------
/src/visualization/README.md:
--------------------------------------------------------------------------------
1 | # Crash model visualization
2 |
3 | This directory contains the code relevant to the visualization efforts for this project.
4 |
5 | ## Visualization Products
6 |
7 | _risk_map.py_ - This script can be used to plot predictions generated from multiple models on a single Leaflet map of Boston. It color-codes each segment based on the magnitude of the predicted risk.
8 |
9 | To run this script, you need the following inputs:
10 | - inter_and_non_int.shp (created in create_segments.py)
11 | - csv files of predictions (each row should have 1 prediction per segment and be stored in the `data/processed/` directory)
12 |
13 | The script takes the following flag arguments on the command line:
14 |
15 | -m = model names (these will be the names of the layers on your map)
16 |
17 | -f = csv file names (one for each model and specified in the same order as the model names)
18 |
19 | -c = names of the predictions columns (one for each file and specified in the same order as the model names)
20 |
21 | -n = optional flag to indicate if predictions need to be normalized
22 |
23 | An example of how to run this script to plot the output from two models is as follows:
24 | ```
25 | python risk_map.py -m model1 model2 -f model1_output.csv model2_output.csv -c risk_score preds
26 | ```
27 |
28 | _plot_points.py_ - This script can be used to plot point-level data on a Leaflet map of Boston.
29 |
30 | To run this script, you need the following inputs:
31 | - csv files of point-level data (there should separate columns named "X" and "Y" for the X and Y coordinates. The files should be stored in the `data/processed/` directory)
32 |
33 | The script takes the following flag arguments on the command line:
34 |
35 | -n = name of the data to be plotted (these will be the names of the layers on your map)
36 |
37 | -f = csv file names (one for each set of data and specified in the same order as the layer names)
38 |
39 | An example of how to run this script is as follows:
40 | ```
41 | python plot_points.py -n crashes -f cad_crash_events.csv
42 | ```
43 |
44 | _historical_crash_map.html_ - This static site plots historical crash data and model predictions for a given week in 2016. Users can scrub the slider to see different weeks visualized on the map. A bar graph at the bottom summarizes the total number of crashes by week.
45 |
46 | To run this site, you need the following:
47 | - the /css and /js subdirectories with the files contained within
48 | - cad.geojson and car_preds_named.json
49 | - to run historical_crash_map.py to generate the data needed for the bar graph. Place the generated csv file in the same directory as the html file.
50 |
--------------------------------------------------------------------------------
/src/visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/__init__.py
--------------------------------------------------------------------------------
/src/visualization/plot_points.py:
--------------------------------------------------------------------------------
1 | """
2 | Title: plot_points.py
3 |
4 | Author: @andhint, @bpben, @alicefeng
5 |
6 | This script visualizes point-level data on a map.
7 |
8 | Usage:
9 | --name: name of the data to be plotted
10 | this will be used as the name of the layers in the map so they must be unique
11 | --filename: filename of the dataset
12 | must be csvs with separate columns named "X" and "Y" for the X and Y coordinates
13 |
14 | Inputs:
15 | csv files of point-level data to be visualized
16 |
17 | Output:
18 | plot_points.html - a Leaflet map with point-level data plotted on it
19 | """
20 |
21 | import pandas as pd
22 | import geopandas as gpd
23 | from shapely.geometry import Point
24 | import folium
25 | from folium import FeatureGroup, CircleMarker
26 | import argparse
27 | import os
28 |
29 |
30 | # all datasets must be stored in the "data/processed/" directory
31 | BASE_DIR = os.path.dirname(
32 | os.path.dirname(
33 | os.path.dirname(
34 | os.path.abspath(__file__))))
35 |
36 | DATA_FP = BASE_DIR + '/data/processed/'
37 |
38 |
39 | # parse arguments
40 | parser = argparse.ArgumentParser(description="Plot point-level data on a map")
41 | parser.add_argument("-n", "--name", nargs="+",
42 | help="name of the layer, must be unique")
43 | parser.add_argument("-f", "--filename", nargs="+",
44 | help="name of the dataset file to be plotted on the map, must specify at least 1")
45 | parser.add_argument("-lat", "--latitude",
46 | help="alternate latitude for the base map")
47 | parser.add_argument("-lon", "--longitude",
48 | help="alternate longitude for the base map")
49 | parser.add_argument("-dir", "--datadir",
50 | help="alternate data directory for the files")
51 |
52 | args = parser.parse_args()
53 |
54 | # zip layer names and filenames
55 | if len(args.name) == len(args.filename):
56 | match = list(zip(args.name, args.filename))
57 | else:
58 | raise Exception("Number of layers and files must match")
59 |
60 | latitude = args.latitude or 42.3601
61 | longitude = args.longitude or -71.0589
62 |
63 | if args.datadir:
64 | DATA_FP = args.datadir
65 |
66 | def process_data(filename):
67 | """Preps data for plotting on a map
68 |
69 | Reads in dataset with separate columns for X, Y coordinates and converts them into (lat, long) points
70 |
71 | Args:
72 | filename: name of the file with the predictions
73 |
74 | Returns:
75 | a dataframe with point geometries added to it
76 | """
77 | df = pd.read_csv(DATA_FP + filename)
78 | geometry = [Point(xy) for xy in zip(df.X, df.Y)]
79 | crs = {'init': 'epsg:4326'}
80 |
81 | geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
82 |
83 | return geo_df
84 |
85 | def add_layer(dataset, layername, mapname, color):
86 | """Plots predictions on a Leaflet map
87 |
88 | Creates a FeatureGroup to hold all of the points.
89 | FeatureGroup is added to the map as a layer.
90 |
91 | Args:
92 | dataset: a dataframe with the data to be plotted
93 | modelname: name of the model to be used as the layer name
94 | mapname: name of the map to be plotted on
95 | color: color used for the points in the layer
96 |
97 | Returns:
98 | a layer of points added to the map
99 | """
100 | feature_group = FeatureGroup(name=layername)
101 | for point in dataset['geometry']:
102 | CircleMarker(location=[point.y, point.x],
103 | radius=4,
104 | color=color,
105 | fill_color=color).add_to(feature_group)
106 |
107 | feature_group.add_to(mapname)
108 |
109 |
110 |
111 |
112 | ### Make map
113 |
114 | # First create basemap
115 | boston_map = folium.Map(
116 | [latitude, longitude], tiles='Cartodb dark_matter', zoom_start=12)
117 | folium.TileLayer('Cartodb Positron').add_to(boston_map)
118 |
119 | # Create sequence of colors so different layers appear in different colors
120 | colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
121 |
122 | # Plot data as separate layers
123 | for i in range(len(match)):
124 | data = process_data(match[i][1])
125 | add_layer(data, match[i][0], boston_map, colors[i])
126 |
127 | # Add control to toggle between model layers
128 | folium.LayerControl(position='bottomright').add_to(boston_map)
129 |
130 |
131 | # Save map as separate html file
132 | boston_map.save('plot_points.html')
133 |
--------------------------------------------------------------------------------
/src/visualization/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/tests/__init__.py
--------------------------------------------------------------------------------
/src/visualization/tests/data/single_segment.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | {
5 | "type": "Feature",
6 | "id": "001",
7 | "geometry": {
8 | "type": "LineString",
9 | "coordinates": [
10 | [
11 | -71.06858488357565,
12 | 42.35165031556542
13 | ],
14 | [
15 | -71.06876751642436,
16 | 42.35161688446769
17 | ]
18 | ]
19 | },
20 | "properties": {
21 | "id": "001",
22 | "access": null,
23 | "area": null,
24 | "bridge": null,
25 | "from": "61341696",
26 | "highway": "secondary",
27 | "junction": null,
28 | "key": "0",
29 | "lanes": 2,
30 | "length": "44.954",
31 | "maxspeed": null,
32 | "name": "Park Plaza",
33 | "oneway": 1,
34 | "osmid": "8652528",
35 | "ref": null,
36 | "to": "61341267",
37 | "tunnel": null,
38 | "width": 30,
39 | "hwy_type": 1,
40 | "osm_speed": "0",
41 | "signal": 0,
42 | "width_per_lane": 15,
43 | "segment_id": "8652528-61341696-61341267",
44 | "dead_end": null,
45 | "streets": null,
46 | "intersection": null,
47 | "orig_id": 991,
48 | "inter": 0,
49 | "display_name": "Park Plaza between Columbus Avenue and Hadassah Way",
50 | "center_y": 42.35163360001877,
51 | "center_x": -71.06867620000001
52 | }
53 | }
54 | ]
55 | }
56 |
--------------------------------------------------------------------------------
/src/visualization/tests/data/test_prediction.csv:
--------------------------------------------------------------------------------
1 | ,segment_id,prediction
2 | 0,001,0.1223508492
3 | 0,001,0.0
4 |
--------------------------------------------------------------------------------
/src/visualization/tests/test_visualization.py:
--------------------------------------------------------------------------------
1 | from ..risk_map import process_data
2 | import os
3 | import geopandas as gpd
4 |
5 | TEST_FP = os.path.dirname(os.path.abspath(__file__))
6 |
7 | def test_process_data():
8 | streets = gpd.read_file(os.path.join(TEST_FP, 'data', 'single_segment.geojson'))
9 | streets_w_risk = process_data(streets,
10 | os.path.join(TEST_FP, 'data', 'test_prediction.csv'),
11 | 'prediction')
12 | assert streets_w_risk.shape[0] == 1
13 | assert streets_w_risk['prediction'].mean().round(2) == 0.12
--------------------------------------------------------------------------------
/src/visualization/visualize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/visualize.py
--------------------------------------------------------------------------------
/standards/concerns-instance.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": 14808,
4 | "source": "visionzero",
5 | "dateCreated": "2016-01-19T14:48:45.000Z",
6 | "status": "Unassigned",
7 | "category": "of something that is not listed here",
8 | "location": {
9 | "latitude": 42.354167552594284,
10 | "longitude": -71.05414378860903
11 | },
12 | "summary": "This intersection is dangerous. Cars don't follow the lane markings (ie go straight while in turn lane) so it's nearly impossible to safely position yourself on a bike. In a car, people are trying to move over/it's unclear where people are going."
13 | },
14 | {
15 | "id": 14809,
16 | "source": "visionzero",
17 | "dateCreated": "2016-01-19T14:57:03.000Z",
18 | "status": "Unassigned",
19 | "category": "people don't yield while going straight",
20 | "location": {
21 | "latitude": 42.33938397670106,
22 | "longitude": -71.0994798889095
23 | },
24 | "summary": "It's terrifying to walk over here. It seems like it's impossible to get the cars to stop stop, even at the crosswalks."
25 | },
26 | {
27 | "id": 14810,
28 | "source": "visionzero",
29 | "dateCreated": "2016-01-19T15:36:25.000Z",
30 | "status": "Unassigned",
31 | "category": "it’s hard to see / low visibility",
32 | "location": {
33 | "latitude": 42.349364649630935,
34 | "longitude": -71.06656509857143
35 | },
36 | "summary": "cars coming around the corner of this wide one street are speeding and not visible for persons on the crosswalk"
37 | }
38 | ]
39 |
--------------------------------------------------------------------------------
/standards/concerns-schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-06/schema#",
3 | "title": "Concerns",
4 | "description": "Defines the structure of a set of concerns",
5 | "type": "array",
6 | "items": {
7 | "title": "Concern",
8 | "description": "Defines the structure of a concern",
9 | "type": "object",
10 | "properties": {
11 | "id": {
12 | "description": "Unique identifier of concern",
13 | "type": ["string", "number"]
14 | },
15 | "source": {
16 | "description": "Source of concern",
17 | "type": "string",
18 | "enum": ["seeclickfix", "visionzero"]
19 | },
20 | "dateCreated": {
21 | "description": "Date concern was created, ISO8601 formatted",
22 | "type": "string",
23 | "format": "date-time"
24 | },
25 | "dateResolved": {
26 | "description": "Date concern was resolved, ISO8601 formatted",
27 | "format": "date-time"
28 | },
29 | "status": {
30 | "description": "Status of concern",
31 | "type": "string"
32 | },
33 | "category": {
34 | "description": "Primary category of concern",
35 | "type": "string"
36 | },
37 | "subCategories": {
38 | "description": "Subcategories of concern",
39 | "type": "array",
40 | "items": {
41 | "type": "string"
42 | },
43 | "uniqueItems": true
44 | },
45 | "location": {
46 | "description": "Coordinates of concern, WGS84 formatted",
47 | "type": "object",
48 | "properties": {
49 | "latitude": {
50 | "description": "Latitude of concern",
51 | "type": "number"
52 | },
53 | "longitude": {
54 | "description": "Longitude of concern",
55 | "type": "number"
56 | }
57 | }
58 | },
59 | "address": {
60 | "description": "Address of concern",
61 | "type": "string"
62 | },
63 | "summary": {
64 | "description": "Summary of concern",
65 | "type": "string"
66 | }
67 | },
68 | "required": ["id", "source", "dateCreated", "status", "category", "location", "summary"]
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/standards/crashes-instance.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": 1403832,
4 | "dateOccurred": "2016-01-01T00:56:45-05:00",
5 | "location": {
6 | "latitude": 42.300864811284534,
7 | "longitude": -71.0713167869833
8 | },
9 | "vehicles": [
10 | {
11 | "category": "car"
12 | }
13 | ],
14 | "summary": "REPORTED INJURIES (P) (E) (F)"
15 | },
16 | {
17 | "id": 1404159,
18 | "dateOccurred": "2016-01-01T02:30:23-05:00",
19 | "location": {
20 | "latitude": 42.317987926802246,
21 | "longitude": -71.06188127008645
22 | },
23 | "vehicles": [
24 | {
25 | "category": "car"
26 | }
27 | ],
28 | "summary": "REPORTED INJURIES (P) (E) (F)"
29 | },
30 | {
31 | "id": 1404194,
32 | "dateOccurred": "2016-01-01T02:49:56-05:00",
33 | "location": {
34 | "latitude": 42.356046190978454,
35 | "longitude": -71.13132169601725
36 | },
37 | "vehicles": [
38 | {
39 | "category": "car"
40 | }
41 | ],
42 | "summary": "PEDESTRIAN STRUCK (P) (E) (F)"
43 | }
44 | ]
45 |
--------------------------------------------------------------------------------
/standards/crashes-schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-06/schema#",
3 | "title": "Crashes",
4 | "description": "Defines the structure of a set of crashes",
5 | "type": "array",
6 | "items": {
7 | "title": "Crash",
8 | "description": "Defines the structure of a crash",
9 | "type": "object",
10 | "properties": {
11 | "id": {
12 | "description": "Unique identifier of crash",
13 | "type": ["string", "number"]
14 | },
15 | "dateOccurred": {
16 | "description": "Date crash occurred, ISO8601 formatted",
17 | "type": "string",
18 | "format": "date-time"
19 | },
20 | "location": {
21 | "description": "Coordinates of crash, WGS84 formatted",
22 | "type": "object",
23 | "properties": {
24 | "latitude": {
25 | "description": "Latitude of crash",
26 | "type": "number"
27 | },
28 | "longitude": {
29 | "description": "Longitude of crash",
30 | "type": "number"
31 | }
32 | }
33 | },
34 | "vehicles": {
35 | "description": "Vehicles involved in crash",
36 | "type": "array",
37 | "items": {
38 | "title": "Vehicle",
39 | "description": "Defines the structure of a vehicle",
40 | "type": "object",
41 | "properties": {
42 | "category": {
43 | "description": "Category of vehicle",
44 | "type": "string",
45 | "enum": ["car", "bike", "taxi", "bus", "truck"]
46 | },
47 | "quantity": {
48 | "description": "Quantity of vehicles of this category",
49 | "type": "number"
50 | }
51 | },
52 | "required": ["category"]
53 | },
54 | "uniqueItems": true
55 | },
56 | "persons": {
57 | "description": "Persons involved in crash",
58 | "type": "array",
59 | "items": {
60 | "title": "Person",
61 | "description": "Defines the structure of a person",
62 | "type": "object",
63 | "properties": {
64 | "category": {
65 | "description": "Category of person",
66 | "type": "string",
67 | "enum": ["driver", "pedestrian", "cyclist", "other"]
68 | },
69 | "quantity": {
70 | "description": "Quantity of persons",
71 | "type": "number"
72 | },
73 | "injuryType": {
74 | "description": "Type of injury",
75 | "type": "string",
76 | "enum": ["minor", "major", "fatal", "unknown"]
77 | }
78 | },
79 | "required": ["category"]
80 | },
81 | "uniqueItems": true
82 | },
83 | "address": {
84 | "description": "Address of crash",
85 | "type": "string"
86 | },
87 | "summary": {
88 | "description": "Summary of crash",
89 | "type": "string"
90 | }
91 | },
92 | "required": ["id", "dateOccurred", "location"]
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/standards/points-instance.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "feature": "parking_ticket",
4 | "date": "2016-01-19T14:48:45.000Z",
5 | "category": "RESIDENT PERMIT ONLY",
6 | "location": {
7 | "latitude": 42.354167552594284,
8 | "longitude": -71.05414378860903
9 | }
10 | },
11 | {
12 | "feature": "taxi_stands",
13 | "location": {
14 | "latitude": 42.33938397670106,
15 | "longitude": -71.0994798889095
16 | },
17 | },
18 | {
19 | "feature": "crime_report",
20 | "date": "2016-01-19T15:36:25.000Z",
21 | "category": "Auto Theft",
22 | "location": {
23 | "latitude": 42.349364649630935,
24 | "longitude": -71.06656509857143
25 | },
26 | "notes": "West Cambridge"
27 | }
28 | ]
29 |
--------------------------------------------------------------------------------
/standards/points-schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-06/schema#",
3 | "title": "Concerns",
4 | "description": "Defines the structure of a set of point-based features",
5 | "type": "array",
6 | "items": {
7 | "title": "Point Feature",
8 | "description": "Defines the structure of a point-based feature",
9 | "type": "object",
10 | "properties": {
11 | "feature": {
12 | "description": "Type of feature, e.g. traffic tickets",
13 | "type": "string"
14 | },
15 | "date": {
16 | "description": "Date for feature, ISO8601 formatted",
17 | "type": "string",
18 | "format": "date-time"
19 | },
20 | "category": {
21 | "description": "Category of entry",
22 | "type": "string"
23 | },
24 | "notes": {
25 | "description": "Notes for entry",
26 | "type": "string"
27 | },
28 | "location": {
29 | "description": "Coordinates of concern, WGS84 formatted",
30 | "type": "object",
31 | "properties": {
32 | "latitude": {
33 | "description": "Latitude of entry",
34 | "type": "number"
35 | },
36 | "longitude": {
37 | "description": "Longitude of entry",
38 | "type": "number"
39 | }
40 | }
41 | },
42 | "feat_agg": {
43 | "description": "Type of Feature Aggregation",
44 | "type": "string"
45 | },
46 | "value": {
47 | "description": "Value of entry",
48 | "type": "number"
49 | }
50 | },
51 | "required": ["feature", "location"]
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/standards/volumes-instance.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "startDateTime": "2014-03-12",
4 | "location": {
5 | "latitude": 42.3408948,
6 | "longitude": -71.16084219999999,
7 | "address": "21 Greycliff Rd, Brighton, MA 02135, USA"
8 | },
9 | "speed": {
10 | "averageSpeed": 25
11 | },
12 | "volume": {
13 | "totalVolume": 518,
14 | "totalLightVehicles": 508,
15 | "totalHeavyVehicles": 8,
16 | "bikes": 2,
17 | "hourlyVolume": [6, 6, 0, 1, 2, 11, 18, 47, 22, 21, 17, 11, 10, 25, 38, 33, 44, 42, 30, 30, 31, 43, 20, 10]
18 | },
19 | }
20 | ]
21 |
--------------------------------------------------------------------------------
/standards/volumes-schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-06/schema#",
3 | "title": "Traffic",
4 | "description": "Defines the structure of traffic studies",
5 | "type": "array",
6 | "items": {
7 | "startDateTime": {
8 | "description": "DateTime of traffic count start, ISO8601 formatted",
9 | "type": "string",
10 | "format": "date"
11 | },
12 | "endDateTime": {
13 | "description": "DateTime of traffic count end, ISO8601 formatted. Can be left blank if the traffic count is 24 hours",
14 | "type": "string",
15 | "format": "date"
16 | },
17 | "location": {
18 | "description": "Coordinates of traffic count, WGS84 formatted",
19 | "type": "object",
20 | "properties": {
21 | "latitude": {
22 | "description": "Latitude of traffic count",
23 | "type": "number"
24 | },
25 | "longitude": {
26 | "description": "Longitude of traffic count",
27 | "type": "number"
28 | },
29 | "address": {
30 | "description": "Address of traffic count",
31 | "type": "string"
32 | }
33 | }
34 | },
35 | "speed": {
36 | "description": "Traffic speed information",
37 | "type": "object",
38 | "properties": {
39 | "averageSpeed": {
40 | "description": "Average speed in miles per hour",
41 | "type": "number"
42 | },
43 | "85thPercentileSpeed": {
44 | "description": "85th percentile speed in miles per hour",
45 | "type": "number"
46 | }
47 | }
48 | },
49 | "volume": {
50 | "description": "Traffic speed information",
51 | "type": "object",
52 | "properties": {
53 | "hourlyVolume": {
54 | "description": "Hourly total vehicle count",
55 | "type": "list"
56 | },
57 | "totalVolume": {
58 | "description": "Total vehicle count, averaged over a 24 hour period",
59 | "type": "number"
60 | },
61 | "totalCars": {
62 | "description": "Total car count, averaged over a 24 hour period",
63 | "type": "number"
64 | },
65 | "totalHeavyVehicles": {
66 | "description": "Total heavy vehicle count, averaged over a 24 hour period",
67 | "type": "number"
68 | },
69 | "totalCars": {
70 | "description": "Total car count, averaged over a 24 hour period",
71 | "type": "number"
72 | },
73 | "bikes": {
74 | "description": "Total bicycle count, averaged over a 24 hour period",
75 | "type": "number"
76 | },
77 | "AMPeak": {
78 | "description": "Average Total vehicle count during AM peak",
79 | "type": "number"
80 | },
81 | "PMPeak": {
82 | "description": "Average Total vehicle count during PM peak",
83 | "type": "number"
84 | }
85 | }
86 | },
87 | "turningMovements": {
88 | }
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/test_environment.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | REQUIRED_PYTHON = "python"
4 |
5 |
6 | def main():
7 | system_major = sys.version_info.major
8 | if REQUIRED_PYTHON == "python":
9 | required_major = 2
10 | elif REQUIRED_PYTHON == "python3":
11 | required_major = 3
12 | else:
13 | raise ValueError("Unrecognized python interpreter: {}".format(
14 | REQUIRED_PYTHON))
15 |
16 | if system_major != required_major:
17 | raise TypeError(
18 | "This project requires Python {}. Found: Python {}".format(
19 | required_major, sys.version))
20 | else:
21 | print(">>> Development environment passes all tests!")
22 |
23 |
24 | if __name__ == '__main__':
25 | main()
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = test_service
3 | skipsdist = true
4 |
5 | [testenv]
6 | install_command = pip install {opts} {packages}
7 | basepython = python3.9
8 | changedir = src
9 |
10 | [testenv:test_service]
11 | deps = -r requirements.txt
12 | setenv = PYTHONPATH=.
13 | commands = pytest
14 |
--------------------------------------------------------------------------------