├── .codecov.yml ├── .dockerignore ├── .github └── workflows │ └── main.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── conda-linux-64.lock ├── conda-osx-64.lock ├── conda-win-64.lock ├── conf ├── insight-lane.conf ├── start.sh └── supervisord.conf ├── docs ├── MassDOT RoadInvDictionary.pdf └── model_data_dictionary.md ├── environment_linux.yml ├── environment_mac.yml ├── environment_pc.yml ├── models └── .gitkeep ├── notebooks ├── .gitkeep ├── benchmark │ ├── crash_predict_benchmark.ipynb │ └── crash_predict_rnn.ipynb ├── clustering_segment_level.ipynb ├── general_analysis.ipynb ├── intervention_effect.ipynb ├── python2 │ ├── TMC_analysis.ipynb │ ├── crash_predict_car.ipynb │ ├── data_explore_12_14_crashes_road_details.ipynb │ ├── make_shapefiles_for_tests.ipynb │ ├── open_street_map.ipynb │ └── tutorial.ipynb ├── route_level_risk_distribute.ipynb └── vision_zero_analysis.ipynb ├── references └── .gitkeep ├── requirements.txt ├── src ├── .coveragerc ├── .pylintrc ├── README.md ├── config │ ├── config_ada_county.yml │ ├── config_boise.yml │ ├── config_boston.yml │ ├── config_brisbane.yml │ ├── config_buffalo.yml │ ├── config_cambridge.yml │ ├── config_chicago.yml │ ├── config_dc.yml │ ├── config_losangeles.yml │ ├── config_melbourne.yml │ ├── config_meridian.yml │ ├── config_nyc.yml │ ├── config_philly.yml │ ├── config_pittsburgh.yml │ └── config_somerville.yml ├── data │ ├── .gitkeep │ ├── README.md │ ├── TMC_scraping │ │ ├── README.md │ │ ├── __init__.py │ │ └── parse_tmc.py │ ├── __init__.py │ ├── add_map.py │ ├── add_waze_data.py │ ├── analysis_util.py │ ├── config.py │ ├── create_segments.py │ ├── extract_intersections.py │ ├── geocoding_util.py │ ├── join_segments_crash.py │ ├── make_dataset.py │ ├── make_preds_viz.py │ ├── osm_create_maps.py │ ├── propagate_volume.py │ ├── record.py │ ├── see_click_fix │ │ └── seeclickfix.py │ ├── segment.py │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── bad_intersection_test.geojson │ │ │ ├── concern_joined.json │ │ │ ├── concern_test_dummy.json │ │ │ ├── config_brisbane_no_supplemental.yml │ │ │ ├── config_brisbane_supplemental.yml │ │ │ ├── config_features.yml │ │ │ ├── crash_test_dummy.json │ │ │ ├── missing_segments_test.geojson │ │ │ ├── osm_crash_file.json │ │ │ ├── osm_output.gpickle │ │ │ ├── processed │ │ │ │ ├── crash_joined.json │ │ │ │ └── maps │ │ │ │ │ ├── boston_test_elements.geojson │ │ │ │ │ ├── inters.geojson │ │ │ │ │ ├── non_inters_segments.geojson │ │ │ │ │ ├── osm.gpkg │ │ │ │ │ ├── osm_elements.geojson │ │ │ │ │ ├── test_line_convert.cpg │ │ │ │ │ ├── test_line_convert.dbf │ │ │ │ │ ├── test_line_convert.shp │ │ │ │ │ └── test_line_convert.shx │ │ │ ├── raw │ │ │ │ ├── ma_cob_spatially_joined_streets.cpg │ │ │ │ ├── ma_cob_spatially_joined_streets.dbf │ │ │ │ ├── ma_cob_spatially_joined_streets.prj │ │ │ │ ├── ma_cob_spatially_joined_streets.shp │ │ │ │ └── ma_cob_spatially_joined_streets.shx │ │ │ ├── standardized │ │ │ │ ├── Vision_Zero_Entry.csv │ │ │ │ ├── concerns.json │ │ │ │ ├── crashes.csv │ │ │ │ └── crashes.json │ │ │ ├── test_add_map │ │ │ │ ├── ma_cob_small.cpg │ │ │ │ ├── ma_cob_small.dbf │ │ │ │ ├── ma_cob_small.shp │ │ │ │ ├── ma_cob_small.shx │ │ │ │ ├── osm3857.cpg │ │ │ │ ├── osm3857.dbf │ │ │ │ ├── osm3857.geojson │ │ │ │ ├── osm3857.prj │ │ │ │ ├── osm3857.shp │ │ │ │ └── osm3857.shx │ │ │ ├── test_create_segments │ │ │ │ ├── additional_points.json │ │ │ │ ├── empty_set_inter.geojson │ │ │ │ ├── missing_int_segments.geojson │ │ │ │ ├── no_non_inter.geojson │ │ │ │ ├── points.geojson │ │ │ │ ├── points_test.json │ │ │ │ ├── test_adjacency.geojson │ │ │ │ ├── test_get_connections1.geojson │ │ │ │ ├── test_get_connections2.geojson │ │ │ │ ├── test_linestring.geojson │ │ │ │ └── unconnected.geojson │ │ │ ├── test_get_roads_and_inters.geojson │ │ │ ├── test_waze │ │ │ │ ├── osm_elements.geojson │ │ │ │ └── test_waze.json │ │ │ └── viz_preds_tests │ │ │ │ ├── crashes_rollup.geojson │ │ │ │ ├── crashes_rollup_pedestrian.geojson │ │ │ │ ├── single_prediction.json │ │ │ │ ├── single_prediction_viz.geojson │ │ │ │ └── single_segment.geojson │ │ ├── test_add_map.py │ │ ├── test_add_waze_data.py │ │ ├── test_all.py │ │ ├── test_analysis_util.py │ │ ├── test_config.py │ │ ├── test_create_segments.py │ │ ├── test_extract_intersections.py │ │ ├── test_initialize_city.py │ │ ├── test_join_segments_crash.py │ │ ├── test_make_preds_viz.py │ │ ├── test_osm_create_maps.py │ │ ├── test_pipeline.py │ │ └── test_util.py │ ├── util.py │ └── weather │ │ ├── BostonWeather2016_Wunderground.Rda │ │ ├── BostonWeather2016_Wunderground.csv │ │ ├── README.md │ │ └── weatherScrapingScript.R ├── data_standardization │ ├── __init__.py │ ├── boston_volume.py │ ├── standardization_util.py │ ├── standardize_crashes.py │ ├── standardize_point_data.py │ ├── standardize_volume.py │ ├── standardize_waze_data.py │ └── tests │ │ ├── __init__.py │ │ ├── data │ │ ├── 8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX │ │ └── waze │ │ │ ├── 2018-10-15-20-15.json.gz │ │ │ ├── 2018-10-16-08-00.json │ │ │ └── 2018-10-17-16-15.json.gz │ │ ├── test-schema.json │ │ ├── test_boston_volume.py │ │ ├── test_standardization_util.py │ │ ├── test_standardize_crashes.py │ │ ├── test_standardize_point_data.py │ │ └── test_standardize_waze_data.py ├── features │ ├── .gitkeep │ ├── __init__.py │ ├── build_features.py │ ├── make_canon_dataset.py │ └── tests │ │ ├── __init__.py │ │ ├── data │ │ └── processed │ │ │ ├── concern_joined.json │ │ │ ├── crash_joined.json │ │ │ ├── inters_data.json │ │ │ └── maps │ │ │ └── inter_and_non_int.geojson │ │ └── test_make_canon.py ├── initialize_city.py ├── models │ ├── .gitkeep │ ├── __init__.py │ ├── make_weekly.py │ ├── model_classes.py │ ├── old_train.py │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── data_model.csv │ │ │ └── features.yml │ │ └── test_train_model.py │ └── train_model.py ├── pipeline.py ├── showcase │ ├── .dockerignore │ ├── .gcloudignore │ ├── Dockerfile │ ├── Dockerfile.gcp │ ├── __init__.py │ ├── app.py │ ├── cloudbuild.yaml │ ├── nginx.conf │ ├── run_all_cities.py │ ├── static │ │ ├── config.js │ │ ├── gcp_config.js │ │ ├── make_plots.js │ │ ├── style.css │ │ └── update_map.js │ └── templates │ │ └── index.html ├── tools │ ├── __init__.py │ ├── geocode_batch.py │ ├── make_map_subset.py │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── geocoded_addresses.csv │ │ │ ├── make_map_multilinestring.geojson │ │ │ ├── test_make_map.geojson │ │ │ └── to_geocode.csv │ │ ├── test_geocode_batch.py │ │ ├── test_make_map_subset.py │ │ └── test_update_config.py │ ├── update_configs.py │ └── waze_feed.py └── visualization │ ├── .gitkeep │ ├── README.md │ ├── __init__.py │ ├── plot_points.py │ ├── risk_map.py │ ├── tests │ ├── __init__.py │ ├── data │ │ ├── single_segment.geojson │ │ └── test_prediction.csv │ └── test_visualization.py │ └── visualize.py ├── standards ├── concerns-instance.json ├── concerns-schema.json ├── crashes-instance.json ├── crashes-schema.json ├── points-instance.json ├── points-schema.json ├── volumes-instance.json └── volumes-schema.json ├── test_environment.py └── tox.ini /.codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | notify: 3 | require_ci_to_pass: yes 4 | 5 | coverage: 6 | precision: 2 7 | round: down 8 | range: "39...100" 9 | 10 | status: 11 | project: yes 12 | patch: no 13 | changes: no 14 | 15 | parsers: 16 | gcov: 17 | branch_detection: 18 | conditional: yes 19 | loop: yes 20 | method: no 21 | macro: no 22 | 23 | comment: 24 | layout: "header, diff" 25 | behavior: default 26 | require_changes: no 27 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | linux: 14 | env: 15 | PYTHONFAULTHANDLER: "true" 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | shell: bash -l {0} 20 | steps: 21 | - uses: actions/checkout@v2 22 | - uses: conda-incubator/setup-miniconda@v2 23 | with: 24 | auto-update-conda: false 25 | # activate-environment: crash-model 26 | # environment-file: conda-linux-64.lock 27 | channels: conda-forge, defaults 28 | channel-priority: strict 29 | - name: Run linux tests 30 | run: | 31 | set -eo pipefail 32 | conda env create --name crash-model -f requirements.txt 33 | conda activate crash-model 34 | cd src 35 | py.test --cov=./ --cov-report=xml 36 | - name: Upload coverage to Codecov 37 | uses: codecov/codecov-action@v1 38 | with: 39 | file: ./src/coverage.xml 40 | flags: unittests 41 | env_vars: OS,PYTHON 42 | name: codecov-umbrella 43 | fail_ci_if_error: true 44 | mac: 45 | env: 46 | PYTHONFAULTHANDLER: "true" 47 | runs-on: macos-11 48 | steps: 49 | - uses: actions/checkout@v2 50 | - uses: conda-incubator/setup-miniconda@v2 51 | with: 52 | auto-update-conda: true 53 | #activate-environment: crash-model 54 | #environment-file: conda-osx-64.lock 55 | #environment-file: environment_mac.yml 56 | channels: conda-forge, defaults 57 | channel-priority: strict 58 | - name: Run mac tests 59 | run: | 60 | set -eo pipefail 61 | . /usr/local/miniconda/etc/profile.d/conda.sh 62 | sudo chown -R $USER $CONDA 63 | conda env create --name crash-model -f requirements.txt 64 | conda activate crash-model 65 | cd src 66 | pytest 67 | # unclear that we need to test on windows 68 | # windows: 69 | # env: 70 | # PYTHONFAULTHANDLER: "true" 71 | # runs-on: windows-latest 72 | # steps: 73 | # - uses: actions/checkout@v2 74 | # - uses: conda-incubator/setup-miniconda@v2 75 | # with: 76 | # auto-update-conda: true 77 | # #activate-environment: crash-model 78 | # #environment-file: conda-win-64.lock 79 | # channels: conda-forge, defaults 80 | # channel-priority: strict 81 | # - name: Run windows tests 82 | # shell: bash -l {0} 83 | # run: | 84 | # set -eo pipefail 85 | # conda env create --name crash-model -f requirements.txt 86 | # cd src 87 | # py.test 88 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | .venv/ 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | # Conda envs 94 | Scripts 95 | 96 | # Data 97 | /data/ 98 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | # update 3 | RUN conda update -n base conda 4 | 5 | # Set package installer as non-interactive 6 | ENV DEBIAN_FRONTEND noninteractive 7 | 8 | # Set a terminal type 9 | ENV TERM xterm-256color 10 | 11 | WORKDIR /app 12 | 13 | # Install packges 14 | RUN apt-get update -qq && apt-get install -y --no-install-recommends \ 15 | # apache for serving the visualisation 16 | apache2 \ 17 | # easier management of services via supervisor 18 | supervisor \ 19 | # base anaconda image seems to lack libgl support required for our virtual environment 20 | libgl1-mesa-glx \ 21 | # handy text editor 22 | vim 23 | 24 | # Setup apache & supervisor 25 | RUN rm -rf /var/www/html && ln -s /app/reports /var/www/html 26 | ADD conf/insight-lane.conf /etc/apache2/sites-available/insight-lane.conf 27 | RUN ln -s /etc/apache2/sites-available/insight-lane.conf /etc/apache2/sites-enabled/insight-lane.conf 28 | RUN a2enmod rewrite 29 | ADD conf/supervisord.conf /etc/supervisord.conf 30 | 31 | # Make sure processes are stopped 32 | RUN service apache2 stop && service supervisor stop 33 | 34 | # Entrypoint script that will kick off supervisor (which in turn starts apache) 35 | ADD conf/start.sh /start.sh 36 | RUN chmod +x /start.sh 37 | 38 | # Setup the project's virtual environment 39 | COPY conda-linux-64.lock /app/conda-linux-64.lock 40 | RUN ["conda", "create", "--name", "crash-model", "--file", "conda-linux-64.lock"] 41 | 42 | 43 | # Use bash for the entrypoint rather than sh, for 'conda activate' compatibility 44 | ENTRYPOINT ["/bin/bash", "-c"] 45 | 46 | # Activate the project's virtual environment 47 | RUN echo "conda activate crash-model" >> ~/.bashrc 48 | 49 | # this startup script runs supervisor in foreground (which in turn starts apache) to keep container running 50 | CMD ["/start.sh"] 51 | 52 | # Make the apache port available 53 | EXPOSE 8080 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | Copyright (c) 2017, terryf82 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | 11 | -------------------------------------------------------------------------------- /conf/insight-lane.conf: -------------------------------------------------------------------------------- 1 | Listen 8080 2 | 3 | 4 | DocumentRoot /var/www/html 5 | 6 | 7 | # Base Apache config 8 | Options -Indexes +FollowSymLinks -MultiViews 9 | AllowOverride all 10 | Require all granted 11 | 12 | RewriteEngine on 13 | 14 | # if a directory or a file exists, use it directly 15 | RewriteCond %{REQUEST_FILENAME} !-f 16 | RewriteCond %{REQUEST_FILENAME} !-d 17 | 18 | # otherwise forward it to index.html 19 | RewriteRule ^ index.html [QSA,L] 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /conf/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # -e: exit immediately if a command exits with a non-zerio status 4 | set -e 5 | echo "starting supervisor in foreground" 6 | supervisord -c /etc/supervisord.conf -n 7 | 8 | # don't put anything else in this file, it won't run! 9 | -------------------------------------------------------------------------------- /conf/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | http_port=/var/tmp/supervisor.sock ; (default is to run a UNIX domain socket server) 3 | 4 | logfile=/var/log/supervisor/supervisord.log ; (main log file;default $CWD/supervisord.log) 5 | logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB) 6 | logfile_backups=10 ; (num of main logfile rotation backups;default 10) 7 | loglevel=info ; (logging level;default info; others: debug,warn) 8 | pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid) 9 | nodaemon=false ; (start in foreground if true;default false) 10 | minfds=1024 ; (min. avail startup file descriptors;default 1024) 11 | minprocs=200 ; (min. avail process descriptors;default 200) 12 | 13 | [supervisorctl] 14 | serverurl=unix:///var/tmp/supervisor.sock ; use a unix:// URL for a unix socket 15 | 16 | [program:apache2] 17 | command=apachectl -e info -DFOREGROUND 18 | autostart=true 19 | autorestart=true 20 | user=root 21 | -------------------------------------------------------------------------------- /docs/MassDOT RoadInvDictionary.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/docs/MassDOT RoadInvDictionary.pdf -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/models/.gitkeep -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/notebooks/.gitkeep -------------------------------------------------------------------------------- /notebooks/python2/tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Before you get started, you need to get an account for data.world, and download the data/ directory. That directory goes in the boston-crash-modeling directory." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 4, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "PROCESSED_DATA_FP = '../osm-data/processed/'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 5, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import json\n", 30 | "import os\n", 31 | "BASE_DIR = os.path.dirname(os.getcwd())\n", 32 | "os.chdir(BASE_DIR + '/src/data/')\n", 33 | "import util\n", 34 | "os.chdir(BASE_DIR + '/notebooks/')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "In data/processed/ there is a file called crash_joined.json. This is a list of dicts containing each crash event and which road segment (intersection or non-intersection) it is closest to." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 6, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "7220 crashes found\n", 54 | "{u'vehicles': u\"[{u'category': u'car'}]\", u'summary': u'REPORTED INJURIES (P) (E) (F)', u'persons': u'{}', u'address': u'None', u'id': u'1403832', u'near_id': u'0011059', u'dateOccurred': u'2016-01-01 00:56:45-05:00'}\n", 55 | "3379 unique crash locations found\n", 56 | "\n", 57 | "Highest number of crashes at any one location: [38, 34, 31, 29, 29, 27, 27, 27, 22, 21]\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "items = json.load(open(PROCESSED_DATA_FP + 'crash_joined.json'))\n", 63 | "crash_list, crashes_by_id = util.group_json_by_location(items)\n", 64 | "print str(len(crash_list)) + \" crashes found\"\n", 65 | "\n", 66 | "# Crash_list is a list of dicts about each individual crash\n", 67 | "print crash_list[0]\n", 68 | "\n", 69 | "# Crashes by id is a dict where the key is the segment id. It contains the count of crashes at that intersection\n", 70 | "# Can also contain other information about this location, if called with arg otherfields. In this case, we're\n", 71 | "# Looking at time\n", 72 | "sorted = [value['count'] for key,value in crashes_by_id.items() if key != '']\n", 73 | "sorted.sort(reverse=True)\n", 74 | "print str(len(sorted)) + \" unique crash locations found\\n\"\n", 75 | "\n", 76 | "print \"Highest number of crashes at any one location: \" + str(sorted[0:10])\n", 77 | "worst_id = [key for key, value in crashes_by_id.items() if value['count'] == sorted[0]][0]\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 7, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "(u'Oxford St & Beacon St, Somerville, MA 02143, USA', 42.3860143, -71.1161841)\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "x = util.geocode_address('Beacon and Oxford, Somerville MA')\n", 95 | "print x" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 2", 102 | "language": "python", 103 | "name": "python2" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 2 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython2", 115 | "version": "2.7.13" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 2 120 | } 121 | -------------------------------------------------------------------------------- /references/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/references/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | codecov==2.1.13 2 | fiona==1.9.4 3 | flask==3.0.2 4 | folium==0.16.0 5 | geopandas==0.14.3 6 | geocoder==1.38.1 7 | geojson==3.1.0 8 | matplotlib==3.8.3 9 | numpy==1.26.4 10 | openpyxl==3.1.2 11 | osmnx==1.9.1 12 | pandas==2.2.1 13 | pyproj==3.6.1 14 | pytest==8.0.2 15 | pyyaml==6.0.1 16 | pytest-cov==4.1.0 17 | rtree==1.2.0 18 | ruamel.yaml==0.17.33 19 | scikit-learn==1.4.1.post1 20 | shapely==2.0.3 21 | tzlocal==5.2 22 | xlrd==2.0.1 23 | xgboost==2.0.3 24 | tox==4.14.1 25 | jsonschema==4.21.1 26 | pylint==3.1.0 -------------------------------------------------------------------------------- /src/.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = */tests/* 3 | showcase/* -------------------------------------------------------------------------------- /src/config/config_ada_county.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Ada County 3 | # City center point latitude & longitude (default geocoded values set) 4 | city_latitude: 43.60764000000006 5 | city_longitude: -116.19339999999994 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: America/Boise 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | speed_unit: mph 12 | 13 | # By default, maps are created from OSM's polygon data and fall back to radius 14 | # if there is no polygon data, but but you can change the openstreetmap_geography 15 | # to 'radius' if preferred 16 | map_geography: polygon 17 | 18 | # The folder under data where this city's data is stored 19 | name: ada_county 20 | 21 | # If given, limit crashes to after startdate and no later than enddate 22 | # Recommended to limit to just a few years for now 23 | startdate: 2015-01-01 24 | enddate: 2019-12-31 25 | 26 | ################################################################# 27 | # Configuration for data standardization 28 | 29 | # crash file configurations 30 | crashes_files: 31 | Ada_County_Crashes_2015_2019_filtered.csv: 32 | required: 33 | id: FID 34 | latitude: latitude 35 | longitude: longitude 36 | # If date supplied in single column: 37 | date_complete: accident_date 38 | # If date is separated into year/month/day: 39 | date_year: 40 | date_month: 41 | # Leave date_day empty if not available 42 | date_day: 43 | # If time is available and separate from date: 44 | time: accident_time 45 | # If time specified, time_format is one of: 46 | # default (HH:MM:SS) 47 | # seconds (since midnight) 48 | # military (HHMM) 49 | time_format: default 50 | optional: 51 | summary: 52 | # If the crash file doesn't have a lat/lon, you must give the address field 53 | # and you will need to run the geocode_batch script - see the README 54 | address: 55 | # This section allows you to specify additional feature in the crash file 56 | # (split_columns) to go into the training set 57 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 58 | # but you can specify other fields in the crash data file. 59 | # See the README for examples 60 | split_columns: 61 | pedestrian: 62 | column_name: crash_type 63 | column_value: pedestrian 64 | bike: 65 | column_name: crash_type 66 | column_value: bicycle 67 | vehicle: 68 | column_name: crash_type 69 | column_value: vehicle 70 | 71 | ################################################################# 72 | # Configuration for default features 73 | 74 | # Default features from open street map. You can remove features you don't want 75 | # Note: we don't support adding features in the config file. 76 | # If there is an additional feature you want from open street map, contact the development team 77 | openstreetmap_features: 78 | categorical: 79 | width: Width 80 | cycleway_type: Bike lane 81 | signal: Signal 82 | oneway: One Way 83 | lanes: Number of lanes 84 | continuous: 85 | width_per_lane: Average width per lane 86 | 87 | # Speed limit is a required feature 88 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 89 | speed_limit: osm_speed 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /src/config/config_boise.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Boise, Idaho, USA 3 | 4 | # City centerpoint latitude & longitude (default geocoded values set) 5 | city_latitude: 43.60764000000006 6 | city_longitude: -116.19339999999994 7 | 8 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 9 | timezone: America/Denver 10 | 11 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 12 | city_radius: 20 13 | speed_unit: mph 14 | 15 | # By default, maps are created from OSM's polygon data and fall back to radius 16 | # if there is no polygon data, but but you can change the openstreetmap_geography 17 | # to 'radius' if preferred 18 | map_geography: polygon 19 | 20 | # The folder under data where this city's data is stored 21 | name: boise 22 | 23 | # If given, limit crashes to after startdate and no later than enddate 24 | # Recommended to limit to just a few years for now 25 | startdate: 2015-01-01 26 | enddate: 2019-12-31 27 | 28 | ################################################################# 29 | # Configuration for data standardization 30 | 31 | # crash file configurations 32 | crashes_files: 33 | Boise_Crash_Data_2005_-_Present.csv: 34 | required: 35 | id: OBJECTID 36 | latitude: Y 37 | longitude: X 38 | # If date supplied in single column: 39 | date_complete: Accident_Date_Time 40 | # If date is separated into year/month/day: 41 | date_year: 42 | date_month: 43 | # Leave date_day empty if not available 44 | date_day: 45 | # If time is available and separate from date: 46 | time: 47 | # If time specified, time_format is one of: 48 | # default (HH:MM:SS) 49 | # seconds (since midnight) 50 | # military (HHMM) 51 | time_format: 52 | optional: 53 | summary: 54 | # If the crash file doesn't have a lat/lon, you must give the address field 55 | # and you will need to run the geocode_batch script - see the README 56 | address: 57 | # This section allows you to specify additional feature in the crash file 58 | # (split_columns) to go into the training set 59 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 60 | # but you can specify other fields in the crash data file. 61 | # See the README for examples 62 | 63 | ################################################################# 64 | # Configuration for default features 65 | 66 | # Default features from open street map. You can remove features you don't want 67 | # Note: we don't support adding features in the config file. 68 | # If there is an additional feature you want from open street map, contact the development team 69 | openstreetmap_features: 70 | categorical: 71 | width: Width 72 | cycleway_type: Bike lane 73 | signal: Signal 74 | oneway: One Way 75 | lanes: Number of lanes 76 | continuous: 77 | width_per_lane: Average width per lane 78 | 79 | # Speed limit is a required feature 80 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 81 | speed_limit: osm_speed 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /src/config/config_boston.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Boston, Massachusetts, USA 3 | # The folder under data where this city's data is stored 4 | name: boston 5 | # City centerpoint latitude & longitude 6 | city_latitude: 42.3600825 7 | city_longitude: -71.0588801 8 | timezone: America/New_York 9 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data) 10 | city_radius: 15 11 | # If given, limit crashes to after startdate and no later than enddate 12 | startdate: 2021-01-01 13 | enddate: 2024-03-01 14 | 15 | ############################################################################## 16 | # Configuration for data standardization 17 | 18 | # crash file configurations 19 | crashes_files: 20 | vzopendata.csv: 21 | required: 22 | id: ID 23 | latitude: lat 24 | longitude: long 25 | # If date supplied in single column: 26 | date_complete: dispatch_ts 27 | # If date is separated into year/month/day: 28 | date_year: 29 | date_month: 30 | # Leave date_day empty if not available 31 | date_day: 32 | # If time is available and separate from date: 33 | time: 34 | # If time specified, time_format is one of: 35 | # default (HH:MM:SS) 36 | # seconds (since midnight) 37 | # military (HHMM) 38 | time_format: 39 | optional: 40 | summary: location_type 41 | address: 42 | split_columns: 43 | pedestrian: 44 | column_name: mode_type 45 | column_value: ped 46 | bike: 47 | column_name: mode_type 48 | column_value: bike 49 | vehicle: 50 | column_name: mode_type 51 | column_value: mv 52 | 53 | ############################################################################## 54 | 55 | # atr filepath (should be in processed subfolder) 56 | # leave as '' if not adding atrs 57 | atr: 'atrs_predicted.csv' 58 | # atr column names as list 59 | atr_cols: 60 | - speed_coalesced 61 | - volume_coalesced 62 | 63 | # tmc filepath (should be in processed subfolder) 64 | # leave as '' if not adding tmcs 65 | tmc: 'tmc_summary.json' 66 | # tmc column names as list 67 | tmc_cols: 68 | - Conflict 69 | 70 | ################################################################# 71 | # Configuration for default features 72 | 73 | # Default features from open street map. You can remove features you don't want 74 | # Note: we don't support adding features in the config file. 75 | # If there is an additional feature you want from open street map, contact the development team 76 | openstreetmap_features: 77 | categorical: 78 | width: Width 79 | cycleway_type: Bike lane 80 | oneway: One Way 81 | lanes: Number of lanes 82 | continuous: 83 | width_per_lane: Average width per lane 84 | 85 | # Configuration for default waze features 86 | waze_features: 87 | categorical: 88 | jam: Existence of a jam 89 | continuous: 90 | jam_percent: Percent of time there was a jam 91 | 92 | 93 | # Additional city-specific features can be added from alternate map 94 | additional_map_features: 95 | # The path to the extra map, must be in 3857 projection 96 | extra_map: ../data/boston/raw/maps/ma_cob_spatially_joined_streets.shp 97 | continuous: 98 | AADT: Average annual daily traffic 99 | categorical: 100 | Struct_Cnd: Condition 101 | Surface_Tp: Road type 102 | F_F_Class: Functional class 103 | speed_limit: SPEEDLIMIT 104 | 105 | # Additional data sources 106 | data_source: 107 | - name: visionzero 108 | filename: Vision_Zero_Entry.csv 109 | latitude: Y 110 | longitude: X 111 | date: REQUESTDATE 112 | category: REQUESTTYPE 113 | - name: see_click_fix 114 | filename: bos_scf.csv 115 | latitude: Y 116 | longitude: X 117 | date: created 118 | 119 | -------------------------------------------------------------------------------- /src/config/config_brisbane.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Brisbane, Australia 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: -27.4697707 5 | city_longitude: 153.0251235 6 | timezone: Australia/Brisbane 7 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 8 | city_radius: 10 9 | # The folder under data where this city's data is stored 10 | name: brisbane 11 | # If given, limit crashes to after startdate and no later than enddate 12 | # Recommended to limit to just a few years for now 13 | startdate: 2015-01-01 14 | enddate: 2017-12-31 15 | 16 | ################################################################# 17 | # Configuration for data standardization 18 | 19 | # crash file configurations 20 | crashes_files: 21 | locations_2014_2017.csv: 22 | required: 23 | id: Crash_Ref_Number 24 | latitude: Crash_Latitude_GDA94 25 | longitude: Crash_Longitude_GDA94 26 | # If date supplied in single column: 27 | date_complete: Crash_Date 28 | # If date is separated into year/month/day: 29 | date_year: 30 | date_month: 31 | # Leave date_day empty if not available 32 | date_day: 33 | # If time is available and separate from date: 34 | time: 35 | # If time specified, time_format is one of: 36 | # default (HH:MM:SS) 37 | # seconds (since midnight) 38 | # military (HHMM) 39 | time_format: 40 | optional: 41 | summary: Crash_DCA_Description 42 | address: Crash_Street 43 | split_columns: 44 | pedestrian: 45 | column_name: Count_Unit_Pedestrian 46 | column_value: any 47 | bike: 48 | column_name: Count_Unit_Bicycle 49 | column_value: any 50 | vehicle: 51 | not_column: pedestrian bike 52 | 53 | 54 | ################################################################# 55 | # Configuration for default features 56 | 57 | # Default features from open street map. You can remove features you don't want 58 | # Note: we don't support adding features in the config file. 59 | # If there is an additional feature you want from open street map, contact the development team 60 | openstreetmap_features: 61 | categorical: 62 | width: Width 63 | cycleway_type: Bike lane 64 | oneway: One Way 65 | lanes: Number of lanes 66 | continuous: 67 | width_per_lane: Average width per lane 68 | 69 | -------------------------------------------------------------------------------- /src/config/config_buffalo.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Buffalo, NY, USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 42.885440000000074 5 | city_longitude: -78.87845999999996 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: America/New_York 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | 12 | # By default, maps are created from OSM's polygon data and fall back to radius 13 | # if there is no polygon data, but but you can change the openstreetmap_geography 14 | # to 'radius' if preferred 15 | map_geography: polygon 16 | 17 | # The folder under data where this city's data is stored 18 | name: buffalo 19 | 20 | # If given, limit crashes to after startdate and no later than enddate 21 | # Recommended to limit to just a few years for now 22 | startdate: 2016-01-01 23 | enddate: 2018-12-31 24 | 25 | ################################################################# 26 | # Configuration for data standardization 27 | 28 | # crash file configurations 29 | crashes_files: 30 | buffalo_converted.csv: 31 | required: 32 | id: CASE_NUM 33 | latitude: lat_converted 34 | longitude: lon_converted 35 | # If date supplied in single column: 36 | date_complete: ACC_DATE 37 | # If date is separated into year/month/day: 38 | date_year: 39 | date_month: 40 | # Leave date_day empty if not available 41 | date_day: 42 | # If time is available and separate from date: 43 | time: ACCD_TME 44 | # If time specified, time_format is one of: 45 | # default (HH:MM:SS) 46 | # seconds (since midnight) 47 | # military (HHMM) 48 | time_format: 49 | optional: 50 | summary: 51 | # If the crash file doesn't have a lat/lon, you must give the address field 52 | # and you will need to run the geocode_batch script - see the README 53 | address: 54 | # This section allows you to specify additional feature in the crash file 55 | # (split_columns) to go into the training set 56 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 57 | # but you can specify other fields in the crash data file. 58 | # See the README for examples 59 | 60 | ################################################################# 61 | # Configuration for default features 62 | 63 | # Default features from open street map. You can remove features you don't want 64 | # Note: we don't support adding features in the config file. 65 | # If there is an additional feature you want from open street map, contact the development team 66 | openstreetmap_features: 67 | categorical: 68 | width: Width 69 | cycleway_type: Bike lane 70 | signal: Signal 71 | oneway: One Way 72 | lanes: Number of lanes 73 | continuous: 74 | width_per_lane: Average width per lane 75 | 76 | # Speed limit is a required feature 77 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 78 | speed_limit: osm_speed 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/config/config_cambridge.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Cambridge, Massachusetts, USA 3 | # The folder under data where this city's data is stored 4 | name: cambridge 5 | # City centerpoint latitude & longitude 6 | city_latitude: 42.3736158 7 | city_longitude: -71.10973349999999 8 | timezone: America/New_York 9 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data) 10 | city_radius: 10 11 | # If given, limit crashes to after startdate and no later than enddate 12 | startdate: 13 | enddate: 14 | 15 | ############################################################################## 16 | # Configuration for data standardization 17 | 18 | # crash file configurations 19 | crashes_files: 20 | Crashes20152017.csv: 21 | required: 22 | id: ID 23 | latitude: Y 24 | longitude: X 25 | # If date supplied in single column: 26 | date_complete: DateTime 27 | # If date is separated into year/month/day: 28 | date_year: 29 | date_month: 30 | # Leave date_day empty if not available 31 | date_day: 32 | # If time is available and separate from date: 33 | time: 34 | # If time specified, time_format is one of: 35 | # default (HH:MM:SS) 36 | # seconds (since midnight) 37 | # military (HHMM) 38 | time_format: 39 | optional: 40 | summary: 41 | address: Address 42 | split_columns: 43 | pedestrian: 44 | column_name: Type 45 | column_value: PED 46 | bike: 47 | column_name: Type 48 | column_value: CYC 49 | vehicle: 50 | column_name: Type 51 | column_value: AUTO 52 | 53 | # Additional data sources 54 | data_source: 55 | - name: parking_tickets 56 | filename: Cambridge_Parking_Tickets.csv 57 | address: Location 58 | date: Ticket Issue Date 59 | time: Issue Time 60 | category: Violation Description 61 | notes: 62 | # Feature can be categorical (f_cat) or continuous (f_cont) 63 | feat_type: continuous 64 | - name: seeclickfix 65 | filename: Commonwealth_Connect_Service_Requests.csv 66 | latitude: lat 67 | longitude: lng 68 | date: ticket_created_date_time 69 | category: issue_type 70 | # feat_type: categorical 71 | 72 | openstreetmap_features: 73 | categorical: 74 | width: Width 75 | cycleway_type: Bike lane 76 | oneway: One Way 77 | lanes: Number of lanes 78 | continuous: 79 | width_per_lane: Average width per lane 80 | waze_features: 81 | categorical: 82 | jam: Existence of a jam 83 | continuous: 84 | jam_percent: Percent of time there was a jam 85 | avg_jam_level: Jam level 86 | # alert_WEATHERHAZARD: Waze weather hazard alert 87 | # alert_JAM: Waze jam alert 88 | # alert_ROAD_CLOSED: Waze road closed alert 89 | # alert_ACCIDENT: Waze crash alert 90 | -------------------------------------------------------------------------------- /src/config/config_chicago.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Chicago,IL,USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 41.884250000000065 5 | city_longitude: -87.63244999999995 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: America/Chicago 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | speed_unit: mph 12 | 13 | # By default, maps are created from OSM's polygon data and fall back to radius 14 | # if there is no polygon data, but but you can change the openstreetmap_geography 15 | # to 'radius' if preferred 16 | map_geography: shapefile 17 | boundary_shapefile: chicago_boundaries.geojson 18 | 19 | # The folder under data where this city's data is stored 20 | name: chicago 21 | 22 | # If given, limit crashes to after startdate and no later than enddate 23 | # Recommended to limit to just a few years for now 24 | startdate: 2019-01-01 25 | enddate: 2020-12-31 26 | 27 | crashes_files: 28 | Chicago_Crashes_Joined_Crashes_People.csv: 29 | required: 30 | id: ID 31 | latitude: LATITUDE 32 | longitude: LONGITUDE 33 | # If date supplied in single column: 34 | date_complete: CRASH_DATE_x 35 | # If date is separated into year/month/day: 36 | date_year: 37 | date_month: 38 | # Leave date_day empty if not available 39 | date_day: 40 | # If time is available and separate from date: 41 | time: 42 | # If time specified, time_format is one of: 43 | # default (HH:MM:SS) 44 | # seconds (since midnight) 45 | # military (HHMM) 46 | time_format: 47 | optional: 48 | summary: 49 | # If the crash file doesn't have a lat/lon, you must give the address field 50 | # and you will need to run the geocode_batch script - see the README 51 | address: 52 | # This section allows you to specify additional feature in the crash file 53 | # (split_columns) to go into the training set 54 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 55 | # but you can specify other fields in the crash data file. 56 | # See the README for examples 57 | split_columns: 58 | pedestrian: 59 | column_name: PERSON_TYPE 60 | column_value: PEDESTRIAN 61 | bike: 62 | column_name: PERSON_TYPE 63 | column_value: BICYCLE 64 | vehicle: 65 | column_name: PERSON_TYPE 66 | column_value: DRIVER 67 | 68 | ################################################################# 69 | # Configuration for default features 70 | 71 | # Default features from open street map. You can remove features you don't want 72 | # Note: we don't support adding features in the config file. 73 | # If there is an additional feature you want from open street map, contact the development team 74 | openstreetmap_features: 75 | categorical: 76 | width: Width 77 | cycleway_type: Bike lane 78 | signal: Signal 79 | oneway: One Way 80 | lanes: Number of lanes 81 | continuous: 82 | width_per_lane: Average width per lane 83 | 84 | # Speed limit is a required feature 85 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 86 | speed_limit: osm_speed 87 | 88 | 89 | 90 | # Additional data sources 91 | # Any csv file with rows corresponding to location points 92 | data_source: 93 | - name: 311_DOT_Requests 94 | filename: 311_Service_Requests.csv 95 | latitude: LATITUDE 96 | longitude: LONGITUDE 97 | date: CREATED_DATE 98 | time: 99 | category: SR_TYPE 100 | notes: 101 | # Feature is 'categorical' or 'continuous' 102 | feat: categorical 103 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' 104 | feat_agg: 105 | # if latest, the column name where the value can be found 106 | value: 107 | 108 | 109 | -------------------------------------------------------------------------------- /src/config/config_dc.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Washington, DC, USA 3 | # City centerpoint latitude & longitude 4 | city_latitude: 38.9071923 5 | city_longitude: -77.0368707 6 | timezone: America/New_York 7 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data) 8 | city_radius: 25 9 | # The folder under data where this city's data is stored 10 | name: dc 11 | # If given, limit crashes to after startdate and no later than enddate 12 | # Recommended to limit to just a few years for now 13 | startdate: 2014-01-01 14 | enddate: 2016-12-31 15 | 16 | ################################################################# 17 | # Configuration for data standardization 18 | 19 | # crash file configurations 20 | crashes_files: 21 | Crashes_in_DC.csv: 22 | required: 23 | id: OBJECTID 24 | latitude: Y 25 | longitude: X 26 | # If date supplied in single column: 27 | date_complete: REPORTDATE 28 | # If date is separated into year/month/day: 29 | date_year: 30 | date_month: 31 | # Leave date_day empty if not available 32 | date_day: 33 | # If time is available and separate from date: 34 | time: 35 | # If time specified, time_format is one of: 36 | # default (HH:MM:SS) 37 | # seconds (since midnight) 38 | # military (HHMM) 39 | time_format: 40 | optional: 41 | summary: MAR_ADDRESS 42 | split_columns: 43 | pedestrian: 44 | column_name: TOTAL_PEDESTRIANS 45 | column_value: any 46 | bike: 47 | column_name: TOTAL_BICYCLES 48 | column_value: any 49 | vehicle: 50 | column_name: TOTAL_VEHICLES 51 | column_value: any 52 | 53 | ################################################################# 54 | # Configuration for default features 55 | 56 | # Default features from open street map. You can remove features you don't want 57 | # Note: we don't support adding features in the config file. 58 | # If there is an additional feature you want from open street map, contact the development team 59 | openstreetmap_features: 60 | categorical: 61 | width: Width 62 | cycleway_type: Bike lane 63 | oneway: One Way 64 | lanes: Number of lanes 65 | continuous: 66 | width_per_lane: Average width per lane 67 | 68 | 69 | data_source: 70 | - name: visionzero 71 | filename: Vision_Zero_Safety.csv 72 | latitude: Y 73 | longitude: X 74 | date: REQUESTDATE 75 | -------------------------------------------------------------------------------- /src/config/config_losangeles.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Los Angeles, CA, USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 34.0522 5 | city_longitude: -118.2437 6 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 7 | timezone: America/Los_Angeles 8 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 9 | city_radius: 50 10 | 11 | # The folder under data where this city's data is stored 12 | name: losangeles 13 | 14 | # If given, limit crashes to after startdate and no later than enddate 15 | # Recommended to limit to just a few years for now 16 | startdate: 17 | enddate: 18 | 19 | ################################################################# 20 | # Configuration for data standardization 21 | 22 | # crash file configurations 23 | crashes_files: 24 | LA_collision_data.csv: 25 | required: 26 | id: ID 27 | latitude: point_y 28 | longitude: point_x 29 | # If date supplied in single column: 30 | date_complete: collision_date 31 | # If date is separated into year/month/day: 32 | date_year: 33 | date_month: 34 | # Leave date_day empty if not available 35 | date_day: 36 | # If time is available and separate from date: 37 | time: collision_time 38 | # If time specified, time_format is one of: 39 | # default (HH:MM:SS) 40 | # seconds (since midnight) 41 | # military (HHMM) 42 | time_format: military 43 | optional: 44 | summary: 45 | # If the crash file doesn't have a lat/lon, you must give the address field 46 | # and you will need to run the geocode_batch script - see the README 47 | address: 48 | split_columns: 49 | pedestrian: 50 | column_name: pedestrian_accident 51 | column_value: any 52 | bike: 53 | column_name: bicycle_accident 54 | column_value: any 55 | vehicle: 56 | not_column: pedestrian bike 57 | 58 | ################################################################# 59 | # Configuration for default features 60 | 61 | # Default features from open street map. You can remove features you don't want 62 | # Note: we don't support adding features in the config file. 63 | # If there is an additional feature you want from open street map, contact the development team 64 | openstreetmap_features: 65 | categorical: 66 | width: Width 67 | cycleway_type: Bike lane 68 | signal: Signal 69 | oneway: One Way 70 | lanes: Number of lanes 71 | continuous: 72 | width_per_lane: Average width per lane 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /src/config/config_melbourne.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Melbourne, VIC, Australia 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: -37.8163 5 | city_longitude: 144.9631 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: Australia/Melbourne 9 | 10 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 11 | city_radius: 10 12 | #map_geography: shapefile 13 | #boundary_shapefile: VicRoads_Regions.shp 14 | 15 | # The folder under data where this city's data is stored 16 | name: melbourne 17 | 18 | # If given, limit crashes to after startdate and no later than enddate 19 | # Recommended to limit to just a few years for now 20 | startdate: 1/8/2013 21 | enddate: 22 | 23 | 24 | ################################################################# 25 | # Configuration for data standardization 26 | 27 | # crash file configurations 28 | crashes_files: 29 | Crashes_Last_Five_Years.csv: 30 | required: 31 | id: ID 32 | latitude: LATITUDE 33 | longitude: LONGITUDE 34 | # If date supplied in single column: 35 | date_complete: ACCIDENT_DATE 36 | # If date is separated into year/month/day: 37 | date_year: 38 | date_month: 39 | # Leave date_day empty if not available 40 | date_day: 41 | # If time is available and separate from date: 42 | time: ACCIDENT_TIME 43 | # If time specified, time_format is one of: 44 | # default (HH:MM:SS) 45 | # seconds (since midnight) 46 | # military (HHMM) 47 | time_format: 48 | optional: 49 | summary: 50 | # If the crash file doesn't have a lat/lon, you must give the address field 51 | # and you will need to run the geocode_batch script - see the README 52 | address: 53 | split_columns: 54 | pedestrian: 55 | column_name: PEDESTRIAN 56 | column_value: any 57 | bike: 58 | column_name: BICYCLIST 59 | column_value: any 60 | vehicle: 61 | not_column: pedestrian bike 62 | 63 | ################################################################# 64 | # Configuration for default features 65 | 66 | # Default features from open street map. You can remove features you don't want 67 | # Note: we don't support adding features in the config file. 68 | # If there is an additional feature you want from open street map, contact the development team 69 | openstreetmap_features: 70 | categorical: 71 | width: Width 72 | cycleway_type: Bike lane 73 | signal: Signal 74 | oneway: One Way 75 | lanes: Number of lanes 76 | continuous: 77 | width_per_lane: Average width per lane 78 | -------------------------------------------------------------------------------- /src/config/config_meridian.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Meridian, Idaho, USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 43.61102000000005 5 | city_longitude: -116.39257999999995 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: America/Denver 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | speed_unit: mph 12 | 13 | # By default, maps are created from OSM's polygon data and fall back to radius 14 | # if there is no polygon data, but but you can change the openstreetmap_geography 15 | # to 'radius' if preferred 16 | map_geography: polygon 17 | 18 | # The folder under data where this city's data is stored 19 | name: meridian 20 | 21 | # If given, limit crashes to after startdate and no later than enddate 22 | # Recommended to limit to just a few years for now 23 | startdate: 2015-01-01 24 | enddate: 2019-12-31 25 | 26 | ################################################################# 27 | # Configuration for data standardization 28 | 29 | # crash file configurations 30 | crashes_files: 31 | Meridian_ID_Crash_Data_2005_-_Present.csv: 32 | required: 33 | id: OBJECTID 34 | latitude: Y 35 | longitude: X 36 | # If date supplied in single column: 37 | date_complete: Accident_Date_Time 38 | # If date is separated into year/month/day: 39 | date_year: 40 | date_month: 41 | # Leave date_day empty if not available 42 | date_day: 43 | # If time is available and separate from date: 44 | time: 45 | # If time specified, time_format is one of: 46 | # default (HH:MM:SS) 47 | # seconds (since midnight) 48 | # military (HHMM) 49 | time_format: 50 | optional: 51 | summary: 52 | # If the crash file doesn't have a lat/lon, you must give the address field 53 | # and you will need to run the geocode_batch script - see the README 54 | address: 55 | # This section allows you to specify additional feature in the crash file 56 | # (split_columns) to go into the training set 57 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 58 | # but you can specify other fields in the crash data file. 59 | # See the README for examples 60 | 61 | ################################################################# 62 | # Configuration for default features 63 | 64 | # Default features from open street map. You can remove features you don't want 65 | # Note: we don't support adding features in the config file. 66 | # If there is an additional feature you want from open street map, contact the development team 67 | openstreetmap_features: 68 | categorical: 69 | width: Width 70 | cycleway_type: Bike lane 71 | signal: Signal 72 | oneway: One Way 73 | lanes: Number of lanes 74 | continuous: 75 | width_per_lane: Average width per lane 76 | 77 | # Speed limit is a required feature 78 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 79 | speed_limit: osm_speed 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/config/config_nyc.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: New York, NY, USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 40.71455000000003 5 | city_longitude: -74.00713999999994 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: America/New_York 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | speed_unit: kph 12 | 13 | # By default, maps are created from OSM's polygon data and fall back to radius 14 | # if there is no polygon data, but but you can change the openstreetmap_geography 15 | # to 'radius' if preferred 16 | map_geography: polygon 17 | 18 | # The folder under data where this city's data is stored 19 | name: nyc 20 | 21 | # If given, limit crashes to after startdate and no later than enddate 22 | # Recommended to limit to just a few years for now 23 | startdate: 24 | enddate: 25 | 26 | ################################################################# 27 | # Configuration for data standardization 28 | 29 | # crash file configurations 30 | crashes_files: 31 | Motor_Vehicle_Collisions_-_Crashes.csv: 32 | required: 33 | id: COLLISION_ID 34 | latitude: LATITUDE 35 | longitude: LONGITUDE 36 | # If date supplied in single column: 37 | date_complete: CRASH DATE 38 | # If date is separated into year/month/day: 39 | date_year: 40 | date_month: 41 | # Leave date_day empty if not available 42 | date_day: 43 | # If time is available and separate from date: 44 | time: CRASH TIME 45 | # If time specified, time_format is one of: 46 | # default (HH:MM:SS) 47 | # seconds (since midnight) 48 | # military (HHMM) 49 | time_format: 50 | optional: 51 | # summary: 52 | # If the crash file doesn't have a lat/lon, you must give the address field 53 | # and you will need to run the geocode_batch script - see the README 54 | # address: 55 | # Currently only considering number of persons injured for pedestrian/cyclists, 56 | # which removes a small number of fatal accidents (less than .01%) 57 | split_columns: 58 | pedestrian: 59 | column_name: NUMBER OF PEDESTRIANS INJURED 60 | column_value: any 61 | bike: 62 | column_name: NUMBER OF CYCLIST INJURED 63 | column_value: any 64 | vehicle: 65 | not_column: pedestrian bike 66 | 67 | ################################################################# 68 | # Configuration for default features 69 | 70 | # Default features from open street map. You can remove features you don't want 71 | # Note: we don't support adding features in the config file. 72 | # If there is an additional feature you want from open street map, contact the development team 73 | openstreetmap_features: 74 | categorical: 75 | width: Width 76 | cycleway_type: Bike lane 77 | signal: Signal 78 | oneway: One Way 79 | lanes: Number of lanes 80 | continuous: 81 | width_per_lane: Average width per lane 82 | 83 | # Speed limit is a required feature 84 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 85 | speed_limit: osm_speed 86 | -------------------------------------------------------------------------------- /src/config/config_philly.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Philadelphia, Pennslyvania, USA 3 | # The folder under data where this city's data is stored 4 | name: philly 5 | # City centerpoint latitude & longitude 6 | city_latitude: 39.9526 7 | city_longitude: -75.1652 8 | timezone: America/New_York 9 | # Radius of city's road network from centerpoint in km (required if OSM has no polygon data) 10 | city_radius: 15 11 | # If given, limit crashes to after startdate and no later than enddate 12 | startdate: 2017-01-01 13 | enddate: 2017-12-31 14 | 15 | ############################################################################## 16 | # Configuration for data standardization 17 | 18 | # crash file configurations 19 | crashes_files: 20 | crash_data_collision_crash_2007_2017.csv: 21 | required: 22 | id: objectid_1 23 | latitude: dec_lat 24 | longitude: dec_long 25 | date_complete: 26 | date_year: crash_year 27 | date_month: crash_month 28 | date_day: 29 | time: time_of_day 30 | time_format: military 31 | optional: 32 | intersection: intersect_type 33 | split_columns: 34 | pedestrian: 35 | column_name: ped_count 36 | column_value: any 37 | bike: 38 | column_name: bicycle_count 39 | column_value: any 40 | vehicle: 41 | column_name: vehicle_count 42 | column_value: any 43 | 44 | ################################################################# 45 | # Configuration for default features 46 | 47 | # Default features from open street map. You can remove features you don't want 48 | # Note: we don't support adding features in the config file. 49 | # If there is an additional feature you want from open street map, contact the development team 50 | openstreetmap_features: 51 | categorical: 52 | width: Width 53 | cycleway_type: Bike lane 54 | signal: Signal 55 | oneway: One Way 56 | lanes: Number of lanes 57 | hwy_type: Highway type 58 | continuous: 59 | width_per_lane: Average width per lane 60 | 61 | # Additional data sources 62 | data_source: 63 | - name: DVRPCTrafficVolume 64 | filename: DVRPC_Traffic_Counts.csv 65 | latitude: LATITUDE 66 | longitude: LONGITUDE 67 | date: SETDATE 68 | notes: 69 | # Feature is categorical (f_cat) or continuous (f_cont)\n" + 70 | feat: f_cont 71 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' \n" + 72 | feat_agg: latest 73 | # if latest, the desired value 74 | value: AADT 75 | - name: DVRPCWalkVolume 76 | filename: DVRPC__Pedestrian_Counts.csv 77 | latitude: LATITUDE 78 | longitude: LONGITUDE 79 | date: SETDATE 80 | notes: 81 | # Feature is categorical (f_cat) or continuous (f_cont)\n" + 82 | feat: f_cont 83 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' \n" + 84 | feat_agg: latest 85 | # if latest, the desired value 86 | value: AADP 87 | - name: DVRPCBikeVolume 88 | filename: DVRPC__Bicycle__Counts.csv 89 | latitude: LATITUDE 90 | longitude: LONGITUDE 91 | date: SETDATE 92 | notes: 93 | # Feature is categorical (f_cat) or continuous (f_cont)\n" + 94 | feat: f_cont 95 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' \n" + 96 | feat_agg: latest 97 | # if latest, the desired value 98 | value: AADT 99 | - name: parking_tickets 100 | filename: parking_violations_14.csv 101 | latitude: lat 102 | longitude: lon 103 | date: issue_datetime 104 | category: violation_desc 105 | notes: 106 | # Feature can be categorical (f_cat) or continuous (f_cont) 107 | feat: f_cont 108 | 109 | -------------------------------------------------------------------------------- /src/config/config_pittsburgh.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Pittsburgh, PA, USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 40.44062479999999 5 | city_longitude: -79.9958864 6 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 7 | city_radius: 20 8 | # The folder under data where this city's data is stored 9 | name: pittsburgh 10 | # If given, limit crashes to after startdate and no later than enddate 11 | # Recommended to limit to just a few years for now 12 | startdate: 13 | enddate: 14 | 15 | 16 | ################################################################# 17 | # Configuration for data standardization 18 | 19 | # crash file configurations 20 | crashes_files: 21 | pittsburgh_2017.csv: 22 | required: 23 | id: _id 24 | latitude: DEC_LAT 25 | longitude: DEC_LONG 26 | # If date supplied in single column: 27 | date_complete: 28 | # If date is separated into year/month/day: 29 | date_year: CRASH_YEAR 30 | date_month: CRASH_MONTH 31 | # Leave date_day empty if not available 32 | date_day: 33 | # If time is available and separate from date: 34 | time: TIME_OF_DAY 35 | # If time specified, time_format is one of: 36 | # default (HH:MM:SS) 37 | # seconds (since midnight) 38 | # military (HHMM) 39 | time_format: military 40 | optional: 41 | summary: 42 | address: 43 | 44 | -------------------------------------------------------------------------------- /src/config/config_somerville.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Somerville, Massachusetts, USA 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: 42.3876 5 | city_longitude: -71.0995 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: America/New_York 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | 12 | # The folder under data where this city's data is stored 13 | name: somerville 14 | 15 | # If given, limit crashes to after startdate and no later than enddate 16 | # Recommended to limit to just a few years for now 17 | startdate: 18 | enddate: 19 | 20 | ################################################################# 21 | # Configuration for data standardization 22 | 23 | # crash file configurations 24 | crashes_files: 25 | Motor_Vehicle_Crash_Reports.csv: 26 | required: 27 | id: ID 28 | latitude: 29 | longitude: 30 | # If date supplied in single column: 31 | date_complete: Date 32 | # If date is separated into year/month/day: 33 | date_year: 34 | date_month: 35 | # Leave date_day empty if not available 36 | date_day: 37 | # If time is available and separate from date: 38 | time: 39 | # If time specified, time_format is one of: 40 | # default (HH:MM:SS) 41 | # seconds (since midnight) 42 | # military (HHMM) 43 | time_format: 44 | optional: 45 | summary: 46 | # If the crash file doesn't have a lat/lon, you must give the address field 47 | # and you will need to run the geocode_batch script - see the README 48 | address: Location 49 | split_columns: 50 | pedestrian: 51 | column_name: Pedestrian 52 | column_value: any 53 | bike: 54 | column_name: Bicycle 55 | column_value: any 56 | vehicle: 57 | not_column: pedestrian bike 58 | 59 | 60 | 61 | ################################################################# 62 | # Configuration for default features 63 | 64 | # Default features from open street map. You can remove features you don't want 65 | # Note: we don't support adding features in the config file. 66 | # If there is an additional feature you want from open street map, contact the development team 67 | openstreetmap_features: 68 | categorical: 69 | width: Width 70 | cycleway_type: Bike lane 71 | signal: Signal 72 | oneway: One Way 73 | lanes: Number of lanes 74 | continuous: 75 | width_per_lane: Average width per lane 76 | 77 | 78 | # Configuration for default waze features 79 | waze_features: 80 | categorical: 81 | jam: Existence of a jam 82 | continuous: 83 | jam_percent: Percent of time there was a jam 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/.gitkeep -------------------------------------------------------------------------------- /src/data/TMC_scraping/README.md: -------------------------------------------------------------------------------- 1 | # TMC scraping -------------------------------------------------------------------------------- /src/data/TMC_scraping/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This package parses turning movement count files 3 | """ 4 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/record.py: -------------------------------------------------------------------------------- 1 | from pyproj import Transformer 2 | from . import util 3 | from dateutil.parser import parse 4 | 5 | # transformer object between 4326 projection and 3857 projection 6 | transformer_4326_to_3857 = Transformer.from_proj( 7 | 4326, 3857, always_xy=True) 8 | # transformer object between 3857 projection and 4326 projection 9 | transformer_3857_to_4326 = Transformer.from_proj( 10 | 3857, 4326, always_xy=True) 11 | 12 | 13 | class Record(object): 14 | "A record contains a dict of properties and a point in 4326 projection" 15 | 16 | def __init__(self, properties, point=None): 17 | if point: 18 | self.point = point 19 | else: 20 | self.point = util.get_reproject_point( 21 | properties['location']['latitude'], 22 | properties['location']['longitude'], 23 | transformer_4326_to_3857) 24 | self.properties = properties 25 | 26 | @property 27 | def schema(self): 28 | return util.make_schema('Point', self.properties) 29 | 30 | def _get_near_id(self): 31 | if 'near_id' in self.properties: 32 | return self.properties['near_id'] 33 | return None 34 | 35 | def _set_near_id(self, near_id): 36 | self.properties['near_id'] = near_id 37 | 38 | near_id = property(_get_near_id, _set_near_id) 39 | 40 | @property 41 | def timestamp(self): 42 | if 'timestamp' in self.properties: 43 | return self.properties['timestamp'] 44 | else: 45 | return '' 46 | 47 | 48 | class Crash(Record): 49 | def __init__(self, properties): 50 | Record.__init__(self, properties) 51 | 52 | @property 53 | def timestamp(self): 54 | return parse(self.properties['dateOccurred']) 55 | 56 | -------------------------------------------------------------------------------- /src/data/see_click_fix/seeclickfix.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import requests 3 | import time 4 | import json 5 | import os 6 | import csv 7 | from dateutil.parser import parse 8 | 9 | 10 | def convert_to_csv(filename): 11 | 12 | with open(filename + '.json', 'r') as f: 13 | tickets = json.load(f) 14 | 15 | print "Converting " + str(len(tickets)) + " tickets to csv" 16 | # Since this so far only looks at Boston, hard coding 17 | # fields we care about. Will need to check against other cities 18 | fieldnames = ['X', 'Y', 'type', 'created', 'summary', 'description'] 19 | with open(filename + '.csv', 'w') as csvfile: 20 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 21 | 22 | writer.writeheader() 23 | 24 | for t in tickets: 25 | 26 | writer.writerow({ 27 | 'X': t['lng'], 28 | 'Y': t['lat'], 29 | 'type': t['request_type']['title'] 30 | if 'title' in t['request_type'].keys() else '', 31 | 'created': t['created_at'], 32 | 'summary': t['summary'].encode("utf-8"), 33 | 'description': t['description'].encode("utf-8") 34 | if t['description'] else '' 35 | }) 36 | 37 | 38 | def get_tickets(place_url, outfile, statuses=[ 39 | 'open', 'acknowledged', 'closed', 'archived'], start_date=None): 40 | print outfile 41 | if not os.path.exists(outfile): 42 | status_str = ','.join(statuses) 43 | 44 | request_str = 'https://seeclickfix.com/api/v2/issues?place_url=' \ 45 | + place_url \ 46 | + '&status=' + status_str 47 | if start_date: 48 | start_date = parse(start_date).isoformat() 49 | request_str += '&after=' + start_date 50 | curr_page = requests.get(request_str) 51 | 52 | md = curr_page.json()['metadata']['pagination'] 53 | print "Getting " + str(md['pages']) + " pages of see click fix data" 54 | 55 | next_page_url = md['next_page_url'] 56 | all = curr_page.json()['issues'] 57 | print "page:" + str(md['page']) 58 | while next_page_url: 59 | curr_page = requests.get(next_page_url) 60 | md = curr_page.json()['metadata']['pagination'] 61 | print "page:" + str(md['page']) 62 | all += curr_page.json()['issues'] 63 | next_page_url = md['next_page_url'] 64 | time.sleep(.5) 65 | 66 | with open(outfile, 'w') as f: 67 | json.dump(all, f) 68 | else: 69 | print "See click fix file already exists, skipping query..." 70 | 71 | if __name__ == '__main__': 72 | 73 | parser = argparse.ArgumentParser() 74 | 75 | parser.add_argument("outputfile", type=str, 76 | help="output file prefix") 77 | parser.add_argument("-c", "--city", type=str, default='Boston') 78 | parser.add_argument("-status", "--status_list", nargs="+", 79 | default=['open', 'acknowledged', 'closed', 'archived']) 80 | parser.add_argument("-start", "--start_date") 81 | 82 | args = parser.parse_args() 83 | 84 | filename = args.outputfile 85 | city = args.city 86 | 87 | get_tickets( 88 | city, 89 | filename + '.json', 90 | statuses=args.status_list, 91 | start_date=args.start_date 92 | ) 93 | convert_to_csv(filename) 94 | -------------------------------------------------------------------------------- /src/data/segment.py: -------------------------------------------------------------------------------- 1 | 2 | class Segment(object): 3 | "A segment contains a dict of properties and a shapely shape" 4 | 5 | def __init__(self, geometry, properties): 6 | 7 | self.geometry = geometry 8 | self.properties = properties 9 | 10 | 11 | class Intersection(object): 12 | """ 13 | Creates an Intersection object 14 | Args: 15 | count (int): Unique identifier for the intersection. 16 | lines (list of shapely.geometry.linestring): List of lines forming the intersection. 17 | properties (list of dict): List of dictionaries containing properties for each line. 18 | data (dict, optional): Additional data associated with the intersection. Defaults to an empty dictionary. 19 | nodes (list of dict, optional): List of dictionaries containing information about nodes in the intersection. Defaults to an empty list. 20 | connected_segments (list of int, optional): List of IDs of segments connected to the intersection. Defaults to an empty list. 21 | 22 | """ 23 | 24 | def __init__(self, segment_id, lines, data, properties, 25 | nodes=[], connected_segments=[]): 26 | self.id = segment_id 27 | self.lines = lines 28 | self.data = data 29 | self.properties = properties 30 | self.geometry = None 31 | # Nodes are the points (with openstreetmap node id) in the intersection 32 | self.nodes = nodes 33 | self.connected_segments = connected_segments 34 | 35 | 36 | class IntersectionBuffer(object): 37 | """ 38 | An intersection buffer consists of a polygon, and a list of 39 | records associated with the intersection points 40 | """ 41 | def __init__(self, buffer, points): 42 | self.buffer = buffer 43 | self.points = points 44 | -------------------------------------------------------------------------------- /src/data/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for data_generation 3 | """ 4 | -------------------------------------------------------------------------------- /src/data/tests/data/concern_test_dummy.json: -------------------------------------------------------------------------------- 1 | [{"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14807, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "Broadway Bridge is wide & off highway ramps. Vehicles speed over. Key connection for bikes (& everyone) to get back into Southie fr much of city. W.Broadway is wide enough for bike lanes all the way across this bridge down to at least E.Broadway.", "USERTYPE": "bikes", "REQUESTDATE": "2016-01-19T14:43:50.000Z", "REQUESTID": 14807.0, "near_id": "004581", "Y": 42.343488869715976, "X": -71.05869817972585}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14808, "GLOBALID": "", "REQUESTTYPE": "other", "COMMENTS": "This intersection is dangerous. Cars don't follow the lane markings (ie go straight while in turn lane) so it's nearly impossible to safely position yourself on a bike. In a car, people are trying to move over/it's unclear where people are going.", "USERTYPE": "bikes", "REQUESTDATE": "2016-01-19T14:48:45.000Z", "REQUESTID": 14808.0, "near_id": 6391, "Y": 42.354167552594284, "X": -71.05414378860903}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "people don't yield while going straight", "COMMENTS": "It's terrifying to walk over here. It seems like it's impossible to get the cars to stop stop, even at the crosswalks.", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "0013093", "Y": 42.33938397670106, "X": -71.0994798889095}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14810, "GLOBALID": "", "REQUESTTYPE": "low visibility", "COMMENTS": "cars coming around the corner of this wide one street are speeding and not visible for persons on the crosswalk", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T15:36:25.000Z", "REQUESTID": 14810.0, "near_id": 6083, "Y": 42.349364649630935, "X": -71.06656509857143}, {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14811, "GLOBALID": "", "REQUESTTYPE": "people don't yield while turning", "COMMENTS": "as you come off the bike path, it's unclear how to get across to continue south on Washington, and cars turn right into your path as you try to cross west with traffic to position yourself to go south with traffic.", "USERTYPE": "bikes", "REQUESTDATE": "2016-01-19T21:26:54.000Z", "REQUESTID": 14811.0, "near_id": "0011866", "Y": 42.30199319771136, "X": -71.11441432121919}, 2 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "people don't yield while going straight", "COMMENTS": "TEST", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": 5492, "Y": 42.342341340478789, "X": -71.065894221691337}, 3 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "people don't yield while going straight", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005598", "Y": 42.325676212533196, "X": -71.065894221691337}, 4 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005598", "Y": 42.325676212533196, "X": -71.065894221691337}, 5 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005598", "Y": 42.325676212533196, "X": -71.065894221691337}, 6 | {"STATUS": "Unassigned", "STREETSEGID": 0, "OBJECTID": 14809, "GLOBALID": "", "REQUESTTYPE": "bike facilities", "COMMENTS": "TEST2", "USERTYPE": "walks", "REQUESTDATE": "2016-01-19T14:57:03.000Z", "REQUESTID": 14809.0, "near_id": "005593", "Y": 42.330339, "X": -71.05727900000001}] 7 | 8 | -------------------------------------------------------------------------------- /src/data/tests/data/config_brisbane_no_supplemental.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Brisbane, Australia 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: -27.4697707 5 | city_longitude: 153.0251235 6 | 7 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 8 | timezone: Australia/Brisbane 9 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 10 | city_radius: 20 11 | speed_unit: kph 12 | 13 | # By default, maps are created from OSM's polygon data and fall back to radius 14 | # if there is no polygon data, but but you can change the openstreetmap_geography 15 | # to 'radius' if preferred 16 | map_geography: polygon 17 | 18 | # The folder under data where this city's data is stored 19 | name: brisbane 20 | 21 | # If given, limit crashes to after startdate and no later than enddate 22 | # Recommended to limit to just a few years for now 23 | startdate: 24 | enddate: 25 | 26 | ################################################################# 27 | # Configuration for data standardization 28 | 29 | # crash file configurations 30 | crashes_files: 31 | test_crashes.csv: 32 | required: 33 | id: 34 | latitude: 35 | longitude: 36 | # If date supplied in single column: 37 | date_complete: 38 | # If date is separated into year/month/day: 39 | date_year: 40 | date_month: 41 | # Leave date_day empty if not available 42 | date_day: 43 | # If time is available and separate from date: 44 | time: 45 | # If time specified, time_format is one of: 46 | # default (HH:MM:SS) 47 | # seconds (since midnight) 48 | # military (HHMM) 49 | time_format: 50 | optional: 51 | summary: 52 | # If the crash file doesn't have a lat/lon, you must give the address field 53 | # and you will need to run the geocode_batch script - see the README 54 | address: 55 | # This section allows you to specify additional feature in the crash file 56 | # (split_columns) to go into the training set 57 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 58 | # but you can specify other fields in the crash data file. 59 | # See the README for examples 60 | 61 | ################################################################# 62 | # Configuration for default features 63 | 64 | # Default features from open street map. You can remove features you don't want 65 | # Note: we don't support adding features in the config file. 66 | # If there is an additional feature you want from open street map, contact the development team 67 | openstreetmap_features: 68 | categorical: 69 | width: Width 70 | cycleway_type: Bike lane 71 | signal: Signal 72 | oneway: One Way 73 | lanes: Number of lanes 74 | continuous: 75 | width_per_lane: Average width per lane 76 | 77 | # Speed limit is a required feature 78 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 79 | speed_limit: osm_speed 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/data/tests/data/config_brisbane_supplemental.yml: -------------------------------------------------------------------------------- 1 | # City name 2 | city: Brisbane, Australia 3 | # City centerpoint latitude & longitude (default geocoded values set) 4 | city_latitude: -27.4697707 5 | city_longitude: 153.0251235 6 | # City's time zone: defaults to the local time zone of computer initializing the city's config file 7 | timezone: Australia/Brisbane 8 | # Radius of city's road network from centerpoint in km, required if OSM has no polygon data (defaults to 20km) 9 | city_radius: 20 10 | speed_unit: kph 11 | 12 | # By default, maps are created from OSM's polygon data and fall back to radius 13 | # if there is no polygon data, but but you can change the openstreetmap_geography 14 | # to 'radius' if preferred 15 | map_geography: polygon 16 | 17 | # The folder under data where this city's data is stored 18 | name: brisbane 19 | # If given, limit crashes to after startdate and no later than enddate 20 | # Recommended to limit to just a few years for now 21 | startdate: 22 | enddate: 23 | ################################################################# 24 | # Configuration for data standardization 25 | 26 | # crash file configurations 27 | crashes_files: 28 | test_crashes.csv: 29 | required: 30 | id: 31 | latitude: 32 | longitude: 33 | # If date supplied in single column: 34 | date_complete: 35 | # If date is separated into year/month/day: 36 | date_year: 37 | date_month: 38 | # Leave date_day empty if not available 39 | date_day: 40 | # If time is available and separate from date: 41 | time: 42 | # If time specified, time_format is one of: 43 | # default (HH:MM:SS) 44 | # seconds (since midnight) 45 | # military (HHMM) 46 | time_format: 47 | optional: 48 | summary: 49 | # If the crash file doesn't have a lat/lon, you must give the address field 50 | # and you will need to run the geocode_batch script - see the README 51 | address: 52 | # This section allows you to specify additional feature in the crash file 53 | # (split_columns) to go into the training set 54 | # Most commonly split_columns are used for mode (pedestrian/bike/vehicle) 55 | # but you can specify other fields in the crash data file. 56 | # See the README for examples 57 | 58 | # List of concern type information 59 | concern_files: 60 | - name: concern 61 | filename: test_concerns.csv 62 | latitude: 63 | longitude: 64 | time: 65 | 66 | # Additional data sources 67 | data_source: 68 | - name: 69 | filename: parking_tickets_dummy_file_1.csv 70 | address: 71 | date: 72 | time: 73 | category: 74 | notes: 75 | # Feature is categorical (f_cat) or continuous (f_cont) 76 | feat: 77 | # feat_agg (feature aggregation) can be total count 'default' or 'latest value' 78 | feat_agg: 79 | # if latest, the column name where the value can be found 80 | value: 81 | 82 | ################################################################# 83 | # Configuration for default features 84 | 85 | # Default features from open street map. You can remove features you don't want 86 | # Note: we don't support adding features in the config file. 87 | # If there is an additional feature you want from open street map, contact the development team 88 | openstreetmap_features: 89 | categorical: 90 | width: Width 91 | cycleway_type: Bike lane 92 | signal: Signal 93 | oneway: One Way 94 | lanes: Number of lanes 95 | continuous: 96 | width_per_lane: Average width per lane 97 | 98 | # Speed limit is a required feature 99 | # If you choose to override OpenStreetMaps' speed limit, replace 'osm_speed' with the feature name here 100 | # speed_limit: osm_speed 101 | 102 | 103 | -------------------------------------------------------------------------------- /src/data/tests/data/config_features.yml: -------------------------------------------------------------------------------- 1 | city: Boston, Massachusetts, USA 2 | name: boston 3 | city_latitude: 42.3600825 4 | city_longitude: -71.0588801 5 | city_radius: 15 6 | timezone: America/New_York 7 | crashes_files: 8 | test: 9 | dummy 10 | 11 | openstreetmap_features: 12 | categorical: 13 | osm_speed: Speed limit 14 | width: Width 15 | cycleway_type: Bike lane 16 | oneway: One Way 17 | lanes: Number of lanes 18 | signal: Traffic signal 19 | crosswalk: Crosswalk 20 | continuous: 21 | width_per_lane: Average width per lane 22 | waze_features: 23 | categorical: 24 | jam: Existence of a jam 25 | continuous: 26 | jam_percent: Percent of time there was a jam 27 | -------------------------------------------------------------------------------- /src/data/tests/data/crash_test_dummy.json: -------------------------------------------------------------------------------- 1 | [{"CAD_EVENT_REL_COMMON_ID": "1481358", "YCOORD": "2950076.65", "CALENDAR_DATE": "2016-02-08 00:00:00.000", "X": "-71.065840037158878", "FIRST_EVENT_SUBTYPE": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)", "XCOORD": "773540.86", "N_EVENTS": "1", "TIME": "20:40:23", "Y": "42.342341340478789", "near_id": 5492}, {"CAD_EVENT_REL_COMMON_ID": "1666213", "YCOORD": "2944003.51", "CALENDAR_DATE": "2016-05-06 00:00:00.000", "X": "-71.065894221691337", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "773557.12", "N_EVENTS": "1", "TIME": "14:14:30", "Y": "42.325676212533196", "near_id": "005598"}, {"CAD_EVENT_REL_COMMON_ID": "1689595", "YCOORD": "2945764.85", "CALENDAR_DATE": "2016-05-16 00:00:00.000", "X": "-71.057661166075064", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "775774.4", "N_EVENTS": "1", "TIME": "18:08:29", "Y": "42.330478134494818", "near_id": "005593"}, {"CAD_EVENT_REL_COMMON_ID": "2069106", "YCOORD": "2945652", "CALENDAR_DATE": "2016-10-28 00:00:00.000", "X": "-71.057275245294946", "FIRST_EVENT_SUBTYPE": "PEDESTRIAN STRUCK (P) (E) (F)", "XCOORD": "775879.34", "N_EVENTS": "1", "TIME": "10:27:18", "Y": "42.330166976202662", "near_id": "005593"}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "618110", "CALENDAR_DATE": "2015-01-19", "mode_type": "mv", "X": "-71.05727900000001", "TIME,": "85064", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.330339", "near_id": "005593"}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "862107", "CALENDAR_DATE": "2015-05-09", "mode_type": "mv", "X": "-71.027179", "TIME,": "64588", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.335786999999996", "near_id": 5829}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "982316", "CALENDAR_DATE": "2015-06-28", "mode_type": "mv", "X": "-71.05727900000001", "TIME,": "23951", "FIRST_EVENT_SUBTYPE": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.330339", "near_id": "005593"}, {"YCOORD": "", "CAD_EVENT_REL_COMMON_ID": "2246232", "CALENDAR_DATE": "2017-01-23", "mode_type": "mv", "X": "-71.06572800000001", "TIME,": "45770", "FIRST_EVENT_SUBTYPE": "REPORTED INJURIES (P) (E) (F)", "XCOORD": "", "N_EVENTS": "", "TIME": "", "Y": "42.342415", "near_id": 5492}] -------------------------------------------------------------------------------- /src/data/tests/data/osm_crash_file.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1403832, 4 | "dateOccurred": "2016-01-01T00:56:45-05:00", 5 | "location": { 6 | "latitude": 42.3311855, 7 | "longitude": -71.0748389 8 | }, 9 | "vehicles": [ 10 | { 11 | "category": "car" 12 | } 13 | ], 14 | "summary": "REPORTED INJURIES (P) (E) (F)" 15 | }, 16 | { 17 | "id": 1403832, 18 | "dateOccurred": "2016-01-01T00:56:45-05:00", 19 | "location": { 20 | "latitude": 42.3611498, 21 | "longitude": -71.0645559 22 | }, 23 | "vehicles": [ 24 | { 25 | "category": "car" 26 | } 27 | ], 28 | "summary": "REPORTED INJURIES (P) (E) (F)" 29 | }, 30 | { 31 | "id": 1403832, 32 | "dateOccurred": "2016-01-01T00:56:45-05:00", 33 | "location": { 34 | "latitude": 42.333375, 35 | "longitude": -71.07736 36 | }, 37 | "vehicles": [ 38 | { 39 | "category": "car" 40 | } 41 | ], 42 | "summary": "REPORTED INJURIES (P) (E) (F)" 43 | } 44 | 45 | ] 46 | -------------------------------------------------------------------------------- /src/data/tests/data/osm_output.gpickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/osm_output.gpickle -------------------------------------------------------------------------------- /src/data/tests/data/processed/maps/inters.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": { "id_2": 2, "id_1": 1, "intersection": 1, "connected_segments": [10, 11]}, "geometry": { "type": "Point", "coordinates": [ -71.130919614355903, 42.236953123165236 ] } }, 2 | { "type": "Feature", "properties": { "id_2": 6, "id_1": 2, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.130457692279407, 42.236823198783938 ] } }, 3 | { "type": "Feature", "properties": { "id_2": 4, "id_1": 0, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.130747480344851, 42.234915582812036 ] } }, 4 | { "type": "Feature", "properties": { "id_2": 5, "id_1": 3, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.129743187575258, 42.236620170391106 ] } }, 5 | { "type": "Feature", "properties": { "id_2": 3, "id_1": 0, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.129934237072263, 42.236675963780598 ] } }, 6 | { "type": "Feature", "properties": { "id_2": 4, "id_1": 1, "intersection": 1 }, "geometry": { "type": "Point", "coordinates": [ -71.131724767847999, 42.235164325359385 ] } } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /src/data/tests/data/processed/maps/osm.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/osm.gpkg -------------------------------------------------------------------------------- /src/data/tests/data/processed/maps/test_line_convert.cpg: -------------------------------------------------------------------------------- 1 | ISO-8859-1 -------------------------------------------------------------------------------- /src/data/tests/data/processed/maps/test_line_convert.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/test_line_convert.dbf -------------------------------------------------------------------------------- /src/data/tests/data/processed/maps/test_line_convert.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/test_line_convert.shp -------------------------------------------------------------------------------- /src/data/tests/data/processed/maps/test_line_convert.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/processed/maps/test_line_convert.shx -------------------------------------------------------------------------------- /src/data/tests/data/raw/ma_cob_spatially_joined_streets.cpg: -------------------------------------------------------------------------------- 1 | ISO-8859-1 -------------------------------------------------------------------------------- /src/data/tests/data/raw/ma_cob_spatially_joined_streets.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/raw/ma_cob_spatially_joined_streets.dbf -------------------------------------------------------------------------------- /src/data/tests/data/raw/ma_cob_spatially_joined_streets.prj: -------------------------------------------------------------------------------- 1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]] -------------------------------------------------------------------------------- /src/data/tests/data/raw/ma_cob_spatially_joined_streets.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/raw/ma_cob_spatially_joined_streets.shp -------------------------------------------------------------------------------- /src/data/tests/data/raw/ma_cob_spatially_joined_streets.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/raw/ma_cob_spatially_joined_streets.shx -------------------------------------------------------------------------------- /src/data/tests/data/standardized/Vision_Zero_Entry.csv: -------------------------------------------------------------------------------- 1 | X,Y,OBJECTID,GLOBALID,REQUESTID,REQUESTTYPE,REQUESTDATE,STATUS,STREETSEGID,COMMENTS,USERTYPE 2 | -71.129924,42.236677,14807,,14807,bike facilities don't exist or need improvement,2016-01-19T14:43:50.000Z,Unassigned,0,Broadway Bridge is wide & off highway ramps. Vehicles speed over. Key connection for bikes (& everyone) to get back into Southie fr much of city. W.Broadway is wide enough for bike lanes all the way across this bridge down to at least E.Broadway.,bikes 3 | -------------------------------------------------------------------------------- /src/data/tests/data/standardized/concerns.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 67658, 4 | "dateCreated": "2016-02-12T21:22:30.000Z", 5 | "status": "Unassigned", 6 | "tags": ["poorSignange", "driversIgnoreSignage"], 7 | "location": { 8 | "latitude": 42.236677, 9 | "longitude": -71.129924 10 | }, 11 | "address": "685 Tremont Street, Boston", 12 | "summary": "Drivers do not stop for pedestrians in the designated crosswalks" 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /src/data/tests/data/standardized/crashes.csv: -------------------------------------------------------------------------------- 1 | X,Y,CAD_EVENT_REL_COMMON_ID,FIRST_EVENT_SUBTYPE,XCOORD,YCOORD,CALENDAR_DATE,TIME,N_EVENTS 2 | -71.130909,42.236942,1403832,REPORTED INJURIES (P) (E) (F),772136.21,2934954.42,2016-01-01 00:00:00.000,00:56:45,1 3 | -------------------------------------------------------------------------------- /src/data/tests/data/standardized/crashes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1403832, 4 | "dateOccurred": "2016-01-01T00:56:45-05:00", 5 | "mode": "vehicle", 6 | "location": { 7 | "latitude": 42.236942, 8 | "longitude": -71.130909 9 | }, 10 | "address": "14 Corona Street", 11 | "summary": "REPORTED INJURIES (P) (E) (F)" 12 | } 13 | ] 14 | -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/ma_cob_small.cpg: -------------------------------------------------------------------------------- 1 | ISO-8859-1 -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/ma_cob_small.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/ma_cob_small.dbf -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/ma_cob_small.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/ma_cob_small.shp -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/ma_cob_small.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/ma_cob_small.shx -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/osm3857.cpg: -------------------------------------------------------------------------------- 1 | ISO-8859-1 -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/osm3857.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/osm3857.dbf -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/osm3857.prj: -------------------------------------------------------------------------------- 1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]] -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/osm3857.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/osm3857.shp -------------------------------------------------------------------------------- /src/data/tests/data/test_add_map/osm3857.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/tests/data/test_add_map/osm3857.shx -------------------------------------------------------------------------------- /src/data/tests/data/test_create_segments/additional_points.json: -------------------------------------------------------------------------------- 1 | [{"feature": "parking_tickets", "date": "2016-05-17T00:00:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "category": "NO PARKING"}, 2 | {"feature": "parking_tickets", "date": "2014-01-04T15:50:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "category": "METER EXPIRED"}, 3 | {"feature": "traffic_volume", "date": "2014-01-04T15:50:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "feat_agg":"latest", "value":100}, 4 | {"feature": "traffic_volume", "date": "2015-01-04T15:50:00Z", "location": {"latitude": 42.38404209999999, "longitude": -71.1370766}, "feat_agg":"latest", "value":200}] 5 | -------------------------------------------------------------------------------- /src/data/tests/data/test_create_segments/empty_set_inter.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13578386374338, 42.258548279519616], [-71.13582699649501, 42.25852287054979], [-71.13587333014675, 42.25849538531848], [-71.13591916288622, 42.258460997638835]]}, "properties": {"id": "13321", "id_1": null, "id_2": null, "intersection": null, "ST_NAME": "Boundary", "ST_TYPE": "RD", "SUF_DIR": null, "CFCC": "A31", "SPEEDLIMIT": 1, "ONEWAY": "N", "FT_COST": 0.0, "TF_COST": 0.0, "TF_DIR": null, "FT_DIR": null, "Route_ID": "L120383 EB", "F_F_Class": 7, "Jurisdictn": "0", "Hwy_Dist": "6", "Hwy_Subdst": "6A", "Med_Width": 0.0, "Med_Type": 0, "Mile_Count": 1, "Num_Lanes": 2, "Opp_Lanes": 0, "Shldr_Lt_W": 0.0, "Shldr_Lt_T": 0, "Shldr_Rt_W": 0.0, "Shldr_Rt_T": 0, "Speed_Lim": 0, "Op_Dir_SL": 0, "ST_Name_1": "BLUE LEDGE DRIVE", "Fm_St_Name": "WASHINGTON STREET", "To_St_Name": "ENNEKING PARKWAY", "City": 35, "County": "M", "Operation": 2, "Struct_Cnd": 2, "Surface_Tp": 6, "Surface_Wd": 26.0, "Terrain": 1, "Toll_Road": 0, "AADT": 0, "AADT_Year": 0, "AADT_Deriv": 0, "Statn_Num": 0, "Curb": 3, "Shldr_UL_W": 0.0, "Shldr_UL_T": 0, "Lt_Sidewlk": 4, "Rt_Sidewlk": 4, "orig_id": 9913321}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13577927225563, 42.258377648139316], [-71.13599614724784, 42.2585068664975], [-71.13603227173553, 42.25852824802771], [-71.13605920937619, 42.25854419145341]]}, "properties": {"id": "2657", "id_1": null, "id_2": null, "intersection": null, "ST_NAME": "Enneking", "ST_TYPE": "PKWY", "SUF_DIR": null, "CFCC": "A31", "SPEEDLIMIT": 30, "ONEWAY": null, "FT_COST": 0.535, "TF_COST": 0.535, "TF_DIR": null, "FT_DIR": null, "Route_ID": "L233833 EB", "F_F_Class": 7, "Jurisdictn": "0", "Hwy_Dist": "6", "Hwy_Subdst": "6A", "Med_Width": 0.0, "Med_Type": 0, "Mile_Count": 1, "Num_Lanes": 0, "Opp_Lanes": 0, "Shldr_Lt_W": 0.0, "Shldr_Lt_T": 0, "Shldr_Rt_W": 0.0, "Shldr_Rt_T": 0, "Speed_Lim": 0, "Op_Dir_SL": 0, "ST_Name_1": "BOLD KNOB PATH", "Fm_St_Name": "ENNEKING PARKWAY", "To_St_Name": "EAST BOUNDARY PATH", "City": 35, "County": "M", "Operation": 0, "Struct_Cnd": 0, "Surface_Tp": 0, "Surface_Wd": 0.0, "Terrain": 1, "Toll_Road": 0, "AADT": 0, "AADT_Year": 0, "AADT_Deriv": 0, "Statn_Num": 0, "Curb": 0, "Shldr_UL_W": 0.0, "Shldr_UL_T": 0, "Lt_Sidewlk": 0, "Rt_Sidewlk": 0, "orig_id": 992657}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.13591916281045, 42.25846099769571]}, "properties": {"intersection": 1}}]} -------------------------------------------------------------------------------- /src/data/tests/data/test_create_segments/missing_int_segments.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10680621667301, 42.365203534606216], [-71.1068893, 42.36508589999996]]}, "properties": {"id": "626", "access": null, "bridge": null, "from": "61327145", "highway": "tertiary", "junction": null, "key": "0", "lanes": 1, "length": "40.807", "maxspeed": null, "name": "Pleasant Street", "oneway": 1, "osmid": "8615853", "ref": null, "to": "61317661", "tunnel": null, "width": 12, "hwy_type": 5, "osm_speed": 0, "signal": 0, "width_per_lane": 12, "segment_id": "8615853-61327145-61317661", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10719479999999, 42.3648758], [-71.10716289999999, 42.36485609999997], [-71.10705704275215, 42.364790669448304]]}, "properties": {"id": "669", "access": null, "bridge": null, "from": "61318588", "highway": "residential", "junction": null, "key": "0", "lanes": 1, "length": "55.25", "maxspeed": "25 mph", "name": "Auburn Street", "oneway": 1, "osmid": "13583073", "ref": null, "to": "61327311", "tunnel": null, "width": 12, "hwy_type": 0, "osm_speed": 25, "signal": 0, "width_per_lane": 12, "segment_id": "13583073-61318588-61327311", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10731257878744, 42.3647757307271], [-71.1072207, 42.36485379999999], [-71.10719479999999, 42.3648758]]}, "properties": {"id": "1651", "access": null, "bridge": null, "from": "61333677", "highway": "residential", "junction": null, "key": "0", "lanes": 1, "length": "116.20300000000002", "maxspeed": "25 mph", "name": "Pleasant Street", "oneway": 1, "osmid": "164024921", "ref": null, "to": "61318588", "tunnel": null, "width": 12, "hwy_type": 0, "osm_speed": 25, "signal": 0, "width_per_lane": 12, "segment_id": "164024921-61333677-61318588", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1068893, 42.36508589999996], [-71.1072174, 42.36505979999999]]}, "properties": {"id": "1827", "access": null, "bridge": null, "from": "61317661", "highway": "secondary", "junction": null, "key": "0", "lanes": 4, "length": "27.112", "maxspeed": null, "name": "Western Avenue", "oneway": 1, "osmid": "138749294", "ref": null, "to": "61317663", "tunnel": null, "width": 20, "hwy_type": 1, "osm_speed": 0, "signal": 0, "width_per_lane": 5, "segment_id": "138749294-61317661-61317663", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10671117302242, 42.365102253508525], [-71.1068893, 42.36508589999996]]}, "properties": {"id": "1828", "access": null, "bridge": null, "from": "61317667", "highway": "secondary", "junction": null, "key": "0", "lanes": 4, "length": "56.09", "maxspeed": null, "name": "Western Avenue", "oneway": 1, "osmid": "519812477", "ref": null, "to": "61317661", "tunnel": null, "width": 20, "hwy_type": 1, "osm_speed": 0, "signal": 0, "width_per_lane": 5, "segment_id": "519812477-61317667-61317661", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.10719479999999, 42.3648758], [-71.1072174, 42.36505979999999]]}, "properties": {"id": "1829", "access": null, "bridge": null, "from": "61318588", "highway": "residential", "junction": null, "key": "0", "lanes": 2, "length": "20.544", "maxspeed": "25 mph", "name": "Pleasant Street", "oneway": 0, "osmid": "13583074", "ref": null, "to": "61317663", "tunnel": null, "width": 12, "hwy_type": 0, "osm_speed": 25, "signal": 0, "width_per_lane": 6, "segment_id": "13583074-61318588-61317663", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1072174, 42.36505979999999], [-71.10739575202363, 42.36504456744246]]}, "properties": {"id": "1830", "access": null, "bridge": null, "from": "61317663", "highway": "secondary", "junction": null, "key": "0", "lanes": 4, "length": "75.436", "maxspeed": null, "name": "Western Avenue", "oneway": 1, "osmid": "138749294", "ref": null, "to": "61317675", "tunnel": null, "width": 20, "hwy_type": 1, "osm_speed": 0, "signal": 0, "width_per_lane": 5, "segment_id": "138749294-61317663-61317675", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1068893, 42.36508589999996]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1072174, 42.36505979999999]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.10719479999999, 42.3648758]}, "properties": {"intersection": 1}}]} -------------------------------------------------------------------------------- /src/data/tests/data/test_create_segments/points.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "id": "61170342", "geometry": {"type": "Point", "coordinates": [-71.138121, 42.383125]}, "properties": {"feature": "signal"}}, {"type": "Feature", "id": "61171136", "geometry": {"type": "Point", "coordinates": [-71.1161581, 42.386904]}, "properties": {"feature": "signal"}}, {"type": "Feature", "id": "61172660", "geometry": {"type": "Point", "coordinates": [-71.1377047, 42.3834466]}, "properties": {"feature": "crosswalk"}}]} -------------------------------------------------------------------------------- /src/data/tests/data/test_create_segments/test_get_connections1.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1200424, 42.379453]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.11993622994846, 42.379346131508484], [-71.11993139999998, 42.3793892], [-71.11991448286459, 42.37954613062321]]}, "properties": {"id": "259", "access": null, "bridge": null, "from": "61327430", "highway": "primary", "junction": null, "key": "0", "lanes": 2, "length": "253.047", "maxspeed": null, "name": "Massachusetts Avenue", "oneway": 1, "osmid": "507868287", "ref": "MA 2A", "to": "2559968287", "tunnel": null, "width": 20, "hwy_type": 2, "osm_speed": "0", "signal": 0, "width_per_lane": 10, "segment_id": "507868287-61327430-2559968287", "dead_end": null, "streets": null, "intersection": null, "orig_id": 994}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.12002868592982, 42.379585218759324], [-71.1200424, 42.379453]]}, "properties": {"id": "263", "access": null, "bridge": null, "from": "2559968299", "highway": "primary", "junction": null, "key": "0", "lanes": 2, "length": "60.21", "maxspeed": null, "name": "Massachusetts Avenue", "oneway": 1, "osmid": "249388676", "ref": "MA 2A", "to": "3306934506", "tunnel": null, "width": 20, "hwy_type": 2, "osm_speed": "0", "signal": 0, "width_per_lane": 10, "segment_id": "249388676-2559968299-3306934506", "dead_end": null, "streets": null, "intersection": null, "orig_id": 998}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1200424, 42.379453], [-71.12005019999998, 42.379381099999996], [-71.12005628884195, 42.37932078730472]]}, "properties": {"id": "1167", "access": null, "bridge": null, "from": "3306934506", "highway": "primary", "junction": null, "key": "0", "lanes": 2, "length": "195.353", "maxspeed": null, "name": "Massachusetts Avenue", "oneway": 1, "osmid": "249388676", "ref": "MA 2A", "to": "61321358", "tunnel": null, "width": 20, "hwy_type": 2, "osm_speed": "0", "signal": 0, "width_per_lane": 10, "segment_id": "249388676-3306934506-61321358", "dead_end": null, "streets": null, "intersection": null, "orig_id": 9913}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1200424, 42.379453], [-71.1201581, 42.3794316], [-71.12018899999998, 42.3794069], [-71.12019574286946, 42.37938414519778]]}, "properties": {"id": "1168", "access": null, "bridge": null, "from": "3306934506", "highway": "residential", "junction": null, "key": "0", "lanes": 1, "length": "187.493", "maxspeed": "25 mph", "name": "Massachusetts Avenue Branch", "oneway": 1, "osmid": "323920752", "ref": null, "to": "61324087", "tunnel": null, "width": 20, "hwy_type": 0, "osm_speed": "25", "signal": 0, "width_per_lane": 20, "segment_id": "323920752-3306934506-61324087", "dead_end": null, "streets": null, "intersection": null, "orig_id": 9914}}]} -------------------------------------------------------------------------------- /src/data/tests/data/test_create_segments/unconnected.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1337941, 42.371415999999996]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1337188, 42.37130749999999]}, "properties": {"intersection": 1}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1337941, 42.371415999999996], [-71.1339089, 42.37140499999999], [-71.1339730821058, 42.37140575955156]]}, "properties": {"id": "70", "access": null, "bridge": null, "from": "61326095", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "110.983", "maxspeed": null, "name": "Gerry's Landing Road", "oneway": 1, "osmid": "42161639", "ref": null, "to": "61325507", "tunnel": null, "width": 26, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 13, "segment_id": "42161639-61326095-61325507", "dead_end": null, "streets": null, "intersection": null, "orig_id": 9970}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1337941, 42.371415999999996], [-71.13396706786295, 42.37138053992674]]}, "properties": {"id": "212", "access": null, "bridge": null, "from": "61326095", "highway": "secondary_link", "junction": null, "key": "0", "lanes": 3, "length": "76.199", "maxspeed": null, "name": null, "oneway": 1, "osmid": "41743733", "ref": null, "to": "61316733", "tunnel": null, "width": 37, "hwy_type": 4, "osm_speed": 0, "signal": 0, "width_per_lane": 12, "segment_id": "41743733-61326095-61316733", "dead_end": null, "streets": null, "intersection": null, "orig_id": 99212}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13382560991367, 42.371200968244224], [-71.1337188, 42.37130749999999]]}, "properties": {"id": "199", "access": null, "bridge": null, "from": "61326039", "highway": "secondary_link", "junction": null, "key": "0", "lanes": 2, "length": "144.49499999999998", "maxspeed": null, "name": null, "oneway": 1, "osmid": "93128876", "ref": null, "to": "61283383", "tunnel": null, "width": 37, "hwy_type": 4, "osm_speed": 0, "signal": 0, "width_per_lane": 18, "segment_id": "93128876-61326039-61283383", "dead_end": null, "streets": null, "intersection": null, "orig_id": 99199}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13362851835943, 42.371467428137386], [-71.1337033, 42.3714401], [-71.1337941, 42.371415999999996]]}, "properties": {"id": "213", "access": null, "bridge": "yes", "from": "4223586646", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "27.222", "maxspeed": null, "name": "Gerrys Landing Road/Eliot Bridge", "oneway": 1, "osmid": "[42161642, 42161639]", "ref": null, "to": "61326095", "tunnel": null, "width": 0, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 0, "segment_id": "[42161642, 42161639]-4223586646-61326095", "dead_end": null, "streets": null, "intersection": null, "orig_id": 99213}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.1337188, 42.37130749999999], [-71.133626, 42.37135879999998], [-71.13357330574013, 42.371385118021024]]}, "properties": {"id": "1198", "access": null, "bridge": "yes", "from": "61283383", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "27.810000000000002", "maxspeed": null, "name": "Eliot Bridge/Gerrys Landing Road", "oneway": 1, "osmid": "[41743732, 353942598]", "ref": null, "to": "4223586645", "tunnel": null, "width": 0, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 0, "segment_id": "[41743732, 353942598]-61283383-4223586645", "dead_end": null, "streets": null, "intersection": null, "orig_id": 991198}}, {"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.13385555213752, 42.371221558865074], [-71.1338096, 42.37125439999999], [-71.1337188, 42.37130749999999]]}, "properties": {"id": "1551", "access": null, "bridge": null, "from": "61316733", "highway": "trunk", "junction": null, "key": "0", "lanes": 2, "length": "80.116", "maxspeed": null, "name": "Gerry's Landing Road", "oneway": 1, "osmid": "353942598", "ref": null, "to": "61283383", "tunnel": null, "width": 26, "hwy_type": 7, "osm_speed": 0, "signal": 0, "width_per_lane": 13, "segment_id": "353942598-61316733-61283383", "dead_end": null, "streets": null, "intersection": null, "orig_id": 991551}}]} -------------------------------------------------------------------------------- /src/data/tests/data/test_get_roads_and_inters.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "id": "442", "geometry": {"type": "LineString", "coordinates": [[-71.10053800000001, 42.36912999999998], [-71.1006832, 42.3691875], [-71.10092959999999, 42.369286499999966], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "442", "access": null, "bridge": null, "from": "61317355", "highway": "tertiary", "junction": null, "key": "0", "lanes": 3, "length": "43.561", "maxspeed": null, "name": "Broadway", "oneway": 0, "osmid": "[426455459, 302156855]", "ref": null, "to": "61326778", "tunnel": null, "width": 24, "hwy_type": 5, "osm_speed": "0", "signal": 0, "width_per_lane": 8, "segment_id": "[426455459, 302156855]-61317355-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "id": "443", "geometry": {"type": "LineString", "coordinates": [[-71.101981, 42.36970099999999], [-71.1013635, 42.369456799999966], [-71.1013231, 42.369440799999985], [-71.1011784, 42.3693844], [-71.101119, 42.369361199999986], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "443", "access": null, "bridge": null, "from": "61321175", "highway": "tertiary", "junction": null, "key": "0", "lanes": 3, "length": "90.918", "maxspeed": null, "name": "Broadway", "oneway": 0, "osmid": "[426455462, 33720646]", "ref": null, "to": "61326778", "tunnel": null, "width": 24, "hwy_type": 5, "osm_speed": "0", "signal": 0, "width_per_lane": 8, "segment_id": "[426455462, 33720646]-61321175-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "id": "444", "geometry": {"type": "LineString", "coordinates": [[-71.1017379, 42.36829510000001], [-71.1016837, 42.368371200000006], [-71.1014763, 42.368662400000005], [-71.1013023, 42.36890659999999], [-71.1010657, 42.36923209999999], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "444", "access": null, "bridge": null, "from": "61317367", "highway": "secondary", "junction": null, "key": "0", "lanes": 3, "length": "128.576", "maxspeed": null, "name": "Prospect Street", "oneway": 0, "osmid": "[426455475, 426455483, 426455486]", "ref": null, "to": "61326778", "tunnel": null, "width": 15, "hwy_type": 1, "osm_speed": "0", "signal": 0, "width_per_lane": 5, "segment_id": "[426455475, 426455483, 426455486]-61317367-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "id": "445", "geometry": {"type": "LineString", "coordinates": [[-71.100067, 42.370669999999954], [-71.100104, 42.3706169], [-71.1002278, 42.3704355], [-71.1003334, 42.3702808], [-71.1003786, 42.37021659999997], [-71.1005678, 42.3699383], [-71.1007327, 42.3696959], [-71.10094689999998, 42.36939659999999], [-71.1010043, 42.36931649999998]]}, "properties": {"id": "445", "access": null, "bridge": null, "from": "61321196", "highway": "secondary", "junction": null, "key": "0", "lanes": 3, "length": "169.066", "maxspeed": null, "name": "Prospect Street", "oneway": 0, "osmid": "[426455489, 302156882]", "ref": null, "to": "61326778", "tunnel": null, "width": 15, "hwy_type": 1, "osm_speed": "0", "signal": 0, "width_per_lane": 5, "segment_id": "[426455489, 302156882]-61321196-61326778", "dead_end": null, "streets": null, "intersection": null}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-71.1010043, 42.36931649999998]}, "properties": {"id": null, "access": null, "bridge": null, "from": null, "highway": "traffic_signals", "junction": null, "key": null, "lanes": null, "length": null, "maxspeed": null, "name": null, "oneway": null, "osmid": "61326778", "ref": null, "to": null, "tunnel": null, "width": null, "hwy_type": null, "osm_speed": null, "signal": 1, "width_per_lane": null, "segment_id": null, "dead_end": null, "streets": "Broadway, Prospect Street", "intersection": 1}}]} -------------------------------------------------------------------------------- /src/data/tests/data/viz_preds_tests/crashes_rollup.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { "type": "Feature", "properties": { "total_crashes": 2, "crash_dates": "2015-04-03T09:10:00-04:00,2015-09-18T08:45:00-04:00" }, "geometry": { "type": "Point", "coordinates": [ -71.107141, 42.353438 ] } }, 5 | { "type": "Feature", "properties": { "total_crashes": 2, "crash_dates": "2015-06-25T17:00:00-04:00,2016-06-01T20:30:00-04:00" }, "geometry": { "type": "Point", "coordinates": [ -71.10526852, 42.35351 ] } } 6 | ]} -------------------------------------------------------------------------------- /src/data/tests/data/viz_preds_tests/crashes_rollup_pedestrian.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { "type": "Feature", "properties": { "total_crashes": 1, "crash_dates": "2015-04-03T09:10:00-04:00" }, "geometry": { "type": "Point", "coordinates": [ -71.107141, 42.353438 ] } }, 5 | ]} -------------------------------------------------------------------------------- /src/data/tests/data/viz_preds_tests/single_prediction.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": { 3 | "segment_id": "001", 4 | "year": 2017, 5 | "week": 51, 6 | "prediction": 0.1223508492, 7 | "crash": 0, 8 | "pre_week": 0, 9 | "pre_month": 0, 10 | "pre_quarter": 0, 11 | "avg_week": 0.0065359477, 12 | "AADT": 22222, 13 | "SPEEDLIMIT": 20, 14 | "Struct_Cnd": 2, 15 | "Surface_Tp": 6, 16 | "F_F_Class": 3, 17 | "visionzero": 0, 18 | "id": "001", 19 | "speed_coalesced": 20.3420443264, 20 | "volume_coalesced": 3941.1356224647, 21 | "near_id": null, 22 | "Conflict": 0, 23 | "SPEEDLIMIT0": 0, 24 | "SPEEDLIMIT1": 0, 25 | "SPEEDLIMIT5": 0, 26 | "SPEEDLIMIT10": 0, 27 | "SPEEDLIMIT15": 0, 28 | "SPEEDLIMIT20": 1, 29 | "SPEEDLIMIT25": 0, 30 | "SPEEDLIMIT30": 0, 31 | "SPEEDLIMIT35": 0, 32 | "SPEEDLIMIT45": 0, 33 | "SPEEDLIMIT55": 0, 34 | "SPEEDLIMIT65": 0, 35 | "Struct_Cnd0": 0, 36 | "Struct_Cnd1": 0, 37 | "Struct_Cnd2": 1, 38 | "Struct_Cnd3": 0, 39 | "Struct_Cnd4": 0, 40 | "Surface_Tp0": 0, 41 | "Surface_Tp1": 0, 42 | "Surface_Tp2": 0, 43 | "Surface_Tp3": 0, 44 | "Surface_Tp4": 0, 45 | "Surface_Tp5": 0, 46 | "Surface_Tp6": 1, 47 | "Surface_Tp7": 0, 48 | "Surface_Tp8": 0, 49 | "F_F_Class0": 0, 50 | "F_F_Class1": 0, 51 | "F_F_Class2": 0, 52 | "F_F_Class3": 1, 53 | "F_F_Class4": 0, 54 | "F_F_Class5": 0, 55 | "F_F_Class7": 0, 56 | "log_AADT": 10.0088830676, 57 | "intersection": 0 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/data/tests/data/viz_preds_tests/single_prediction_viz.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "LineString", "coordinates": [[-71.06858488357565, 42.35165031556542], [-71.06876751642436, 42.35161688446769]]}, "properties": {"segment_id": "001", "crash": 0, "prediction": 0.1223508492, "SPEEDLIMIT": 20, "segment": {"id": "001", "display_name": "Park Plaza between Columbus Avenue and Hadassah Way", "center_x": -71.06867620000001, "center_y": 42.35163360001877}}}]} 2 | -------------------------------------------------------------------------------- /src/data/tests/data/viz_preds_tests/single_segment.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "id": "001", 7 | "geometry": { 8 | "type": "LineString", 9 | "coordinates": [ 10 | [ 11 | -71.06858488357565, 12 | 42.35165031556542 13 | ], 14 | [ 15 | -71.06876751642436, 16 | 42.35161688446769 17 | ] 18 | ] 19 | }, 20 | "properties": { 21 | "id": "001", 22 | "access": null, 23 | "area": null, 24 | "bridge": null, 25 | "from": "61341696", 26 | "highway": "secondary", 27 | "junction": null, 28 | "key": "0", 29 | "lanes": 2, 30 | "length": "44.954", 31 | "maxspeed": null, 32 | "name": "Park Plaza", 33 | "oneway": 1, 34 | "osmid": "8652528", 35 | "ref": null, 36 | "to": "61341267", 37 | "tunnel": null, 38 | "width": 30, 39 | "hwy_type": 1, 40 | "osm_speed": "0", 41 | "signal": 0, 42 | "width_per_lane": 15, 43 | "segment_id": "8652528-61341696-61341267", 44 | "dead_end": null, 45 | "streets": null, 46 | "intersection": null, 47 | "orig_id": 991, 48 | "inter": 0, 49 | "display_name": "Park Plaza between Columbus Avenue and Hadassah Way", 50 | "center_y": 42.35163360001877, 51 | "center_x": -71.06867620000001 52 | } 53 | } 54 | ] 55 | } 56 | -------------------------------------------------------------------------------- /src/data/tests/test_add_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import shutil 4 | 5 | 6 | def test_add_map(tmpdir): 7 | 8 | # Copy test data into temp directory in appropriate place 9 | base_path = os.path.dirname( 10 | os.path.abspath(__file__)) + '/data/' 11 | orig_path = base_path + 'test_add_map' 12 | path = tmpdir.strpath + '/data' 13 | 14 | data_path = os.path.join(path, "processed/maps") 15 | shutil.copytree(orig_path, data_path) 16 | 17 | # To test the mapping, use much smaller versions of the osm 18 | # and osm3857 files, as well as much smaller versions of boston data 19 | 20 | # Then as in the standard workflow, extract_intersections 21 | # and create_segments need to be run (in the test directory) 22 | # and then the mapping can be run and tested 23 | 24 | # Extract and create on osm data 25 | subprocess.check_call([ 26 | 'python', 27 | '-m', 28 | 'data.extract_intersections', 29 | os.path.join(data_path, 'osm3857.shp'), 30 | '-d', 31 | path 32 | ], shell=True) 33 | 34 | subprocess.check_call([ 35 | 'python', 36 | '-m', 37 | 'data.create_segments', 38 | '-d', 39 | path, 40 | '-r', 41 | os.path.join(data_path, 'elements.geojson'), 42 | '-c', 43 | os.path.join(base_path, 'config_features.yml') 44 | ], shell=True) 45 | 46 | # Extract and create on supplemental map 47 | subprocess.check_call([ 48 | 'python', 49 | '-m', 50 | 'data.extract_intersections', 51 | os.path.join(data_path, 'ma_cob_small.shp'), 52 | '-d', 53 | path, 54 | '-n', 55 | 'boston' 56 | ], shell=True) 57 | 58 | subprocess.check_call([ 59 | 'python', 60 | '-m', 61 | 'data.create_segments', 62 | '-d', 63 | path, 64 | '-r', 65 | os.path.join(data_path, 'boston/elements.geojson'), 66 | '-n', 67 | 'boston', 68 | '-c', 69 | os.path.join(base_path, 'config_features.yml') 70 | 71 | ], shell=True) 72 | 73 | # Above was all set up, now the testing part 74 | # and add features 75 | subprocess.check_call([ 76 | 'python', 77 | '-m', 78 | 'data.add_map', 79 | path, 80 | 'boston', 81 | ], shell=True) 82 | -------------------------------------------------------------------------------- /src/data/tests/test_add_waze_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import geojson 4 | from .. import add_waze_data 5 | 6 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | 9 | def test_make_map(tmpdir): 10 | 11 | original_filename = os.path.join( 12 | TEST_FP, 'data', 'test_waze', 'test_waze.json') 13 | with open(original_filename) as f: 14 | original = geojson.load(f) 15 | original = [x for x in original if x['eventType'] == 'jam'] 16 | add_waze_data.make_map(original_filename, tmpdir.strpath) 17 | 18 | # Read back in the resulting map 19 | with open(os.path.join(tmpdir.strpath, 'waze.geojson')) as f: 20 | items = geojson.load(f) 21 | 22 | # The number of lines in the original json file should 23 | # equal the number of linestrings in the resulting geojson map 24 | assert len(original) == len(items['features']) 25 | 26 | 27 | def test_map_segments(tmpdir): 28 | 29 | # Copy test data into temp directory 30 | orig_path = os.path.join(TEST_FP, 'data', 'test_waze') 31 | path = os.path.join(tmpdir.strpath, 'processed', 'maps') 32 | 33 | os.makedirs(path) 34 | shutil.copyfile( 35 | os.path.join(orig_path, 'osm_elements.geojson'), 36 | os.path.join(path, 'osm_elements.geojson') 37 | ) 38 | 39 | add_waze_data.map_segments( 40 | tmpdir.strpath, 41 | os.path.join(orig_path, 'test_waze.json') 42 | ) 43 | 44 | # Read back in the jams information 45 | with open(os.path.join(path, 'jams.geojson')) as f: 46 | items = geojson.load(f) 47 | # Test that the number of jam segments is consistent 48 | # This is not the number of jams total, since jams can 49 | # encompass more than one segment from osm_elements 50 | assert len(items['features']) == 22 51 | 52 | # Read back in the osm_elements, make sure number of elements 53 | # with a jam percentage matches the number of jam segments 54 | with open(os.path.join(path, 'osm_elements.geojson')) as f: 55 | osm_items = geojson.load(f) 56 | assert len([x for x in osm_items['features'] 57 | if x['geometry']['type'] == 'LineString' 58 | and x['properties']['jam_percent'] > 0]) == 22 59 | 60 | # Test that the points in the file still exist 61 | # after modifying the linestrings 62 | assert len(osm_items['features']) == 90 63 | 64 | # Test the average level of delay is accurate on a segment 65 | test_segment = [x for x in osm_items['features'] 66 | if x['properties']['segment_id'] 67 | == '426492374-61330572-5720026211'][0] 68 | assert test_segment['properties']['avg_jam_level'] == 2 69 | 70 | # Test that alerts get added 71 | test_segment = [x for x in osm_items['features'] 72 | if 'alert_JAM' in x['properties']][0] 73 | assert test_segment['properties']['alert_JAM'] == 1 74 | -------------------------------------------------------------------------------- /src/data/tests/test_all.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import json 4 | import shutil 5 | 6 | 7 | def test_all(tmpdir): 8 | 9 | # Copy test data into temp directory 10 | orig_path = os.path.dirname( 11 | os.path.abspath(__file__)) + '/data/' 12 | path = tmpdir.strpath + '/data' 13 | shutil.copytree(orig_path, path) 14 | filename = path + '/raw/ma_cob_spatially_joined_streets.shp' 15 | 16 | subprocess.check_call([ 17 | 'python', 18 | '-m', 19 | 'data.extract_intersections', 20 | filename, 21 | '-d', 22 | path 23 | ]) 24 | 25 | subprocess.check_call([ 26 | 'python', 27 | '-m', 28 | 'data.create_segments', 29 | '-d', 30 | path, 31 | '-r', 32 | path + '/processed/maps/elements.geojson', 33 | '-c', 34 | path + '/config_features.yml' 35 | ]) 36 | 37 | subprocess.check_call([ 38 | 'python', 39 | '-m', 40 | 'data.join_segments_crash', 41 | '-d', 42 | path, 43 | '-c', 44 | path + '/config_features.yml' 45 | 46 | ]) 47 | data = json.load(open(path + '/processed/crash_joined.json')) 48 | #TODO : previously 2, now 4, this may be because of ordering issues with update 49 | assert data[0]['near_id'] == 4 50 | 51 | 52 | -------------------------------------------------------------------------------- /src/data/tests/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ruamel.yaml 3 | import data.config 4 | 5 | 6 | def write_to_file(filename, d): 7 | with open(filename, "w") as f: 8 | ruamel.yaml.round_trip_dump(d, f) 9 | 10 | 11 | def test_get_feature_list(tmpdir): 12 | 13 | config_dict = { 14 | 'city': 'Boston, Massachusetts, USA', 15 | 'name': 'boston', 16 | 'city_latitude': 42.3600825, 17 | 'city_longitude': -71.0588801, 18 | 'city_radius': 15, 19 | 'timezone': 'America/New_York', 20 | 'crashes_files': {'test': {}}, 21 | 'openstreetmap_features': { 22 | 'categorical': { 23 | 'width': 'Width', 24 | 'cycleway_type': 'Bike lane', 25 | 'signal': 'Signal', 26 | 'oneway': 'One Way', 27 | 'lanes': 'Number of lanes' 28 | }, 29 | 'continuous': { 30 | 'width_per_lane': 'Average width per lane' 31 | } 32 | }, 33 | } 34 | 35 | yml_file = os.path.join(tmpdir, 'test.yml') 36 | write_to_file(yml_file, config_dict) 37 | config = data.config.Configuration(yml_file) 38 | assert config.continuous_features == ['width_per_lane'] 39 | assert config.categorical_features == [ 40 | 'width', 'cycleway_type', 'signal', 'oneway', 'lanes', 'osm_speed'] 41 | assert set(config.features) == set([ 42 | 'width', 'cycleway_type', 'signal', 43 | 'oneway', 'lanes', 'width_per_lane', 44 | 'osm_speed' 45 | ]) 46 | 47 | config_dict['waze_features'] = { 48 | 'categorical': {'jam': 'Existence of a jam'}, 49 | 'continuous': {'jam_percent': 'Percent of time there was a jam'} 50 | } 51 | write_to_file(yml_file, config_dict) 52 | config = data.config.Configuration(yml_file) 53 | 54 | assert config.continuous_features == ['width_per_lane', 'jam_percent'] 55 | assert config.categorical_features == [ 56 | 'width', 'cycleway_type', 'signal', 57 | 'oneway', 'lanes', 'jam', 'osm_speed'] 58 | assert set(config.features) == set([ 59 | 'width_per_lane', 'jam_percent', 60 | 'width', 'cycleway_type', 'signal', 61 | 'oneway', 'lanes', 'jam', 'osm_speed']) 62 | 63 | config_dict['waze_features'] = {} 64 | config_dict['openstreetmap_features'] = {} 65 | config_dict['additional_map_features'] = { 66 | 'extra_map': 'test', 67 | 'continuous': {'AADT': 'test name'}, 68 | 'categorical': { 69 | 'Struct_Cnd': 'test name3', 70 | 'Surface_Tp': 'test name4', 71 | 'F_F_Class': 'test name5' 72 | } 73 | } 74 | config_dict['speed_limit'] = 'SPEEDLIMIT' 75 | 76 | write_to_file(yml_file, config_dict) 77 | config = data.config.Configuration(yml_file) 78 | 79 | assert set(config.categorical_features) == set([ 80 | 'SPEEDLIMIT', 'Struct_Cnd', 'Surface_Tp', 'F_F_Class']) 81 | assert config.continuous_features == ['AADT'] 82 | assert set(config.features) == set([ 83 | 'SPEEDLIMIT', 'Struct_Cnd', 'Surface_Tp', 'F_F_Class', 'AADT']) 84 | 85 | config_dict['data_source'] = [ 86 | {'filename': 'test_multi', 87 | 'feats': [ 88 | {'name': 'cat_test', 89 | 'feat_type': 'categorical'}, 90 | {'name': 'cont_test', 91 | 'feat_type': 'continuous'}, 92 | {'name': 'default_test'}, 93 | ]}] 94 | 95 | write_to_file(yml_file, config_dict) 96 | config = data.config.Configuration(yml_file) 97 | assert all([c in config.continuous_features for c in ['cont_test', 'default_test']]) 98 | assert 'cat_test' in config.categorical_features 99 | -------------------------------------------------------------------------------- /src/data/tests/test_extract_intersections.py: -------------------------------------------------------------------------------- 1 | from shapely.geometry import Point, LineString 2 | from .. import extract_intersections 3 | 4 | 5 | def test_generate_intersections(): 6 | lines = [ 7 | (0, LineString([ 8 | Point(-1, -1), 9 | Point(0, 0) 10 | ])), 11 | (1, LineString([ 12 | Point(1, 0), 13 | Point(3, 1), 14 | ])), 15 | (2, LineString([ 16 | Point(0, 5), 17 | Point(3, 5), 18 | ])), 19 | (3, LineString([ 20 | Point(2, -1), 21 | Point(2, 10) 22 | ])) 23 | ] 24 | result = extract_intersections.generate_intersections(lines) 25 | 26 | assert result == [ 27 | (Point(2.0, 0.5), {'id_1': 1, 'id_2': 3}), 28 | (Point(2.0, 5.0), {'id_1': 2, 'id_2': 3}) 29 | ] 30 | 31 | -------------------------------------------------------------------------------- /src/data/tests/test_initialize_city.py: -------------------------------------------------------------------------------- 1 | import os 2 | import initialize_city 3 | 4 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | 7 | def mockreturn(address): 8 | return "Brisbane, Australia", -27.4697707, 153.0251235, 'S' 9 | 10 | 11 | def test_initialize_city_brisbane_no_supplemental(tmpdir, monkeypatch): 12 | 13 | monkeypatch.setattr(initialize_city, 'geocode_address', mockreturn) 14 | 15 | # Generate a test config for Brisbane 16 | initialize_city.make_config_file( 17 | tmpdir.join('/test_config_brisbane_no_supplemental.yml'), 18 | 'Brisbane, Australia', 19 | 'Australia/Brisbane', 20 | 'brisbane', 21 | 'test_crashes.csv', 22 | False 23 | ) 24 | 25 | # check that the file contents generated is identical to a pre-built string 26 | with open(tmpdir.join( 27 | '/test_config_brisbane_no_supplemental.yml'), 'r') as test_file: 28 | test_file_contents = test_file.read() 29 | with open(os.path.join( 30 | TEST_FP, 'data', 'config_brisbane_no_supplemental.yml'), 'r' 31 | ) as test_file: 32 | expected_file_contents = test_file.read() 33 | print(tmpdir) 34 | assert test_file_contents == expected_file_contents 35 | 36 | 37 | def test_supplemental_arg_changes_content_of_config_file(tmpdir, monkeypatch): 38 | 39 | monkeypatch.setattr(initialize_city, 'geocode_address', mockreturn) 40 | 41 | # Generate a test config for Brisbane 42 | initialize_city.make_config_file( 43 | tmpdir.join('/test_config_brisbane_supplemental.yml'), 44 | 'Brisbane, Australia', 45 | 'Australia/Brisbane', 46 | 'brisbane', 47 | 'test_crashes.csv', 48 | ['parking_tickets_dummy_file_1.csv'] 49 | ) 50 | 51 | with open(tmpdir.join( 52 | '/test_config_brisbane_supplemental.yml'), 'r') as test_file: 53 | expected_file_contents = test_file.read() 54 | 55 | with open(tmpdir.join( 56 | '/test_config_brisbane_supplemental.yml'), 'r') as test_file: 57 | test_file_contents = test_file.read() 58 | assert test_file_contents == expected_file_contents 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/data/tests/test_join_segments_crash.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | from shapely.geometry import Point 3 | from pandas.testing import assert_frame_equal 4 | from .. import join_segments_crash 5 | 6 | 7 | def test_make_rollup(): 8 | """ 9 | Tests total number of crashes per crash location is correctly calculated and 10 | list of unique crash dates per location is correctly generated 11 | """ 12 | standardized_crashes = [{ 13 | "id": 1, 14 | "dateOccurred": "2015-01-01T00:45:00-05:00", 15 | "location": { 16 | "latitude": 42.365, 17 | "longitude": -71.106 18 | }, 19 | "address": "GREEN ST & PLEASANT ST", 20 | "vehicle": 1 21 | }, { 22 | "id": 1, 23 | "dateOccurred": "2015-04-15T00:45:00-05:00", 24 | "location": { 25 | "latitude": 42.365, 26 | "longitude": -71.106 27 | }, 28 | "address": "GREEN ST & PLEASANT ST", 29 | "pedestrian": 1 30 | }, { 31 | "id": 1, 32 | "dateOccurred": "2015-10-20T00:45:00-05:00", 33 | "location": { 34 | "latitude": 42.365, 35 | "longitude": -71.106 36 | }, 37 | "address": "GREEN ST & PLEASANT ST", 38 | "vehicle": 1 39 | }, { 40 | "id": 2, 41 | "dateOccurred": "2015-01-01T01:12:00-05:00", 42 | "location": { 43 | "latitude": 42.361, 44 | "longitude": -71.097 45 | }, 46 | "address": "LANDSDOWNE ST & MASSACHUSETTS AVE", 47 | "bike": 1 48 | }, { 49 | "id": 3, 50 | "dateOccurred": "2015-01-01T01:54:00-05:00", 51 | "location": { 52 | "latitude": 42.396, 53 | "longitude": -71.127 54 | }, 55 | "address": "LOCKE ST & SHEA RD", 56 | "bike": 1 57 | }, { 58 | "id": 3, 59 | "dateOccurred": "2015-01-01T01:54:00-05:00", 60 | "location": { 61 | "latitude": 42.396, 62 | "longitude": -71.127 63 | }, 64 | "address": "LOCKE ST & SHEA RD", 65 | "vehicle": 1 66 | }] 67 | expected_rollup_total = gpd.GeoDataFrame() 68 | expected_rollup_total["coordinates"] = gpd.GeoSeries([ 69 | Point(-71.106, 42.365), 70 | Point(-71.097, 42.361), 71 | Point(-71.127, 42.396)]) 72 | expected_rollup_total["total_crashes"] = [3, 1, 2] 73 | expected_rollup_total["crash_dates"] = [ 74 | "2015-01-01T00:45:00-05:00,2015-04-15T00:45:00-05:00,2015-10-20T00:45:00-05:00", 75 | "2015-01-01T01:12:00-05:00", 76 | "2015-01-01T01:54:00-05:00" 77 | ] 78 | 79 | expected_rollup_pedestrian = gpd.GeoDataFrame() 80 | expected_rollup_pedestrian["coordinates"] = gpd.GeoSeries([ 81 | Point(-71.106, 42.365) 82 | ]) 83 | expected_rollup_pedestrian["total_crashes"] = [1] 84 | expected_rollup_pedestrian["crash_dates"] = [ 85 | "2015-04-15T00:45:00-05:00" 86 | ] 87 | 88 | expected_rollup_bike = gpd.GeoDataFrame() 89 | expected_rollup_bike["coordinates"] = gpd.GeoSeries([ 90 | Point(-71.097, 42.361), 91 | Point(-71.127, 42.396) 92 | ]) 93 | expected_rollup_bike["total_crashes"] = [1, 1] 94 | expected_rollup_bike["crash_dates"] = [ 95 | "2015-01-01T01:12:00-05:00", 96 | "2015-01-01T01:54:00-05:00" 97 | ] 98 | 99 | expected_rollup_vehicle = gpd.GeoDataFrame() 100 | expected_rollup_vehicle["coordinates"] = [ 101 | Point(-71.106, 42.365), 102 | Point(-71.127, 42.396) 103 | ] 104 | expected_rollup_vehicle["total_crashes"] = [2, 1] 105 | expected_rollup_vehicle["crash_dates"] = [ 106 | "2015-01-01T00:45:00-05:00,2015-10-20T00:45:00-05:00", 107 | "2015-01-01T01:54:00-05:00" 108 | ] 109 | split_columns = ['pedestrian', 'bike', 'vehicle'] 110 | 111 | results = join_segments_crash.make_crash_rollup(standardized_crashes, split_columns) 112 | 113 | assert_frame_equal(results['all'], expected_rollup_total) 114 | assert_frame_equal(results['pedestrian'], expected_rollup_pedestrian) 115 | assert_frame_equal(results['bike'], expected_rollup_bike) 116 | -------------------------------------------------------------------------------- /src/data/tests/test_make_preds_viz.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from pandas.testing import assert_frame_equal 4 | import ruamel.yaml 5 | import shutil 6 | import data.config 7 | from .. import make_preds_viz 8 | 9 | DATA_FP = os.path.join( 10 | os.path.dirname( 11 | os.path.abspath(__file__)), 12 | "data", 13 | "viz_preds_tests", 14 | ) 15 | 16 | 17 | def test_make_preds_viz_boston(tmpdir): 18 | """ 19 | Confirm that predictions & segments are combined as expected. 20 | """ 21 | # load the test predictions & segments 22 | preds_test = pd.read_json( 23 | os.path.join(DATA_FP, 24 | "single_prediction.json"), 25 | orient="index", typ="series", dtype=False 26 | ) 27 | 28 | segs_test = pd.read_json(os.path.join( 29 | DATA_FP, "single_segment.geojson"))["features"] 30 | 31 | # combine the two 32 | preds_combined_test = make_preds_viz.combine_predictions_and_segments(preds_test, segs_test) 33 | 34 | # write to file 35 | tmpdir_test_path = os.path.join(tmpdir.strpath, "preds_viz.geojson") 36 | make_preds_viz.write_preds_as_geojson(preds_combined_test, tmpdir_test_path) 37 | 38 | # compare the new file's contents to test data 39 | tmpdir_preds_viz = pd.read_json(os.path.join(tmpdir.strpath, "preds_viz.geojson")) 40 | preds_viz_test = pd.read_json(os.path.join( 41 | DATA_FP, "single_prediction_viz.geojson") 42 | ) 43 | 44 | assert_frame_equal(tmpdir_preds_viz, preds_viz_test) 45 | 46 | 47 | 48 | def test_write_all_preds(tmpdir): 49 | config_dict = { 50 | 'name': 'cambridge', 51 | 'crashes_files': { 52 | 'file1': {} 53 | }, 54 | 'city_latitude': 42.3600825, 55 | 'city_longitude': -71.0588801, 56 | 'city_radius': 15, 57 | 'city': "Cambridge, Massachusetts, USA", 58 | 'timezone': "America/New_York", 59 | 60 | } 61 | config_filename = os.path.join(tmpdir, 'test.yml') 62 | 63 | with open(config_filename, "w") as f: 64 | ruamel.yaml.round_trip_dump(config_dict, f) 65 | config = data.config.Configuration(config_filename) 66 | 67 | os.makedirs(os.path.join(tmpdir, 'processed')) 68 | os.makedirs(os.path.join(tmpdir, 'processed', 'maps')) 69 | shutil.copy( 70 | os.path.join( 71 | DATA_FP, 72 | 'single_prediction.json'), 73 | os.path.join( 74 | tmpdir, 75 | 'processed', 76 | 'seg_with_predicted.json' 77 | ) 78 | ) 79 | shutil.copy( 80 | os.path.join( 81 | DATA_FP, 82 | 'single_segment.geojson'), 83 | os.path.join( 84 | tmpdir, 85 | 'processed', 86 | 'maps', 87 | 'inter_and_non_int.geojson' 88 | ) 89 | ) 90 | make_preds_viz.write_all_preds(tmpdir, config) 91 | assert os.path.exists(os.path.join( 92 | tmpdir, 'processed', 'preds_viz.geojson')) 93 | 94 | 95 | def test_write_all_preds_split_column(tmpdir): 96 | config_dict = { 97 | 'name': 'cambridge', 98 | 'crashes_files': { 99 | 'file1': { 100 | 'optional': { 101 | 'split_columns': { 102 | 'pedestrian': {} 103 | } 104 | } 105 | 106 | } 107 | }, 108 | 'city_latitude': 42.3600825, 109 | 'city_longitude': -71.0588801, 110 | 'city_radius': 15, 111 | 'city': "Cambridge, Massachusetts, USA", 112 | 'timezone': "America/New_York", 113 | 114 | } 115 | config_filename = os.path.join(tmpdir, 'test.yml') 116 | 117 | with open(config_filename, "w") as f: 118 | ruamel.yaml.round_trip_dump(config_dict, f) 119 | config = data.config.Configuration(config_filename) 120 | 121 | os.makedirs(os.path.join(tmpdir, 'processed')) 122 | os.makedirs(os.path.join(tmpdir, 'processed', 'maps')) 123 | shutil.copy( 124 | os.path.join( 125 | DATA_FP, 126 | 'single_prediction.json'), 127 | os.path.join( 128 | tmpdir, 129 | 'processed', 130 | 'seg_with_predicted_pedestrian.json' 131 | ) 132 | ) 133 | shutil.copy( 134 | os.path.join( 135 | DATA_FP, 136 | 'single_segment.geojson'), 137 | os.path.join( 138 | tmpdir, 139 | 'processed', 140 | 'maps', 141 | 'inter_and_non_int.geojson' 142 | ) 143 | ) 144 | make_preds_viz.write_all_preds(tmpdir, config) 145 | assert os.path.exists(os.path.join( 146 | tmpdir, 'processed', 'preds_viz_pedestrian.geojson')) 147 | -------------------------------------------------------------------------------- /src/data/tests/test_osm_create_maps.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from shapely.geometry import Polygon 4 | import networkx as nx 5 | import json 6 | import fiona 7 | import pickle 8 | from .. import osm_create_maps 9 | from .. import util 10 | from .. import config 11 | from ..record import transformer_4326_to_3857 12 | 13 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 14 | 15 | 16 | def test_get_width(): 17 | assert osm_create_maps.get_width('15.2') == 15 18 | assert osm_create_maps.get_width('') == 0 19 | assert osm_create_maps.get_width("['14.9', '12.2']") == 0 20 | assert osm_create_maps.get_width('t') == 0 21 | 22 | 23 | def test_get_speed(): 24 | assert osm_create_maps.get_speed('') == 0 25 | assert osm_create_maps.get_speed('signals') == 0 26 | assert osm_create_maps.get_speed('60') == 60 27 | assert osm_create_maps.get_speed("['90', '100']") == 100 28 | 29 | 30 | def test_reproject_and_clean_feats(tmpdir): 31 | 32 | tmppath = tmpdir.strpath 33 | shutil.copy( 34 | TEST_FP + '/data/processed/maps/osm.gpkg', 35 | tmppath 36 | ) 37 | 38 | # For now, just make sure it runs 39 | osm_create_maps.clean_ways( 40 | tmppath + '/osm.gpkg', 41 | tmppath + '/docs' 42 | ) 43 | 44 | 45 | def test_expand_polygon(): 46 | 47 | test_polygon = { 48 | 'type': 'Polygon', 49 | 'coordinates': [[[-71.0770265, 42.3364517], [-71.0810509, 42.3328703], 50 | [-71.0721386, 42.3325241]]] 51 | } 52 | points_file = os.path.join(TEST_FP, 'data', 'osm_crash_file.json') 53 | 54 | # Too many points fall outside of the polygon to buffer 55 | result = osm_create_maps.expand_polygon(test_polygon, points_file) 56 | assert result is None 57 | 58 | polygon_coords = [util.get_reproject_point( 59 | x[1], x[0], transformer_4326_to_3857, coords=True 60 | ) for x in test_polygon['coordinates'][0]] 61 | orig_shape = Polygon(polygon_coords) 62 | 63 | result = osm_create_maps.expand_polygon(test_polygon, points_file, 64 | max_percent=.7) 65 | 66 | result_coords = [util.get_reproject_point( 67 | x[1], x[0], transformer_4326_to_3857, coords=True 68 | ) for x in result.exterior.coords] 69 | result_shape = Polygon(result_coords) 70 | 71 | # Check whether the new polygon has a larger area than the old one 72 | assert result_shape.area > orig_shape.area 73 | 74 | records = util.read_records(points_file, 'crash') 75 | 76 | # The first two points are outside the original shape 77 | # and the last point is within 78 | assert orig_shape.contains(records[0].point) is False 79 | assert orig_shape.contains(records[1].point) is False 80 | assert orig_shape.contains(records[2].point) 81 | 82 | # The first point should be within the new shape, but not the 83 | # second point, since it was too far from the original shape 84 | assert result_shape.contains(records[0].point) 85 | assert result_shape.contains(records[1].point) is False 86 | assert result_shape.contains(records[2].point) 87 | 88 | 89 | def mockreturn(config): 90 | G1 = pickle.load(open(os.path.join(TEST_FP, 'data', 'osm_output.gpickle'), 'rb')) 91 | return G1 92 | 93 | 94 | def test_simple_get_roads(tmpdir, monkeypatch): 95 | 96 | monkeypatch.setattr(osm_create_maps, 'get_graph', mockreturn) 97 | c = config.Configuration( 98 | os.path.join(TEST_FP, 'data', 'config_features.yml')) 99 | osm_create_maps.simple_get_roads(c, tmpdir) 100 | 101 | with open(os.path.join(tmpdir, 'features.geojson')) as f: 102 | data = json.load(f) 103 | signals = [x for x in data['features'] 104 | if x['properties']['feature'] == 'signal'] 105 | assert len(signals) == 2 106 | intersections = [x for x in data['features'] 107 | if x['properties']['feature'] == 'intersection'] 108 | assert len(intersections) == 14 109 | crosswalks = [x for x in data['features'] 110 | if x['properties']['feature'] == 'crosswalk'] 111 | assert len(crosswalks) == 9 112 | 113 | nodes = fiona.open(os.path.join(tmpdir, 'osm.gpkg'), layer='nodes') 114 | ways = fiona.open(os.path.join(tmpdir, 'osm.gpkg'), layer='edges') 115 | 116 | # It's just coincidence that the number of ways and nodes is the same 117 | assert len(nodes) == 28 118 | assert len(ways) == 28 119 | -------------------------------------------------------------------------------- /src/data/weather/BostonWeather2016_Wunderground.Rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data/weather/BostonWeather2016_Wunderground.Rda -------------------------------------------------------------------------------- /src/data/weather/README.md: -------------------------------------------------------------------------------- 1 | # Weather data -------------------------------------------------------------------------------- /src/data/weather/weatherScrapingScript.R: -------------------------------------------------------------------------------- 1 | 2 | # To run the script, insert an api key from Wunderground below: 3 | 4 | apikey <- "" 5 | 6 | library(jsonlite) #for parsing json output (fromJSON function) 7 | library(plyr) 8 | 9 | #Extract data for Jan-01 and then append daily weather data for Boston, 2016 to this dataset 10 | url <- "http://api.wunderground.com/api/api_key/history_20160101/q/MA/Boston.json" 11 | data <- fromJSON(txt = url) 12 | 13 | data <- as.data.frame(data$history$dailysummary) 14 | data$dateFull <- data$date$pretty 15 | data <- data[,2:71] #extract out the nested date dataframe that carries same information as data$date$pretty 16 | 17 | for (month in 1:12) 18 | { 19 | if (nchar(month) != 2) 20 | { 21 | month <- paste("0",month,sep="") 22 | } 23 | #separating months with 31 days from 30 and 29 24 | if (month == "01" | month == "03" | month == "05" | month == "07" | 25 | month == "08" | month == "10" | month == "12") 26 | { 27 | for (day in 1:31) 28 | { 29 | if (nchar(day) != 2) 30 | { 31 | day <- paste("0",day,sep="") 32 | } 33 | url <- paste("http://api.wunderground.com/api/api_key/history_2016",month,day,"/q/MA/Boston.json",sep="") 34 | data2 <- fromJSON(txt = url) 35 | data2 <- as.data.frame(data2$history$dailysummary) 36 | data2$dateFull <- data2$date$pretty 37 | data2 <- data2[-1] 38 | data <- rbind(data, data2) 39 | } 40 | } 41 | else if (month == "02") #accounting for february as leap 42 | { 43 | for (day in 1:29) 44 | { 45 | if (nchar(day) != 2) 46 | { 47 | day <- paste("0",day,sep="") 48 | } 49 | url <- paste("http://api.wunderground.com/api/api_key/history_2016",month,day,"/q/MA/Boston.json",sep="") 50 | data2 <- fromJSON(txt = url) 51 | data2 <- as.data.frame(data2$history$dailysummary) 52 | data2$dateFull <- data2$date$pretty 53 | data2 <- data2[-1] 54 | data <- rbind(data, data2) 55 | } 56 | } 57 | else 58 | { 59 | for (day in 1:30) 60 | { 61 | if (nchar(day) != 2) 62 | { 63 | day <- paste("0",day,sep="") 64 | } 65 | url <- paste("http://api.wunderground.com/api/77655c0d74f69756/history_2016",month,day,"/q/MA/Boston.json",sep="") 66 | data2 <- fromJSON(txt = url) 67 | data2 <- as.data.frame(data2$history$dailysummary) 68 | data2$dateFull <- data2$date$pretty 69 | data2 <- data2[-1] 70 | data <- rbind(data, data2) 71 | } 72 | } 73 | } 74 | 75 | data <- data[-1,] #remove duplicate January 1st observation 76 | data <- plyr::rename(data, replace = c("dateFull" = "date")) #rename date variable 77 | weatherBoston <- data 78 | save(weatherBoston, file = "BostonWeather2016_Wunderground.Rda") 79 | write.csv(weatherBoston, file = "BostonWeather2016_Wunderground.Rda") 80 | -------------------------------------------------------------------------------- /src/data_standardization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/__init__.py -------------------------------------------------------------------------------- /src/data_standardization/standardization_util.py: -------------------------------------------------------------------------------- 1 | import dateutil.parser as date_parser 2 | from datetime import datetime, timedelta 3 | import json 4 | from jsonschema import validate 5 | from dateutil import tz 6 | 7 | 8 | def parse_date(date: str, timezone: str, time=None, time_format=None): 9 | """ 10 | Turn a date (and optional time) into a datetime string 11 | in standardized format 12 | """ 13 | 14 | # If date is badly formatted, skip 15 | try: 16 | # Date can either be a date or a date time 17 | date = date_parser.parse(date) 18 | except ValueError as _: 19 | print("{} is badly formatted, skipping".format(date)) 20 | return None 21 | 22 | # If there's no time in the date given, look at the time field 23 | # if available 24 | if date.hour == 0 and date.minute == 0 and date.second == 0 and time: 25 | 26 | if time_format == "military": 27 | # military times less than 4 chars require padding with leading zeros 28 | # e.g 155 becomes 0155 29 | while (len(str(time)) < 4): 30 | time = "0" + str(time) 31 | 32 | # ignore invalid times 33 | if int(time) <= 2359: 34 | date = date_parser.parse( 35 | date.strftime('%Y-%m-%d ') + datetime.strptime(str(time), '%H%M').strftime('%I:%M%p').lower() 36 | ) 37 | 38 | else: 39 | date = date_parser.parse( 40 | date.strftime('%Y-%m-%d ') 41 | ) 42 | 43 | elif time_format == "seconds": 44 | date = date + timedelta(seconds=int(time)) 45 | 46 | else: 47 | try: 48 | date = date_parser.parse( 49 | date.strftime('%Y-%m-%d ') + str(time) 50 | ) 51 | # if time can't be parsed, just use bare date 52 | except ValueError as _: 53 | pass 54 | 55 | # Add timezone if it wasn't included in the string formatting originally 56 | if not date.tzinfo: 57 | date = timezone.localize(date) 58 | # If the timezone was set to utc, reformat into local time with offset 59 | elif date.tzinfo == tz.tzutc(): 60 | date = date.astimezone(timezone) 61 | date_time = date.isoformat() 62 | 63 | return date_time 64 | 65 | 66 | def parse_address(address): 67 | """ 68 | Some cities have the lat/lon as part of the address. 69 | If that's the format, parse out these values 70 | """ 71 | lines = address.split('\n') 72 | 73 | if len(lines) == 3 and lines[2]: 74 | street = ' '.join(lines[0].split()[1:]) 75 | lat, lon = lines[2][1:-1].split(', ') 76 | return street, float(lat), float(lon) 77 | return None, None, None 78 | 79 | 80 | def validate_and_write_schema(schema_path, schema_values, output_file): 81 | """ 82 | Validate a schema according to a schema file, and write to file 83 | Args: 84 | schema_path - the schema filename 85 | schema_values - a list of dicts 86 | output_file 87 | """ 88 | 89 | with open(schema_path) as schema: 90 | validate(schema_values, json.load(schema)) 91 | 92 | with open(output_file, "w") as f: 93 | json.dump(schema_values, f) 94 | 95 | print("- output written to {}".format(output_file)) 96 | -------------------------------------------------------------------------------- /src/data_standardization/standardize_volume.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from jsonschema import validate 4 | import json 5 | from .boston_volume import BostonVolumeParser 6 | import data.config 7 | 8 | BASE_FP = None 9 | PROCESSED_DATA_FP = None 10 | CURR_FP = os.path.dirname( 11 | os.path.abspath(__file__)) 12 | 13 | 14 | def write_volume(volume_counts): 15 | 16 | schema_path = os.path.join(os.path.dirname(os.path.dirname( 17 | CURR_FP)), "standards", "volumes-schema.json") 18 | with open(schema_path) as volume_schema: 19 | validate(volume_counts, json.load(volume_schema)) 20 | volume_output = os.path.join(BASE_FP, "standardized", "volume.json") 21 | with open(volume_output, "w") as f: 22 | json.dump(volume_counts, f) 23 | 24 | 25 | if __name__ == '__main__': 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument("-c", "--config", type=str, required=True, 29 | help="city config filename") 30 | parser.add_argument("-d", "--datadir", type=str, 31 | help="data directory") 32 | 33 | args = parser.parse_args() 34 | BASE_FP = os.path.join(args.datadir) 35 | 36 | config = data.config.Configuration(args.config) 37 | if config.name == 'boston': 38 | volume_counts = BostonVolumeParser(args.datadir).get_volume() 39 | write_volume(volume_counts) 40 | else: 41 | print("No volume data given for {}".format(config.name)) 42 | -------------------------------------------------------------------------------- /src/data_standardization/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/__init__.py -------------------------------------------------------------------------------- /src/data_standardization/tests/data/8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/data/8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX -------------------------------------------------------------------------------- /src/data_standardization/tests/data/waze/2018-10-15-20-15.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/data/waze/2018-10-15-20-15.json.gz -------------------------------------------------------------------------------- /src/data_standardization/tests/data/waze/2018-10-16-08-00.json: -------------------------------------------------------------------------------- 1 | {"startTimeMillis": 1539676620000, "alerts": [{"type": "WEATHERHAZARD", "subtype": "HAZARD_ON_ROAD_CONSTRUCTION", "city": "Cambridge, MA", "pubMillis": 1539607721062, "location": {"y": 42.371072, "x": -71.114300}}, {"type": "NONE", "city": "Boston, MA", "pubMillis": 1537582060620, "location": {"y": 42.371072, "x": -71.114300}}], "jams": [{"roadType": 1, "city": "Cambridge, MA", "pubMillis": 1539670005835}, {"city": "Boston, MA", "pubMillis": 1539610200000}], "startTime": "2018-10-16 07:57:00:000", "endTime": "2018-10-16 08:00:00:000"} 2 | -------------------------------------------------------------------------------- /src/data_standardization/tests/data/waze/2018-10-17-16-15.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/data_standardization/tests/data/waze/2018-10-17-16-15.json.gz -------------------------------------------------------------------------------- /src/data_standardization/tests/test-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-06/schema#", 3 | "title": "Test", 4 | "description": "Defines the structure of a set of information", 5 | "type": "array", 6 | "items": { 7 | "title": "Test", 8 | "description": "Defines the structure of a test", 9 | "type": "object", 10 | "properties": { 11 | "id": { 12 | "description": "Unique identifier of test", 13 | "type": ["string", "number"] 14 | }, 15 | "dateOccurred": { 16 | "description": "Date test occurred, ISO8601 formatted", 17 | "type": "string", 18 | "format": "date-time" 19 | }, 20 | "location": { 21 | "description": "Coordinates of test, WGS84 formatted", 22 | "type": "object", 23 | "properties": { 24 | "latitude": { 25 | "description": "Latitude of test", 26 | "type": "number" 27 | }, 28 | "longitude": { 29 | "description": "Longitude of test", 30 | "type": "number" 31 | } 32 | } 33 | } 34 | }, 35 | "required": ["id", "dateOccurred", "location"] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/data_standardization/tests/test_boston_volume.py: -------------------------------------------------------------------------------- 1 | from ..boston_volume import BostonVolumeParser 2 | import os 3 | 4 | 5 | def test_is_readable_ATR(): 6 | 7 | parser = BostonVolumeParser(os.path.abspath(__file__)) 8 | 9 | bad = '7147_NA_NA_53_CLAPP-ST_DORCHESTER_24-HOURS_SPEED_02-25-2013.XLS' 10 | assert not parser.is_readable_ATR(bad) 11 | 12 | bad = '8652_NA_NA_0_SOUTHWEST-CORRIDOR_ROXBURY_48-HOURS_XXX_09-27-2016.XLS' 13 | assert not parser.is_readable_ATR(bad) 14 | 15 | good = '8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX' 16 | assert parser.is_readable_ATR(good) 17 | 18 | 19 | def test_clean_ATR_fname(): 20 | parser = BostonVolumeParser(os.path.abspath(__file__)) 21 | 22 | file = '7362_NA_NA_147_TRAIN-ST_DORCHESTER_24-HOURS_XXX_03-19-2014.XLSX' 23 | assert parser.clean_ATR_fname(file) == '147 TRAIN ST Boston, MA' 24 | 25 | 26 | def test_read_ATR(): 27 | path = os.path.dirname( 28 | os.path.abspath(__file__)) + '/data/' 29 | file = os.path.join(path, 30 | '8811_NA_NA_83_PEARL-ST_CHARLESTOWN_24-HOURS_XXX_01-11-2017.XLSX') 31 | 32 | parser = BostonVolumeParser(path) 33 | assert parser.read_ATR(file) == ( 34 | # total 35 | 243, 36 | # speed 37 | 14, 38 | # motos/bikes 39 | 14, 40 | # light vehicles 41 | 215, 42 | # heavy vehicles 43 | 14, 44 | # date 45 | '2017-01-11', 46 | # hourly totals 47 | [2, 0, 1, 0, 3, 3, 6, 26, 21, 15, 11, 12, 7, 20, 12, 15, 48 | 11, 16, 23, 11, 10, 11, 4, 3] 49 | ) 50 | 51 | -------------------------------------------------------------------------------- /src/data_standardization/tests/test_standardization_util.py: -------------------------------------------------------------------------------- 1 | from .. import standardization_util 2 | import json 3 | import os 4 | import pytz 5 | 6 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | 9 | def test_parse_date(): 10 | timezone = pytz.timezone('America/New_York') 11 | assert standardization_util.parse_date( 12 | '01/08/2009 08:53:00 PM', timezone) == '2009-01-08T20:53:00-05:00' 13 | 14 | assert standardization_util.parse_date( 15 | '01/08/2009', 16 | timezone, 17 | time='08:53:00 PM') == '2009-01-08T20:53:00-05:00' 18 | 19 | assert standardization_util.parse_date( 20 | '01/08/2009', 21 | timezone, 22 | time='75180', 23 | time_format='seconds') == '2009-01-08T20:53:00-05:00' 24 | 25 | assert standardization_util.parse_date('01/08/2009 unk', timezone) \ 26 | is None 27 | 28 | assert standardization_util.parse_date( 29 | '01/08/2009', 30 | timezone, 31 | time='0201', 32 | time_format='military') == '2009-01-08T02:01:00-05:00' 33 | 34 | assert standardization_util.parse_date( 35 | '01/08/2009', 36 | timezone, 37 | time='1201', 38 | time_format='military') == '2009-01-08T12:01:00-05:00' 39 | 40 | assert standardization_util.parse_date( 41 | '01/08/2009', 42 | timezone, 43 | time='9999', 44 | time_format='military') == '2009-01-08T00:00:00-05:00' 45 | 46 | # Test daylight savings time 47 | assert standardization_util.parse_date( 48 | '08/08/2009 08:53:00 PM', timezone) == '2009-08-08T20:53:00-04:00' 49 | 50 | # Test UTC conversion 51 | assert standardization_util.parse_date( 52 | '2009-01-08T08:53:00.000Z', timezone) == '2009-01-08T03:53:00-05:00' 53 | 54 | assert standardization_util.parse_date( 55 | '2009', timezone)[:4] == '2009' 56 | 57 | 58 | def test_parse_address(): 59 | 60 | address = "29 OXFORD ST\n" + \ 61 | "Cambridge, MA\n" + \ 62 | "(42.37857940800046, -71.11657724799966)" 63 | 64 | street, lat, lon = standardization_util.parse_address(address) 65 | assert street == 'OXFORD ST' 66 | assert lat == 42.37857940800046 67 | assert lon == -71.11657724799966 68 | 69 | 70 | def test_validate_and_write_schema(tmpdir): 71 | tmppath = tmpdir.strpath 72 | 73 | values = [{ 74 | "id": "1", 75 | "dateOccurred": "2009-01-08T20:53:00Z", 76 | "location": { 77 | "latitude": 42.37857940800046, 78 | "longitude": -71.11657724799966 79 | } 80 | }] 81 | print(values) 82 | standardization_util.validate_and_write_schema( 83 | os.path.join(TEST_FP, 'test-schema.json'), 84 | values, 85 | os.path.join(tmppath, 'test.json') 86 | ) 87 | 88 | # Now load the json back and make sure it matches 89 | items = json.load(open(os.path.join(tmppath, 'test.json'))) 90 | assert items == values 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /src/data_standardization/tests/test_standardize_waze_data.py: -------------------------------------------------------------------------------- 1 | import ruamel.yaml 2 | from .. import standardize_waze_data 3 | import data.config 4 | import os 5 | import pytz 6 | 7 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 8 | 9 | 10 | def test_get_datetime(): 11 | timezone = pytz.timezone("America/New_York") 12 | 13 | result = standardize_waze_data.get_datetime( 14 | '2018-10-04 12:13:00:000', timezone) 15 | assert result.isoformat() == '2018-10-04T08:13:00-04:00' 16 | 17 | result = standardize_waze_data.get_datetime( 18 | '2018-11-04 01:13:00:000', timezone) 19 | assert result.isoformat() == '2018-11-03T21:13:00-04:00' 20 | 21 | result = standardize_waze_data.get_datetime( 22 | '2018-11-04 06:13:00:000', timezone) 23 | assert result.isoformat() == '2018-11-04T01:13:00-05:00' 24 | 25 | 26 | def test_read_snapshots(tmpdir): 27 | config_dict = { 28 | 'name': 'cambridge', 29 | 'city_latitude': 42.3600825, 30 | 'city_longitude': -71.0588801, 31 | 'city_radius': 15, 32 | 'timezone': 'America/New_York', 33 | 'crashes_files': {'test': {}}, 34 | 'city': "Cambridge, Massachusetts, USA", 35 | 'timezone': "America/New_York" 36 | } 37 | filename = os.path.join(tmpdir, 'test.yml') 38 | with open(filename, "w") as f: 39 | ruamel.yaml.round_trip_dump(config_dict, f) 40 | config = data.config.Configuration(filename) 41 | 42 | results = standardize_waze_data.read_snapshots(os.path.join( 43 | TEST_FP, 'data', 'waze'), config) 44 | 45 | expected_results = [ 46 | { 47 | 'pubMillis': 1539632995870, 48 | 'city': 'Cambridge, MA', 49 | 'eventType': 'jam', 50 | 'pubTimeStamp': '2018-10-15 15:49:55', 51 | 'snapshotId': 1 52 | }, 53 | { 54 | 'country': 'US', 55 | 'subtype': '', 56 | 'pubMillis': 1539632447442, 57 | 'city': 'Cambridge, MA', 58 | 'type': 'JAM', 59 | 'reportRating': 2, 60 | 'location': { 61 | 'latitude': 42.373807, 62 | 'longitude': -71.112465 63 | }, 64 | 'eventType': 'alert', 65 | 'pubTimeStamp': '2018-10-15 15:40:47', 66 | 'snapshotId': 1 67 | }, 68 | { 69 | 'roadType': 1, 70 | 'city': 'Cambridge, MA', 71 | 'pubMillis': 1539670005835, 72 | 'eventType': 'jam', 73 | 'pubTimeStamp': '2018-10-16 02:06:45', 74 | 'snapshotId': 2 75 | }, 76 | { 77 | 'type': 'WEATHERHAZARD', 78 | 'subtype': 'HAZARD_ON_ROAD_CONSTRUCTION', 79 | 'city': 'Cambridge, MA', 80 | 'pubMillis': 1539607721062, 81 | 'location': { 82 | 'latitude': 42.371072, 83 | 'longitude': -71.1143 84 | }, 85 | 'eventType': 'alert', 86 | 'pubTimeStamp': '2018-10-15 08:48:41', 87 | 'snapshotId': 2 88 | }, 89 | { 90 | 'updateDate': 'Wed Oct 17 16:14:17 +0000 2018', 91 | 'speed': 3.79, 92 | 'city': 'Cambridge, MA', 93 | 'detectionDateMillis': 1539788890781, 94 | 'detectionDate': 'Wed Oct 17 15:08:10 +0000 2018', 95 | 'type': 'Small', 96 | 'eventType': 'irregularity', 97 | 'snapshotId': 3 98 | } 99 | ] 100 | assert results == expected_results 101 | 102 | results = standardize_waze_data.read_snapshots( 103 | os.path.join(TEST_FP, 'data', 'waze'), 104 | config, 105 | startdate='2018-10-16', 106 | enddate='2018-10-16' 107 | ) 108 | assert results == [ 109 | { 110 | 'roadType': 1, 111 | 'city': 'Cambridge, MA', 112 | 'pubMillis': 1539670005835, 113 | 'eventType': 'jam', 114 | 'pubTimeStamp': '2018-10-16 02:06:45', 115 | 'snapshotId': 1 116 | }, 117 | { 118 | 'type': 'WEATHERHAZARD', 119 | 'subtype': 'HAZARD_ON_ROAD_CONSTRUCTION', 120 | 'city': 'Cambridge, MA', 121 | 'pubMillis': 1539607721062, 122 | 'location': { 123 | 'latitude': 42.371072, 124 | 'longitude': -71.1143 125 | }, 126 | 'eventType': 'alert', 127 | 'pubTimeStamp': '2018-10-15 08:48:41', 128 | 'snapshotId': 1 129 | }, 130 | ] 131 | -------------------------------------------------------------------------------- /src/features/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/features/.gitkeep -------------------------------------------------------------------------------- /src/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/features/__init__.py -------------------------------------------------------------------------------- /src/features/build_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/features/build_features.py -------------------------------------------------------------------------------- /src/features/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for feature generation 3 | """ 4 | -------------------------------------------------------------------------------- /src/features/tests/test_make_canon.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | import pandas as pd 4 | from .. import make_canon_dataset 5 | 6 | 7 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 8 | DATA_FP = os.path.join(TEST_FP, 'data', 'processed') 9 | 10 | 11 | def test_read_records(tmpdir): 12 | 13 | result = make_canon_dataset.read_records( 14 | os.path.join(DATA_FP, 'crash_joined.json'), 15 | 'near_id', 16 | ['bike', 'pedestrian', 'vehicle'] 17 | ) 18 | expected = pd.DataFrame({ 19 | 'near_id': [1, 2, 3, '000', '002', '003', '004', '005', '007', '008'], 20 | 'crash': [2, 18, 2, 5, 3, 14, 2, 11, 1, 4], 21 | 'bike': [0, 3, 0, 0, 1, 1, 0, 3, 0, 1], 22 | 'pedestrian': [0, 3, 1, 1, 0, 0, 1, 0, 0, 0], 23 | 'vehicle': [2, 12, 1, 4, 2, 13, 1, 8, 1, 3] 24 | }) 25 | pd.testing.assert_frame_equal(result, expected, check_dtype=False) 26 | 27 | 28 | def test_aggregate_roads(): 29 | """ 30 | Test case for the aggregate_roads function in the make_canon_dataset module. 31 | 32 | This test case verifies that the aggregate_roads function correctly aggregates road data 33 | and combines it with crash data. 34 | 35 | It performs the following checks: 36 | - Verifies that the expected columns are present in the resulting dataframe. 37 | - Verifies that the inferred dtype of the 'segment_id' column is 'string'. 38 | - Verifies the shape of the resulting dataframe. 39 | - Verifies the values of the 'width' column in the resulting dataframe. 40 | 41 | """ 42 | 43 | aggregated, cr_con = make_canon_dataset.aggregate_roads( 44 | ['width', 'lanes', 'hwy_type', 'signal', 'oneway'], 45 | ['osm_speed'], 46 | DATA_FP, 47 | ['bike', 'pedestrian', 'vehicle'] 48 | ) 49 | expected_columns = set(['width', 'lanes', 'hwy_type', 'osm_speed', 'signal', 'oneway', 50 | 'segment_id', 'crash', 'bike', 'pedestrian', 'vehicle']) 51 | 52 | expected_width = set([24, 24, 24, 15, 15, 24, 5, 24, 12, 12, 24, 24, 24, 24]) 53 | 54 | cr_con_roads = make_canon_dataset.combine_crash_with_segments( 55 | cr_con, aggregated) 56 | 57 | import pandas.testing as pd_testing 58 | 59 | assert pd.api.types.infer_dtype(cr_con_roads.segment_id) == 'string' 60 | assert set(cr_con_roads.columns.tolist()) == expected_columns 61 | assert cr_con_roads.shape == (14, 11) 62 | assert set(cr_con_roads.width) == expected_width 63 | 64 | 65 | def test_road_make(): 66 | with warnings.catch_warnings(record=True) as w: 67 | result = make_canon_dataset.road_make( 68 | ['test1', 'test2', 'width', 'lanes', 'hwy_type', 'osm_speed'], 69 | os.path.join(DATA_FP, 'maps', 'inter_and_non_int.geojson')) 70 | assert len(w) == 1 71 | assert str(w[0].message) \ 72 | == "2 feature(s) missing, skipping (test1, test2)" 73 | assert list(result.columns) == [ 74 | 'width', 'lanes', 'hwy_type', 'osm_speed'] 75 | 76 | expected = pd.DataFrame({ 77 | 'id': ['000', '001', '002', '003', '004', '005', '006', 78 | '007', '008', '009', '0', '1', '2', '3'], 79 | 'width': [24, 24, 24, 15, 15, 24, 5, 24, 12, 12, 24, 24, 24, 24], 80 | 'lanes': [2, 3, 3, 3, 3, 2, 1, 2, 1, 1, 2, 3, 3, 3], 81 | 'hwy_type': [6, 6, 6, 3, 6, 6, 1, 6, 1, 1, 1, 1, 3, 1], 82 | 'osm_speed': [0, 0, 0, 0, 25, 0, 25, 0, 25, 25, 25, 25, 25, 25] 83 | }) 84 | expected.set_index('id', inplace=True) 85 | pd.testing.assert_frame_equal(expected, result) 86 | 87 | -------------------------------------------------------------------------------- /src/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/.gitkeep -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/make_weekly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/make_weekly.py -------------------------------------------------------------------------------- /src/models/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/models/tests/__init__.py -------------------------------------------------------------------------------- /src/models/tests/data/features.yml: -------------------------------------------------------------------------------- 1 | f_cat: 2 | - width 3 | f_cont: 4 | - lanes 5 | - hwy_type 6 | - osm_speed 7 | - signal 8 | - oneway 9 | - width_per_lane 10 | - jam_percent 11 | -------------------------------------------------------------------------------- /src/models/tests/test_train_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ruamel.yaml 3 | import pandas as pd 4 | from .. import train_model 5 | import data.config 6 | 7 | 8 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | 11 | def test_get_features(tmpdir): 12 | test_data = pd.DataFrame(data={ 13 | 'width': [10, 12], 14 | 'signal': [1, 0], 15 | 'jam_percent': [1, 12], 16 | 'lanes': [2, 1] 17 | }) 18 | config_dict = { 19 | 'name': 'cambridge', 20 | 'city_latitude': 42.3600825, 21 | 'city_longitude': -71.0588801, 22 | 'city_radius': 15, 23 | 'crashes_files': {'test': {}}, 24 | 'city': "Cambridge, Massachusetts, USA", 25 | 'timezone': "America/New_York", 26 | 'openstreetmap_features': { 27 | 'categorical': { 28 | 'signal': 'Signal', 29 | 'test_missing': 'Missing Field' 30 | }, 31 | 'continuous': {'missing': 'Missing Field'} 32 | }, 33 | 'atr': '', 34 | 'tmc': '', 35 | 'concern': '' 36 | } 37 | 38 | config_filename = os.path.join(tmpdir, 'test.yml') 39 | with open(config_filename, "w") as f: 40 | ruamel.yaml.round_trip_dump(config_dict, f) 41 | config = data.config.Configuration(config_filename) 42 | 43 | f_cat, f_cont, feats = train_model.get_features(config, test_data) 44 | assert f_cat == ['signal'] 45 | assert f_cont == [] 46 | assert feats == ['signal'] 47 | 48 | def test_process_features(tmpdir): 49 | test_data = pd.DataFrame(data={ 50 | 'width': [10, 12], 51 | 'signal': [1, 0], 52 | 'jam_percent': [0, 0], 53 | 'lanes': [2, 1], 54 | 'segment_id': ['001', '002'] 55 | }) 56 | f_cat = ['signal', 'lanes'] 57 | f_cont = ['width', 'jam_percent'] 58 | features = ['signal', 'lanes', 'width', 'jam_percent'] 59 | test_data, features, lm_features = train_model.process_features(features, f_cat, f_cont, test_data) 60 | assert set(features) == set(['intersection', 'signal_1', 'signal_0', 'log_width', 'lanes_2', 'lanes_1']) 61 | assert set(lm_features) == set(['intersection', 'signal_1', 'log_width', 'lanes_2']) 62 | 63 | 64 | def test_initialize_and_run(tmpdir): 65 | # For now, just test the model runs 66 | model = pd.read_csv(os.path.join(TEST_FP, 'data', 'data_model.csv')) 67 | # Since we're going to test 68 | features = ['lanes0', 'oneway1', 'log_width', 'lanes1', 'signal2', 69 | 'hwy_type1', 'hwy_type5', 'oneway0', 'signal1', 'hwy_type9', 70 | 'lanes3', 'lanes2', 'intersection', 'osm_speed0', 71 | 'osm_speed25', 'signal0', 'hwy_type0'] 72 | train_model.initialize_and_run(model, features, features, 'target', 73 | tmpdir, seed=1) 74 | -------------------------------------------------------------------------------- /src/showcase/.dockerignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /src/showcase/.gcloudignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /src/showcase/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:alpine3.7 2 | RUN pip install flask 3 | COPY . /app 4 | WORKDIR /app 5 | EXPOSE 5000 6 | CMD ["python", "app.py"] 7 | -------------------------------------------------------------------------------- /src/showcase/Dockerfile.gcp: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | 3 | COPY templates/index.html /usr/share/nginx/html 4 | COPY static /usr/share/nginx/html/static/ 5 | 6 | COPY nginx.conf /etc/nginx/conf.d/default.conf -------------------------------------------------------------------------------- /src/showcase/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/showcase/__init__.py -------------------------------------------------------------------------------- /src/showcase/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | from flask import Flask, render_template, send_from_directory 3 | 4 | 5 | app = Flask(__name__) 6 | 7 | 8 | CONFIG_FILE = os.path.join('static', 'config.js') 9 | 10 | 11 | @app.route('/data/') 12 | def static_files(path): 13 | return send_from_directory('data', path) 14 | 15 | 16 | @app.route('/', methods=['GET', 'POST']) 17 | def index(): 18 | if 'CONFIG_FILE' in os.environ: 19 | global CONFIG_FILE 20 | CONFIG_FILE = os.environ['CONFIG_FILE'] 21 | return render_template( 22 | 'index.html', 23 | mapbox_token=os.environ['MAPBOX_TOKEN'], 24 | config_file=CONFIG_FILE 25 | ) 26 | 27 | 28 | if __name__ == '__main__': 29 | 30 | app.run(host='0.0.0.0') 31 | -------------------------------------------------------------------------------- /src/showcase/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/docker' 3 | args: ['build', '-t', 'gcr.io/insight-lane/showcase', '--file', 'Dockerfile.gcp', '.'] 4 | images: 5 | - 'gcr.io/insight-lane/showcase' -------------------------------------------------------------------------------- /src/showcase/nginx.conf: -------------------------------------------------------------------------------- 1 | server { 2 | sub_filter "{{ config_file }}" "static/gcp_config.js"; 3 | sub_filter "{{ mapbox_token }}" "pk.eyJ1IjoidGVycnlmODIiLCJhIjoiY2poOXlvc2NnMGdoNDM3cWc1bHVlejNtMSJ9.JPUsgcaeW0r12m5sBEcvVw"; 4 | listen 8080; 5 | server_name localhost; 6 | location / { 7 | root /usr/share/nginx/html; 8 | index index.html index.htm; 9 | } 10 | } -------------------------------------------------------------------------------- /src/showcase/run_all_cities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import subprocess 4 | import argparse 5 | 6 | 7 | DATA_FP = os.path.dirname( 8 | os.path.dirname( 9 | os.path.dirname( 10 | os.path.abspath(__file__)))) + '/data/' 11 | 12 | 13 | if __name__ == '__main__': 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--forceupdate', action='store_true', 17 | help='Whether to force update the maps') 18 | # Can also choose which steps of the process to run 19 | parser.add_argument('--onlysteps', 20 | help="Give list of steps to run, as comma-separated " + 21 | "string. Has to be among 'standardization'," + 22 | "'generation', 'model', 'visualization'") 23 | args = parser.parse_args() 24 | 25 | cities = os.listdir(DATA_FP) 26 | for city in cities: 27 | config_file = os.path.join('config', 'config_{}.yml'.format(city)) 28 | 29 | print("Running pipeline for {}".format(city)) 30 | print(args.onlysteps) 31 | subprocess.check_call([ 32 | 'python', 33 | 'pipeline.py', 34 | '-c', 35 | config_file, 36 | ] + (['--forceupdate'] if args.forceupdate else []) + 37 | (['--onlysteps', args.onlysteps] if args.onlysteps else []) 38 | ) 39 | city_list = ", ".join(cities) 40 | print("Ran pipeline on {}".format(city_list)) 41 | -------------------------------------------------------------------------------- /src/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/tools/__init__.py -------------------------------------------------------------------------------- /src/tools/geocode_batch.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import os 4 | import csv 5 | from data.geocoding_util import lookup_address, read_geocode_cache 6 | 7 | 8 | def parse_addresses(directory, filename, city, addressfield, 9 | mapboxtoken=None): 10 | 11 | cached = read_geocode_cache(filename=os.path.join( 12 | directory, 'processed', 'geocoded_addresses.csv')) 13 | 14 | results = [] 15 | geocoded_count = [0, 0, 0] 16 | 17 | # Read in the csv file 18 | with open(filename) as f: 19 | csv_reader = csv.DictReader(f) 20 | for r in csv_reader: 21 | address = r[addressfield] + ' ' + city 22 | geocoded_add, lat, lng, status = lookup_address( 23 | address, cached, mapboxtoken=mapboxtoken, city=city, strict=True) 24 | cached[address] = [geocoded_add, lat, lng, status] 25 | 26 | if status == 'S': 27 | geocoded_count[0] += 1 28 | elif status == 'F': 29 | geocoded_count[1] += 1 30 | else: 31 | geocoded_count[2] += 1 32 | 33 | print('Number successfully geocoded: {}'.format(geocoded_count[0])) 34 | print('Unable to geocode: {}'.format(geocoded_count[1])) 35 | print('Timed out on {} addresses'.format(geocoded_count[2])) 36 | 37 | # Write out the cache 38 | with open(os.path.join(directory, 'processed', 39 | 'geocoded_addresses.csv'), 'w', newline='\n') as csvfile: 40 | 41 | writer = csv.writer(csvfile, delimiter=',') 42 | writer.writerow([ 43 | 'Input Address', 44 | 'Output Address', 45 | 'Latitude', 46 | 'Longitude', 47 | 'Status' 48 | ]) 49 | 50 | for name, value in cached.items(): 51 | writer.writerow([name] + value) 52 | 53 | return results 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument("-d", "--directory", type=str, required=True) 59 | parser.add_argument("-f", "--filename", type=str, required=True) 60 | parser.add_argument("-c", "--city", type=str, required=True) 61 | parser.add_argument("-a", "--address", type=str, required=True, 62 | help="Address column name") 63 | parser.add_argument('-m', '--mapboxtoken', type=str, 64 | help="mapbox token") 65 | args = parser.parse_args() 66 | parse_addresses(args.directory, args.filename, args.city, 67 | args.address, args.mapboxtoken) 68 | 69 | -------------------------------------------------------------------------------- /src/tools/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/tools/tests/__init__.py -------------------------------------------------------------------------------- /src/tools/tests/data/geocoded_addresses.csv: -------------------------------------------------------------------------------- 1 | Input Address,Output Address,Latitude,Longitude,Status 2 | "21 GREYCLIFF RD Boston, MA","21 Greycliff Rd, Brighton, MA 02135, USA",42.3408948,-71.16084219999999,S -------------------------------------------------------------------------------- /src/tools/tests/data/make_map_multilinestring.geojson: -------------------------------------------------------------------------------- 1 | {"type": "FeatureCollection", "features": [{"type": "Feature", "id": "682", "geometry": {"type": "MultiLineString", "coordinates": [[[-71.10108933357631, 42.36919958642794], [-71.1010657, 42.3692321], [-71.1010043, 42.36931649999999]], [[-71.1008465100983, 42.36925311568192], [-71.10092959999999, 42.369286499999994], [-71.1010043, 42.36931649999999]], [[-71.10116315079033, 42.369378444080525], [-71.101119, 42.36936119999999], [-71.1010043, 42.36931649999999]], [[-71.10092027806371, 42.369433798702836], [-71.10094689999998, 42.36939659999999], [-71.1010043, 42.36931649999999]]]}, "properties": {"id": "682", "access": null, "bridge": null, "cycleway": null, "from": null, "highway": null, "junction": null, "key": null, "lanes": 3, "length": null, "maxspeed": null, "name": null, "oneway": 0, "osmid": null, "ref": null, "to": null, "tunnel": null, "width": 24, "hwy_type": null, "cycleway_type": 1, "osm_speed": null, "signal": null, "width_per_lane": 8, "segment_id": null, "dead_end": null, "streets": null, "intersection": null, "jam_percent": 24.80037576326914, "jam": 1, "avg_jam_speed": null, "avg_jam_level": 4, "alert_WEATHERHAZARD": null, "alert_JAM": null, "alert_ROAD_CLOSED": null, "alert_ACCIDENT": null, "orig_id": null, "inter": null, "display_name": "Broadway and Prospect Street", "center_y": 42.36931670366326, "center_x": -71.10100482259885, "intersection_segments": null, "parking_tickets": 69, "crosswalk": null}}]} -------------------------------------------------------------------------------- /src/tools/tests/data/to_geocode.csv: -------------------------------------------------------------------------------- 1 | Date,Location,ID 2 | 01/02/2016,"21 GREYCLIFF RD",1 3 | 01/03/2016,"216 SAVIN HILL AVE Boston, MA",2 -------------------------------------------------------------------------------- /src/tools/tests/test_geocode_batch.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .. import geocode_batch 3 | import shutil 4 | 5 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 6 | 7 | 8 | def mockreturn(address, cached, city, mapboxtoken, strict): 9 | if address in cached: 10 | return cached[address] 11 | else: 12 | return ["216 Savin Hill Ave, Dorchester, MA 02125", 13 | 42.3092288, -71.0480357, 'S'] 14 | 15 | 16 | def test_parse_addresses(tmpdir, monkeypatch): 17 | 18 | monkeypatch.setattr(geocode_batch, 'lookup_address', mockreturn) 19 | 20 | path = os.path.join(tmpdir.strpath, 'processed') 21 | os.makedirs(path) 22 | shutil.copyfile( 23 | os.path.join(TEST_FP, 'data', 'geocoded_addresses.csv'), 24 | os.path.join(path, 'geocoded_addresses.csv') 25 | ) 26 | 27 | datadir = os.path.join(TEST_FP, 'data') 28 | geocode_batch.parse_addresses( 29 | tmpdir.strpath, 30 | os.path.join(datadir, 'to_geocode.csv'), 31 | "Boston, MA", 32 | 'Location' 33 | ) 34 | 35 | # check that the resulting geocoded file is correct 36 | with open(os.path.join(path, 37 | 'geocoded_addresses.csv'), 'r') as test_file: 38 | test_file_contents = test_file.read() 39 | 40 | assert test_file_contents == """Input Address,Output Address,Latitude,Longitude,Status 41 | "21 GREYCLIFF RD Boston, MA","21 Greycliff Rd, Brighton, MA 02135, USA",42.3408948,-71.16084219999999,S 42 | "216 SAVIN HILL AVE Boston, MA Boston, MA","216 Savin Hill Ave, Dorchester, MA 02125",42.3092288,-71.0480357,S 43 | """ 44 | 45 | -------------------------------------------------------------------------------- /src/tools/tests/test_make_map_subset.py: -------------------------------------------------------------------------------- 1 | 2 | from .. import make_map_subset 3 | from data.util import get_reproject_point, reproject_records 4 | import os 5 | from data.record import transformer_4326_to_3857 6 | 7 | 8 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | 11 | def test_get_buffer(): 12 | results = make_map_subset.get_buffer( 13 | os.path.join(TEST_FP, 'data', 'test_make_map.geojson'), 14 | 42.3693239, 15 | -71.10103649999999, 16 | 20 17 | ) 18 | assert len(results['features']) == 5 19 | lines = [x for x in results['features'] 20 | if x['geometry']['type'] == 'LineString'] 21 | assert len(lines) == 4 22 | point = [x for x in results['features'] 23 | if x['geometry']['type'] == 'Point'] 24 | assert len(point) == 1 25 | 26 | # Make sure that all the resulting features are at least partially 27 | # within the buffer 28 | center_point = get_reproject_point( 29 | 42.3693239, 30 | -71.10103649999999, 31 | transformer_4326_to_3857) 32 | buff_poly = center_point.buffer(20) 33 | 34 | # To do this, have to convert the points and linestrings back to 3857 35 | reprojected_lines = reproject_records(lines) 36 | for r in reprojected_lines: 37 | assert r['geometry'].intersects(buff_poly) 38 | 39 | point_3857 = get_reproject_point( 40 | point[0]['geometry']['coordinates'][1], 41 | point[0]['geometry']['coordinates'][0], 42 | transformer_4326_to_3857) 43 | assert point_3857.within(buff_poly) 44 | 45 | results = make_map_subset.get_buffer( 46 | os.path.join(TEST_FP, 'data', 'test_make_map.geojson'), 47 | 42.3601, 48 | 71.0589, 49 | 20 50 | ) 51 | assert results == [] 52 | 53 | # Test multilinestring 54 | results = make_map_subset.get_buffer( 55 | os.path.join(TEST_FP, 'data', 'make_map_multilinestring.geojson'), 56 | 42.3693167036633, 57 | -71.1010048225989, 58 | 20 59 | ) 60 | assert len(results['features']) == 1 61 | -------------------------------------------------------------------------------- /src/tools/tests/test_update_config.py: -------------------------------------------------------------------------------- 1 | from .. import update_configs 2 | import os 3 | 4 | 5 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 6 | 7 | 8 | def test_add_feature(tmpdir): 9 | 10 | # Write a test config to file 11 | test_config = """ 12 | # Test comments are preserved 13 | 14 | openstreetmap_features: 15 | categorical: 16 | width: Width 17 | """ 18 | config_filename = os.path.join(tmpdir, 'test.yml') 19 | with open(config_filename, "w") as f: 20 | f.write(test_config) 21 | 22 | update_configs.add_feature(config_filename, [ 23 | 'openstreetmap_features', 24 | 'categorical', 25 | 'test', 26 | 'Test Name' 27 | ]) 28 | update_configs.add_feature(config_filename, [ 29 | 'openstreetmap_features', 30 | 'continuous', 31 | 'another_test', 32 | 'Test Name2' 33 | ]) 34 | 35 | with open(config_filename) as f: 36 | result = f.read() 37 | assert result == """# Test comments are preserved 38 | 39 | openstreetmap_features: 40 | categorical: 41 | width: Width 42 | test: Test Name 43 | continuous: 44 | another_test: Test Name2 45 | """ 46 | -------------------------------------------------------------------------------- /src/tools/update_configs.py: -------------------------------------------------------------------------------- 1 | # Update config files 2 | import argparse 3 | import ruamel.yaml 4 | 5 | 6 | def add_feature(filename, feat_info): 7 | """ 8 | Add new features to a config file 9 | Args: 10 | filename - config file 11 | feat_info - a list consisting of 12 | - feature set type (e.g. openstreetmap_features) 13 | - feature type (categorical or continuuous) 14 | - feature (the name of the feature, e.g. width) 15 | - feature name (human readable feature name) 16 | If the feature set type doesn't exist, it will be added, 17 | but it needs to be in the set of feature set types possible: 18 | openstreetmap_features or waze_features 19 | """ 20 | with open(filename, 'r') as myfile: 21 | yaml_str = myfile.read() 22 | 23 | config = ruamel.yaml.round_trip_load(yaml_str) 24 | 25 | if len(feat_info) != 4: 26 | print("Wrong number of args to -a") 27 | return 28 | 29 | feat_set = feat_info[0] 30 | if feat_set not in ('openstreetmap_features', 'waze_features'): 31 | print("feature set given is not valid") 32 | return 33 | 34 | feat_type = feat_info[1] 35 | feat = feat_info[2] 36 | feat_name = feat_info[3] 37 | 38 | # If the feature set doesn't exist, add it 39 | if feat_set not in config: 40 | config.insert( 41 | len(config), feat_set, ruamel.yaml.comments.CommentedMap()) 42 | 43 | # if the feat_type doesn't exist, add it 44 | if feat_type not in config[feat_set]: 45 | config[feat_set][feat_type] = {} 46 | # if the feature does not exist, add it 47 | if feat not in config[feat_set][feat_type]: 48 | config[feat_set][feat_type][feat] = feat_name 49 | else: 50 | print("Feature already exists, skipping") 51 | with open(filename, "w") as f: 52 | ruamel.yaml.round_trip_dump(config, f) 53 | 54 | 55 | if __name__ == '__main__': 56 | """ 57 | Examples 58 | - Add a feature to open street map features 59 | - -a "openstreetmap_features categorical test human readable name" 60 | - Add a feature to waze features 61 | - Remove a feature from osm or waze 62 | """ 63 | 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument("-f", "--filenames", nargs="+", 66 | help="config filenames", 67 | required=True) 68 | parser.add_argument("-a", "--addfeatures", nargs="+", 69 | help="Feature to add, a string with feature set " + 70 | "(e.g. openstreetmap_features)," + 71 | "feature type (categorical or continuous), " + 72 | "feature name, human readable feature name in quotes") 73 | args = parser.parse_args() 74 | 75 | if args.addfeatures: 76 | for filename in args.filenames: 77 | add_feature(filename, args.addfeatures) 78 | 79 | -------------------------------------------------------------------------------- /src/tools/waze_feed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import requests 4 | import argparse 5 | import datetime 6 | import json 7 | import gzip 8 | import os 9 | import yaml 10 | 11 | 12 | if __name__ == '__main__': 13 | """ 14 | Given a link to a waze feed, and a directory to write to, zip and write 15 | the resulting json file to the directory 16 | """ 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('-f', "--file", type=str, required=True, 19 | help="yml file containing city and waze feed urls") 20 | parser.add_argument('-d', "--dirname", type=str, required=True, 21 | help="directory to write results to") 22 | 23 | args = parser.parse_args() 24 | 25 | if not os.path.exists(args.dirname): 26 | os.makedirs(args.dirname) 27 | 28 | with open(args.file) as f: 29 | feeds = yaml.safe_load(f) 30 | 31 | for city in feeds: 32 | response = requests.get(feeds[city]) 33 | dirname = os.path.join(args.dirname, city) 34 | if not os.path.exists(dirname): 35 | os.makedirs(dirname) 36 | 37 | # Filename is the current minute, in utc time 38 | timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%M") 39 | json_str = json.dumps(response.json()) 40 | json_bytes = json_str.encode('utf-8') 41 | 42 | outfile = os.path.join(dirname, 43 | timestamp + '.json.gz') 44 | 45 | with gzip.open(outfile, 'wb') as f: 46 | f.write(json_bytes) 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/visualization/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/.gitkeep -------------------------------------------------------------------------------- /src/visualization/README.md: -------------------------------------------------------------------------------- 1 | # Crash model visualization 2 | 3 | This directory contains the code relevant to the visualization efforts for this project. 4 | 5 | ## Visualization Products 6 | 7 | _risk_map.py_ - This script can be used to plot predictions generated from multiple models on a single Leaflet map of Boston. It color-codes each segment based on the magnitude of the predicted risk. 8 | 9 | To run this script, you need the following inputs: 10 | - inter_and_non_int.shp (created in create_segments.py) 11 | - csv files of predictions (each row should have 1 prediction per segment and be stored in the `data/processed/` directory) 12 | 13 | The script takes the following flag arguments on the command line: 14 | 15 | -m = model names (these will be the names of the layers on your map) 16 | 17 | -f = csv file names (one for each model and specified in the same order as the model names) 18 | 19 | -c = names of the predictions columns (one for each file and specified in the same order as the model names) 20 | 21 | -n = optional flag to indicate if predictions need to be normalized 22 | 23 | An example of how to run this script to plot the output from two models is as follows: 24 | ``` 25 | python risk_map.py -m model1 model2 -f model1_output.csv model2_output.csv -c risk_score preds 26 | ``` 27 | 28 | _plot_points.py_ - This script can be used to plot point-level data on a Leaflet map of Boston. 29 | 30 | To run this script, you need the following inputs: 31 | - csv files of point-level data (there should separate columns named "X" and "Y" for the X and Y coordinates. The files should be stored in the `data/processed/` directory) 32 | 33 | The script takes the following flag arguments on the command line: 34 | 35 | -n = name of the data to be plotted (these will be the names of the layers on your map) 36 | 37 | -f = csv file names (one for each set of data and specified in the same order as the layer names) 38 | 39 | An example of how to run this script is as follows: 40 | ``` 41 | python plot_points.py -n crashes -f cad_crash_events.csv 42 | ``` 43 | 44 | _historical_crash_map.html_ - This static site plots historical crash data and model predictions for a given week in 2016. Users can scrub the slider to see different weeks visualized on the map. A bar graph at the bottom summarizes the total number of crashes by week. 45 | 46 | To run this site, you need the following: 47 | - the /css and /js subdirectories with the files contained within 48 | - cad.geojson and car_preds_named.json 49 | - to run historical_crash_map.py to generate the data needed for the bar graph. Place the generated csv file in the same directory as the html file. 50 | -------------------------------------------------------------------------------- /src/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/__init__.py -------------------------------------------------------------------------------- /src/visualization/plot_points.py: -------------------------------------------------------------------------------- 1 | """ 2 | Title: plot_points.py 3 | 4 | Author: @andhint, @bpben, @alicefeng 5 | 6 | This script visualizes point-level data on a map. 7 | 8 | Usage: 9 | --name: name of the data to be plotted 10 | this will be used as the name of the layers in the map so they must be unique 11 | --filename: filename of the dataset 12 | must be csvs with separate columns named "X" and "Y" for the X and Y coordinates 13 | 14 | Inputs: 15 | csv files of point-level data to be visualized 16 | 17 | Output: 18 | plot_points.html - a Leaflet map with point-level data plotted on it 19 | """ 20 | 21 | import pandas as pd 22 | import geopandas as gpd 23 | from shapely.geometry import Point 24 | import folium 25 | from folium import FeatureGroup, CircleMarker 26 | import argparse 27 | import os 28 | 29 | 30 | # all datasets must be stored in the "data/processed/" directory 31 | BASE_DIR = os.path.dirname( 32 | os.path.dirname( 33 | os.path.dirname( 34 | os.path.abspath(__file__)))) 35 | 36 | DATA_FP = BASE_DIR + '/data/processed/' 37 | 38 | 39 | # parse arguments 40 | parser = argparse.ArgumentParser(description="Plot point-level data on a map") 41 | parser.add_argument("-n", "--name", nargs="+", 42 | help="name of the layer, must be unique") 43 | parser.add_argument("-f", "--filename", nargs="+", 44 | help="name of the dataset file to be plotted on the map, must specify at least 1") 45 | parser.add_argument("-lat", "--latitude", 46 | help="alternate latitude for the base map") 47 | parser.add_argument("-lon", "--longitude", 48 | help="alternate longitude for the base map") 49 | parser.add_argument("-dir", "--datadir", 50 | help="alternate data directory for the files") 51 | 52 | args = parser.parse_args() 53 | 54 | # zip layer names and filenames 55 | if len(args.name) == len(args.filename): 56 | match = list(zip(args.name, args.filename)) 57 | else: 58 | raise Exception("Number of layers and files must match") 59 | 60 | latitude = args.latitude or 42.3601 61 | longitude = args.longitude or -71.0589 62 | 63 | if args.datadir: 64 | DATA_FP = args.datadir 65 | 66 | def process_data(filename): 67 | """Preps data for plotting on a map 68 | 69 | Reads in dataset with separate columns for X, Y coordinates and converts them into (lat, long) points 70 | 71 | Args: 72 | filename: name of the file with the predictions 73 | 74 | Returns: 75 | a dataframe with point geometries added to it 76 | """ 77 | df = pd.read_csv(DATA_FP + filename) 78 | geometry = [Point(xy) for xy in zip(df.X, df.Y)] 79 | crs = {'init': 'epsg:4326'} 80 | 81 | geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry) 82 | 83 | return geo_df 84 | 85 | def add_layer(dataset, layername, mapname, color): 86 | """Plots predictions on a Leaflet map 87 | 88 | Creates a FeatureGroup to hold all of the points. 89 | FeatureGroup is added to the map as a layer. 90 | 91 | Args: 92 | dataset: a dataframe with the data to be plotted 93 | modelname: name of the model to be used as the layer name 94 | mapname: name of the map to be plotted on 95 | color: color used for the points in the layer 96 | 97 | Returns: 98 | a layer of points added to the map 99 | """ 100 | feature_group = FeatureGroup(name=layername) 101 | for point in dataset['geometry']: 102 | CircleMarker(location=[point.y, point.x], 103 | radius=4, 104 | color=color, 105 | fill_color=color).add_to(feature_group) 106 | 107 | feature_group.add_to(mapname) 108 | 109 | 110 | 111 | 112 | ### Make map 113 | 114 | # First create basemap 115 | boston_map = folium.Map( 116 | [latitude, longitude], tiles='Cartodb dark_matter', zoom_start=12) 117 | folium.TileLayer('Cartodb Positron').add_to(boston_map) 118 | 119 | # Create sequence of colors so different layers appear in different colors 120 | colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854'] 121 | 122 | # Plot data as separate layers 123 | for i in range(len(match)): 124 | data = process_data(match[i][1]) 125 | add_layer(data, match[i][0], boston_map, colors[i]) 126 | 127 | # Add control to toggle between model layers 128 | folium.LayerControl(position='bottomright').add_to(boston_map) 129 | 130 | 131 | # Save map as separate html file 132 | boston_map.save('plot_points.html') 133 | -------------------------------------------------------------------------------- /src/visualization/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/tests/__init__.py -------------------------------------------------------------------------------- /src/visualization/tests/data/single_segment.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "id": "001", 7 | "geometry": { 8 | "type": "LineString", 9 | "coordinates": [ 10 | [ 11 | -71.06858488357565, 12 | 42.35165031556542 13 | ], 14 | [ 15 | -71.06876751642436, 16 | 42.35161688446769 17 | ] 18 | ] 19 | }, 20 | "properties": { 21 | "id": "001", 22 | "access": null, 23 | "area": null, 24 | "bridge": null, 25 | "from": "61341696", 26 | "highway": "secondary", 27 | "junction": null, 28 | "key": "0", 29 | "lanes": 2, 30 | "length": "44.954", 31 | "maxspeed": null, 32 | "name": "Park Plaza", 33 | "oneway": 1, 34 | "osmid": "8652528", 35 | "ref": null, 36 | "to": "61341267", 37 | "tunnel": null, 38 | "width": 30, 39 | "hwy_type": 1, 40 | "osm_speed": "0", 41 | "signal": 0, 42 | "width_per_lane": 15, 43 | "segment_id": "8652528-61341696-61341267", 44 | "dead_end": null, 45 | "streets": null, 46 | "intersection": null, 47 | "orig_id": 991, 48 | "inter": 0, 49 | "display_name": "Park Plaza between Columbus Avenue and Hadassah Way", 50 | "center_y": 42.35163360001877, 51 | "center_x": -71.06867620000001 52 | } 53 | } 54 | ] 55 | } 56 | -------------------------------------------------------------------------------- /src/visualization/tests/data/test_prediction.csv: -------------------------------------------------------------------------------- 1 | ,segment_id,prediction 2 | 0,001,0.1223508492 3 | 0,001,0.0 4 | -------------------------------------------------------------------------------- /src/visualization/tests/test_visualization.py: -------------------------------------------------------------------------------- 1 | from ..risk_map import process_data 2 | import os 3 | import geopandas as gpd 4 | 5 | TEST_FP = os.path.dirname(os.path.abspath(__file__)) 6 | 7 | def test_process_data(): 8 | streets = gpd.read_file(os.path.join(TEST_FP, 'data', 'single_segment.geojson')) 9 | streets_w_risk = process_data(streets, 10 | os.path.join(TEST_FP, 'data', 'test_prediction.csv'), 11 | 'prediction') 12 | assert streets_w_risk.shape[0] == 1 13 | assert streets_w_risk['prediction'].mean().round(2) == 0.12 -------------------------------------------------------------------------------- /src/visualization/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insight-lane/crash-model/39b9c6958582f158adce8f746cd3844e3a29eec8/src/visualization/visualize.py -------------------------------------------------------------------------------- /standards/concerns-instance.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 14808, 4 | "source": "visionzero", 5 | "dateCreated": "2016-01-19T14:48:45.000Z", 6 | "status": "Unassigned", 7 | "category": "of something that is not listed here", 8 | "location": { 9 | "latitude": 42.354167552594284, 10 | "longitude": -71.05414378860903 11 | }, 12 | "summary": "This intersection is dangerous. Cars don't follow the lane markings (ie go straight while in turn lane) so it's nearly impossible to safely position yourself on a bike. In a car, people are trying to move over/it's unclear where people are going." 13 | }, 14 | { 15 | "id": 14809, 16 | "source": "visionzero", 17 | "dateCreated": "2016-01-19T14:57:03.000Z", 18 | "status": "Unassigned", 19 | "category": "people don't yield while going straight", 20 | "location": { 21 | "latitude": 42.33938397670106, 22 | "longitude": -71.0994798889095 23 | }, 24 | "summary": "It's terrifying to walk over here. It seems like it's impossible to get the cars to stop stop, even at the crosswalks." 25 | }, 26 | { 27 | "id": 14810, 28 | "source": "visionzero", 29 | "dateCreated": "2016-01-19T15:36:25.000Z", 30 | "status": "Unassigned", 31 | "category": "it’s hard to see / low visibility", 32 | "location": { 33 | "latitude": 42.349364649630935, 34 | "longitude": -71.06656509857143 35 | }, 36 | "summary": "cars coming around the corner of this wide one street are speeding and not visible for persons on the crosswalk" 37 | } 38 | ] 39 | -------------------------------------------------------------------------------- /standards/concerns-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-06/schema#", 3 | "title": "Concerns", 4 | "description": "Defines the structure of a set of concerns", 5 | "type": "array", 6 | "items": { 7 | "title": "Concern", 8 | "description": "Defines the structure of a concern", 9 | "type": "object", 10 | "properties": { 11 | "id": { 12 | "description": "Unique identifier of concern", 13 | "type": ["string", "number"] 14 | }, 15 | "source": { 16 | "description": "Source of concern", 17 | "type": "string", 18 | "enum": ["seeclickfix", "visionzero"] 19 | }, 20 | "dateCreated": { 21 | "description": "Date concern was created, ISO8601 formatted", 22 | "type": "string", 23 | "format": "date-time" 24 | }, 25 | "dateResolved": { 26 | "description": "Date concern was resolved, ISO8601 formatted", 27 | "format": "date-time" 28 | }, 29 | "status": { 30 | "description": "Status of concern", 31 | "type": "string" 32 | }, 33 | "category": { 34 | "description": "Primary category of concern", 35 | "type": "string" 36 | }, 37 | "subCategories": { 38 | "description": "Subcategories of concern", 39 | "type": "array", 40 | "items": { 41 | "type": "string" 42 | }, 43 | "uniqueItems": true 44 | }, 45 | "location": { 46 | "description": "Coordinates of concern, WGS84 formatted", 47 | "type": "object", 48 | "properties": { 49 | "latitude": { 50 | "description": "Latitude of concern", 51 | "type": "number" 52 | }, 53 | "longitude": { 54 | "description": "Longitude of concern", 55 | "type": "number" 56 | } 57 | } 58 | }, 59 | "address": { 60 | "description": "Address of concern", 61 | "type": "string" 62 | }, 63 | "summary": { 64 | "description": "Summary of concern", 65 | "type": "string" 66 | } 67 | }, 68 | "required": ["id", "source", "dateCreated", "status", "category", "location", "summary"] 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /standards/crashes-instance.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1403832, 4 | "dateOccurred": "2016-01-01T00:56:45-05:00", 5 | "location": { 6 | "latitude": 42.300864811284534, 7 | "longitude": -71.0713167869833 8 | }, 9 | "vehicles": [ 10 | { 11 | "category": "car" 12 | } 13 | ], 14 | "summary": "REPORTED INJURIES (P) (E) (F)" 15 | }, 16 | { 17 | "id": 1404159, 18 | "dateOccurred": "2016-01-01T02:30:23-05:00", 19 | "location": { 20 | "latitude": 42.317987926802246, 21 | "longitude": -71.06188127008645 22 | }, 23 | "vehicles": [ 24 | { 25 | "category": "car" 26 | } 27 | ], 28 | "summary": "REPORTED INJURIES (P) (E) (F)" 29 | }, 30 | { 31 | "id": 1404194, 32 | "dateOccurred": "2016-01-01T02:49:56-05:00", 33 | "location": { 34 | "latitude": 42.356046190978454, 35 | "longitude": -71.13132169601725 36 | }, 37 | "vehicles": [ 38 | { 39 | "category": "car" 40 | } 41 | ], 42 | "summary": "PEDESTRIAN STRUCK (P) (E) (F)" 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /standards/crashes-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-06/schema#", 3 | "title": "Crashes", 4 | "description": "Defines the structure of a set of crashes", 5 | "type": "array", 6 | "items": { 7 | "title": "Crash", 8 | "description": "Defines the structure of a crash", 9 | "type": "object", 10 | "properties": { 11 | "id": { 12 | "description": "Unique identifier of crash", 13 | "type": ["string", "number"] 14 | }, 15 | "dateOccurred": { 16 | "description": "Date crash occurred, ISO8601 formatted", 17 | "type": "string", 18 | "format": "date-time" 19 | }, 20 | "location": { 21 | "description": "Coordinates of crash, WGS84 formatted", 22 | "type": "object", 23 | "properties": { 24 | "latitude": { 25 | "description": "Latitude of crash", 26 | "type": "number" 27 | }, 28 | "longitude": { 29 | "description": "Longitude of crash", 30 | "type": "number" 31 | } 32 | } 33 | }, 34 | "vehicles": { 35 | "description": "Vehicles involved in crash", 36 | "type": "array", 37 | "items": { 38 | "title": "Vehicle", 39 | "description": "Defines the structure of a vehicle", 40 | "type": "object", 41 | "properties": { 42 | "category": { 43 | "description": "Category of vehicle", 44 | "type": "string", 45 | "enum": ["car", "bike", "taxi", "bus", "truck"] 46 | }, 47 | "quantity": { 48 | "description": "Quantity of vehicles of this category", 49 | "type": "number" 50 | } 51 | }, 52 | "required": ["category"] 53 | }, 54 | "uniqueItems": true 55 | }, 56 | "persons": { 57 | "description": "Persons involved in crash", 58 | "type": "array", 59 | "items": { 60 | "title": "Person", 61 | "description": "Defines the structure of a person", 62 | "type": "object", 63 | "properties": { 64 | "category": { 65 | "description": "Category of person", 66 | "type": "string", 67 | "enum": ["driver", "pedestrian", "cyclist", "other"] 68 | }, 69 | "quantity": { 70 | "description": "Quantity of persons", 71 | "type": "number" 72 | }, 73 | "injuryType": { 74 | "description": "Type of injury", 75 | "type": "string", 76 | "enum": ["minor", "major", "fatal", "unknown"] 77 | } 78 | }, 79 | "required": ["category"] 80 | }, 81 | "uniqueItems": true 82 | }, 83 | "address": { 84 | "description": "Address of crash", 85 | "type": "string" 86 | }, 87 | "summary": { 88 | "description": "Summary of crash", 89 | "type": "string" 90 | } 91 | }, 92 | "required": ["id", "dateOccurred", "location"] 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /standards/points-instance.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "feature": "parking_ticket", 4 | "date": "2016-01-19T14:48:45.000Z", 5 | "category": "RESIDENT PERMIT ONLY", 6 | "location": { 7 | "latitude": 42.354167552594284, 8 | "longitude": -71.05414378860903 9 | } 10 | }, 11 | { 12 | "feature": "taxi_stands", 13 | "location": { 14 | "latitude": 42.33938397670106, 15 | "longitude": -71.0994798889095 16 | }, 17 | }, 18 | { 19 | "feature": "crime_report", 20 | "date": "2016-01-19T15:36:25.000Z", 21 | "category": "Auto Theft", 22 | "location": { 23 | "latitude": 42.349364649630935, 24 | "longitude": -71.06656509857143 25 | }, 26 | "notes": "West Cambridge" 27 | } 28 | ] 29 | -------------------------------------------------------------------------------- /standards/points-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-06/schema#", 3 | "title": "Concerns", 4 | "description": "Defines the structure of a set of point-based features", 5 | "type": "array", 6 | "items": { 7 | "title": "Point Feature", 8 | "description": "Defines the structure of a point-based feature", 9 | "type": "object", 10 | "properties": { 11 | "feature": { 12 | "description": "Type of feature, e.g. traffic tickets", 13 | "type": "string" 14 | }, 15 | "date": { 16 | "description": "Date for feature, ISO8601 formatted", 17 | "type": "string", 18 | "format": "date-time" 19 | }, 20 | "category": { 21 | "description": "Category of entry", 22 | "type": "string" 23 | }, 24 | "notes": { 25 | "description": "Notes for entry", 26 | "type": "string" 27 | }, 28 | "location": { 29 | "description": "Coordinates of concern, WGS84 formatted", 30 | "type": "object", 31 | "properties": { 32 | "latitude": { 33 | "description": "Latitude of entry", 34 | "type": "number" 35 | }, 36 | "longitude": { 37 | "description": "Longitude of entry", 38 | "type": "number" 39 | } 40 | } 41 | }, 42 | "feat_agg": { 43 | "description": "Type of Feature Aggregation", 44 | "type": "string" 45 | }, 46 | "value": { 47 | "description": "Value of entry", 48 | "type": "number" 49 | } 50 | }, 51 | "required": ["feature", "location"] 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /standards/volumes-instance.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "startDateTime": "2014-03-12", 4 | "location": { 5 | "latitude": 42.3408948, 6 | "longitude": -71.16084219999999, 7 | "address": "21 Greycliff Rd, Brighton, MA 02135, USA" 8 | }, 9 | "speed": { 10 | "averageSpeed": 25 11 | }, 12 | "volume": { 13 | "totalVolume": 518, 14 | "totalLightVehicles": 508, 15 | "totalHeavyVehicles": 8, 16 | "bikes": 2, 17 | "hourlyVolume": [6, 6, 0, 1, 2, 11, 18, 47, 22, 21, 17, 11, 10, 25, 38, 33, 44, 42, 30, 30, 31, 43, 20, 10] 18 | }, 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /standards/volumes-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-06/schema#", 3 | "title": "Traffic", 4 | "description": "Defines the structure of traffic studies", 5 | "type": "array", 6 | "items": { 7 | "startDateTime": { 8 | "description": "DateTime of traffic count start, ISO8601 formatted", 9 | "type": "string", 10 | "format": "date" 11 | }, 12 | "endDateTime": { 13 | "description": "DateTime of traffic count end, ISO8601 formatted. Can be left blank if the traffic count is 24 hours", 14 | "type": "string", 15 | "format": "date" 16 | }, 17 | "location": { 18 | "description": "Coordinates of traffic count, WGS84 formatted", 19 | "type": "object", 20 | "properties": { 21 | "latitude": { 22 | "description": "Latitude of traffic count", 23 | "type": "number" 24 | }, 25 | "longitude": { 26 | "description": "Longitude of traffic count", 27 | "type": "number" 28 | }, 29 | "address": { 30 | "description": "Address of traffic count", 31 | "type": "string" 32 | } 33 | } 34 | }, 35 | "speed": { 36 | "description": "Traffic speed information", 37 | "type": "object", 38 | "properties": { 39 | "averageSpeed": { 40 | "description": "Average speed in miles per hour", 41 | "type": "number" 42 | }, 43 | "85thPercentileSpeed": { 44 | "description": "85th percentile speed in miles per hour", 45 | "type": "number" 46 | } 47 | } 48 | }, 49 | "volume": { 50 | "description": "Traffic speed information", 51 | "type": "object", 52 | "properties": { 53 | "hourlyVolume": { 54 | "description": "Hourly total vehicle count", 55 | "type": "list" 56 | }, 57 | "totalVolume": { 58 | "description": "Total vehicle count, averaged over a 24 hour period", 59 | "type": "number" 60 | }, 61 | "totalCars": { 62 | "description": "Total car count, averaged over a 24 hour period", 63 | "type": "number" 64 | }, 65 | "totalHeavyVehicles": { 66 | "description": "Total heavy vehicle count, averaged over a 24 hour period", 67 | "type": "number" 68 | }, 69 | "totalCars": { 70 | "description": "Total car count, averaged over a 24 hour period", 71 | "type": "number" 72 | }, 73 | "bikes": { 74 | "description": "Total bicycle count, averaged over a 24 hour period", 75 | "type": "number" 76 | }, 77 | "AMPeak": { 78 | "description": "Average Total vehicle count during AM peak", 79 | "type": "number" 80 | }, 81 | "PMPeak": { 82 | "description": "Average Total vehicle count during PM peak", 83 | "type": "number" 84 | } 85 | } 86 | }, 87 | "turningMovements": { 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /test_environment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | REQUIRED_PYTHON = "python" 4 | 5 | 6 | def main(): 7 | system_major = sys.version_info.major 8 | if REQUIRED_PYTHON == "python": 9 | required_major = 2 10 | elif REQUIRED_PYTHON == "python3": 11 | required_major = 3 12 | else: 13 | raise ValueError("Unrecognized python interpreter: {}".format( 14 | REQUIRED_PYTHON)) 15 | 16 | if system_major != required_major: 17 | raise TypeError( 18 | "This project requires Python {}. Found: Python {}".format( 19 | required_major, sys.version)) 20 | else: 21 | print(">>> Development environment passes all tests!") 22 | 23 | 24 | if __name__ == '__main__': 25 | main() -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = test_service 3 | skipsdist = true 4 | 5 | [testenv] 6 | install_command = pip install {opts} {packages} 7 | basepython = python3.9 8 | changedir = src 9 | 10 | [testenv:test_service] 11 | deps = -r requirements.txt 12 | setenv = PYTHONPATH=. 13 | commands = pytest 14 | --------------------------------------------------------------------------------