electric flexible carshare vehicles in {{ displayed_city.display }}
4 |
6 |
7 |
10 |
11 | {% include "cities.html" %}
12 |
13 |
14 |
The {{ displayed_city.system.title() }} system in {{ displayed_city.display }}
15 | operates {{ all_car_models|join(' and ') }} electric cars as part of a mixed fleet.
16 |
{{all_cars|length|count("electric car")}} currently available.
17 |
18 | {% if all_cars|length > 1 %}
19 | {# Don't show map if there's no cars, or there is only one car. #}
20 |
22 | {% endif %}
23 |
24 |
25 | {% for car in all_cars %}
26 | {% include "car.html" %}
27 | {% endfor %}
28 |
29 |
40 |
41 |
44 |
45 | {{ block_endmatter }}
46 |
47 |
48 |
--------------------------------------------------------------------------------
/scripts/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from __future__ import print_function
5 | import argparse
6 | import os
7 | import sys
8 |
9 | # ask script to look for the electric2go package in one directory up
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11 |
12 | from electric2go.analysis import cmdline, generate
13 |
14 |
15 | # This does the inverse of normalize: given a result_dict on stdin,
16 | # generate files as they would have come out of the system's API.
17 |
18 | # This should be used rarely, mostly useful to generate test data
19 | # or test that normalization can be fully undone.
20 |
21 |
22 | def process_commandline():
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument('-c', '--check', type=str,
25 | help='optional: verify that the generated files '
26 | 'have the same contents as the CHECK archive')
27 | parser.add_argument('--check-only', action='store_true',
28 | help='don\'t generate the files before checking; '
29 | 'can be useful to check files already generated')
30 | args = parser.parse_args()
31 |
32 | if args.check_only and not args.check:
33 | raise RuntimeError('--check-only can only be used with --check')
34 |
35 | result_dict = cmdline.read_json()
36 |
37 | # use the shell's current working directory
38 | target_directory = ''
39 |
40 | if not args.check_only:
41 | generate.write_files(result_dict, target_directory)
42 |
43 | if args.check:
44 | try:
45 | generate.compare_files(result_dict, args.check, target_directory)
46 | except AssertionError as e:
47 | raise RuntimeError('Generated file at {} is not the same as original!'.format(e))
48 |
49 |
50 | if __name__ == '__main__':
51 | process_commandline()
52 |
--------------------------------------------------------------------------------
/doc/crontab:
--------------------------------------------------------------------------------
1 | # sample crontab entry
2 |
3 | # Every minute download files for all car2go cities of interest and cache them
4 | # in files named "current_{city}" so they can be used by web tools.
5 | # Replace the ellipses with actual path, optionally replace "car2go" with
6 | # another supported system.
7 |
8 | */1 * * * * python3 .../scripts/download.py car2go all >> .../electric2go/data/cronlog-car2go
9 | */1 * * * * python3 .../scripts/download.py drivenow all >> .../electric2go/data/cronlog-drivenow
10 |
11 | # Optionally, archive the downloaded information, by adding "archive" param:
12 |
13 | */1 * * * * python3 .../scripts/download.py car2go all archive >> .../electric2go/data/cronlog-car2go
14 | */1 * * * * python3 .../scripts/download.py drivenow all archive >> .../electric2go/data/cronlog-drivenow
15 |
16 | # Daily, at 1 am server time, tarball+gzip up previous day's archived files
17 | # into .../electric2go/data/car2go-archives/.
18 | # A tarball is a lot easier to move around than 1440 individual files,
19 | # and normalize.py can process it directly.
20 | # The remove step (&& rm {}*) saves a lot of drivespace, but can be omitted
21 | # if you'd like to be super-sure you have all the data.
22 | # To adapt for other systems, just replace the three occurrences of "car2go".
23 | # This command is pretty hacky, can probably be done better,
24 | # but it seems to work well at least on Debian Wheezy.
25 |
26 | 0 1 * * * cd .../electric2go/data/car2go/ && find . -type f -name "`date -d 'yesterday 13:00' '+*\%Y-\%m-\%d*'`" | awk -F '--' '{print $1}' | sort -u | xargs -i sh -c "tar cfz ../car2go-archives/{}.tgz {}* && rm {}*" >> .../electric2go/data/cronlog-car2go-archives
27 | 10 1 * * * cd .../electric2go/data/drivenow/ && find . -type f -name "`date -d 'yesterday 13:00' '+*\%Y-\%m-\%d*'`" | awk -F '--' '{print $1}' | sort -u | xargs -i sh -c "tar cfz ../drivenow-archives/{}.tgz {}* && rm {}*" >> .../electric2go/data/cronlog-drivenow-archives
28 |
--------------------------------------------------------------------------------
/scripts/graph.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from __future__ import print_function
5 | import argparse
6 | import os
7 | import sys
8 |
9 | # ask script to look for the electric2go package in one directory up
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11 |
12 | from electric2go import output_file_name
13 | from electric2go.analysis import cmdline, graph
14 |
15 |
16 | def process_commandline():
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('-ap', '--all-positions-image', action='store_true',
19 | help='create image of all vehicle positions in the dataset')
20 | parser.add_argument('-atl', '--all-trips-lines-image', action='store_true',
21 | help='create image of all trips in the dataset')
22 | parser.add_argument('-atp', '--all-trips-points-image', action='store_true',
23 | help='create image of all trips in the dataset')
24 | parser.add_argument('--symbol', type=str, default='.',
25 | help='matplotlib symbol to indicate vehicles on the images'
26 | ' (default \'.\', larger \'o\')')
27 |
28 | args = parser.parse_args()
29 |
30 | result_dict = cmdline.read_json()
31 |
32 | if args.all_positions_image:
33 | output_file = output_file_name('all_positions', 'png')
34 | graph.make_positions_graph(result_dict, output_file, args.symbol)
35 |
36 | print(output_file)
37 |
38 | if args.all_trips_lines_image:
39 | output_file = output_file_name('all_trips', 'png')
40 | graph.make_trips_graph(result_dict, output_file)
41 |
42 | print(output_file)
43 |
44 | if args.all_trips_points_image:
45 | output_file = output_file_name('all_trips_points', 'png')
46 | graph.make_trip_origin_destination_graph(result_dict, output_file,
47 | args.symbol)
48 |
49 | print(output_file)
50 |
51 |
52 | if __name__ == '__main__':
53 | process_commandline()
54 |
--------------------------------------------------------------------------------
/electric2go/analysis/cmdline.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | import sys
4 | from datetime import datetime
5 |
6 | # This file will particularly be used with larger JSON files/objects
7 | # so try to get better-performing module first.
8 | # simplejson is usually slightly faster than json running my tests,
9 | # (simplejson=3.8.0, py2.7.8 & py3.4.2)
10 | # so load it if present. If not available, json is fine.
11 | try:
12 | import simplejson as json
13 | except ImportError:
14 | import json
15 |
16 |
17 | def json_serializer(obj):
18 | # default doesn't serialize dates... tell it to use isoformat()
19 | # syntax from http://blog.codevariety.com/2012/01/06/python-serializing-dates-datetime-datetime-into-json/
20 | return obj.isoformat() if hasattr(obj, 'isoformat') else obj
21 |
22 |
23 | def _strptime(t):
24 | return datetime.strptime(t, "%Y-%m-%dT%H:%M:%S")
25 |
26 |
27 | def json_deserializer(obj):
28 | # parse datetimes from JSON we wrote
29 | for (key, value) in obj.items():
30 |
31 | # json_deserializer is used as an object_hook. That only runs on objects,
32 | # that is, dicts. We are also storing datetimes as lists in the 'missing'
33 | # and 'changing_data' keys.
34 | # List items don't get passed into object_hook so we need to catch it separately. Sucks.
35 | if key == 'missing':
36 | datetimes_as_string_list = obj[key]
37 | obj[key] = [_strptime(t) for t in datetimes_as_string_list]
38 | elif key == 'changing_data':
39 | changing_data = obj[key]
40 | obj[key] = [(_strptime(item[0]), item[1]) for item in changing_data]
41 |
42 | try:
43 | # this is the format that isoformat outputs
44 | obj[key] = _strptime(value)
45 | except (TypeError, ValueError):
46 | pass
47 |
48 | return obj
49 |
50 |
51 | def write_json(data, fp=sys.stdout, indent=0):
52 | json.dump(data, fp=fp, default=json_serializer, indent=indent)
53 |
54 |
55 | def read_json(fp=sys.stdin):
56 | return json.load(fp=fp, object_hook=json_deserializer)
57 |
--------------------------------------------------------------------------------
/electric2go/systems/multicity/parse.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 |
6 | def get_cars(system_data_dict):
7 | all_markers = system_data_dict.get('marker', [])
8 |
9 | all_cars = [car for car in all_markers if car['hal2option']['objectname'] == 'multicitymarker']
10 |
11 | return all_cars
12 |
13 |
14 | def get_license_plate(car):
15 | tooltip = car['hal2option']['tooltip'].replace(' ', ' ')
16 |
17 | start_string = ' ('
18 | start = tooltip.find(start_string)
19 |
20 | end = tooltip.find(')', start)
21 |
22 | plate = tooltip[start + len(start_string): end]
23 |
24 | return plate
25 |
26 |
27 | def get_car_basics(car):
28 | return car['hal2option']['id'], float(car['lat']), float(car['lng'])
29 |
30 |
31 | def get_car(car):
32 | result = {}
33 |
34 | vin, lat, lng = get_car_basics(car)
35 |
36 | result['vin'] = vin
37 | result['lat'] = lat
38 | result['lng'] = lng
39 |
40 | result['license_plate'] = get_license_plate(car)
41 | result['name'] = result['license_plate']
42 |
43 | # defaults for all the cars in the system
44 | # TODO: no longer holds now!
45 | result['model'] = 'Citroën C-Zero'
46 | result['electric'] = True
47 | result['fuel_type'] = 'E'
48 |
49 | # AFAICT those are not available from the all-cars API endpoint,
50 | # would have to query for each car separately
51 | result['address'] = ''
52 | result['fuel'] = 0
53 |
54 | return result
55 |
56 |
57 | def get_range(car):
58 | if 'fuel' not in car:
59 | car = get_car(car)
60 |
61 | # Multicity quotes a full charge range of 150 km (NEDC).
62 | # Multicity policy is that cars cannot be parked with less than 10 km range
63 | # (presumably unless they're plugged in?).
64 | # https://www.multicity-carsharing.de/en/faq/how-do-i-ensure-that-the-car-battery-charge-level-does-not-fall-below-the-minimum-at-the-end-of-the-journey/
65 | # Use 10 km = ~7% as indicator for minimum charge level.
66 |
67 | if car['fuel'] > 7:
68 | car_range = int(1.5 * (car['fuel']-7))
69 | else:
70 | car_range = 0
71 |
72 | return car_range
73 |
--------------------------------------------------------------------------------
/electric2go/analysis/filter.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from random import sample
4 |
5 |
6 | def by_vehicle(result_dict, find_by):
7 | """
8 | :param find_by: accepts VINs, "random", "most_trips", "most_distance", and "most_duration"
9 | :return: result_dict only containing data for the requested car
10 | """
11 |
12 | all_known_vins = set()
13 | all_known_vins.update(result_dict['unfinished_trips'].keys())
14 | all_known_vins.update(result_dict['finished_trips'].keys())
15 | all_known_vins.update(result_dict['unfinished_parkings'].keys())
16 | all_known_vins.update(result_dict['finished_parkings'].keys())
17 | all_known_vins.update(result_dict['unstarted_trips'].keys())
18 |
19 | all_trips_by_vin = result_dict['finished_trips']
20 |
21 | vin = find_by # allow finding by passing in VIN verbatim
22 |
23 | if find_by == 'random':
24 | vin = sample(all_known_vins, 1)[0]
25 | elif find_by == 'most_trips':
26 | # pick the vehicle with most trips. in case of tie, pick first one
27 | vin = max(all_trips_by_vin,
28 | key=lambda v: len(all_trips_by_vin[v]))
29 | elif find_by == 'most_distance':
30 | vin = max(all_trips_by_vin,
31 | key=lambda v: sum(t['distance'] for t in all_trips_by_vin[v]))
32 | elif find_by == 'most_duration':
33 | vin = max(all_trips_by_vin,
34 | key=lambda v: sum(t['duration'] for t in all_trips_by_vin[v]))
35 |
36 | if vin not in all_trips_by_vin:
37 | raise KeyError("VIN %s not found in result_dict" % vin)
38 |
39 | vin_to_find = vin
40 |
41 | result_dict['finished_trips'] = {k: v for k, v in result_dict['finished_trips'].items() if k == vin_to_find}
42 | result_dict['unfinished_trips'] = {k: v for k, v in result_dict['unfinished_trips'].items() if k == vin_to_find}
43 | result_dict['finished_parkings'] = {k: v for k, v in result_dict['finished_parkings'].items() if k == vin_to_find}
44 | result_dict['unfinished_parkings'] = {k: v for k, v in result_dict['unfinished_parkings'].items() if k == vin_to_find}
45 | result_dict['unstarted_trips'] = {k: v for k, v in result_dict['unstarted_trips'].items() if k == vin_to_find}
46 |
47 | return result_dict
48 |
--------------------------------------------------------------------------------
/electric2go/systems/evo/parse.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 |
4 | def get_cars(system_data_dict):
5 | return system_data_dict.get('data', [])
6 |
7 |
8 | def get_everything_except_cars(system_data_dict):
9 | result = system_data_dict.copy()
10 | del result['data']
11 | return result
12 |
13 |
14 | def get_car_basics(car):
15 | return car['Id'], car['Lat'], car['Lon']
16 |
17 |
18 | def get_car_unchanging_properties(car):
19 | """
20 | Gets car properties that are expected to not change at all
21 | for a given car VIN/ID during a reasonable timescale (1 week to 1 month)
22 | :param car: car info in original system JSON-dict format
23 | :return: dict with keys mapped to common electric2go format
24 | """
25 |
26 | return {
27 | 'vin': car['Id'],
28 | 'license_plate': car['Name'],
29 | 'model': 'Toyota Prius C'
30 | }
31 |
32 |
33 | def get_car_changing_properties(car):
34 | """
35 | Gets cars properties that change during a trip
36 | :param car: car info in original system JSON-dict format
37 | :return: dict with keys mapped to common electric2go format
38 | """
39 |
40 | return {
41 | 'lat': car['Lat'],
42 | 'lng': car['Lon'],
43 | 'address': car['Address'],
44 | 'fuel': car['Fuel']
45 | }
46 |
47 |
48 | def get_car(car):
49 | result = {}
50 |
51 | vin, lat, lng = get_car_basics(car)
52 |
53 | result['vin'] = vin
54 | result['license_plate'] = car['Name']
55 |
56 | result['model'] = 'Toyota Prius C'
57 |
58 | result['lat'] = lat
59 | result['lng'] = lng
60 |
61 | result['address'] = car['Address']
62 |
63 | result['fuel'] = car['Fuel']
64 |
65 | return result
66 |
67 |
68 | def get_car_parking_drift(car):
69 | """
70 | Gets properties that can change during a parking period but aren't
71 | considered to interrupt the parking.
72 | These are things like a car charging while being parked.
73 | :return: a hashable object
74 | """
75 |
76 | # TODO: implement
77 | return None
78 |
79 |
80 | def put_car_parking_drift(car, d):
81 | """
82 | Update `car`'s properties that might have changed during a parking period.
83 | :param d: must be a result of get_car_parking_drift()
84 | """
85 |
86 | # TODO: implement
87 | return car
88 |
--------------------------------------------------------------------------------
/scripts/video.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from __future__ import print_function
5 | import argparse
6 | import os
7 | import sys
8 |
9 | from tqdm import tqdm
10 |
11 | # ask script to look for the electric2go package in one directory up
12 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
13 |
14 | from electric2go import output_file_name
15 | from electric2go.analysis import cmdline, video
16 |
17 |
18 | def process_commandline():
19 | parser = argparse.ArgumentParser()
20 | parser.add_argument('-tz', '--tz-offset', type=float, default=0,
21 | help='offset times by TZ_OFFSET hours')
22 | parser.add_argument('-d', '--distance', type=float, default=False,
23 | help='highlight DISTANCE meters around each car on map')
24 | parser.add_argument('--trips', action='store_true',
25 | help='show lines indicating vehicles\' trips')
26 | parser.add_argument('--speeds', action='store_true',
27 | help='show vehicles\' speeds in addition to locations')
28 | parser.add_argument('--symbol', type=str, default='.',
29 | help='matplotlib symbol to indicate vehicles on the images' +
30 | ' (default \'.\', larger \'o\')')
31 |
32 | args = parser.parse_args()
33 |
34 | result_dict = cmdline.read_json()
35 |
36 | metadata = result_dict['metadata']
37 |
38 | output_filename_prefix = output_file_name(metadata['city'])
39 |
40 | images_generator = video.make_video_frames(
41 | result_dict, output_filename_prefix,
42 | args.distance, args.trips, args.speeds,
43 | args.symbol, args.tz_offset)
44 |
45 | # evaluate the generator to actually generate the images;
46 | # use tqdm to display a progress bar
47 | exp_timespan = metadata['ending_time'] - metadata['starting_time']
48 | exp_frames = exp_timespan.total_seconds() / metadata['time_step']
49 | generated_images = list(tqdm(images_generator,
50 | total=exp_frames, leave=False))
51 |
52 | # print animation information
53 | animate_command_text = video.make_animate_command(
54 | result_dict, output_filename_prefix, len(generated_images))
55 | print('\nto animate:')
56 | print(animate_command_text)
57 |
58 |
59 | if __name__ == '__main__':
60 | process_commandline()
61 |
--------------------------------------------------------------------------------
/electric2go/systems/multicity/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 |
4 | # from https://www.multicity-carsharing.de/en/
5 | # needs additional requests per car to get fuel/charge and address,
6 | # could see if I can find a better API endpoint.
7 |
8 | CITIES = {
9 | 'berlin': {
10 | 'of_interest': True,
11 | 'electric': 'all',
12 | 'API_AVAILABLE_VEHICLES_URL': 'https://kunden.multicity-carsharing.de/kundenbuchung/hal2ajax_process.php?searchmode=buchanfrage&lat=52.51&lng=13.39&instant_access=J&ajxmod=hal2map&callee=getMarker&objectname=multicitymarker',
13 | 'BOUNDS': {
14 | # actual bounds based on operation area are
15 | # 52.55798, 52.449909, 13.48569, 13.26054
16 |
17 | # use slightly wider values to allow for GPS wobble
18 | 'NORTH': 52.559,
19 | 'SOUTH': 52.449,
20 | 'EAST': 13.486,
21 | 'WEST': 13.260
22 | },
23 | 'DEGREE_LENGTHS': {
24 | # for latitude 52.52
25 | 'LENGTH_OF_LATITUDE': 111277.17,
26 | 'LENGTH_OF_LONGITUDE': 67879.39
27 | },
28 | 'MAP_LIMITS': {
29 | # Use wider limits so that the generated image will look the same
30 | # as car2go and Drivenow images.
31 | # At 1920x1080 pixels, 16:9, the map is:
32 | # http://render.openstreetmap.org/cgi-bin/export?bbox=13.099773,52.38927,13.646893,52.576767&scale=113281&format=png
33 | 'NORTH': 52.576767,
34 | 'SOUTH': 52.38927,
35 | 'EAST': 13.646893,
36 | 'WEST': 13.099773
37 | }
38 | }
39 | }
40 |
41 |
42 | def get_latlng_extent():
43 | import requests
44 | from lxml import etree
45 | from ..drivenow import city as drivenow_city
46 |
47 | # this URL is in https://www.multicity-carsharing.de/wp-content/plugins/multicity_map/multicity.js
48 | # which as of 2016-05-29 is loaded by https://www.multicity-carsharing.de/
49 | r = requests.get('https://www.multicity-carsharing.de/wp-content/plugins/multicity_map/geschaeftsbereich_07032014.kml')
50 |
51 | xml = etree.fromstring(r.content)
52 |
53 | ns = '{http://earth.google.com/kml/2.2}'
54 | pl = xml.findall('.//' + ns + 'Placemark')
55 |
56 | # reuse code from Drivenow to parse the KML
57 | coords = drivenow_city.get_details_from_kml(pl[0], ns)
58 | return drivenow_city.get_max_latlng(coords)
59 |
--------------------------------------------------------------------------------
/doc/analysis.md:
--------------------------------------------------------------------------------
1 | Analyzing data
2 | ==============
3 |
4 | The electric2go project has a fairly extensive library for analyzing
5 | carshare use data.
6 |
7 | Data is collected using `download.py` with an "archive" param.
8 | Cities indicated as `'of_interest': True` in a system's CITIES dictionary
9 | will have their information saved to a file named with a timestamp.
10 | The `doc/crontab` file has sample commands.
11 |
12 | A data archive is then loaded into `scripts/normalize.py` to convert it
13 | to a JSON data dictionary that is will have the same format for all
14 | supported systems. This is printed to stdout and can either be piped
15 | directly to another command or directed to a file for later use and reuse.
16 |
17 | `scripts/merge.py` merges two or more JSON data dictionaries that describe
18 | sequential time periods. For example, you can merge seven files, each with
19 | a day's worth of data, into one file containing the whole week's data.
20 |
21 | A number of other scripts read in JSON from stdin and process it:
22 |
23 | `scripts/video.py` generates generating location maps of carshare vehicles at
24 | a given time. These maps are then animated into a video that shows
25 | car movement over time.
26 | Sample output: https://www.youtube.com/watch?v=UOqA-un8oeU
27 |
28 | `scripts/graph.py` generates single images, for instance a map of
29 | all positions where cars were parked during the dataset.
30 |
31 | Given a data dictionary, `scripts/stats.py` calculates statistics about
32 | properties like trip distance or duration.
33 | Keep in mind that the statistics are only as good as the data coming in.
34 | For instance, reserved car2go cars disappear off the available vehicles list,
35 | so any time reserved will be counted as trip time.
36 |
37 | The JSON data piping setup allows easy filtering of data to process.
38 | For instance you could get statistics for a week of data for only
39 | the morning rush hour. To do this, pipe to a filtering script between
40 | invocations of `normalize.py` and `video.py`/`graph.py`/`stats.py`.
41 |
42 | All of the above scripts are thin executable wrappers around modules in
43 | `electric2go/analysis` package, which you can also import directly.
44 |
45 | Note that all dates within the system are UTC. Provide a tz_offset param
46 | to `video` and `stats` to compensate for timezones.
47 |
48 | Statistics require numpy and graphing also requires matplotlib;
49 | install the requirements with `pip install -r requirements.txt`
50 |
--------------------------------------------------------------------------------
/electric2go/analysis/video.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from datetime import timedelta
4 |
5 | from . import generate, graph
6 | from ..systems import get_background_as_image
7 |
8 |
9 | def make_graph_from_frame(result_dict, index, data, filename_prefix, symbol,
10 | show_speeds, distance, tz_offset):
11 | turn, current_positions, current_trips = data
12 |
13 | image_filename = '{file}_{i:05d}.png'.format(file=filename_prefix, i=index)
14 |
15 | printed_time = turn + timedelta(0, tz_offset*3600)
16 |
17 | graph.make_graph(result_dict, current_positions, current_trips, image_filename,
18 | printed_time, show_speeds, distance, symbol)
19 |
20 | return image_filename
21 |
22 |
23 | def make_animate_command(result_dict, filename_prefix, frame_count):
24 | background_path = get_background_as_image(result_dict)
25 | png_filepaths = '{file}_%05d.png'.format(file=filename_prefix)
26 | mp4_path = '{file}.mp4'.format(file=filename_prefix)
27 |
28 | framerate = 30
29 | # to my best understanding, my "input" is the static background image
30 | # which avconv assumes to be "25 fps".
31 | # to get output at 30 fps to be correct length to include all frames,
32 | # I need to convert framecount from 25 fps to 30 fps
33 | frames = (frame_count / 25.0) * framerate
34 |
35 | command_template = "avconv -loop 1 -r %d -i %s -vf 'movie=%s [over], [in][over] overlay' -b 15360000 -frames %d %s"
36 | command = command_template % (framerate, background_path, png_filepaths, frames, mp4_path)
37 |
38 | return command
39 |
40 |
41 | def make_video_frames(result_dict, filename_prefix, distance, include_trips,
42 | show_speeds, symbol, tz_offset):
43 | """
44 | :return: Generator that knows how to create the images. It is not actually
45 | evaluated, so you must evaluate it (e.g. list(make_video_frames(...))
46 | to create the images.
47 | """
48 |
49 | # make_graph_from_frame is currently fairly slow (~2 seconds per frame).
50 | # The map can be fairly easily parallelized, e.g. http://stackoverflow.com/a/5237665/1265923
51 | # TODO: parallelize
52 | # It appears graph functions will be safe to parallelize, they
53 | # all ultimately go to matplotlib which is parallel-safe
54 | # according to http://stackoverflow.com/a/4662511/1265923
55 |
56 | return (
57 | make_graph_from_frame(result_dict, index, data, filename_prefix,
58 | symbol, show_speeds, distance, tz_offset)
59 | for index, data
60 | in enumerate(generate.build_data_frames(result_dict, include_trips))
61 | )
62 |
--------------------------------------------------------------------------------
/web/frontend/homeareas/wrapper.js:
--------------------------------------------------------------------------------
1 | // requires Leaflet, tested with version 0.6.2
2 |
3 | function createMap(divId, tileLayer, colors) {
4 | var opacity = 0.5;
5 |
6 | // create map and add provided tiles
7 | var map = L.map(divId);
8 | tileLayer.addTo(map);
9 |
10 | // create legend
11 | var legend = L.control({position: 'bottomright'});
12 | legend.onAdd = function (map) {
13 | var div = L.DomUtil.create('div', 'info legend');
14 |
15 | var ul = L.DomUtil.create('ul');
16 |
17 | div.appendChild(ul);
18 |
19 | map.legendList = ul; // save in map object for future use in addMultiPolygonCoordinates
20 |
21 | return div;
22 | };
23 | map.addControl(legend);
24 |
25 | // common processing for geoJson objects
26 | map.addGeoJson = function (geoJson) {
27 | var layer = L.geoJson([geoJson], {
28 |
29 | style: function (feature) {
30 | return feature.properties && feature.properties.style;
31 | },
32 |
33 | onEachFeature: function onEachFeature(feature, layer) {
34 | if (feature.properties && feature.properties.popupContent) {
35 | layer.bindPopup(feature.properties.popupContent);
36 | }
37 | }
38 | }).addTo(map);
39 |
40 | map.fitBounds(layer.getBounds());
41 | }
42 |
43 | // common processing for systems with home area coordinates
44 | map.addSystemMultiPolygon = function (coordinates, systemName) {
45 | var color = systemName in colors ? colors[systemName] : "#eee";
46 | var title = systemName + ' home area';
47 |
48 | var geoJsonMultiPolygon = {
49 | "type": "Feature",
50 | "properties": {
51 | "popupContent": title,
52 | "style": {
53 | weight: 1,
54 | color: "#999",
55 | opacity: 1,
56 | fillColor: color,
57 | fillOpacity: opacity
58 | }
59 | },
60 | "geometry": {
61 | "type": "MultiPolygon",
62 | "coordinates": coordinates
63 | }
64 | };
65 |
66 | map.addGeoJson(geoJsonMultiPolygon);
67 |
68 | if (map.legendList) {
69 | var li = document.createElement('li');
70 | li.setAttribute('data-name-system', systemName);
71 | li.setAttribute('data-name-caption', title);
72 | li.setAttribute('data-color', color);
73 |
74 | var span = document.createElement('span');
75 | span.style.background = color;
76 | span.style.opacity = opacity;
77 | li.appendChild(span);
78 |
79 | li.appendChild(document.createTextNode(title));
80 |
81 | map.legendList.appendChild(li);
82 | }
83 | };
84 |
85 | return map;
86 | };
87 |
88 |
--------------------------------------------------------------------------------
/electric2go/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from datetime import datetime
4 | from math import radians, sin, cos, asin, sqrt
5 | from subprocess import Popen, PIPE
6 |
7 | from .files import root_dir
8 |
9 |
10 | def current_git_revision():
11 | """
12 | Gets the current git revision in the directory of the electric2go module.
13 | Intended for use as metadata for information what version of the software
14 | generated a given output file.
15 |
16 | This uses `git rev-parse --verify HEAD` in the directory
17 | where electric2go/__init__.py is located.
18 |
19 | The result will usually be the current git revision of the electric2go
20 | codebase. There is an edge case: the result might be the revision
21 | of a different repository if electric2go is not a git repository,
22 | but a parent directory is a git repository.
23 | (e.g. this file is /home/user/repo/electric2go/electric2go/__init__.py,
24 | /home/user/repo/electric2go/electric2go/.git/ doesn't exist,
25 | but /home/user/repo/.git/ does)
26 |
27 | Raises RuntimeError when unable to find the git revision.
28 |
29 | This will have to be changed if electric2go is to be available as
30 | a package or in other cases where the files would not be expected
31 | to be versioned. Perhaps we can switch to MD5-summing a source .py file
32 | to establish quasi-revision, but that decision can be done later.
33 | """
34 |
35 | cmd = Popen(["git", "rev-parse", "--verify", "HEAD"],
36 | stdout=PIPE, stderr=PIPE, cwd=root_dir)
37 |
38 | stdout_data, stderr_data = cmd.communicate()
39 |
40 | if stderr_data:
41 | raise RuntimeError('Unable to get git revision of electric2go')
42 |
43 | rev = stdout_data.decode('utf-8').strip()
44 |
45 | return rev
46 |
47 |
48 | def output_file_name(description, extension=''):
49 | file_name = '{date}_{desc}'.format(
50 | date=datetime.now().strftime('%Y%m%d-%H%M%S'),
51 | desc=description)
52 |
53 | if extension:
54 | file_name = '{name}.{ext}'.format(name=file_name, ext=extension)
55 |
56 | return file_name
57 |
58 |
59 | def dist(ll1, ll2):
60 | # Haversine formula implementation to get distance between two points
61 | # adapted from http://www.movable-type.co.uk/scripts/latlong.html
62 | # see also http://stackoverflow.com/questions/27928/calculate-distance-between-two-ll-points
63 | # and http://stackoverflow.com/questions/4913349/haversine-formula-in-python
64 |
65 | # the js equivalent of this code is used in sort.js
66 | # - any changes should be reflected in both
67 |
68 | earth_radius = 6371 # Radius of the earth in km
69 |
70 | lat1, lng1 = ll1
71 | lat2, lng2 = ll2
72 |
73 | lat1_rad = radians(lat1)
74 | lat2_rad = radians(lat2)
75 |
76 | # Using d_lat = lat2_rad - lat1_rad gives marginally different results,
77 | # because floating point
78 | d_lat = radians(lat2 - lat1)
79 | d_lng = radians(lng2 - lng1)
80 |
81 | a = sin(d_lat/2)**2 + cos(lat1_rad) * cos(lat2_rad) * sin(d_lng/2)**2
82 | c = 2 * asin(sqrt(a))
83 |
84 | return earth_radius * c
85 |
--------------------------------------------------------------------------------
/scripts/normalize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from __future__ import print_function
5 | import argparse
6 | import os
7 | import sys
8 |
9 | # ask script to look for the electric2go package in one directory up
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11 |
12 | from electric2go import files
13 | from electric2go.analysis import cmdline
14 | from electric2go.analysis.normalize import batch_load_data
15 |
16 |
17 | def process_commandline():
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('system', type=str,
20 | help='system to be used (e.g. car2go, drivenow, ...)')
21 | parser.add_argument('starting_filename', type=str,
22 | help='name of archive of files or the first file')
23 | parser.add_argument('-st', '--starting-time', type=str,
24 | help='optional: if using an archive, first data point '
25 | 'to process; format YYYY-mm-DD--HH-MM')
26 | parser.add_argument('-et', '--ending-time', type=str,
27 | help='optional: if using an archive, data point '
28 | 'to stop at; format YYYY-mm-DD--HH-MM')
29 | parser.add_argument('-step', '--time-step', type=int, default=60,
30 | help='each step is TIME_STEP seconds (default 60)')
31 | parser.add_argument('-i', '--indent', type=int, default=0,
32 | help='indent for output JSON (default 0)')
33 |
34 | args = parser.parse_args()
35 |
36 | if not os.path.exists(args.starting_filename):
37 | sys.exit('file not found: ' + args.starting_filename)
38 |
39 | # TODO: also support more standard YYYY-mm-DDTHH-MM (ISO 8601)
40 | # in addition to YYYY-mm-DD--HH-MM when parsing dates here.
41 | # I guess changing the file naming to match would be a bit of a big
42 | # and breaking change... hmm.
43 |
44 | if args.starting_time:
45 | try:
46 | args.starting_time = files.parse_date(args.starting_time)
47 | except ValueError:
48 | sys.exit('time format not recognized: ' + args.starting_time)
49 |
50 | if args.ending_time:
51 | try:
52 | args.ending_time = files.parse_date(args.ending_time)
53 | except ValueError:
54 | sys.exit('time format not recognized: ' + args.ending_time)
55 |
56 | try:
57 | result = batch_load_data(args.system, args.starting_filename,
58 | args.starting_time, args.ending_time,
59 | args.time_step)
60 | except ValueError as e:
61 | # raised when an invalid system is encountered
62 | # or the first data file is invalid
63 | sys.exit(e)
64 |
65 | if args.ending_time and args.ending_time > result['metadata']['ending_time']:
66 | print('warning: requested ending_time was {et}, but only found data up to {at}; using {at}'.
67 | format(et=args.ending_time, at=result['metadata']['ending_time']),
68 | file=sys.stderr)
69 |
70 | cmdline.write_json(result, indent=args.indent)
71 |
72 |
73 | if __name__ == '__main__':
74 | process_commandline()
75 |
--------------------------------------------------------------------------------
/electric2go/systems/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | import os
4 | import importlib
5 |
6 |
7 | systems_dir = os.path.dirname(os.path.realpath(__file__))
8 |
9 |
10 | def _fill_in_city_information(system, city_name, city_data):
11 | city_data['system'] = system
12 | city_data['name'] = city_name
13 |
14 | if 'display' not in city_data:
15 | city_data['display'] = city_name.title()
16 |
17 | if 'MAP_SIZES' not in city_data and 'MAP_LIMITS' in city_data:
18 | # default to 1920x1080 if we have other map data
19 | city_data['MAP_SIZES'] = {'MAP_X': 1920, 'MAP_Y': 1080}
20 |
21 | # set some default values if not present
22 | city_data.setdefault('electric', False)
23 | city_data.setdefault('of_interest', False)
24 | city_data.setdefault('number_first_address', False)
25 | city_data.setdefault('API_AVAILABLE_VEHICLES_HEADERS', None)
26 | city_data.setdefault('API_KNOCK_HEAD_URL', None)
27 |
28 | # provide the range estimator
29 | city_data['range_estimator'] = getattr(get_parser(system), 'get_range', None)
30 |
31 | return city_data
32 |
33 |
34 | def _get_carshare_system_module(system_name, module_name=''):
35 | if module_name == '':
36 | lib_name = '.{s}'.format(s=system_name)
37 | else:
38 | lib_name = '.{s}.{m}'.format(s=system_name, m=module_name)
39 |
40 | return importlib.import_module(lib_name, __package__)
41 |
42 |
43 | def _get_all_cities_raw(system):
44 | city_module = _get_carshare_system_module(system)
45 |
46 | return getattr(city_module, 'CITIES')
47 |
48 |
49 | def get_all_cities(system):
50 | all_cities = _get_all_cities_raw(system)
51 |
52 | return {city_name: _fill_in_city_information(system, city_name, all_cities[city_name])
53 | for city_name in all_cities}
54 |
55 |
56 | def get_city_by_name(system, city_name):
57 | all_cities = _get_all_cities_raw(system)
58 | city_data = all_cities[city_name]
59 | return _fill_in_city_information(system, city_name, city_data)
60 |
61 |
62 | def get_city_by_result_dict(result_dict):
63 | return get_city_by_name(result_dict['metadata']['system'],
64 | result_dict['metadata']['city'])
65 |
66 |
67 | _parse_modules = {}
68 | def get_parser(system):
69 | # Function with a mini-cache since getting parser requires importing
70 | # modules which might be pretty slow, and parsers might get requested a lot
71 | # Python 3 has a @functools.lru_cache but Python 2 doesn't :(
72 | # so hack our own simple one.
73 | if system not in _parse_modules:
74 | _parse_modules[system] = _get_carshare_system_module(system, 'parse')
75 |
76 | return _parse_modules[system]
77 |
78 |
79 | def get_background_as_image(result_dict):
80 | # TODO: Eventually I want these to be automatically generated
81 | # based on result_dict, rather than shared as binary images.
82 | # But even then I will be able to save that automatically generated image
83 | # to a temp file and return its path in this function :)
84 |
85 | file_name = '{city}-background.png'.format(
86 | city=result_dict['metadata']['city'])
87 |
88 | path = os.path.join(systems_dir, 'backgrounds', file_name)
89 |
90 | return os.path.relpath(path)
91 |
--------------------------------------------------------------------------------
/web/web_helper.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | import os
4 | import sys
5 | import cgi
6 | import json
7 |
8 | # ask script to look for the electric2go package in one directory up
9 | # you might want to hardcode a path instead
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11 |
12 | from electric2go import dist, download, systems # dist used in api.py
13 |
14 |
15 | # systems are loaded dynamically based on their name,
16 | # so the easiest thing is to manually define a list
17 | # of systems with mixed fleets to search
18 | ALL_SYSTEMS = ['car2go', 'drivenow', 'communauto']
19 |
20 | DEFAULT_SYSTEM = 'drivenow'
21 | DEFAULT_CITY = 'london'
22 |
23 | CACHE_PERIOD = 60 # cache data for this many seconds at most
24 |
25 | request_timer = []
26 |
27 |
28 | def get_param(param_name):
29 | arguments = cgi.FieldStorage()
30 |
31 | if param_name in arguments:
32 | return arguments[param_name].value
33 | else:
34 | return False
35 |
36 |
37 | def get_arg(param_number):
38 | return sys.argv[param_number].lower() if len(sys.argv) > param_number else False
39 |
40 |
41 | def get_system_and_city(allow_any_city=True):
42 | system = get_param('system') or get_arg(1)
43 | city = get_param('city') or get_arg(2)
44 |
45 | if system in ALL_SYSTEMS:
46 | try:
47 | city_data = systems.get_city_by_name(system, city)
48 | if allow_any_city or city_data['electric'] == 'some':
49 | return city_data
50 | except KeyError:
51 | # city name not valid, fall through to default
52 | pass
53 |
54 | # if city or system were incorrect, return default
55 | return systems.get_city_by_name(DEFAULT_SYSTEM, DEFAULT_CITY)
56 |
57 |
58 | def get_electric_cars(city):
59 | api_text, cache = download.get_current(city, CACHE_PERIOD)
60 |
61 | parse = systems.get_parser(city['system'])
62 | all_cars = parse.get_cars(json.loads(api_text))
63 | parsed_cars = [parse.get_car(car) for car in all_cars]
64 |
65 | electric_cars = [car for car in parsed_cars if car['electric']]
66 |
67 | return electric_cars, cache
68 |
69 |
70 | def fill_in_car(car, city):
71 | car['range'] = city['range_estimator'](car)
72 | car['address'] = format_address(car['address'], city)
73 |
74 | return car
75 |
76 |
77 | def format_address(address, city):
78 | if city['system'] == 'drivenow' and city['name'] == 'london':
79 | # London has an annoying scheme that includes "London" in
80 | # all geolocated address which is pretty useless
81 | # as all cars are in London.
82 | address = address.replace(' London', '')
83 |
84 | if not city['number_first_address']:
85 | return address
86 |
87 | # If possible and appropriate, try to reformat street address
88 | # to more usual form used in English-speaking areas.
89 | # Except for designated parking areas, API always returns
90 | # German-style "Main St 100", change it to "100 Main St"
91 |
92 | address_parts = address.split(',')
93 |
94 | street_parts = address_parts[0].split()
95 |
96 | if street_parts[-1].isdigit() and not street_parts[0].isdigit():
97 | street_parts.insert(0, street_parts.pop())
98 | address_parts[0] = ' '.join(street_parts)
99 |
100 | return ','.join(address_parts)
101 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | electric2go
2 | ===========
3 |
4 | Collect and analyze data on roaming one-way carshare use.
5 |
6 | The project started out as a way to find nearest electric car2go vehicles,
7 | hence the name. I've since added caching the data, archiving it,
8 | and processing to make visualizations and collect statistics.
9 |
10 | **Much of the data-collection code no longer works as of 2018.** Drivenow API
11 | was turned off in April 2017 and car2go API in January 2018. The code is now
12 | mostly useful as historical record or for analyzing data collected earlier.
13 | It is mostly not maintained.
14 |
15 | There was once good support for various carshare systems: in December 2015
16 | we could handle car2go, Drivenow, Montréal's Communauto Automobile,
17 | Vancouver's Evo, Italy's Enjoy, Milan's Sharengo, and Berlin's Multicity.
18 | Communauto and Evo appear to still work, others are untested.
19 |
20 |
21 | Requirements
22 | ------------
23 |
24 | Known to work under Python 3.4.2 and 2.7.8.
25 | Scripts invoked from the command line specify `#!/usr/bin/env python3`.
26 |
27 | PyPI dependencies for whole project are in requirements.txt,
28 | for data archiver (run headless on a server) in requirements-download.txt,
29 | for web interface in requirements-web.txt.
30 |
31 |
32 | Web interface: finding electric cars
33 | ------------------------------------
34 |
35 | Simple web page listing currently available electric carshare vehicles
36 | in a number of cities where a system has both internal-combustion
37 | and electric vehicles available. A basic JSON API is also available.
38 |
39 | View it live at http://bin.piorkowski.ca/electric2go/
40 |
41 | See [doc/web.md](doc/web.md) for more information.
42 |
43 |
44 | Analyzing data
45 | --------------
46 |
47 | A carshare's data can be downloaded automatically and archived. An archive
48 | can be then analyzed to get usage statistics and generate visualizations.
49 |
50 | * [Example visualization video on Youtube](https://www.youtube.com/watch?v=UOqA-un8oeU)
51 | * [Example write-ups using statistics calculated with this code](http://piorkowski.ca/rev/tag/carshare/)
52 |
53 | See [doc/analysis.md](doc/analysis.md) for more information.
54 |
55 |
56 | Multisystem operation
57 | ---------------------
58 |
59 | Supported carshare systems are defined in packages in
60 | the `electric2go/systems/` directory.
61 |
62 | A system definition consists of a dictionary of cities a system supports
63 | and a "parser" that converts the system's API output to a standard format.
64 |
65 | More systems can be added fairly easily.
66 | See [doc/systems.md](doc/systems.md) for more information.
67 |
68 | If you add a new system, patches or pull requests are most welcome.
69 |
70 |
71 | Similar projects
72 | ----------------
73 |
74 | - https://github.com/mattsacks/disposable-cars/ is a visualization of
75 | car2go trips in Portland
76 | - http://www.comparecarshares.com/ incorporates car2go data to calculate
77 | how competitive the cost is compared with driving, cycling,
78 | and classic carshare systems in Calgary, Vancouver, and Toronto
79 | - http://labs.densitydesign.org/carsharing/ is an analysis of Enjoy service
80 | in Milan, it has a making-of write-up at
81 | http://www.densitydesign.org/2014/07/the-making-of-the-seven-days-of-carsharing-project/
82 | - http://mappable.info/one-week-of-carsharing was an analysis of
83 | car2go service in a number of cities, taken down by request of car2go
84 |
85 |
86 | Legal stuff
87 | -----------
88 |
89 | This product uses the car2go API but is not endorsed or certified by car2go.
90 |
91 | Released under the ISC license. Boilerplate:
92 |
93 | Copyright (c) 2012-2018, Jarek Piórkowski
94 |
95 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
96 |
97 | The software is provided "as is" and the author disclaims all warranties with regard to this software including all implied warranties of merchantability and fitness. In no event shall the author be liable for any special, direct, indirect, or consequential damages or any damages whatsoever resulting from loss of use,
98 | data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software.
99 |
--------------------------------------------------------------------------------
/electric2go/systems/car2go/weird trip notes:
--------------------------------------------------------------------------------
1 | Initial ideas
2 | =============
3 |
4 | Some categories of "weird" trips that might be of concern in analysis/stats.py:
5 | - Trips with really short distance but long duration and significant fuel use
6 | - These are likely to be round trips, treat them as normal except maybe collect separately for analysis
7 | - Trips with really short distance and zero fuel use
8 | - These are likely to be lapsed reservations, especially if duration is exactly 30 minutes
9 | (though note that you can cancel a booking so not necessarily 30 minutes)
10 | (added complication of fuel level wobble possibility)
11 | - Trips with really short distance and really short duration
12 | - These are GPS wobble
13 |
14 | The procedure was then to take a ~big dataset (ended up being a week of Vancouver data),
15 | take short-distance trips from that dataset, get stats on on duration and fuel use,
16 | see if any of the above categories are in fact present, if so isolate and report on them.
17 |
18 |
19 | Distances
20 | =========
21 |
22 | In a 7-day vancouver sample, going by distance only, as ratio of all trips:
23 |
24 | - under 0.01 km : 0.00630854060822
25 | - under 0.02 km : 0.00821073415039
26 | - under 0.05 km : 0.0106667308757
27 | - under 0.1 km : 0.0151453131396
28 |
29 | 100 m is definitely a valid trip (though a suspected round trip).
30 | Under 50 m, I examined further.
31 | Shortest "trip" in the dataset is 0.00111194926601 km, that is 1.1 m.
32 |
33 |
34 | Fuel use
35 | ========
36 |
37 | For all trips (n=41809), fuel use most common values are : [(0, 26105), (3, 11356), (6, 1621), (4, 762)]
38 | 1 and 2 aren't in top ten, suggesting that the measurement isn't accurate enough to give 1% resolution?
39 | - fuel use over 1 ratio : 0.3497811476
40 | - fuel use over 5 ratio : 0.0599392475304
41 | - fuel use over 10 ratio : 0.00674495921931
42 | - fuel use under 1 ratio : 0.6502188524
43 | - fuel use under 5 ratio : 0.94006075247
44 |
45 | So low fuel use is not a reliable indicator generally. Must be combined with other factors.
46 |
47 |
48 | Durations
49 | =========
50 |
51 | In the dataset of 41809:
52 | - Trips of 1 minute are 275 (0.67%). Median distance 6.2 m. 64% are under 10 m. 24% are over 20 m, 20% over 50 m. Fuel use is 90% 0.
53 | - Trips of 2 minutes are 58 (0.14%). Median distance 1.21 km (!). 27.5% are under 20 m, and 25th percentile is 15.6 m,
54 | but it goes really high after that: 75th percentile is 3.19 km, and max is 7.77 km. Fuel use is 84% 0, there are 7 3%s, one 4%, and -79%.
55 | Obviously 1.2 km in 2 minutes is pretty big (36 km/h just driving and without allowing time to start/finish trip),
56 | so it's likely that are mostly errors and weird things. But the prevalence (0.14%) is very low so perhaps it can just be ignored.
57 | We'll just do a best-effort exclusion of that bottom 25% of trips that can be definitely said to not be "real" trips.
58 | - Trips over 2 minutes but under 6 minutes are 616 (1.5%). Mean distance is 806 m.
59 | 90% of trips are over 50 m, only 7.5% under 20 m. 80% of fuel use is 0, next most common is 3 at 15%.
60 |
61 | Lapsed reservations (duration ~30 min) weren't a significant ratio of the short-distance trips as far as I can tell, so not tracking those further for now.
62 |
63 | Thoughts for the future: maybe subtract 2-3 minutes from duration of trip to account for trip start and end procedures
64 | and calculate speed outside those times? If it's something obviously high (>80 km/h perhaps?) there is a problem.
65 |
66 |
67 | Decision
68 | ========
69 |
70 | Decided to exclude:
71 |
72 | - Distance under 10 m and duration under 4 minutes and fuel use > -2:
73 | - Too short to be a real trip, too quick to be a round trip,
74 | fuel use criterion allows accidental wobbles while excluding possible refuels
75 | - Distance under 50 m and duration of 1 minute and fuel use > -2:
76 | - Too quick to be a real trip, so this is GPS wobble.
77 |
78 | These criteria find 245 trips in the set of 41809, or 0.585998%.
79 |
80 | For comparison, if the criterion was duration <= 5 minutes and distance < 50 m,
81 | it would match 294 trips, or 0.7031979%.
82 | But at the outer limit, 5 minutes and 50 m is a possible quick round trip, so I decided to be a bit more stringent.
83 |
84 | Because of the low prevalence this is really best-effort to try to find some of the obvious errors,
85 | and it's not a huge problem if we don't find everything.
86 |
--------------------------------------------------------------------------------
/electric2go/download.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import print_function
4 | import datetime
5 | import os
6 | import time
7 |
8 | import requests
9 |
10 | from . import files, systems
11 |
12 |
13 | def head_url(url, session, extra_headers):
14 | if session is None:
15 | session = requests.Session()
16 |
17 | session.head(url, headers=extra_headers)
18 |
19 | return session
20 |
21 |
22 | def get_url(url, session, extra_headers):
23 | if session is None:
24 | session = requests.Session()
25 |
26 | r = session.get(url, headers=extra_headers)
27 |
28 | return r.text, session
29 |
30 |
31 | def download_one_city(city_data, session=None):
32 | if city_data['API_KNOCK_HEAD_URL']:
33 | # some APIs require we hit another URL first to prepare session
34 | session = head_url(city_data['API_KNOCK_HEAD_URL'],
35 | session,
36 | city_data['API_AVAILABLE_VEHICLES_HEADERS'])
37 |
38 | api_text, session = get_url(city_data['API_AVAILABLE_VEHICLES_URL'],
39 | session,
40 | city_data['API_AVAILABLE_VEHICLES_HEADERS'])
41 |
42 | # handle JSONP if necessary
43 | if 'JSONP_CALLBACK_NAME' in city_data:
44 | prefix = '{callback}('.format(callback=city_data['JSONP_CALLBACK_NAME'])
45 | suffix1 = ');'
46 | suffix2 = ')'
47 |
48 | if api_text.startswith(prefix):
49 | if api_text.endswith(suffix1):
50 | api_text = api_text[len(prefix):-len(suffix1)]
51 | elif api_text.endswith(suffix2):
52 | api_text = api_text[len(prefix):-len(suffix2)]
53 |
54 | return api_text, session
55 |
56 |
57 | def save_one_city(city, timestamp_to_save, should_archive, session):
58 | api_text, session = download_one_city(city, session=session)
59 |
60 | # ensure data directory exists; writing a file would fail otherwise
61 | data_dir = files.get_data_dir(city)
62 | if not os.path.exists(data_dir):
63 | os.makedirs(data_dir)
64 |
65 | api_bytes = api_text.encode('utf-8')
66 |
67 | with open(files.get_current_file_path(city), 'wb') as f:
68 | f.write(api_bytes)
69 |
70 | if should_archive:
71 | with open(files.get_file_path(city, timestamp_to_save), 'wb') as f:
72 | f.write(api_bytes)
73 |
74 | return session
75 |
76 |
77 | def get_current(city_data, max_cache_age):
78 | """
79 | Gets current data for city. Returns data from local cache file
80 | if available, downloads data from API otherwise.
81 | """
82 | api_text = None
83 | cache = False
84 |
85 | # see if it's already cached
86 | cached_data_filename = files.get_current_file_path(city_data)
87 | if os.path.exists(cached_data_filename):
88 | cached_data_timestamp = os.path.getmtime(cached_data_filename)
89 | cached_data_age = time.time() - cached_data_timestamp
90 | if cached_data_age < max_cache_age:
91 | cache = cached_data_timestamp
92 | with open(cached_data_filename, 'r') as f:
93 | api_text = f.read()
94 |
95 | if not api_text:
96 | cache = False
97 | api_text, session = download_one_city(city_data)
98 | session.close()
99 |
100 | return api_text, cache
101 |
102 |
103 | def save(requested_system, requested_city, should_archive):
104 | failures = []
105 |
106 | if requested_city == 'all':
107 | all_cities = systems.get_all_cities(requested_system)
108 | cities_to_download_list = [city for key, city in all_cities.items()
109 | if city['of_interest'] or city['electric'] == 'some']
110 | else:
111 | cities_to_download_list = [systems.get_city_by_name(requested_system, requested_city)]
112 |
113 | t = datetime.datetime.utcnow()
114 | session = None
115 | for city in cities_to_download_list:
116 | try:
117 | session = save_one_city(city, t, should_archive, session)
118 | except:
119 | # bypass cities that fail (like Ulm did in 2015-01) without killing whole script
120 | failures.append((city['system'], city['name']))
121 | continue
122 | if session:
123 | session.close()
124 |
125 | return t, failures
126 |
--------------------------------------------------------------------------------
/doc/adding new cities.howto:
--------------------------------------------------------------------------------
1 | Here is how to generate images and videos for new cities:
2 |
3 |
4 | 1. Get bounds of operational areas to determine map bounds
5 |
6 | car2go, Drivenow, and Multicity have helper functions to do this for you:
7 |
8 | jarek@x1:~/projects/electric2go$ python
9 | >>> from electric2go.systems.car2go import city
10 | >>> city.get_latlng_extent('milano')
11 | (9.27821, 9.066236, 45.535522, 45.398983)
12 |
13 | jarek@x1:~/projects/electric2go$ python
14 | >>> from electric2go.systems.drivenow import city
15 | >>> city.get_latlng_extent('berlin')
16 | (52.572632, 52.389571, 13.565218, 13.183849)
17 |
18 | >>> from electric2go.systems import multicity
19 | >>> multicity.get_latlng_extent()
20 | (52.55798, 52.449909, 13.48569, 13.26054)
21 |
22 | Some cities have operational 'islands' that would inconveniently stretch
23 | the visualization, particularly in the north-south direction.
24 | For car2go and Drivenow, city.print_operation_areas() can help you find them
25 | and you can exclude their bounds from consideration if you wish.
26 |
27 | For other systems, you have to get the bounds manually.
28 | I recommend checking the HTML/DOM source for the service's website and seeing
29 | how it builds the map. If the website uses KML, you might be able to reuse
30 | some of Drivenow and Multicity work - read through
31 | systems/drivenow/city.py and systems/multicity/__init__.py for details.
32 |
33 | I like to add padding to map by rounding up all latitudes/longitudes
34 | to 3 decimal digits, or by adding about 0.003 padding, so that points
35 | aren't on the very edge of the map.
36 |
37 | Paste the bounds into 'BOUNDS' key in the city info dict in city.py.
38 |
39 |
40 | 2. Get map limits / map dimensions
41 |
42 | For esthetics, I make the generated image in 16:9 ratio so that
43 | it will look good on Youtube (and in fullscreen playback on most screens).
44 | I usually go with 1920x1080 for precision.
45 |
46 | Finding desired map coordinates for this takes some trial and error.
47 |
48 | Start with this URL:
49 |
50 | http://render.openstreetmap.org/cgi-bin/export?bbox=WEST,SOUTH,EAST,NORTH&scale=69333&format=png
51 |
52 | Plug in values for WEST,SOUTH,EAST,NORTH from the BOUNDS dict from part 1.
53 | (The BOUNDS and MAP_LIMITS keys are independent - they don't have to be
54 | adjusted in sync or have to center on the same point.)
55 |
56 | Check the size of the image you get back, then adjust scale parameter to get
57 | desired size in the limiting dimension. (If you want to do 16:9 ratio,
58 | the north-south dimension will almost always be the limiting one,
59 | so adjust scale until the height of the exported image is 1080px)
60 |
61 | Then, symmetrically expand the limits in the other dimension to get
62 | its dimension correct. For example, assuming we're expanding east-west:
63 |
64 | - divide desired horizontal dimension of the image by the current one:
65 | e.g. you got an image 653px wide and you want 1920px: 1920/653 = 2.940275651
66 | - find the center longitude, and +/- to get to the east/west bounds:
67 | e.g. (-73.662 + -73.548)/2 = -73.605, so center is at -73.605 with +/- 0.057
68 | - scale the +/- value: 2.940275651×0.057 = 0.167595712
69 | - new dimensions will be -73.605 with +/- 0.167595712,
70 | so (-73.772595712, -73.437404288)
71 | - you might need to adjust these longitudes a tiny bit to get the right pixel
72 | dimension, e.g. I got 1922x1080 with the exact numbers and had to trim
73 | to -73.7725, -73.4375. Try to keep the centre in the same position, notice
74 | how (-73.7725 + -73.4375)/2 is -73.605.
75 |
76 | Put the adjusted limits into 'MAP_LIMITS' key in city info dict in city.py.
77 | Put the scale and the direct URL for the image in a comment for reference.
78 |
79 | If you don't specify a 'MAP_SIZES' key, systems._fill_in_city_information()
80 | will default it to 1920x1080.
81 |
82 |
83 | 3. Process image
84 |
85 | Greyscale the map image: in gimp, use hue/lightness/saturation and change
86 | lightness = 100, saturation = -100
87 |
88 | then pngcrush to make the file size smaller
89 |
90 |
91 | 4. Labels
92 |
93 | Label params are specified in 'LABELS' key in the city info dict.
94 | This is only required for graphs created in a loop (used to make videos
95 | automatically, this is done in analysis.graph.make_graph()).
96 |
97 | Labels are created with a `matplotlib.axes.Axes.text()` call, and you need
98 | to specify the following in a dict:
99 | - 'fontsizes' takes a list of 4 integers that are used for size of font
100 | to print city name, date, time, and number of currently available cars
101 | - 'lines' locates these 4 lines of text on the image, this is a list
102 | of 4 X,Y tuples, with 0,0 being at bottom left of image.
103 |
--------------------------------------------------------------------------------
/electric2go/analysis/merge.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import print_function
4 | from datetime import timedelta
5 |
6 | from . import cmdline, normalize
7 |
8 |
9 | def merge_two_dicts(one, two):
10 | """
11 | Merge two result_dicts:
12 | - second dict's unstarted_trips are merged with first dict's unfinished_parkings and unfinished_trips as appropriate
13 | - key merge on finished_trips, appending
14 | - key merge on finished_parkings, appending
15 | - merge metadata:
16 | - system, city, time_step stay the same
17 | - missing from second is appended to missing from first
18 | - starting_time from first dict
19 | - ending_time from second dict
20 | :param one: first result_dict. if None, `two` is returned immediately
21 | :param two: second result_dict
22 | :return: merged result_dict
23 | """
24 |
25 | if not one:
26 | # first iteration so `one` doesn't have an existing result yet,
27 | # just return the `two`
28 | return two
29 |
30 | def merge(one_sub, two_sub, key):
31 | for vin_sub in two_sub[key]:
32 | if vin_sub in one_sub[key]:
33 | one_sub[key][vin_sub].extend(two_sub[key][vin_sub])
34 | else:
35 | one_sub[key][vin_sub] = two_sub[key][vin_sub]
36 |
37 | return one_sub
38 |
39 | one_ending_time = one['metadata']['ending_time']
40 | two_starting_time = two['metadata']['starting_time']
41 |
42 | time_step = timedelta(seconds=one['metadata']['time_step'])
43 | should_be_second_starting_time = one_ending_time + time_step
44 | if two_starting_time != should_be_second_starting_time:
45 | raise ValueError("Files don't appear to be in order. ending_time and starting_time "
46 | "must be consecutive, but instead they are {} and {}"
47 | .format(one_ending_time, two_starting_time))
48 |
49 | for vin in two['unstarted_trips']:
50 | unstarted_trip = two['unstarted_trips'][vin]
51 |
52 | if (vin in one['unfinished_parkings']
53 | and unstarted_trip['end']['time'] == two_starting_time
54 | and one['unfinished_parkings'][vin]['lat'] == unstarted_trip['end']['lat']
55 | and one['unfinished_parkings'][vin]['lng'] == unstarted_trip['end']['lng']):
56 |
57 | # most common case, cars that were parked over the break
58 |
59 | if (vin in two['finished_parkings']
60 | and two['finished_parkings'][vin][0]['starting_time'] == two_starting_time):
61 |
62 | # merge unfinished parking with first one in two['finished_parkings']
63 |
64 | parking_info = two['finished_parkings'][vin][0]
65 | parking_info_start = one['unfinished_parkings'][vin]
66 |
67 | parking_info['starting_time'] = parking_info_start['starting_time']
68 | parking_info = normalize.calculate_parking(parking_info)
69 |
70 | two['finished_parkings'][vin][0] = parking_info
71 | one['unfinished_parkings'].pop(vin) # delete
72 |
73 | else:
74 | # Cars were parked over the break but then didn't move at all the next day.
75 | # Keep it as unfinished parking, without deleting it from the list.
76 | # Because of code later on that updates one['unfinished_parkings'] with
77 | # two['unfinished_parkings'], update the latter, giving it correct starting_time.
78 |
79 | two['unfinished_parkings'][vin] = one['unfinished_parkings'][vin]
80 |
81 | elif vin in one['unfinished_trips']:
82 | # trip spanning the break, merge the information from unfinished_trips and unstarted_trips
83 | # then append to finished_trips
84 |
85 | trip_data = one['unfinished_trips'][vin]
86 | trip_data.update(unstarted_trip)
87 |
88 | trip_data = normalize.calculate_trip(trip_data)
89 |
90 | if vin in one['finished_trips']:
91 | one['finished_trips'][vin].append(trip_data)
92 | else:
93 | one['finished_trips'][vin] = [trip_data]
94 |
95 | one['unfinished_trips'].pop(vin) # delete
96 |
97 | else:
98 | # could be a brand new car entering service, log it
99 | one['unstarted_trips'][vin] = unstarted_trip
100 |
101 | one = merge(one, two, 'finished_trips')
102 | one = merge(one, two, 'finished_parkings')
103 |
104 | one['unfinished_parkings'].update(two['unfinished_parkings'])
105 | one['unfinished_trips'].update(two['unfinished_trips'])
106 |
107 | one['metadata']['missing'].extend(two['metadata']['missing'])
108 |
109 | one['metadata']['ending_time'] = two['metadata']['ending_time']
110 |
111 | return one
112 |
113 |
114 | def merge_all_dicts(dicts):
115 | result_dict = None
116 | for loaded_dict in dicts:
117 | result_dict = merge_two_dicts(result_dict, loaded_dict)
118 |
119 | return result_dict
120 |
121 |
122 | def load_all_files(files):
123 | for file_to_load in files:
124 | with open(file_to_load) as fp:
125 | yield cmdline.read_json(fp)
126 |
127 |
128 | def merge_all_files(files):
129 | return merge_all_dicts(load_all_files(files))
130 |
--------------------------------------------------------------------------------
/electric2go/systems/car2go/parse.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 |
4 | KEYS = {
5 | 'changing': {
6 | # must be handled manually: coordinates = (lat, lng, 0), charging
7 | # in the API, 'charging' key is only set on electric cars
8 |
9 | 'address': 'address',
10 | 'cleanliness_interior': 'interior',
11 | 'cleanliness_exterior': 'exterior',
12 | 'fuel': 'fuel'
13 | },
14 |
15 | # things that are expected to not change at all for a given car VIN/ID
16 | # during a reasonable timescale (1 week to 1 month)
17 | 'unchanging': {
18 | # must be handled manually: electric
19 |
20 | 'vin': 'vin',
21 |
22 | 'app_required': 'smartPhoneRequired',
23 | 'fuel_type': 'engineType',
24 | 'license_plate': 'name'
25 | }
26 |
27 | # TODO: web interface is expecting 'model'. that's not in v2.1 API and has to be guessed from VIN.
28 | }
29 |
30 |
31 | def get_cars(system_data_dict):
32 | return system_data_dict.get('placemarks', [])
33 |
34 |
35 | def get_cars_dict(system_data_dict):
36 | # This 'vin' key must match the first item returned from get_car_basics()
37 | return {car['vin']: car
38 | for car in get_cars(system_data_dict)}
39 |
40 |
41 | def get_everything_except_cars(system_data_dict):
42 | result = system_data_dict.copy()
43 | del result['placemarks']
44 | return result
45 |
46 |
47 | def get_car_basics(car):
48 | return car['vin'], car['coordinates'][1], car['coordinates'][0]
49 |
50 |
51 | def get_car_unchanging_properties(car):
52 | """
53 | Gets car properties that are expected to not change at all
54 | for a given car VIN/ID during a reasonable timescale (1 week to 1 month)
55 | :param car: car info in original system JSON-dict format
56 | :return: dict with keys mapped to common electric2go format
57 | """
58 |
59 | result = {mapped_key: car[original_key]
60 | for mapped_key, original_key
61 | in KEYS['unchanging'].items()}
62 |
63 | result['electric'] = (car['engineType'] == 'ED')
64 |
65 | return result
66 |
67 |
68 | def get_car_changing_properties(car):
69 | """
70 | Gets cars properties that change during a trip
71 | :param car: car info in original system JSON-dict format
72 | :return: dict with keys mapped to common electric2go format
73 | """
74 |
75 | result = {mapped_key: car[original_key]
76 | for mapped_key, original_key
77 | in KEYS['changing'].items()}
78 |
79 | _, lat, lng = get_car_basics(car)
80 |
81 | result['lat'] = lat
82 | result['lng'] = lng
83 |
84 | result['charging'] = car.get('charging', False)
85 |
86 | return result
87 |
88 |
89 | def get_car(car):
90 | # TODO: this is only used by web-related things, see if they can/should be migrated
91 |
92 | vin, _, _ = get_car_basics(car)
93 |
94 | result = {'vin': vin}
95 | result.update(get_car_changing_properties(car))
96 | result.update(get_car_unchanging_properties(car))
97 |
98 | return result
99 |
100 |
101 | def get_range(car):
102 | if 'fuel' not in car:
103 | car = get_car(car)
104 |
105 | # Wikipedia quotes full charge range 135 km (NEDC), car2go quotes 130 km.
106 | # Use 130 km.
107 | # car2go policy is that less than 20% charge remaining requires ending
108 | # trip at a charging point. Use 20% as indicator for minimum charge level.
109 |
110 | if car['fuel'] > 20:
111 | car_range = int(1.3 * (car['fuel']-20))
112 | else:
113 | car_range = 0
114 |
115 | return car_range
116 |
117 |
118 | def put_cars(cars, result_dict):
119 | # inverse of get_cars
120 |
121 | # car2go has nothing else in the API result,
122 | # so the result_dict param is ignored
123 | return {'placemarks': cars}
124 |
125 |
126 | def put_car(car):
127 | # inverse of get_car
128 |
129 | mapped_keys = KEYS['unchanging']
130 | mapped_keys.update(KEYS['changing'])
131 |
132 | formatted_car = {original_key: car[mapped_key]
133 | for mapped_key, original_key in mapped_keys.items()}
134 |
135 | # minor changes
136 | formatted_car['coordinates'] = (car['lng'], car['lat'], 0)
137 |
138 | # in the API, 'charging' key is only present on electric cars
139 | if car['electric']:
140 | formatted_car['charging'] = car['charging']
141 |
142 | return formatted_car
143 |
144 |
145 | def get_car_parking_drift(car):
146 | """
147 | Gets properties that can change during a parking period but aren't
148 | considered to interrupt the parking.
149 | These are things like a car charging while being parked.
150 | :return: a hashable object
151 | """
152 |
153 | # TODO: reported address can also change during the parking
154 | # see Austin 2016-07-27 07:05 to 07:10, WMEEJ3BA4FK802009
155 |
156 | # TODO: reported licence plate ('name' key) can *also* change
157 | # see Austin 2016-07-27 20:14, WMEEJ3BA3EK735465
158 | # so much for unchanging... will have to regen the whole Austin month
159 |
160 | charging = car.get('charging', None)
161 |
162 | return car['fuel'], charging
163 |
164 |
165 | def put_car_parking_drift(car, d):
166 | """
167 | Update `car`'s properties that might have changed during a parking period.
168 | :param d: must be a result of get_car_parking_drift()
169 | """
170 |
171 | car['fuel'] = d[0]
172 |
173 | # TODO: needs testing with a system with electric cars. I don't think there are
174 | # any mixed systems anymore, so just test two systems separately
175 | if d[1]:
176 | car['charging'] = d[1]
177 |
178 | return car
179 |
--------------------------------------------------------------------------------
/web/index.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from __future__ import unicode_literals
5 | from __future__ import print_function
6 | import time
7 |
8 | from jinja2 import Environment, PackageLoader
9 |
10 | import web_helper
11 | from web_helper import systems
12 |
13 |
14 | # For zoom=15 and size 300x250, the map is less than 0.02 degrees across
15 | # in both directions. In practice the observed value varies from
16 | # roughly 0.007385 degrees latitude to roughly 0.013326 degrees longitude
17 | # (both in Vancouver), with numbers in other cities both north and south
18 | # of Vancouver's latitude (Austin, Berlin) being fairly close.
19 | # If we change displayed map size, we might also need to update this value,
20 | # or come up with a formula to estimate it based on map size and zoom level.
21 | MAP_SIZE_IN_DEGREES = 0.02
22 |
23 |
24 | def google_api_key():
25 | try:
26 | with open('google_api_key', 'r') as f:
27 | key = f.read().strip()
28 | except IOError:
29 | key = ''
30 |
31 | return key
32 |
33 |
34 | def format_latlng(car):
35 | return '{:f},{:f}'.format(car['lat'], car['lng'])
36 |
37 |
38 | def get_car_info(car, all_cars, city):
39 | # Extract information specific for web display
40 |
41 | car = web_helper.fill_in_car(car, city)
42 |
43 | coords = format_latlng(car)
44 |
45 | title = car['address']
46 | if title == '':
47 | # communauto doesn't provide the geocoded address. use license plate
48 | title = car['license_plate']
49 |
50 | # Show other nearby cars on map if they are within the map area.
51 | # Include only the cars that would actually fit on the map
52 | # (given zoom level and distance from this car's coords)
53 | # to avoid unnecessarily long image URLs.
54 | # We do this by simple subtraction of latitudes/longitudes and comparing
55 | # against a reference value (declared with comments above).
56 | # This has some error compared to proper Haversine distance calculation,
57 | # but at scales involved (~1 km) this shouldn't really matter, especially
58 | # given the roughly 50-100% margin of error in the reference
59 | # degree difference value.
60 | def in_bounds(car1, car2):
61 | lat_dist = abs(car1['lat'] - car2['lat'])
62 | lng_dist = abs(car1['lng'] - car2['lng'])
63 | return lat_dist < MAP_SIZE_IN_DEGREES and lng_dist < MAP_SIZE_IN_DEGREES
64 |
65 | other_ll = [format_latlng(other_car) for other_car in all_cars
66 | if (other_car['lat'] != car['lat'] and other_car['lng'] != car['lng'])
67 | and in_bounds(car, other_car)]
68 |
69 | # provide a value that will have either address or coords.
70 | # some systems don't provide geocoded address.
71 | address_for_map = car['address'] if car['address'] != '' else coords
72 |
73 | return {
74 | 'title': title,
75 | 'license_plate': car['license_plate'],
76 | 'charge': car['fuel'],
77 | 'range': car['range'],
78 | 'model': car['model'],
79 | 'coords': coords,
80 | 'vin': car['vin'],
81 | 'address_or_coords': address_for_map,
82 | 'other_cars_ll': other_ll,
83 | 'cleanliness_interior': car.get('cleanliness_interior', ''),
84 | 'cleanliness_exterior': car.get('cleanliness_exterior', '')
85 | }
86 |
87 |
88 | def pluralize(count, string, end_ptr=None, rep_ptr=''):
89 | if int(count) == 1:
90 | label = string
91 | elif end_ptr and string.endswith(end_ptr):
92 | label = string[:-1*len(end_ptr)] + rep_ptr
93 | else:
94 | label = string + 's'
95 |
96 | return '{count:.0f} {label}'.format(count=count, label=label)
97 |
98 |
99 | def print_all_html():
100 | print('Content-type: text/html\n')
101 |
102 | env = Environment(loader=PackageLoader('frontend', '.'), trim_blocks=True, lstrip_blocks=True)
103 | env.filters['count'] = pluralize
104 |
105 | requested_city = web_helper.get_system_and_city(allow_any_city=False)
106 | electric_cars, cache = web_helper.get_electric_cars(requested_city)
107 |
108 | # get list of cities
109 | all_cities = (city for system in web_helper.ALL_SYSTEMS
110 | for city in systems.get_all_cities(system).values()
111 | if city['electric'] == 'some')
112 | all_cities = sorted(all_cities, key=lambda c: c['name'])
113 |
114 | # get car details
115 | car_infos = [get_car_info(car, electric_cars, requested_city) for car in electric_cars]
116 |
117 | car_models = set(car['model'] for car in car_infos)
118 |
119 | # supplementary information
120 | cache_age = (time.time() - cache) if cache else cache
121 | cache_next_refresh = web_helper.CACHE_PERIOD - cache_age
122 |
123 | # render big template
124 | tmpl_layout = env.get_template('layout.html')
125 | full_html = tmpl_layout.render(displayed_city=requested_city,
126 | cities=all_cities,
127 | all_cars=car_infos,
128 | all_car_models=car_models,
129 | cache_age=cache_age,
130 | cache_next_refresh=cache_next_refresh,
131 | google_api_key=google_api_key())
132 |
133 | try:
134 | # this works straight-up on Python 3
135 | print(full_html)
136 | except UnicodeEncodeError:
137 | # Python 2 needs an explicit encode in some cases,
138 | # particularly when using BaseHTTPServer module
139 | print(full_html.encode('utf-8'))
140 |
141 |
142 | if __name__ == '__main__':
143 | print_all_html()
144 |
--------------------------------------------------------------------------------
/web/frontend/sort.js:
--------------------------------------------------------------------------------
1 | /*notmuchhere*/
2 |
3 | function get_location() {
4 | try {
5 | // enableHighAccuracy is left to default to false
6 | // timeout is 2 seconds, to reposition cars reasonably quickly
7 | // maximum age is a minute, users are unlikely to move fast
8 | navigator.geolocation.getCurrentPosition(order_cars,
9 | handle_error, {timeout: 2000, maximumAge: 60000});
10 | } catch(err) {
11 | // fail silently
12 | }
13 | }
14 |
15 | function handle_error() {
16 | // do nothing. fallback is default ordering, which is acceptable
17 | }
18 |
19 | function order_cars(position) {
20 | try {
21 | var user_lat = position.coords.latitude;
22 | var user_lng = position.coords.longitude;
23 |
24 | // get a list of all car latlngs and calculate
25 | // distances from user's position
26 | var car_list = document.querySelectorAll(".sort");
27 | var cars = [];
28 | for (var i = 0; i < car_list.length; i++) {
29 | var car_latlng = car_list[i].getAttribute("data-loc")
30 | .split(",");
31 | var car_dist = calculate_distance(user_lat, user_lng,
32 | car_latlng[0], car_latlng[1]);
33 |
34 | cars.push([ car_dist, car_list[i] ]);
35 | }
36 |
37 | nearby_cars = cars.filter(function (car_data) {
38 | return car_data[0] < 20;
39 | });
40 | if (nearby_cars.length == 0) {
41 | // don't reorder cars if there aren't any within 20 km.
42 | // no point showing distance for cars on another continent.
43 | return;
44 | }
45 |
46 | // sort based on distance - distance is stored in cars[i][0]
47 | cars.sort(function(a, b) {
48 | var dst_a = a[0];
49 | var dst_b = b[0];
50 | return dst_a < dst_b ? -1 : (dst_a > dst_b ? 1 : 0);
51 | });
52 |
53 | // if user has been geolocated as close by to at least one of the cars,
54 | // add a marker indicating the user's position to the overview map.
55 | // the use of the 2.4 km/30 min walk radius is a bit of a hack
56 | // since I currently don't define the limits of the overview map
57 | // and instead let google size it automatically based on the included
58 | // marker. if I add a marker in a city across a continent,
59 | // the overview map won't be terribly useful, so avoid doing that
60 | // with an "at least one car in walking distance" heuristic.
61 | if (cars[0][0] <= 2.4) { // distance of the closest car
62 | var mapImage = document.getElementById('multimap');
63 | if (mapImage) {
64 | var withLocMarker = mapImage.src;
65 | withLocMarker += "&markers=color:blue|size:small|";
66 | withLocMarker += user_lat + "," + user_lng;
67 | mapImage.src = withLocMarker;
68 | }
69 | }
70 |
71 | // sort list of cars in the DOM by approx distance,
72 | // and add it into the DOM using the template message
73 | for (var i = 0; i < cars.length; i++) {
74 | var dist = cars[i][0]; // the distance
75 | var in_minutes = "";
76 | var para = cars[i][1]; // the DOM object with car info
77 | var dist_span = para.querySelectorAll(".distance")[0];
78 |
79 | if (dist <= 2.4) {
80 | // for less than 30 min walk, show approx walk duration
81 | in_minutes = Math.floor(dist * 12.5);
82 | in_minutes = dist_span.getAttribute("data-template-minutes")
83 | .replace("{min}", in_minutes);
84 | }
85 |
86 | if (cars.length > 1) {
87 | // if .length is 1 or 0, no sorting is required
88 |
89 | var parent = para.parentNode;
90 | var prev;
91 |
92 | // remove objects, wherever they are, and
93 | // append them in in new order.
94 | // first one (i === 0) is appended wherever,
95 | // as long as it's within the list (here,
96 | // after the second one), and the rest are
97 | // appended after it in order
98 |
99 | // doing it this way allows having the list
100 | // in DOM root, next to header/footer, without
101 | // requiring a wrapping element
102 |
103 | if (i === 0) {
104 | // cars[1] exists since .length is > 1
105 | prev = cars[1][1];
106 | } else {
107 | prev = cars[i-1][1];
108 | }
109 |
110 | parent.removeChild(para);
111 | parent.insertBefore(para, prev.nextSibling);
112 | }
113 |
114 | // add distance information for each car
115 | var dist_str = dist_span.getAttribute("data-template");
116 | // trim distance to one decimal digit
117 | dist_str = dist_str.replace("{dist}", dist.toFixed(1));
118 | dist_str = dist_str.replace("{minutes}", in_minutes);
119 | dist_span.innerHTML = dist_str;
120 | }
121 | } catch (err) {
122 | // fail silently - this is only an enhancement
123 | }
124 | }
125 |
126 | function calculate_distance(lat1, lng1, lat2, lng2) {
127 | // from http://www.movable-type.co.uk/scripts/latlong.html
128 | // see also http://stackoverflow.com/questions/27928
129 | function deg2rad(deg) {
130 | return deg * (Math.PI/180);
131 | }
132 |
133 | var R = 6371; // Radius of the earth in km
134 | var dLat = deg2rad(lat2-lat1);
135 | var dLon = deg2rad(lng2-lng1);
136 | var a =
137 | Math.sin(dLat/2) * Math.sin(dLat/2) +
138 | Math.cos(deg2rad(lat1)) * Math.cos(deg2rad(lat2)) *
139 | Math.sin(dLon/2) * Math.sin(dLon/2);
140 | var c = 2 * Math.asin(Math.sqrt(a));
141 | var d = R * c; // Distance in km
142 | return d;
143 | }
144 |
145 | document.onload = get_location();
146 |
147 |
--------------------------------------------------------------------------------
/electric2go/systems/drivenow/city.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from __future__ import unicode_literals
4 |
5 | from os import path
6 |
7 |
8 | try:
9 | with open(path.join(path.dirname(__file__), 'api_key'), 'r') as f:
10 | API_KEY = f.read().strip()
11 | except IOError:
12 | API_KEY = 'adf51226795afbc4e7575ccc124face7' # default key used by drivenow.com
13 |
14 |
15 | CITIES = {
16 | 'berlin': {
17 | 'loc_key': 6099,
18 | 'electric': 'some',
19 | 'BOUNDS': {
20 | # actual bounds based on operation areas are
21 | # 52.572632, 52.389571, 13.565218, 13.183849
22 |
23 | # use slightly wider values to allow for GPS wobble
24 | 'NORTH': 52.573,
25 | 'SOUTH': 52.389,
26 | 'EAST': 13.566,
27 | 'WEST': 13.183
28 | },
29 | 'DEGREE_LENGTHS': {
30 | # for latitude 52.52
31 | 'LENGTH_OF_LATITUDE': 111277.17,
32 | 'LENGTH_OF_LONGITUDE': 67879.39
33 | },
34 | 'MAP_SIZES': {
35 | 'MAP_X': 1920,
36 | 'MAP_Y': 1080
37 | },
38 | 'MAP_LIMITS': {
39 | # Fit Berlin's car2go and Drivenow in one 16:9 image.
40 | # Before expanding to 16:9, the limits were 52.576767, 52.38927, 13.565218, 13.181448
41 | # At 1920x1080 pixels, 16:9, the map is:
42 | # http://render.openstreetmap.org/cgi-bin/export?bbox=13.099773,52.38927,13.646893,52.576767&scale=113281&format=png
43 | 'NORTH': 52.576767,
44 | 'SOUTH': 52.38927,
45 | 'EAST': 13.646893,
46 | 'WEST': 13.099773
47 | }
48 | },
49 | 'brussels': {
50 | 'loc_key': 42619
51 | },
52 | 'kobenhavn': {
53 | 'loc_key': 41369,
54 | 'electric': 'all',
55 | 'display': 'Copenhagen'
56 | },
57 | 'duesseldorf': {
58 | 'loc_key': 1293,
59 | 'display': 'Düsseldorf',
60 | },
61 | 'hamburg': {
62 | 'loc_key': 40065,
63 | 'electric': 'some'
64 | },
65 | 'koeln': {
66 | 'loc_key': 1774,
67 | 'display': 'Cologne',
68 | 'localized': {
69 | 'de': 'Köln'
70 | }
71 | },
72 | 'london': {
73 | 'loc_key': 40758,
74 | 'electric': 'some',
75 | 'BOUNDS': {
76 | 'NORTH': 51.612, # exact value is 51.611141
77 | 'SOUTH': 51.518, # exact value is 51.518598
78 | 'EAST': 0.022, # exact value is 0.021994
79 | 'WEST': -0.165 # exact value is -0.164666
80 | },
81 | 'MAP_LIMITS': {
82 | # http://render.openstreetmap.org/cgi-bin/export?bbox=-0.20593,51.518,0.06293,51.612&scale=55659&format=png
83 | 'NORTH': 51.612,
84 | 'SOUTH': 51.518,
85 | 'EAST': 0.06293,
86 | 'WEST': -0.20593
87 | },
88 | 'DEGREE_LENGTHS': {
89 | # for latitude 51.56
90 | 'LENGTH_OF_LATITUDE': 111258.94,
91 | 'LENGTH_OF_LONGITUDE': 69349.27
92 | },
93 | 'MAP_SIZES': {
94 | 'MAP_X': 1920,
95 | 'MAP_Y': 1080
96 | },
97 | 'LABELS': {
98 | 'fontsizes': [35, 22, 30, 18],
99 | 'lines': [
100 | (250, 210),
101 | (250, 170),
102 | (250, 130),
103 | (250, 95)
104 | ]
105 | }
106 | },
107 | 'milano': {
108 | 'loc_key': 42756,
109 | },
110 | 'muenchen': {
111 | 'loc_key': 4604,
112 | 'electric': 'some',
113 | 'display': 'Munich',
114 | 'localized': {
115 | 'de': 'München'
116 | }
117 | },
118 | 'stockholm': {
119 | 'loc_key': 42128
120 | },
121 | 'wien': {
122 | 'loc_key': 40468,
123 | 'display': 'Vienna',
124 | 'localized': {
125 | 'de': 'Wien'
126 | }
127 | }
128 | }
129 |
130 | API_AVAILABLE_VEHICLES_URL = 'https://api2.drive-now.com/cities/{loc}?expand=cars'
131 |
132 | # fill in city data that can be assumed and autogenerated
133 | for city, city_data in CITIES.items():
134 | city_data['of_interest'] = True # we want everything for now
135 |
136 | city_data['API_AVAILABLE_VEHICLES_URL'] = API_AVAILABLE_VEHICLES_URL.format(loc=city_data['loc_key'])
137 | city_data['API_AVAILABLE_VEHICLES_HEADERS'] = {
138 | 'X-Api-Key': API_KEY,
139 | 'User-Agent': 'electric2go',
140 | 'Accept': 'application/json',
141 | 'Referer': 'https://de.drive-now.com/',
142 | 'Origin': 'https://de.drive-now.com'
143 | }
144 |
145 |
146 | def get_operation_areas(city_key):
147 | import requests
148 |
149 | API_URL = 'https://api2.drive-now.com/geodata/{loc_key}/{loc_key}.kml'
150 |
151 | r = requests.get(API_URL.format(loc_key=CITIES[city_key]['loc_key']),
152 | headers={'User-Agent': 'electric2go'})
153 |
154 | return get_areas_from_kml(r.content, '{http://www.opengis.net/kml/2.2}')
155 |
156 |
157 | def get_areas_from_kml(kml_bytes, ns):
158 | from lxml import etree
159 |
160 | results = []
161 |
162 | xml = etree.fromstring(kml_bytes)
163 |
164 | folders = xml.findall('.//' + ns + 'Folder')
165 |
166 | for folder in folders:
167 | name = folder.find(ns + 'name')
168 | if name.text == 'allowedParkingAreas':
169 | areas = folder.findall(ns + 'Placemark')
170 |
171 | results.extend([get_details_from_kml(area, ns) for area in areas])
172 |
173 | return results
174 |
175 |
176 | def get_details_from_kml(area, ns):
177 | return {
178 | 'name': area.find(ns + 'name').text,
179 |
180 | # JSON-formatted description for the parking area
181 | 'description': area.find(ns + 'description').text,
182 |
183 | # KML-string-formatted, process with get_max_latlng()
184 | 'coordinates': area.find('.//' + ns + 'coordinates').text
185 | }
186 |
187 |
188 | def get_max_latlng(area):
189 | # collect lats and longs from a KML-formatted list
190 |
191 | string_coords = area['coordinates']
192 |
193 | # string_coords looks like:
194 | # -0.121107,51.530953,0 -0.12156,51.53088,0 -0.122024,51.53085,0 -0.122228,51.530843,0 -0.122346,51.530913,0 ...
195 | # So whitespace-separated objects then comma-separated lng,lat,elevation.
196 | # Elevation is always 0 for Drivenow, ignore it
197 |
198 | pairs = string_coords.split()
199 | split = (pair.split(',') for pair in pairs)
200 | parsed = ((float(pair[1]), float(pair[0])) for pair in split)
201 | latitudes, longitudes = zip(*parsed)
202 |
203 | return max(latitudes), min(latitudes), max(longitudes), min(longitudes)
204 |
205 |
206 | def print_operation_areas(city_key):
207 | import json
208 |
209 | areas = get_operation_areas(city_key)
210 |
211 | for area in areas:
212 | description = json.loads(area['description'])
213 | print('{name}: {type} zone'.format(**description))
214 | print('border points: {len}, bounds: {bounds}'.format(
215 | len=len(area['coordinates']), bounds=get_max_latlng(area)))
216 |
217 |
218 | def get_latlng_extent(city_key):
219 | areas = get_operation_areas(city_key)
220 |
221 | latitudes = []
222 | longitudes = []
223 |
224 | # collect max lats and longs across all 'operation areas'
225 | for area in areas:
226 | max_lat, min_lat, max_lng, min_lng = get_max_latlng(area)
227 | latitudes.append(max_lat)
228 | latitudes.append(min_lat)
229 | longitudes.append(max_lng)
230 | longitudes.append(min_lng)
231 |
232 | # return max/mins for all operation areas
233 | return max(latitudes), min(latitudes), max(longitudes), min(longitudes)
234 |
--------------------------------------------------------------------------------
/electric2go/systems/drivenow/parse.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from ...analysis.cmdline import json
4 |
5 |
6 | KEYS = {
7 | 'changing': {
8 | # must be handled manually: address, price_offer
9 |
10 | # properties that indicate start of new parking period:
11 | 'lat': 'latitude',
12 | 'lng': 'longitude',
13 |
14 | # properties that can change during a parking period:
15 | 'charging': 'isCharging',
16 | 'fuel': 'fuelLevelInPercent',
17 | 'api_estimated_range': 'estimatedRange',
18 | # also price_offer: car['rentalPrice']['isOfferDrivePriceActive']
19 | # also car['rentalPrice']['offerDrivePrice'] dict is present or not, depending on if offer is active
20 |
21 | # properties that can only change during a drive:
22 | 'cleanliness_interior': 'innerCleanliness',
23 | 'parkingSpaceId': 'parkingSpaceId',
24 | 'isInParkingSpace': 'isInParkingSpace'
25 | },
26 |
27 | # things that are expected to not change at all for a given car VIN/ID
28 | # during a reasonable timescale (1 week to 1 month)
29 | 'unchanging': {
30 | # must be handled manually: electric
31 |
32 | 'vin': 'id',
33 |
34 | 'name': 'name',
35 | 'license_plate': 'licensePlate',
36 |
37 | 'model': 'modelName',
38 | 'color': 'color',
39 |
40 | 'fuel_type': 'fuelType',
41 |
42 | 'transmission': 'transmission',
43 |
44 | # this is a dict and one of its properties can change, noted in a comment in 'changing'
45 | 'rentalPrice': 'rentalPrice',
46 |
47 | # the below is extra info, not widely used, no keys are renamed
48 | 'make': 'make',
49 | 'group': 'group',
50 | 'series': 'series',
51 | 'modelIdentifier': 'modelIdentifier',
52 | 'equipment': 'equipment',
53 | 'carImageUrl': 'carImageUrl',
54 | 'carImageBaseUrl': 'carImageBaseUrl',
55 | 'routingModelName': 'routingModelName',
56 | 'variant': 'variant',
57 | 'isPreheatable': 'isPreheatable'
58 | }
59 | }
60 |
61 |
62 | def get_cars(system_data_dict):
63 | if 'cars' in system_data_dict and 'items' in system_data_dict['cars']:
64 | return system_data_dict['cars']['items']
65 | else:
66 | return []
67 |
68 | # TODO: perhaps instead duck-type system_data_dict keys and raise "wrong system" exception in case of KeyError?
69 |
70 |
71 | def get_cars_dict(system_data_dict):
72 | # This 'id' key must match the first item returned from get_car_basics()
73 | return {car['id']: car
74 | for car in get_cars(system_data_dict)}
75 |
76 |
77 | def get_everything_except_cars(system_data_dict):
78 | result = system_data_dict.copy()
79 |
80 | # like `del result['cars']['items']`, except don't error
81 | # when either of those keys are not there
82 | if 'cars' in result:
83 | result['cars'].pop('items', None)
84 |
85 | return result
86 |
87 |
88 | def get_car_basics(car):
89 | return car['id'], car['latitude'], car['longitude']
90 |
91 |
92 | def get_car_unchanging_properties(car):
93 | """
94 | Gets car properties that are expected to not change at all
95 | for a given car VIN/ID during a reasonable timescale (1 week to 1 month)
96 | :param car: car info in original system JSON-dict format
97 | :return: dict with keys mapped to common electric2go format
98 | """
99 |
100 | props = KEYS['unchanging']
101 | result = {key: car[props[key]] for key in props}
102 |
103 | # derived field that can't be done automatically with a key mapping
104 | result['electric'] = (car['fuelType'] == 'E')
105 |
106 | return result
107 |
108 |
109 | def get_car_changing_properties(car):
110 | """
111 | Gets cars properties that change during a trip
112 | :param car: car info in original system JSON-dict format
113 | :return: dict with keys mapped to common electric2go format
114 | """
115 |
116 | result = {mapped_key: car.get(original_key, None)
117 | for mapped_key, original_key
118 | in KEYS['changing'].items()}
119 |
120 | # derived fields that can't be done automatically with a key mapping
121 | result['address'] = ', '.join(car['address'])
122 | result['price_offer'] = car['rentalPrice']['isOfferDrivePriceActive']
123 | result['price_offer_details'] = car['rentalPrice'].get('offerDrivePrice', {})
124 |
125 | return result
126 |
127 |
128 | def get_car(car):
129 | # TODO: this is only used by web-related things, see if they can/should be migrated
130 |
131 | result = get_car_unchanging_properties(car)
132 | result.update(get_car_changing_properties(car))
133 |
134 | return result
135 |
136 |
137 | def get_range(car):
138 | # TODO: could try using estimatedRange if included in API response,
139 | # presumably Drivenow has a better estimate than we could calculate
140 |
141 | if 'fuel' not in car:
142 | # means we got a verbatim JSON object, not yet parsed to common format
143 | car = get_car(car)
144 |
145 | # Wikipedia quotes 130-160 km range (NEDC), Drivenow claims up to 160 km.
146 | # Use 130 km exactly.
147 | # Drivenow policy is that less than 10 miles range remaining requires
148 | # ending trip at a charging point. Use 10 mi = 16 km = ~12% as indicator
149 | # for minimum charge level.
150 |
151 | if car['fuel'] > 12:
152 | car_range = int(1.3 * (car['fuel']-12))
153 | else:
154 | car_range = 0
155 |
156 | return car_range
157 |
158 |
159 | def put_cars(cars, result_dict):
160 | # inverse of get_cars
161 | result = result_dict['system'].copy()
162 | result['cars']['items'] = cars
163 | result['cars']['count'] = len(cars)
164 | return result
165 |
166 |
167 | def put_car(car):
168 | # inverse of get_car
169 |
170 | mapped_keys = KEYS['unchanging']
171 | mapped_keys.update(KEYS['changing'])
172 |
173 | formatted_car = {original_key: car[mapped_key]
174 | for mapped_key, original_key in mapped_keys.items()}
175 |
176 | # minor changes
177 | formatted_car['address'] = car['address'].split(', ')
178 | formatted_car['rentalPrice']['isOfferDrivePriceActive'] = car['price_offer']
179 |
180 | if car['price_offer_details']:
181 | car['rentalPrice']['offerDrivePrice'] = car['price_offer_details']
182 | else:
183 | # Delete offerDrivePrice if it is set when it shouldn't be.
184 | # It could be detected as part of the "static" vehicle information
185 | # if vehicle is on offer when first seen by the script.
186 | car['rentalPrice'].pop('offerDrivePrice', None)
187 |
188 | # special handling, data is duplicated in source API
189 | # note 100.0 to trigger float division in Python 2
190 | formatted_car['fuelLevel'] = formatted_car['fuelLevelInPercent'] / 100.0
191 |
192 | return formatted_car
193 |
194 |
195 | def get_car_parking_drift(car):
196 | """
197 | Gets properties that can change during a parking period but aren't
198 | considered to interrupt the parking.
199 | These are things like a car charging while being parked.
200 | :param car: must be formatted in normalized electric2go dict format
201 | :return: a hashable object
202 | """
203 |
204 | # Use json.dumps() because a dict is not hashable.
205 | # Sort keys to ensure deterministic key order in dumped JSON.
206 | # Note: using sort_keys prevents us from using e.g. ujson
207 | offer_drive_price = json.dumps(car['price_offer_details'], sort_keys=True)
208 |
209 | return (car['api_estimated_range'], car['fuel'],
210 | car['charging'], car['price_offer'], offer_drive_price)
211 |
212 |
213 | def put_car_parking_drift(car, d):
214 | """
215 | Update `car`'s properties that might have changed during a parking period.
216 | :param car: must be formatted in normalized electric2go dict format
217 | :param d: must be a result of get_car_parking_drift()
218 | """
219 |
220 | offer_drive_price = json.loads(d[4])
221 |
222 | car['api_estimated_range'] = d[0]
223 | car['fuel'] = d[1]
224 | car['charging'] = d[2]
225 | car['price_offer'] = d[3]
226 | car['price_offer_details'] = offer_drive_price
227 |
228 | return car
229 |
--------------------------------------------------------------------------------
/doc/ideas:
--------------------------------------------------------------------------------
1 | Filtering
2 | =========
3 |
4 | With refactoring to store information about trips explicitly, we can
5 | extend filter functions to allow filtering by:
6 | - origin/destination latlng
7 | - starting time, ending time (e.g. get trips from 4 a.m. to noon
8 | over several days)
9 | - starting/ending cleanliness
10 | - starting/ending fuel level
11 | - possibly others that prove interesting
12 |
13 |
14 | Graphing
15 | ========
16 |
17 | Finish up refactor
18 | ------------------
19 |
20 | The graphing functions have largely been cleaned up as result of changing
21 | how locations/trips are provided. There is a bit remaining, noted with TODOs,
22 | particularly involving matplotlib figure and axes setup. The whole matplotlib
23 | setup is hacky and might be worth replacing with straight-up bitmap use.
24 |
25 |
26 | Map subsets
27 | -----------
28 |
29 | Limit the generated map area to a given set of boundaries to essentially
30 | zoom in or focus on an area. I've done this manually for a blog post but
31 | it would be good to have it done automatically. Requires being able to
32 | generate maps from OSM source (described in "Mapping section").
33 |
34 |
35 | Visualization of accessibility/density
36 | --------------------------------------
37 |
38 | There is much that can be done in analysis.graph.make_accessibility_background.
39 |
40 | Extend the function to support gradual transparency and/or colour heatmap
41 | based on distance from nearest vehicle. Gradual transparency was not done
42 | due to performance problems but perhaps it can be improved. Could try to
43 | use matplotlib's hexbin or something.
44 |
45 |
46 | Causes of carshare use: population/business density, etc
47 | --------------------------------------------------------
48 |
49 | Does higher population density in home areas drive higher carshare use, when
50 | compared between cities? When compared within cities?
51 |
52 | Could also do something fancy like colour-analyze sat images, for instance
53 | in Vancouver low-density residential areas are much greener because of trees,
54 | while business and higher-density residential tends to be greyer. So this could
55 | work around having to find numerical density data and be a cool experiment.
56 |
57 |
58 | Further ideas
59 | -------------
60 |
61 | Maybe add street grid angle to city information, if specified, change
62 | accessibility mask shape to take this into account. For example, instead of
63 | using one circle, we could overlay two ellipses aligned with
64 | the street grid to get a closer approximation of walking distance.
65 | This will of course be useless for cities without a strong grid, but
66 | most of Vancouver, Toronto, Seattle could benefit.
67 |
68 | Perhaps highlight vehicles that have just moved into an area fairly lacking
69 | in cars. This would support an algorithm to calculate possible discount
70 | for moving vehicles in off-peak direction (until sign-off, it'll be
71 | estimated discount only, since we don't know if 10 other people are driving
72 | there), disguised as a map for time being. Dynamically price all the things.
73 |
74 |
75 | Mapping
76 | =======
77 |
78 | Automatically generate maps of a given area given OSM export. This would
79 | free me up from depending on hacky OSM website exports and allow me to
80 | customize what is included (no lesser roads, no city borders, etc).
81 | Need to research on existing OSM renderers I could reuse.
82 |
83 |
84 | Estimations
85 | ===========
86 |
87 | We could try to guesstimate actual trip distance for better using reported
88 | battery levels on electric vehicles, by comparing api_estimated_range on
89 | trip start and end with distance driven, we can see how it compares
90 | (with some asterisks on accuracy of api_estimated_range).
91 |
92 | First plot charge level vs api_estimated_range to see if there is
93 | a useful trend or if it is too noisy.
94 |
95 | Example data from a quick look:
96 | WBY1Z41070VZ77282 in duesseldorf:
97 | start range 208 (charge 99)
98 | end range 191
99 | calculated distance 4.68 km
100 | duration 40 minutes
101 | difference in range 17 km
102 |
103 | probably unlikely that they drove 17 km in 40 minutes...
104 |
105 | might get more accurate results with mid-charge data...
106 |
107 | a 30-minute booking on same car went from fuel 93 to 89,
108 | range 191 to 176 (= 15 km), calculated distance 3.7
109 |
110 | next one 36 minutes, fuel 89 to 81, range 176 to 166 = 10 km, calculated 4.3
111 |
112 |
113 | Analysis angles
114 | ===============
115 |
116 | Based on start/end positions
117 | ----------------------------
118 |
119 | Transit competitiveness: get start&end lat long, trip time, run the start&end
120 | through transit journey planner and compare durations.
121 |
122 | Origin/destinations: where do cars leaving Vancouver's West End head to?
123 | Where do cars arriving in Seattle's South Lake Union come from? Particularly
124 | useful if combined with time analysis to show inbound/outbound commutes and
125 | the like.
126 |
127 | In grid-based cities, how close do cars park to the grid arterials? That is,
128 | is more usage nearer to grid, or inside the blocks, where the transit is
129 | farther away?
130 |
131 | Carshare availability analysis: given a position, map nearby fixed carshare
132 | stations and historical availability, and calculate/map historical availability
133 | of floating carshare vehicles nearby. Big project but pretty interesting.
134 | Might need to get data from Zipcar/Modo/Communauto/etc for full effect.
135 |
136 | New kind of visualization: show cars as they're moving, with a trail behind
137 | them for, say, 30 minutes, then disappear it. Can show trips as they're
138 | happening throughout the day. Easier to do now with the new structure.
139 |
140 | Also, for cities with multiple systems, it might be pretty fun to map
141 | multiple systems in one video. Even more moving cars, and potential differences
142 | between the usage of the different systems!
143 |
144 |
145 | Based on time
146 | -------------
147 |
148 | Isolate by time: show trips, start/end points. split at 4 am / noon / 8 pm,
149 | show commutes. Maybe do a sub-graph for only 2 am to 6 am to show night
150 | activity that might be replacing inadequate nighttime transit?
151 |
152 | Get ratio/percentage of cars that enter/exit an area with given boundaries
153 | during given timeframe, e.g.: between 9 am and 3 pm, how many cars are
154 | there in downtown Seattle or Calgary, how many arrive, how many leave?
155 |
156 | Show when a destination is popular (day/week), or which destinations
157 | are strongly popular to enter / leave at given time.
158 |
159 | For cities with a central downtown, calculate radial distance from downtown
160 | for start and end of trip, average and collect over 24 hours, graph.
161 | This might be an easy way of showing most trips starting in the suburbs and
162 | and ending downtown in the morning rush, and starting downtown and ending
163 | in the suburbs in the evening rush. Play around, see what comes up.
164 | - suggested in https://www.reddit.com/r/Austin/comments/208ivi/#cg0t0l0
165 |
166 | Collect idle times between trips.
167 | I suspect heavily binomial distribution - most are grabbed right away or
168 | wait a long time - or might show up as a fat, long tail.
169 | Calculate and show idle times by times of day. 11 pm - 7 am, 7 am - 6 pm,
170 | 6 pm - 11 pm, or something. Check every hour to find the transition points.
171 |
172 | For cars that hadn't moved for a while, find day of week/time of first trip.
173 | If it's during work hours, particularly on a specific day, it might indicate
174 | an administrative move rather than a customer trip - and analysing a subset
175 | of only administrative moves would be pretty interesting.
176 |
177 |
178 | Based on reported fuel level
179 | ----------------------------
180 |
181 | Figure out whether people are more likely to use a car with more fuel/charge.
182 | This might be particularly interesting for electric vehicles and cities with
183 | all-electric fleets (e.g.,do cars with 25-30% charge get orphaned until service
184 | brings them in to charge?)
185 |
186 | Also analyze when and where cars are refueled.
187 |
188 |
189 | Based on cleanliness
190 | --------------------
191 |
192 | Show cars colour-coded by their reported cleanliness status to answer
193 | the important question: do Kits people mark cars as dirty more frequently
194 | than east van people?
195 |
196 | Also, find and visualize cars that went from indicated 'unacceptable'
197 | cleanliness rating back to 'good'. See how often and where that happens!
198 |
199 |
200 | Other
201 | -----
202 |
203 | Get historical weather info and crunch basic stats to see if car2go
204 | is more frequently used during bad weather. For bonus points, get weather
205 | info automatically from city name and date/time of files being processed.
206 | Wunderground can be used as basic source of data - historical info is free
207 | on developer plan (500 calls per day, 10 per minute).
208 |
209 | Visualize around special events - Canucks game, Whitecaps game, etc, etc?
210 | See if we can pick up any additional events or activity.
211 |
212 | Try to quantify environmental impact: how many cars does the service replace?
213 | E.g.: one commute trip in the morning, one errand trip midday, one commute trip
214 | in the evening, one nightlife trip during a given day would be replacing
215 | one car, essentially. But if it had a second commute trip in the evening,
216 | it might be replacing 1.5 cars. Just try to classify trips and
217 | see how that breaks down, how many commute-like trips are made,
218 | see if the data says any interesting things.
219 |
220 |
221 |
222 | Traditional carshare visualizations
223 | ===================================
224 |
225 | Current formats won't be very interesting with traditional model data.
226 | Think of what would be good for this - perhaps a video/animation
227 | that emphasizes cars being picked up, to stress usage?
228 |
229 |
230 | Bus/transit visualizations
231 | ==========================
232 |
233 | Not sure if this should be part of this codebase in general. I used to have
234 | some support but it's been unused and untested for a while and some has been
235 | removed. Improving it or removing it wholesale might be better.
236 |
237 | If I get into this again, I will probably have to parse route info KMLs and
238 | match bus positions to nearest stretch of route. A naive way might be to loop
239 | through each pixel along route (generated from KML) and find closest bus.
240 | Then colour the pixel with appropriate colour from the bus data.
241 |
242 |
243 | HTML display
244 | ============
245 |
246 | I can do some neat things given an interactive HTML page - switch time period
247 | being displayed, switch display positions of trip start or end, etc.
248 |
249 | If feeling fancy, can autogenerate list files on the hour or something.
250 |
251 | HTML 5? Canvas? Just need to draw lines, shouldn't be demanding performance.
252 |
253 |
--------------------------------------------------------------------------------
/web/map2go/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | map2go: car2go use in Austin during SXSW 2013
6 |
7 |
8 |
63 |
64 |
66 |
67 |
68 |
69 |
70 |
72 |
73 |
131 |
132 |
133 |
134 |
342 |
343 |
--------------------------------------------------------------------------------
/electric2go/analysis/generate.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from collections import defaultdict
4 | from datetime import timedelta
5 | import os
6 |
7 | from . import cmdline, normalize
8 | from .. import files, systems
9 |
10 |
11 | # This is basically the inverse of normalize.py
12 | # - it generates per-minute / per-moment state
13 | # from a result_dict.
14 |
15 |
16 | def build_data_frame(result_dict, turn, include_trips):
17 | # shorter variable names for easier access
18 | fin_parkings = result_dict['finished_parkings']
19 | fin_trips = result_dict['finished_trips']
20 | unfinished_parkings = result_dict['unfinished_parkings']
21 |
22 | # flatten and filter parking list
23 |
24 | # The condition of `p['starting_time'] <= turn <= p['ending_time']`
25 | # (with the two less-than-or-equal) in the statement to get
26 | # current_positions is correct.
27 |
28 | # I was initially afraid it was wrong because parking periods
29 | # are defined in normalize.process_data as follows:
30 | # "A parking period starts on data_time and ends on prev_data_time."
31 | # and so I thought this had to be `turn < p['ending_time']`
32 |
33 | # But actually the equals on both ends is fine. process_data does the
34 | # logical filtering as to when a parking starts and ends. With this,
35 | # in process_data output, cars are still available when
36 | # `turn == p['ending_time']`. Trying to do `turn < p['ending_time']`
37 | # would be double-filtering.
38 | # (Confirmed with actually looking at source data.)
39 |
40 | current_positions = [p for vin in fin_parkings for p in fin_parkings[vin]
41 | if p['starting_time'] <= turn <= p['ending_time']]
42 |
43 | # add in parkings of which we don't yet know when they finished
44 | current_positions.extend([unfinished_parkings[vin] for vin in unfinished_parkings
45 | if unfinished_parkings[vin]['starting_time'] <= turn])
46 |
47 | if include_trips:
48 | current_trips = [trip for vin in fin_trips for trip in fin_trips[vin]
49 | if trip['end']['time'] == turn]
50 | else:
51 | current_trips = None
52 |
53 | return turn, current_positions, current_trips
54 |
55 |
56 | def build_data_frames(result_dict, include_trips=True):
57 | # start from the starting time
58 | turn = result_dict['metadata']['starting_time']
59 |
60 | while turn <= result_dict['metadata']['ending_time']:
61 | data_frame = build_data_frame(result_dict, turn, include_trips)
62 |
63 | yield data_frame
64 |
65 | turn += timedelta(seconds=result_dict['metadata']['time_step'])
66 |
67 |
68 | def build_obj(data_frame, parser, result_dict):
69 | turn, current_positions, _ = data_frame
70 |
71 | def undo_normalize(car_data):
72 | # undoes normalize.process_data.process_car
73 |
74 | if 'duration' in car_data:
75 | del car_data['duration'] # this is added in normalize end_parking
76 |
77 | # add in stuff that doesn't change between data frames,
78 | # it is stored separately in 'vehicles' key
79 | car_details = result_dict['vehicles'].get(car_data['vin'], {})
80 | car_data.update(car_details)
81 |
82 | return car_data
83 |
84 | def roll_out_changing_data(car_data, changing_data):
85 | if changing_data:
86 | # find updates to apply, if some are found, apply the latest
87 | data_updates = [update[1] for update in changing_data if update[0] <= turn]
88 | if data_updates:
89 | car_data = parser.put_car_parking_drift(car_data, data_updates[-1])
90 |
91 | return car_data
92 |
93 | # This implicitly assumes that system always returns a list,
94 | # rather than e.g. a dict.
95 | # But that seems fine logically, I haven't seen a dict yet.
96 | # Also that assumption is in other parts of the code,
97 | # e.g. normalize.process_data where I do "for car in available_cars".
98 |
99 | # Verified manually that the cars-in-a-list assumption held in August 2016 for the following systems:
100 | # - car2go (no non-car content in the API JSON result)
101 | # - drivenow (kind of a lot of non-car content, need to analyze if we need to keep any of it)
102 | # - communauto (marginal non-car content: "{"ExtensionData":{},"UserPosition":{"ExtensionData":{},"Lat":0,"Lon":0},"
103 | # - evo (marginal non-car content: "{"success":true,"error":false,")
104 | # - enjoy is broken so I dunno
105 | # - multicity has that hacky API with lots of stuff so might be annoying to implement. but cars are indeed a list
106 | # - sharengo (marginal non-car content: "{"status":200,"reason":"",)
107 | # - translink whole thing is a list so put_cars will just return its param. that works too I guess
108 |
109 | # `car in current_positions` here ultimately comes from a result_dict,
110 | # which could be still used for other purposes - so dict.copy it first
111 | # to avoid undo_normalize and roll_out_changing_data creating side-effects
112 | # note: this isn't a deep copy, so nested dicts as seen for e.g. drivenow might break :(
113 | system_cars = (
114 | parser.put_car(
115 | roll_out_changing_data(
116 | undo_normalize(
117 | dict.copy(car)
118 | ),
119 | car.get('changing_data', None)
120 | )
121 | ) for car in current_positions)
122 |
123 | system_obj = parser.put_cars(list(system_cars), result_dict) # TODO: otherwise json cannot serialize, lame
124 |
125 | return turn, system_obj
126 |
127 |
128 | def build_objs(result_dict):
129 | parser = systems.get_parser(result_dict['metadata']['system'])
130 |
131 | # source files don't include trip info,
132 | # so tell build_data_frames we don't need that
133 | data_frames = build_data_frames(result_dict, False)
134 |
135 | # process each data frame and return as generator
136 | return (build_obj(data_frame, parser, result_dict)
137 | for data_frame in data_frames)
138 |
139 |
140 | def write_files(result_dict, location):
141 | # TODO: depending on how it's being used, this function might not belong here
142 | city = result_dict['metadata']['city']
143 | for data_time, data_dict in build_objs(result_dict):
144 | # If file was missing in the original, don't write it out.
145 | # Strictly speaking, this doesn't always perfectly recreate the original files.
146 | # For instance, if the server returned an "
503 Service Unavailable
" response,
147 | # this will be treated as unparseable, recorded as missing, and its contents information not saved.
148 | # When generated, the file will not be written at all.
149 | # But I am already not recreating the originals *perfectly* due to being unable
150 | # to preserve list order, and recreating error data isn't high on my priority list...
151 | if data_time in result_dict['metadata']['missing']:
152 | continue
153 |
154 | file_name = files.get_file_name(city, data_time)
155 | file_path = os.path.join(location, file_name)
156 |
157 | # TODO: it would be good to parallelize this, but a quick attempt in f39bb45c5b
158 | # resulted in test failures due to incorrect data being written... hrm
159 | with open(file_path, 'w') as f:
160 | cmdline.write_json(data_dict, f)
161 |
162 |
163 | # TODO: this duplicates tests.py GenerateTest except with worse error reporting - factor out somehow?
164 | def compare_files(result_dict, expected_location, actual_location):
165 | metadata = result_dict['metadata']
166 | return compare_files_for_system(metadata['system'], metadata['city'],
167 | expected_location, actual_location,
168 | metadata['starting_time'],
169 | metadata['ending_time'],
170 | metadata['time_step'])
171 |
172 |
173 | def compare_files_for_system(system, city, expected_location, actual_location,
174 | start_time, end_time, time_step):
175 | # Name where files have been generated might be a tempdir name
176 | # like '/tmp/tmp25l2ba19', while Electric2goDataArchive expects
177 | # a trailing slash if not a file name - so add a trailing slash.
178 | actual_location = os.path.join(actual_location, '')
179 |
180 | expected_data_archive = normalize.Electric2goDataArchive(city, expected_location)
181 | actual_data_archive = normalize.Electric2goDataArchive(city, actual_location)
182 |
183 | differing_vins = defaultdict(list)
184 | differing_keys = defaultdict(list)
185 | differing_remainder_keys = defaultdict(list)
186 |
187 | comparison_time = start_time
188 | while comparison_time <= end_time:
189 | (step_diff_vins, step_diff_keys, step_remainder_keys) = _compare_system_independent(
190 | system, expected_data_archive, actual_data_archive, comparison_time)
191 |
192 | for vin in step_diff_vins:
193 | differing_vins[vin].append(comparison_time)
194 |
195 | for key in step_diff_keys:
196 | differing_keys[key].append(comparison_time)
197 |
198 | for key in step_remainder_keys:
199 | differing_remainder_keys[key].append(comparison_time)
200 |
201 | comparison_time += timedelta(seconds=time_step)
202 |
203 | if len(differing_vins):
204 | print("======================")
205 | print("=== differing VINs: {}".format(differing_vins))
206 |
207 | if len(differing_keys):
208 | print("======================")
209 | print("=== differing keys for cars: {}".format(differing_keys))
210 |
211 | if len(differing_remainder_keys):
212 | print("======================")
213 | print("=== differing keys in remainder info: {}".format(differing_remainder_keys))
214 |
215 | return True
216 |
217 |
218 | def _compare_system_independent(system, expected_data_archive, actual_data_archive, comparison_time):
219 | parser = systems.get_parser(system)
220 |
221 | expected_file = expected_data_archive.load_data_point(comparison_time)
222 |
223 | actual_file = actual_data_archive.load_data_point(comparison_time)
224 |
225 | # load_data_point can return False when the file is missing or malformed.
226 | # When that happens, expect it on both archives.
227 | if expected_file is False:
228 | if actual_file is False:
229 | print("expected_file and actual_file are both False")
230 | return set(), set(), set()
231 | else:
232 | return set(), set(), set("expected_file is False, but actual_file is not")
233 |
234 | # test cars equivalency. we have to do it separately because
235 | # it comes from API as a list, but we don't store the list order.
236 | expected_cars = parser.get_cars_dict(expected_file)
237 | actual_cars = parser.get_cars_dict(actual_file)
238 |
239 | differing_vins = set()
240 | differing_keys = set()
241 |
242 | # test for equivalency of cars
243 | if expected_cars != actual_cars:
244 | print("{}: cars are not equiv".format(comparison_time))
245 | for vin, car in expected_cars.items():
246 | if car != actual_cars[vin]:
247 | print(vin + " is the first offender")
248 | differing_vins.add(vin)
249 | for key in car:
250 | if car[key] != actual_cars[vin][key]:
251 | print(key + ": in expected: " + repr(car[key]) + ", in actual: " + repr(actual_cars[vin][key]))
252 | differing_keys.add(key)
253 | # return False
254 |
255 | # test exact equivalency of everything but the cars list
256 | expected_remainder = parser.get_everything_except_cars(expected_file)
257 | actual_remainder = parser.get_everything_except_cars(actual_file)
258 |
259 | differing_remainder_keys = set()
260 |
261 | if expected_remainder != actual_remainder:
262 | if expected_remainder.get('code', '') == 500:
263 | # this happens sometimes, ignore it
264 | print("{}: expected_remainder was a 500 JSON, returning valid".format(comparison_time))
265 | return differing_vins, differing_keys, differing_remainder_keys
266 |
267 | print("{}: remainders are wrong".format(comparison_time))
268 | for key, value in expected_remainder.items():
269 | if key not in actual_remainder:
270 | print("key missing from generated: {}".format(key))
271 | print("!!!!!!!!!!!!!!!!!!!!!!!!!")
272 | print("!!!!!!!!!!!!!!!!!!! unrecognized key, pay attention!!")
273 | print("!!!!!!!!!!!!!!!!!!!!!!!!!")
274 | differing_remainder_keys.add(key)
275 | # return False
276 |
277 | elif value != actual_remainder[key]:
278 | print(key + ": in expected: " + repr(value) + ", in actual: " + repr(actual_remainder[key]))
279 | differing_remainder_keys.add(key)
280 | if not (system == "drivenow" and key in ("emergencyStatus", "marketingMessage", "message")):
281 | print("!!!!!!!!!!!!!!!!!!!!!!!!!")
282 | print("!!!!!!!!!!!!!!!!!!! unrecognized key, pay attention!!")
283 | print("!!!!!!!!!!!!!!!!!!!!!!!!!")
284 | # return False
285 |
286 | return differing_vins, differing_keys, differing_remainder_keys
287 |
--------------------------------------------------------------------------------
/electric2go/analysis/graph.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from collections import defaultdict, OrderedDict
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 |
7 | from ..systems import get_city_by_result_dict
8 |
9 |
10 | # speed ranges are designated as: 0-5; 5-15; 15-30; 30+
11 | SPEEDS = [(5, 'r'), (15, 'y'), (30, 'g'), (float('inf'), 'b')]
12 |
13 |
14 | # strictly not correct as lat/lng isn't a grid, but close enough at city scales
15 | def map_latitude(city_data, latitudes):
16 | south = city_data['MAP_LIMITS']['SOUTH']
17 | north = city_data['MAP_LIMITS']['NORTH']
18 | return ((latitudes - south) / (north - south)) * city_data['MAP_SIZES']['MAP_Y']
19 |
20 |
21 | def map_longitude(city_data, longitudes):
22 | west = city_data['MAP_LIMITS']['WEST']
23 | east = city_data['MAP_LIMITS']['EAST']
24 | return ((longitudes - west) / (east - west)) * city_data['MAP_SIZES']['MAP_X']
25 |
26 |
27 | def is_latlng_in_bounds(city_data, latlng):
28 | lat = latlng[0]
29 | lng = latlng[1]
30 |
31 | is_lat = city_data['BOUNDS']['SOUTH'] <= lat <= city_data['BOUNDS']['NORTH']
32 | is_lng = city_data['BOUNDS']['WEST'] <= lng <= city_data['BOUNDS']['EAST']
33 |
34 | return is_lat and is_lng
35 |
36 |
37 | def get_pixel_size(city_data):
38 | # find the length in metres represented by one pixel on graph in both lat and lng direction
39 |
40 | # TODO: calculate DEGREE_LENGTHS here from formula rather than needing it provided in city_data
41 | # it is only ever used here (so far? hm, that distance-from-subway-station could use it as well)
42 |
43 | lat_range = city_data['MAP_LIMITS']['NORTH'] - city_data['MAP_LIMITS']['SOUTH']
44 | lat_in_m = lat_range * city_data['DEGREE_LENGTHS']['LENGTH_OF_LATITUDE']
45 | pixel_in_lat_m = lat_in_m / city_data['MAP_SIZES']['MAP_Y']
46 |
47 | lng_range = city_data['MAP_LIMITS']['EAST'] - city_data['MAP_LIMITS']['WEST']
48 | lng_in_m = lng_range * city_data['DEGREE_LENGTHS']['LENGTH_OF_LONGITUDE']
49 | pixel_in_lng_m = lng_in_m / city_data['MAP_SIZES']['MAP_X']
50 |
51 | return pixel_in_lat_m, pixel_in_lng_m
52 |
53 |
54 | def get_mean_pixel_size(city_data):
55 | # find the length in metres represented by one pixel on graph
56 |
57 | # take mean of latitude- and longitude-based numbers,
58 | # which is not quite correct but more than close enough for most uses
59 |
60 | pixel_in_m = get_pixel_size(city_data)
61 |
62 | return (pixel_in_m[0] + pixel_in_m[1]) / 2
63 |
64 |
65 | def make_graph_axes(city_data, background=None):
66 | """
67 | Sets up figure area and axes for a city to be graphed.
68 | :param background: path to an image file to load,
69 | or a matplotlib.imshow()-compatible value, or None
70 | :return: tuple(matplotlib_fig, matplotlib_ax)
71 | """
72 |
73 | # set up figure area
74 |
75 | dpi = 80
76 | # i actually have no idea why this is necessary, but the
77 | # figure sizes are wrong otherwise. ???
78 | dpi_adj_x = 0.775
79 | dpi_adj_y = 0.8
80 |
81 | # TODO: see if it is possible to reuse figure or axes rather than
82 | # creating new ones every time
83 | f = plt.figure(dpi=dpi)
84 | f.set_size_inches(city_data['MAP_SIZES']['MAP_X']/dpi_adj_x/dpi,
85 | city_data['MAP_SIZES']['MAP_Y']/dpi_adj_y/dpi)
86 |
87 | ax = f.add_subplot(111)
88 | ax.axis([0, city_data['MAP_SIZES']['MAP_X'], 0, city_data['MAP_SIZES']['MAP_Y']])
89 |
90 | # remove visible axes and figure frame
91 | ax.axes.get_xaxis().set_visible(False)
92 | ax.axes.get_yaxis().set_visible(False)
93 | ax.set_frame_on(False)
94 |
95 | if background is not None:
96 | ax.imshow(background, origin='lower', aspect='auto')
97 |
98 | return f, ax
99 |
100 |
101 | def plot_points(ax, points, colour, symbol):
102 | ys, xs = zip(*points)
103 |
104 | ax.plot(xs, ys, colour + symbol)
105 |
106 | return ax
107 |
108 |
109 | def plot_geopoints(ax, city_data, geopoints_dict, symbol):
110 | for colour in geopoints_dict:
111 | if len(geopoints_dict[colour]):
112 | lats, lngs = zip(*geopoints_dict[colour])
113 |
114 | latitudes = map_latitude(city_data, np.array(lats))
115 | longitudes = map_longitude(city_data, np.array(lngs))
116 |
117 | ax = plot_points(ax, zip(latitudes, longitudes), colour, symbol)
118 |
119 | return ax
120 |
121 |
122 | def plot_lines(ax, lines_start_y, lines_start_x, lines_end_y, lines_end_x, colour='#aaaaaa'):
123 | for i in range(len(lines_start_y)):
124 | l = plt.Line2D([lines_start_x[i], lines_end_x[i]],
125 | [lines_start_y[i], lines_end_y[i]],
126 | color=colour)
127 | ax.add_line(l)
128 |
129 | return ax
130 |
131 |
132 | def plot_geolines(ax, city_data, lines_start_lat, lines_start_lng, lines_end_lat, lines_end_lng, colour='#aaaaaa'):
133 | # translate into map coordinates
134 | lines_start_y = map_latitude(city_data, np.array(lines_start_lat))
135 | lines_start_x = map_longitude(city_data, np.array(lines_start_lng))
136 | lines_end_y = map_latitude(city_data, np.array(lines_end_lat))
137 | lines_end_x = map_longitude(city_data, np.array(lines_end_lng))
138 |
139 | return plot_lines(ax, lines_start_y, lines_start_x, lines_end_y, lines_end_x, colour)
140 |
141 |
142 | def plot_trips(ax, city_data, trips, colour='#aaaaaa'):
143 | lines_start_lat = [t['start']['lat'] for t in trips]
144 | lines_start_lng = [t['start']['lng'] for t in trips]
145 | lines_end_lat = [t['end']['lat'] for t in trips]
146 | lines_end_lng = [t['end']['lng'] for t in trips]
147 |
148 | return plot_geolines(ax, city_data, lines_start_lat, lines_start_lng, lines_end_lat, lines_end_lng, colour)
149 |
150 |
151 | def filter_positions_to_bounds(city_data, positions):
152 | """
153 | Filters the list of positions to only include those that in graphing bounds for the given city
154 | """
155 |
156 | # TODO: I don't think this function is actually necessary, if we're outside of bounds
157 | # I think it'll just try to draw it offscreen and not include in final render.
158 | # Confirm and if so, remove this function.
159 |
160 | return [p for p in positions if is_latlng_in_bounds(city_data, p['coords'])]
161 |
162 |
163 | def create_points_default_colour(positions):
164 | """
165 | Assigns a default colour to all positions in the list
166 | :returns a dict of lists formatted suitably for passing to plot_geopoints()
167 | """
168 |
169 | return {
170 | SPEEDS[-1][1]: [position['coords'] for position in positions]
171 | }
172 |
173 |
174 | def create_points_electric_colour(positions, electric_colour='r', standard_colour='b'):
175 | """
176 | Electric engines get electric_colour, other engines get standard_colour
177 | :returns a dict of lists formatted suitably for passing to plot_geopoints()
178 | """
179 |
180 | # Position electric_colour on top of standard_colour. There is likely to be
181 | # many more standard cars than electric cars - putting the electric on top
182 | # makes them more visible.
183 | return OrderedDict([
184 | (standard_colour, [position['coords'] for position in positions if not position['electric']]),
185 | (electric_colour, [position['coords'] for position in positions if position['electric']])
186 | ])
187 |
188 |
189 | def create_points_speed_colour(positions):
190 | """
191 | Extracts a list of all positions ordered by colour according to vehicle speed
192 | from a list of objects with metadata.
193 | :returns a dict of lists formatted suitably for passing to plot_geopoints()
194 | """
195 |
196 | collected = defaultdict(list)
197 |
198 | for position in positions:
199 | # find the right speed basket
200 | try:
201 | speed_bin = next(speed[1] for speed in SPEEDS
202 | if position['metadata']['speed'] < speed[0])
203 | except (KeyError, StopIteration):
204 | # KeyError when the position doesn't have 'speed' defined
205 | # StopIteration when no speed matches the condition
206 | # default to the last colour
207 | speed_bin = SPEEDS[-1][1]
208 |
209 | # append the position
210 | collected[speed_bin].append(position['coords'])
211 |
212 | return collected
213 |
214 |
215 | def create_points_trip_start_end(trips, from_colour='b', to_colour='r'):
216 | """
217 | Extracts a list of all start and end positions for provided trips.
218 | :returns a dict of lists formatted suitably for passing to plot_geopoints()
219 | """
220 |
221 | # Using OrderedDict to always return the end of the trip last
222 | # to ensure "to" points appear on top in the graph.
223 | # In plot_geopoints, points are plotted in the order of the
224 | # colour-key dictionary, and depending on the colours being used,
225 | # either "from" or "to" points could end up on top.
226 | # (E.g. on my implementation, "g" points would be drawn after "b",
227 | # which would be drawn after "r" -
228 | # this would vary depending on hash function in use.)
229 | # With OrderedDict, I specify the order.
230 | return OrderedDict([
231 | (from_colour, [(trip['start']['lat'], trip['start']['lng']) for trip in trips]),
232 | (to_colour, [(trip['end']['lat'], trip['end']['lng']) for trip in trips])
233 | ])
234 |
235 |
236 | def graph_wrapper(city_data, plot_function, image_name, background=None):
237 | """
238 | Handles creating the figure, saving it as image, and closing the figure.
239 | :param plot_function: function accepting f, ax params to actually draw on the figure
240 | :param image_name: image will be saved with this name
241 | :param background: background for the figure (accessibility snapshot, etc)
242 | :return: none
243 | """
244 |
245 | # set up axes
246 | f, ax = make_graph_axes(city_data, background)
247 |
248 | # pass axes back to function to actually do the plotting
249 | plot_function(f, ax)
250 |
251 | # render graph to file
252 | # TODO: could see if saving to a file type other than a png is faster
253 | # (it seems to have been when I was trying it a long time ago,
254 | # ps being ~4 times faster, svg and pdf being ~2 times faster),
255 | # but make sure to include time to render the file to a format
256 | # that avconv can use as input.
257 | f.savefig(image_name, bbox_inches='tight', pad_inches=0, dpi=80, transparent=True)
258 |
259 | # close the plot to free the memory. memory is never freed otherwise until
260 | # script is killed or exits.
261 | plt.close(f)
262 |
263 |
264 | def convert_positions_to_legacy(positions):
265 | return [dict(p, coords=(p['lat'], p['lng']))
266 | for p in positions]
267 |
268 |
269 | def make_graph(result_dict, positions, trips, image_filename, printed_time,
270 | show_speeds, highlight_distance, symbol):
271 | """ Creates and saves matplotlib figure for provided positions and trips. """
272 |
273 | city_data = get_city_by_result_dict(result_dict)
274 |
275 | positions = convert_positions_to_legacy(positions)
276 |
277 | # filter to only vehicles that are in city's graphing bounds
278 | filtered_positions = filter_positions_to_bounds(city_data, positions)
279 |
280 | if highlight_distance:
281 | positions_without_metadata = [p['coords'] for p in filtered_positions]
282 | graph_background = make_accessibility_background(city_data, positions_without_metadata, highlight_distance)
283 | else:
284 | graph_background = None
285 |
286 | # mark with either speed, or default colour
287 | if show_speeds:
288 | positions_by_colour = create_points_speed_colour(filtered_positions)
289 | else:
290 | positions_by_colour = create_points_default_colour(filtered_positions)
291 |
292 | # define what to add to the graph
293 | def plotter(f, ax):
294 | # plot points for vehicles
295 | ax = plot_geopoints(ax, city_data, positions_by_colour, symbol)
296 |
297 | # add in lines for moving vehicles
298 | if trips:
299 | ax = plot_trips(ax, city_data, trips)
300 |
301 | # add labels
302 | coords = city_data['LABELS']['lines']
303 | fontsizes = city_data['LABELS']['fontsizes']
304 |
305 | ax.text(coords[0][0], coords[0][1],
306 | city_data['display'], fontsize=fontsizes[0])
307 | # prints something like "December 10, 2014"
308 | ax.text(coords[1][0], coords[1][1],
309 | '{d:%B} {d.day}, {d.year}'.format(d=printed_time),
310 | fontsize=fontsizes[1])
311 | # prints something like "Wednesday, 04:02"
312 | ax.text(coords[2][0], coords[2][1],
313 | '{d:%A}, {d:%H}:{d:%M}'.format(d=printed_time),
314 | fontsize=fontsizes[2])
315 | ax.text(coords[3][0], coords[3][1],
316 | 'available cars: %d' % len(filtered_positions),
317 | fontsize=fontsizes[3])
318 |
319 | # create and save plot
320 | graph_wrapper(city_data, plotter, image_filename, graph_background)
321 |
322 |
323 | def make_positions_graph(result_dict, image_name, symbol, colour_electric=False):
324 | city_data = get_city_by_result_dict(result_dict)
325 |
326 | # positions are "unfinished parkings" (cars still parked at the end of the dataset)
327 | # plus all of the "finished parkings" (cars that were parked at one point but moved)
328 | positions = [p for p in result_dict['unfinished_parkings'].values()]
329 | positions.extend(parking for vin in result_dict['finished_parkings']
330 | for parking in result_dict['finished_parkings'][vin])
331 |
332 | positions = convert_positions_to_legacy(positions)
333 |
334 | filtered = filter_positions_to_bounds(city_data, positions)
335 |
336 | if colour_electric:
337 | coloured = create_points_electric_colour(filtered)
338 | else:
339 | coloured = create_points_default_colour(filtered)
340 |
341 | def plotter(f, ax):
342 | plot_geopoints(ax, city_data, coloured, symbol)
343 |
344 | graph_wrapper(city_data, plotter, image_name, background=None)
345 |
346 |
347 | def _get_trips(result_dict):
348 | return [trip
349 | for vin in result_dict['finished_trips']
350 | for trip in result_dict['finished_trips'][vin]]
351 |
352 |
353 | def make_trips_graph(result_dict, image_name):
354 | city_data = get_city_by_result_dict(result_dict)
355 |
356 | trips = _get_trips(result_dict)
357 |
358 | def plotter(f, ax):
359 | if trips:
360 | plot_trips(ax, city_data, trips)
361 |
362 | graph_wrapper(city_data, plotter, image_name, background=None)
363 |
364 |
365 | def make_trip_origin_destination_graph(result_dict, image_name, symbol):
366 | city_data = get_city_by_result_dict(result_dict)
367 |
368 | trips = _get_trips(result_dict)
369 |
370 | # TODO: use hexbin instead of just drawing points, to avoid problem/unexpected results
371 | # caused when a trip ends in a given point then the vehicle is picked up again
372 | # and a second trip starts in the same point (described in a comment in
373 | # create_points_trip_start_end()).
374 | # Maybe try to assign value of +1 to trips starting at a point,
375 | # -1 to trips ending, then do hexbin on sum or mean of the values
376 | # to find spots where vehicles mostly arrive, mostly depart, or are balanced
377 |
378 | def plotter(f, ax):
379 | trip_points = create_points_trip_start_end(trips)
380 | plot_geopoints(ax, city_data, trip_points, symbol)
381 |
382 | graph_wrapper(city_data, plotter, image_name, background=None)
383 |
384 |
385 | def make_accessibility_background(city_data, positions, distance):
386 | latitudes, longitudes = zip(*positions)
387 | latitudes = np.round(map_latitude(city_data, np.array(latitudes)))
388 | longitudes = np.round(map_longitude(city_data, np.array(longitudes)))
389 |
390 | # The below is based off http://stackoverflow.com/questions/8647024/how-to-apply-a-disc-shaped-mask-to-a-numpy-array
391 | # Basically, we build a True/False mask (master_mask) the same size
392 | # as the map. Each 'pixel' within the mask indicates whether the point
393 | # is within provided distance from a car.
394 | # To build this, iterate over all cars and apply a circular mask of Trues
395 | # (circle_mask) around the point indicating each car. We'll need to shift
396 | # things around near the borders of the map, but this is relatively
397 | # straightforward.
398 |
399 | accessible_colour = (255, 255, 255, 0) # white, fully transparent
400 | inaccessible_colour = (239, 239, 239, 100) # #efefef, mostly transparent
401 |
402 | # not using accessible_multiplier currently because it's too slow
403 | # accessible_multiplier = (1, 1, 1, 0.6)
404 | # if using accessible_multiplier, 160 alpha for inaccessible looks better
405 |
406 | # generate basic background, for now uniformly indicating no cars available
407 | markers = np.empty(
408 | (city_data['MAP_SIZES']['MAP_Y'], city_data['MAP_SIZES']['MAP_X'], 4),
409 | dtype=np.uint8)
410 | markers[:] = inaccessible_colour # can't use fill since it isn't a scalar
411 |
412 | # find distance radius, in pixels
413 | pixel_in_m = get_mean_pixel_size(city_data)
414 | radius = np.round(distance / pixel_in_m)
415 |
416 | # generate master availability mask
417 | master_mask = np.empty(
418 | (city_data['MAP_SIZES']['MAP_Y'], city_data['MAP_SIZES']['MAP_X']),
419 | dtype=np.bool)
420 | master_mask.fill(False)
421 | m_m_shape = master_mask.shape
422 |
423 | # generate basic circle mask
424 | y, x = np.ogrid[-radius: radius+1, -radius: radius+1]
425 | circle_mask = x**2+y**2 <= radius**2
426 | c_m_shape = circle_mask.shape
427 |
428 | for i in range(len(latitudes)):
429 | # to just crudely mark a square area around lat/lng:
430 | # markers[ (lat - radius) : (lat+radius), (lng-radius) : (lng+radius)] = accessible_colour
431 |
432 | # mask is drawn from top-left corner. to center mask around the point:
433 | x = latitudes[i] - radius
434 | y = longitudes[i] - radius
435 |
436 | # find various relevant locations within the matrix...
437 |
438 | # cannot give a negative number as first param in slice
439 | master_x_start = max(x, 0)
440 | master_y_start = max(y, 0)
441 | # but going over boundaries is ok, will trim automatically
442 | master_x_end = x + c_m_shape[0]
443 | master_y_end = y + c_m_shape[1]
444 |
445 | circle_x_start = 0
446 | circle_y_start = 0
447 | circle_x_end = c_m_shape[0]
448 | circle_y_end = c_m_shape[1]
449 |
450 | if x < 0: # trim off left side
451 | circle_x_start = x * -1
452 | if y < 0: # trim off top
453 | circle_y_start = y * -1
454 | if master_x_end > m_m_shape[0]: # trim off right side
455 | circle_x_end = (m_m_shape[0] - master_x_end)
456 | if master_y_end > m_m_shape[1]: # trim off bottom
457 | circle_y_end = (m_m_shape[1] - master_y_end)
458 |
459 | # make sure to OR the masks so that earlier circles' Trues
460 | # aren't overwritten by later circles' Falses
461 | master_mask[
462 | master_x_start: master_x_end,
463 | master_y_start: master_y_end
464 | ] |= circle_mask[
465 | circle_x_start: circle_x_end,
466 | circle_y_start: circle_y_end]
467 |
468 | # not using accessible_multiplier currently because it's too slow
469 | # markers[master_mask] *= accessible_multiplier
470 |
471 | # note: can also do something like this: markers[mask] *= (1, 1, 1, 0.5)
472 | # and it updates everything - should be useful for relative values.
473 | # except it has to happen within the iteration as shown above, and is also
474 | # pretty slow. like, adds 1.2 seconds per image slow. see if I can
475 | # optimize it somehow, but multiplying a million-item array, even masked,
476 | # by a vector 200 times might just be inherently a bit slow :(
477 |
478 | markers[master_mask] = accessible_colour
479 |
480 | return markers
481 |
--------------------------------------------------------------------------------
/electric2go/analysis/stats.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | from collections import Counter, OrderedDict
4 | from datetime import timedelta
5 | import csv
6 | import numpy as np
7 |
8 |
9 | def write_csv(f, items):
10 | """
11 | :type items: list[OrderedDict]
12 | """
13 |
14 | if len(items) == 0:
15 | # nothing to write
16 | return f
17 |
18 | fieldnames = items[0].keys() # this works as expected because we use OrderedDicts
19 | writer = csv.DictWriter(f, fieldnames)
20 |
21 | writer.writeheader()
22 | for item in items:
23 | writer.writerow(item)
24 |
25 | return f
26 |
27 |
28 | def write_csv_to_file(output_file, items):
29 | """
30 | :type items: list[OrderedDict]
31 | """
32 |
33 | with open(output_file, 'w') as f:
34 | write_csv(f, items)
35 |
36 |
37 | def is_trip_weird(trip):
38 | # TODO: these criteria are fairly car2go specific. They need to be tested on other systems.
39 |
40 | # TODO: car2go appears to have a significant peak at 32 and 33 minute durations, likely
41 | # from lapsed reservations - try to filter those.
42 | # Check directly - and try to guess if it's a lapsed reservation (fuel use?
43 | # but check similar duration trips to see if their fuel use isn't usually 0 either)
44 |
45 | if trip['duration'] < 4*60 and trip['distance'] <= 0.01 and trip['fuel_use'] > -2:
46 | # trips under 4 minutes and under 10 metres are likely to be errors
47 | return True
48 | elif trip['duration'] == 1*60 and trip['distance'] <= 0.05 and trip['fuel_use'] > -2:
49 | # trips exactly 1 minute along and under 50 metres are likely to be errors
50 | return True
51 |
52 | return False
53 |
54 |
55 | def stats_dict(data_dict):
56 | starting_time = data_dict['metadata']['starting_time']
57 | ending_time = data_dict['metadata']['ending_time']
58 |
59 | all_trips = [trip for vin in data_dict['finished_trips'] for trip in data_dict['finished_trips'][vin]]
60 |
61 | all_known_vins = set()
62 | all_known_vins.update(data_dict['unfinished_trips'].keys())
63 | all_known_vins.update(data_dict['finished_trips'].keys())
64 | all_known_vins.update(data_dict['unfinished_parkings'].keys())
65 | all_known_vins.update(data_dict['finished_parkings'].keys())
66 | all_known_vins.update(data_dict['unstarted_trips'].keys())
67 |
68 | def stats_for_collection(collection, collection_binned, days=1.0, over=False, under=False, most_common_count=10):
69 | """
70 | :type over: list
71 | :type under: list
72 | """
73 |
74 | def dataset_count_over(trips, thresholds, is_over=True):
75 | results = []
76 | for threshold in thresholds:
77 | if is_over:
78 | trip_count = len([x for x in trips if x > threshold])
79 | else:
80 | trip_count = len([x for x in trips if x < threshold])
81 |
82 | results.append((threshold, trip_count))
83 |
84 | return results
85 |
86 | def quartiles(quartiles_collection, quartiles_days):
87 | quartiles_dict = {}
88 | for i in range(0, 101, 25):
89 | quartiles_dict[i] = np.percentile(quartiles_collection, i) / quartiles_days
90 | return quartiles_dict
91 |
92 | result = OrderedDict()
93 | result['count all'] = len(collection)
94 | result['mean'] = np.mean(collection)
95 | result['std'] = np.std(collection)
96 | quartiles_overall = quartiles(collection, 1.0)
97 | result['median'] = quartiles_overall[50]
98 | result['quartiles'] = quartiles_overall
99 | result['most common binned values'] = Counter(collection_binned).most_common(most_common_count)
100 |
101 | if days != 1.0:
102 | days *= 1.0 # make sure it's a decimal
103 | result['mean per day'] = result['mean'] / days
104 | quartiles_per_day = quartiles(collection, days)
105 | result['median per day'] = quartiles_per_day[50]
106 | result['quartiles per day'] = quartiles_per_day
107 |
108 | if over and result['count all'] > 0:
109 | result['thresholds over'] = dataset_count_over(collection, over)
110 |
111 | if under and result['count all'] > 0:
112 | result['thresholds under'] = dataset_count_over(collection, under, is_over=False)
113 |
114 | return result
115 |
116 | def format_stats(name, input_data):
117 | """
118 | Edit the stats dictionary slightly, formatting data to have less dicts/tuples
119 | and more strings so it is easier to export.
120 | Also prefix all keys with "name"
121 | :param input_data: if 'thresholds over' or 'thresholds under' keys are included,
122 | 'count all' key must also be included
123 | """
124 | result = OrderedDict()
125 |
126 | # format quartiles
127 | if 'quartiles' in input_data:
128 | for threshold, amount in input_data['quartiles'].items():
129 | input_data['quartile {}'.format(threshold)] = amount
130 | del input_data['quartiles']
131 |
132 | if 'quartiles per day' in input_data:
133 | for threshold, amount in input_data['quartiles per day'].items():
134 | input_data['per day quartile {}'.format(threshold)] = amount
135 | del input_data['quartiles per day']
136 |
137 | # format thresholds
138 | if 'thresholds over' in input_data:
139 | for threshold, count_over in input_data['thresholds over']:
140 | input_data['over {} ratio'.format(threshold)] = count_over * 1.0 / input_data['count all']
141 | del input_data['thresholds over']
142 |
143 | if 'thresholds under' in input_data:
144 | for threshold, count_over in input_data['thresholds under']:
145 | input_data['under {} ratio'.format(threshold)] = count_over * 1.0 / input_data['count all']
146 | del input_data['thresholds under']
147 |
148 | # prefix with name
149 | for input_key, input_value in input_data.items():
150 | result['%s %s' % (name, input_key)] = input_value
151 |
152 | return result
153 |
154 | def duration(collection):
155 | return [coll_trip['duration']/60 for coll_trip in collection]
156 |
157 | def distance(collection):
158 | return [coll_trip['distance'] for coll_trip in collection]
159 |
160 | def fuel(collection):
161 | return [coll_trip['fuel_use'] for coll_trip in collection]
162 |
163 | def collection_round(collection, round_to):
164 | return [round_to * int(coll_value * (1.0 / round_to)) for coll_value in collection]
165 |
166 | trips_weird = []
167 | trips_good = []
168 | trips_refueled = []
169 | trip_counts_by_vin = {}
170 | for trip in all_trips:
171 | # Find and exclude "weird" trips, that are likely to be system errors caused by things like GPS misreads
172 | # rather than actual trips.
173 | # Not all errors will be caught - sometimes it is impossible to tell. Consequently,
174 | # this operates on a best-effort basis, catching some of the most common and obvious problems.
175 | # Various "weird" trips like that are somewhat less than 1% of a test dataset (Vancouver, Jan 27 - Feb 3)
176 | # and the conditions below catch roughly 50-80% of them.
177 | if is_trip_weird(trip):
178 | trips_weird.append(trip)
179 | else:
180 | trips_good.append(trip)
181 |
182 | trip_counts_by_vin[trip['vin']] = trip_counts_by_vin.get(trip['vin'], 0) + 1
183 |
184 | # TODO: also collect short distance but long duration and/or fuel use - these are likely to be round trips.
185 | # Some sort of heuristic might have to be developed that establishes ratios of duration/fuel use
186 | # that make a trip likely a round trip. Complicating matters is the fact that fuel use is quite unreliable.
187 |
188 | if 'fuel_use' in trip and trip['fuel_use'] < 0:
189 | # collect trips that have included a refuel, for use in stats to be added later
190 | trips_refueled.append(trip)
191 |
192 | for vin in all_known_vins:
193 | # fill in trip count for cars with 0 trips, if any
194 | if vin not in trip_counts_by_vin:
195 | trip_counts_by_vin[vin] = 0
196 |
197 | time_elapsed_seconds = (ending_time - starting_time).total_seconds()
198 | time_elapsed_days = time_elapsed_seconds * 1.0 / (24*60*60)
199 |
200 | time_missing_seconds = len(data_dict['metadata']['missing']) * data_dict['metadata']['time_step']
201 | time_missing_ratio = time_missing_seconds * 1.0 / time_elapsed_seconds
202 |
203 | trips_per_car = list(trip_counts_by_vin.values())
204 |
205 | stats = OrderedDict()
206 | stats['starting time'] = starting_time
207 | stats['ending time'] = ending_time
208 |
209 | stats['missing data ratio'] = time_missing_ratio
210 |
211 | stats['total vehicles'] = len(trip_counts_by_vin)
212 | stats['total trips'] = len(trips_good)
213 | stats['total trips per day'] = len(trips_good) / time_elapsed_days
214 |
215 | stats['time elapsed seconds'] = time_elapsed_seconds
216 | stats['time elapsed days'] = time_elapsed_days
217 |
218 | stats['utilization ratio'] = sum(duration(trips_good)) / len(trip_counts_by_vin) / (time_elapsed_seconds/60)
219 |
220 | stats.update(format_stats('trips per car',
221 | stats_for_collection(trips_per_car,
222 | trips_per_car,
223 | time_elapsed_days)))
224 |
225 | stats.update(format_stats('distance per trip',
226 | stats_for_collection(distance(trips_good),
227 | collection_round(distance(trips_good), 0.5),
228 | over=[5, 10])))
229 |
230 | stats.update(format_stats('duration per trip',
231 | stats_for_collection(duration(trips_good),
232 | collection_round(duration(trips_good), 5),
233 | over=[2*60, 5*60, 10*60])))
234 |
235 | parking_durations = duration(trip for vin in data_dict['finished_trips']
236 | for trip in data_dict['finished_trips'][vin])
237 | stats.update(format_stats('duration per parking',
238 | stats_for_collection(parking_durations,
239 | collection_round(parking_durations, 5),
240 | over=[2*60, 6*60, 12*60, 36*60])))
241 | # TODO: It might be more informative to calculate
242 | # (total length of parkings over 2 hours) / (total length of all parkings) instead of
243 | # (number of parkings over 2 hours) / (number of all parkings) as we're doing now
244 | # - or at least in addition to
245 |
246 | stats.update(format_stats('fuel use stats',
247 | stats_for_collection(fuel(trips_good),
248 | fuel(trips_good),
249 | under=[1, 5],
250 | over=[1, 5, 10])))
251 |
252 | # get some stats on weird trips as outlined above
253 | if len(trips_weird) > 0:
254 | stats['weird trip count'] = len(trips_weird)
255 | stats['weird trips per day'] = len(trips_weird) * 1.0 / time_elapsed_days
256 | stats['weird trip ratio'] = len(trips_weird) * 1.0 / len(all_trips)
257 |
258 | stats.update(format_stats('weird trips duration',
259 | stats_for_collection(duration(trips_weird),
260 | duration(trips_weird))))
261 | stats.update(format_stats('weird trips distance',
262 | stats_for_collection(distance(trips_weird),
263 | collection_round(distance(trips_weird), 0.002),
264 | under=[0.01, 0.02])))
265 |
266 | return stats
267 |
268 |
269 | def stats_slice(data_dict, from_time, to_time):
270 | """
271 | Get a slice of data_dict containing only activity between
272 | from_time and to_time.
273 |
274 | This is intended for stats purposes, to split larger datasets to get
275 | use stats by day or week.
276 |
277 | Trips that straddle the cut-off datetimes are truncated to start/end
278 | on from_time and to_time in attempt to make utilization ratio calculation
279 | more accurate. This makes trip duration data less accurate.
280 |
281 | This is necessarily slightly imprecise, for instance cut-off
282 | parts of trips can be getting classified as mini weird trips.
283 | However, accuracy for utilization ratio is well under 1%.
284 | """
285 |
286 | result_dict = {
287 | 'finished_trips': {},
288 | 'finished_parkings': {},
289 | 'unfinished_trips': {},
290 | 'unfinished_parkings': {},
291 | 'unstarted_trips': {},
292 | 'metadata': dict.copy(data_dict['metadata'])
293 | }
294 |
295 | for vin in data_dict['finished_trips']:
296 | # first do the rough filtering
297 | # use dict.copy to avoid changing trip durations in the passed-by-reference data_dict
298 | trips = [dict.copy(trip) for trip in data_dict['finished_trips'][vin]
299 | # normal trips, within the day
300 | if (from_time <= trip['start']['time'] <= trip['end']['time'] <= to_time)
301 |
302 | # trips spanning starting_time
303 | or (trip['start']['time'] < from_time < trip['end']['time'] < to_time)
304 |
305 | # trips spanning starting_time
306 | or (from_time < trip['start']['time'] < to_time < trip['end']['time'])
307 |
308 | # trips spanning whole day from from_time to to_time
309 | or (trip['start']['time'] < from_time and trip['end']['time'] > to_time)]
310 |
311 | # then trim off ends of trips that straddle dataset borders (either from_time
312 | # or to_time).
313 | # this will hit on accuracy of trip duration statistics, but improve
314 | # accuracy of utilization ratio calculation.
315 | # need to only look at first_trip and last_trip in the newly filtered list
316 | # because by definition only one trip each will straddle from_time and to_time.
317 | if len(trips):
318 | first_trip = trips[0]
319 | if first_trip['start']['time'] < from_time:
320 | first_trip['start']['time'] = from_time
321 | first_trip['duration'] = (first_trip['end']['time'] - from_time).total_seconds()
322 | # not recalculating speed since it'll be pretty meaningless on the changed duration
323 |
324 | last_trip = trips[-1]
325 | if last_trip['end']['time'] > to_time:
326 | last_trip['end']['time'] = to_time
327 | last_trip['duration'] = (to_time - last_trip['start']['time']).total_seconds()
328 |
329 | result_dict['finished_trips'][vin] = trips
330 |
331 | # filter out cars with no trips
332 | result_dict['finished_trips'] = {vin: trips for vin, trips in result_dict['finished_trips'].items()
333 | if len(trips) > 0}
334 |
335 | for vin in data_dict['finished_parkings']:
336 | # first do the rough filtering
337 | # see comments for finished_trips filter above for reasoning
338 | parks = [dict.copy(park) for park in data_dict['finished_parkings'][vin]
339 | if (from_time <= park['starting_time'] <= park['ending_time'] <= to_time)
340 | or (park['starting_time'] < from_time < park['ending_time'] < to_time)
341 | or (from_time < park['starting_time'] < to_time < park['ending_time'])
342 | or (park['starting_time'] < from_time and park['ending_time'] > to_time)]
343 |
344 | # trim off ends as for finished_trips
345 | if len(parks):
346 | first_park = parks[0]
347 | if first_park['starting_time'] < from_time:
348 | first_park['starting_time'] = from_time
349 | first_park['duration'] = (first_park['ending_time'] - from_time).total_seconds()
350 |
351 | last_park = parks[-1]
352 | if last_park['ending_time'] > to_time:
353 | last_park['ending_time'] = to_time
354 | last_park['duration'] = (to_time - last_park['starting_time']).total_seconds()
355 |
356 | result_dict['finished_parkings'][vin] = parks
357 |
358 | # filter out cars with no parkings
359 | result_dict['finished_parkings'] = {vin: parkings for vin, parkings in result_dict['finished_parkings'].items()
360 | if len(parkings) > 0}
361 |
362 | # TODO: should we add unfinished into finished, trimming them?
363 | # to_time is already non-inclusive (e.g. from_time being 04:00, to_time will be 03:59),
364 | # so we use less-than-or-equal, from_time <= data <= to_time
365 | unfi_parkings = data_dict['unfinished_parkings']
366 | result_dict['unfinished_parkings'] = {vin: unfi_parkings[vin] for vin in unfi_parkings
367 | if from_time <= unfi_parkings[vin]['starting_time'] <= to_time}
368 |
369 | unfi_trips = data_dict['unfinished_trips']
370 | result_dict['unfinished_trips'] = {vin: unfi_trips[vin] for vin in unfi_trips
371 | if from_time <= unfi_trips[vin]['start']['time'] <= to_time}
372 |
373 | unst_trips = data_dict['unstarted_trips']
374 | result_dict['unstarted_trips'] = {vin: unst_trips[vin] for vin in unst_trips
375 | if from_time <= unst_trips[vin]['end']['time'] <= to_time}
376 |
377 | # Now we just need to adjust metadata
378 | result_dict['metadata']['starting_time'] = from_time
379 | result_dict['metadata']['ending_time'] = to_time
380 |
381 | # adjust missing data points
382 | # note that the list comp implicitly does a copy of the `missing` list, which is good,
383 | # because we need the original missing list for next iterations
384 | result_dict['metadata']['missing'] = [missing_datetime for missing_datetime
385 | in data_dict['metadata']['missing']
386 | if from_time <= missing_datetime < to_time]
387 |
388 | # TODO: there is a bug here somewhere:
389 | # analysing the same time period from two differently-cut datasets gives different results.
390 | # e.g., a dataset from 2015-07-20 to 2015-08-09 and another dataset from 2015-08-01 to 2015-08-19
391 | # both contain a period from 2015-08-01 to 2015-08-08 but the resulting statistics
392 | # are slightly different. This is bad.
393 |
394 | # I think I should rewrite how the unstarted/unfinished trips are treated in general.
395 | # Rather than trimming ends off but keeping the trips in trips dicts, should convert them
396 | # to unstarted/unfinished trips instead.
397 |
398 | # TODO: need to create tests after "correct" logic is finalized
399 |
400 | # TODO: split this code into a "split.py" or "splice.py" to complement merge.py
401 |
402 | # TODO: make a basic visualiser? using the slice function and output into json,
403 | # and a HTML/js to take in that json and display it in little lines with tooltips identifying cars
404 | # Would be useful for debugging - and also for future visualisations
405 |
406 | return result_dict
407 |
408 |
409 | def repr_floats(result):
410 | # Force floats to formatted strings to avoid differences in precision
411 | # between Python 2 and Python 3, and between separate runs under Python 3.
412 | # Floats have to be stringified for CSV output anyway, and doing so now
413 | # lets us do it consistently.
414 | # Use 10 significant digits as this is more than enough for stats
415 | # while well less than level where float representation issues crop up.
416 |
417 | for key in result:
418 | if isinstance(result[key], float):
419 | result[key] = format(result[key], '.10g')
420 |
421 | return result
422 |
423 |
424 | def stats(data_dict, output_file, tz_offset):
425 | # First, get data for whole data_dict dataset
426 |
427 | result = repr_floats(stats_dict(data_dict))
428 |
429 | all_results = [result]
430 |
431 | # Next, create slices of data_dict containing a day's and week's
432 | # (where available) data to get more detailed statistics automatically
433 |
434 | time_step = timedelta(seconds=data_dict['metadata']['time_step'])
435 | slice_time = data_dict['metadata']['starting_time'] - time_step
436 |
437 | # Use tz_offset (from process.py's -tz param) to offset start time for data slice.
438 | # Times in data_dict are expected to be UTC so this can be used to correct for timezone.
439 | slice_time -= timedelta(hours=tz_offset)
440 |
441 | # Use 4.a.m local time for better logical split of "days"
442 | slice_time += timedelta(hours=4)
443 |
444 | while slice_time <= data_dict['metadata']['ending_time']:
445 | one_day_from_time = slice_time - timedelta(days=1) + time_step
446 |
447 | # Note: trips_per_car and similar stats will be inaccurate if cars are added or removed
448 | # during the data period, as the highest car count during the data period will be used
449 | # for all slices
450 | if one_day_from_time >= data_dict['metadata']['starting_time']:
451 | sliced_dict = stats_slice(data_dict, one_day_from_time, slice_time)
452 |
453 | result = repr_floats(stats_dict(sliced_dict))
454 |
455 | all_results.append(result)
456 |
457 | seven_days_from_time = slice_time - timedelta(days=7) + time_step
458 | if seven_days_from_time >= data_dict['metadata']['starting_time']:
459 | sliced_dict = stats_slice(data_dict, seven_days_from_time, slice_time)
460 |
461 | result = repr_floats(stats_dict(sliced_dict))
462 |
463 | all_results.append(result)
464 |
465 | slice_time += timedelta(days=1)
466 |
467 | write_csv_to_file(output_file, all_results)
468 |
--------------------------------------------------------------------------------