├── dask-worker-space ├── purge.lock └── global.lock ├── README.md ├── Data ├── Template.html └── PTV_Locations.csv ├── requirements.yml ├── Export ├── PTV_Locations.html └── Trip_lengths.html ├── PyConDe 2018 - Interactive Visualization of Traffic Data.ipynb └── .ipynb_checkpoints └── PyConDe 2018 - Interactive Visualization of Traffic Data-checkpoint.ipynb /dask-worker-space/purge.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dask-worker-space/global.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyConDe 2018 - Interactive Visualization of Traffic Data using Bokeh 2 | 3 | ## Set Up 4 | For this tutorial, you will need a setup with some additional Python packages. On my machine, I had issues with installing a Python3 environment with GeoPandas, therefore I use legacy Python. If you use Anaconda, you can use the Anaconda bash shell to setup an environment with name **Python27_TrafficVisualization** and all necessary dependencies via: 5 | 6 | 1. Download or clone the content of this repository to your local machine. 7 | 8 | 2. Change your working directory to the folder of the cloned repository: 9 | 10 | cd /path/to/your/directory 11 | 12 | 3. Open the (Anaconda) Terminal and create virtual environment via: 13 | 14 | conda env create -f requirements.yml 15 | 16 | 4. Activate environment via: 17 | 18 | conda activate Python27_TrafficVisualization 19 | 20 | 5. Start Jupyter Notebook Server via: 21 | 22 | jupyter notebook 23 | 24 | 6. Open Notebook "PyConDe 2018 - Interactive Visualization of Traffic Data.ipynb" 25 | 26 | 27 | All necessary data for the execution of the notebook can be downloaded by evaluating the cell in the "Download Data" Section of the Notebook (~ 12 GB). 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /Data/Template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | OSM PGX Overview 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 |
19 |

Traffic Report

20 |
21 |
22 | 23 | 24 | 25 | 38 | 39 | 40 | 41 | 42 | 43 |
44 | 45 |

Overview

46 |
47 |
48 |
49 |
50 | 51 | 52 | {% for item in overview_table %} 53 | 54 | 55 | 56 | 57 | {% endfor %} 58 | 59 |
{{item.key}}{{item.value}}
60 |
61 |
62 |
63 | 64 | 65 | 66 |
67 | 68 |

Basic Plots

69 | 70 |
71 |
72 | {{p_trips}} 73 |
74 |
75 | {{p_lengths}} 76 |
77 |
78 |
79 | 80 | 81 | 82 | 83 |
84 | 85 |

Heatmap

86 | 87 |
88 | 89 | 90 | {{p_heatmap}} 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /Data/PTV_Locations.csv: -------------------------------------------------------------------------------- 1 | ,Name,City,Address,Country,Telefone,Fax,Email,Latitude,Longitude,X,Y 2 | 0,PTV Headquarter Germany,Karlsruhe,Haid-und-Neu-Straße 15,Germany,+49 721 9651-0,+49 721 9651-699,info@ptvgroup.com,49.01334,8.427800000000001,938178.4045075525,6277125.216101138 3 | 1,"PTV Austria, Vienna",Vienna,Siegelgasse 1/2/1 15 ,Austria,+43 (0) 1 710 2147 ,+43 (0) 1 710 2147 ,info.at@ptvgroup.com,48.2050867,16.3877432,1824275.2282749317,6141042.025760664 4 | 2,"PTV Benelux, Utrecht",Utrecht,Energieweg 1 ,Netherlands,+31 (0) 346 581 600 ,+31 (0) 346 581 616,info.nl@ptvgroup.com,52.1246513,5.043253,561412.3559016498,6822695.46488728 5 | 3,"PTV Benelux, Ieper",Ypres,Albert Dehemlaam 31 ,Belgium,+32 (0) 57 224 522 ,+32 (0) 57 224 536 ,info.be@ptvgroup.com,50.8670375,2.8876555,321452.33984639606,6597807.811138984 6 | 4,"PTV Benelux, Mechelen",Mechelen,De Regenboog 11 ,Belgium,+32 (0) 57 224 522 ,+32 (0) 57 224 536 ,info.be@ptvgroup.com,51.0066408,4.4628466,496801.81100049184,6622468.487448069 7 | 5,"PTV Nordics, Göteborg",Gothenburg,Gullbergs Strandgata 36D ,Sweden,+46 31 687280 ,,info-se@routeoptimiser.com,57.7188307,11.9874373,1334435.4161522945,7908483.131814899 8 | 6,"PTV Loxane France Logistics, Paris-Cergy","Cergy-Pontoise, Paris",10-12 boulevard de l'Oise ,France,+33 (0) 1 30 75 75 75,+33 (0) 1 30 75 75 76,info.fr@ptvgroup.com,49.0359102,2.0822361,231793.4623633727,6280956.801655616 9 | 7,"PTV France Traffic, Straßbourg",Strasbourg,"14, Rue du Général Gouraud",France,+33 3 90221260,+33 3 88357293,contact.fr@ptvgroup.com,48.587748,7.7572091,863528.5669889486,6205198.163060933 10 | 8,"PTV France Traffic, Lyon",Lyon,47 rue Maurice Flandin,France,+33 4 72121466,,contact.fr@ptvgroup.com,45.7578294,4.8629028,541335.8634731855,5741625.778335779 11 | 9,"PTV Italia Logistics, Perugia",Perugia,via Settevalli 133c,Italy,+39 075 500 17 22,,info.it@ptvgroup.com,43.0952157,12.3693429,1376948.953075393,5326475.895267268 12 | 10,"PTV Sistema, Rom",Roma,Via R. Bonghi 11/b,Italy,+39 06 99344415,,info.it@ptvgroup.com,41.8900502,12.5011695,1391623.8230604015,5144523.699686472 13 | 11,"PTV UK, Birmingham",Halesowen ,"4-5 Centre Court, Vine Lane ",United Kingdom,+44 121 585 6633 ,,sales.uk.logistics@ptvgroup.com,52.4482616,-2.0459783,-227757.26253008674,6881586.284429609 14 | 12,"PTV UK, London",London,8 Doncannon Street,United Kingdom,,,info@ptvgroup.com,51.5084349,-0.1266827,-14102.253656318402,6711727.570929194 15 | 13,"PTV CEE Sp. z o. o., Warszawa",Warsaw,Aleje Jerozolimskie 142A ,Poland,+48 697 902 158 ,,info.pl@ptvgroup.com,52.218995,20.966843,2334018.286302513,6839819.831071421 16 | 14,"PTV North America, Portland","Portland, OR",9755 SW Barnes Road ,USA,+1 (503) 297-2556 ,+1 (503) 297-2230 ,info.us@ptvgroup.com,45.511817,-122.777965,-13667580.544434363,5702459.7235654835 17 | 15,"PTV North America, Arlington","Arlington, VA",1530 Wilson Boulevard ,USA,+1 (571) 645-5800 ,+1 (503) 297-2230 ,info.us@ptvgroup.com,38.8942815,-77.0751683,-8579968.48796186,4706539.597968547 18 | 16,"PTV América Latina, México City",Mexico City,Av. Revolución 1877,Mexico,+52 55 6385 0900 ,,info.al@ptvgroup.com,19.3401409,-99.19189539999999,-11041991.286747653,2195023.2156554125 19 | 17,"PTV Brasil, São Paulo",São Paulo,"Av. Angélica, 2503",Brasil,,,info@ptvgroup.com,-23.5539673,-46.6621038,-5194401.634358875,-2699150.509586599 20 | 18,"PTV Asia Pacific, Sydney ",Sydney,61-89 Buckingham Street ,Australia,+61 2 9698 1292,,info@ptvgroup.com,-33.8890708,151.206943,16832279.89916754,-4013916.6528285295 21 | 19,"PTV Asia Pacific, Singapur",Singapore,7 Temasek Boulevard ,Singapore,+65 6323 2746 ,+65 6884 3511,info@ptvgroup.com,1.2954194,103.8584778,11561472.863260506,144217.71541120106 22 | 20,"PTV Asia Pacific, Hong Kong ","Wan Chai, Hong Kong",160 Gloucester Road ,Hong Kong,+852 9154 6007,,info@ptvgroup.com,22.279645,114.1778873,12710224.274087775,2545133.236500539 23 | 21,"PTV China, Shanghai",Shanghai,"East Nanjing Road, No.800 ",China,+86-21-63288206 ,+86-21-63288236 ,sales.cn@ptvgroup.com,31.239921,121.48533,13523685.074452804,3663946.8074628413 24 | 22,PTV Japan,Tokyo,AD-O Shibuya Dogenzaka 11F ,Japan,+81-3-6416-5885 ,,sales.jp@ptvgroup.com,35.6593968,139.6964402,15550936.588696994,4253855.541026517 25 | 23,"PTV Middle East, Dubai",Dubai,"Fortune Tower, Plot No. C1 ",UAE,+971 567 00 00 95 ,+971 4 454 1034 ,info.me@ptvgroup.com,25.0690597,55.138396,6137978.165877872,2884229.439171727 26 | 24,"PTV Africa, Johannesburg",Johannesburg ,27 Owl Street ,South Africa,+27 (0) 11-482-6412 ,,sales.africa@ptvgroup.com,-26.18377,28.01965,3119133.170205748,-3021859.505310397 27 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | name: Python27_TrafficVisualization 2 | channels: 3 | - anaconda 4 | - ioam 5 | - bokeh 6 | - conda-forge 7 | - defaults 8 | dependencies: 9 | - colorcet=1.0.0=py27_0 10 | - bokeh=0.13.0=pyhed0053b_0 11 | - boost=1.66.0=py27_vc9_1 12 | - boost-cpp=1.66.0=vc9_1 13 | - bzip2=1.0.6=vc9_1 14 | - ca-certificates=2018.8.13=ha4d7672_0 15 | - certifi=2018.8.13=py27_0 16 | - cftime=1.0.1=py27hc997a72_0 17 | - click-plugins=1.0.3=py_1 18 | - cligj=0.4.0=py27_0 19 | - curl=7.60.0=vc9_0 20 | - cyordereddict=1.0.0=py27h0c8e037_2 21 | - datashader=0.6.6=0 22 | - descartes=1.1.0=py_2 23 | - expat=2.2.5=vc9_0 24 | - fastparquet=0.1.6=py27_0 25 | - fiona=1.7.13=py27h64536ba_3 26 | - freetype=2.8.1=vc9_0 27 | - freexl=1.0.2=vc9_2 28 | - gdal=2.2.4=py27_vc9_0 29 | - geopandas=0.4.0=py_1 30 | - geos=3.6.2=h2880e7c_2 31 | - geotiff=1.4.2=vc9_1 32 | - gpxpy=1.3.2=py_0 33 | - h5netcdf=0.6.2=py_0 34 | - h5py=2.8.0=py27h0c8e037_0 35 | - hdf4=4.2.13=vc9_0 36 | - hdf5=1.10.1=vc9_2 37 | - icu=58.2=vc9_0 38 | - jpeg=9b=vc9_2 39 | - kealib=1.4.7=vc9_4 40 | - krb5=1.14.6=vc9_0 41 | - libgdal=2.2.4=vc9_5 42 | - libiconv=1.14=vc9_4 43 | - libnetcdf=4.4.1.1=vc9_10 44 | - libpng=1.6.34=vc9_0 45 | - libpq=9.6.3=vc9_0 46 | - libsodium=1.0.16=vc9_0 47 | - libspatialindex=1.8.5=vc9_1 48 | - libspatialite=4.3.0a=vc9_19 49 | - libtiff=4.0.9=vc9_0 50 | - libxml2=2.9.5=vc9_1 51 | - libxslt=1.1.32=vc9_0 52 | - linecache2=1.0.0=py27_0 53 | - lxml=4.2.2=py27h803002f_0 54 | - lzo=2.10=vc9_0 55 | - munch=2.3.2=py_0 56 | - netcdf4=1.4.0=py27_0 57 | - openjpeg=2.3.0=vc9_2 58 | - openssl=1.0.2o=vc9_0 59 | - pillow=5.1.0=py27_0 60 | - proj4=4.9.3=vc9_5 61 | - psycopg2=2.7.5=py27hc64555f_1 62 | - pyct=0.4.3=py_0 63 | - pyproj=1.9.5.1=py27_0 64 | - pyreadline=2.1=py27_0 65 | - pysal=1.14.4.post2=py27_0 66 | - pytables=3.4.4=py27_8 67 | - qt=5.6.2=vc9_1 68 | - rise=5.3.0=py27_0 69 | - rtree=0.8.3=py27_0 70 | - shapely=1.6.4=py27hc31c106_1 71 | - snappy=1.1.7=vc9_1 72 | - sqlite=3.20.1=vc9_2 73 | - thrift=0.11.0=py27hc56fc5f_1 74 | - tk=8.6.8=vc9_0 75 | - traceback2=1.4.0=py27_0 76 | - unittest2=1.1.0=py_0 77 | - xarray=0.10.8=py27_0 78 | - xerces-c=3.2.0=vc9_0 79 | - xz=5.2.4=h3cc03e0_1 80 | - yaml=0.1.7=vc9_0 81 | - zeromq=4.2.5=vc9_2 82 | - zlib=1.2.11=vc9_0 83 | - alabaster=0.7.10=py27h2cab13d_0 84 | - anaconda-client=1.6.14=py27_0 85 | - anaconda=custom=py27h689e5c3_0 86 | - anaconda-project=0.8.2=py27he89e3ca_0 87 | - asn1crypto=0.24.0=py27_0 88 | - astroid=1.6.3=py27_0 89 | - astropy=2.0.6=py27hc997a72_1 90 | - attrs=18.1.0=py27_0 91 | - babel=2.5.3=py27_0 92 | - backports=1.0=py27h6492d98_1 93 | - backports.functools_lru_cache=1.5=py27_1 94 | - backports.shutil_get_terminal_size=1.0.0=py27h1657125_2 95 | - backports.shutil_which=3.5.1=py27_2 96 | - backports_abc=0.5=py27h0ec6b72_0 97 | - beautifulsoup4=4.6.0=py27hc287451_1 98 | - bitarray=0.8.1=py27h0c8e037_1 99 | - bkcharts=0.2=py27h92b6de3_0 100 | - blas=1.0=mkl 101 | - blaze=0.11.3=py27h97e5449_0 102 | - bleach=2.1.3=py27_0 103 | - blosc=1.14.3=h95ae18b_0 104 | - boto=2.48.0=py27h1ccb131_1 105 | - bottleneck=1.2.1=py27hde90680_0 106 | - cartopy=0.16.0=py27h6ef83e3_0 107 | - cdecimal=2.3=py27h0c8e037_3 108 | - cffi=1.11.5=py27hdb016f4_0 109 | - chardet=3.0.4=py27h56c3b73_1 110 | - click=6.7=py27hb6defca_0 111 | - cloudpickle=0.5.3=py27_0 112 | - clyent=1.2.2=py27h4424948_1 113 | - colorama=0.3.9=py27hdfe4ae1_0 114 | - comtypes=1.1.4=py27_0 115 | - configparser=3.5.0=py27h2fa79a8_0 116 | - console_shortcut=0.1.1=h6bb2dd7_3 117 | - contextlib2=0.5.5=py27h42efda5_0 118 | - cryptography=2.2.2=py27h0c8e037_0 119 | - cycler=0.10.0=py27h59acbbf_0 120 | - cython=0.28.2=py27h0c8e037_0 121 | - cytoolz=0.9.0.1=py27h0c8e037_0 122 | - dask=0.17.5=py27_0 123 | - dask-core=0.17.5=py27_0 124 | - datashape=0.5.4=py27h3d6e61b_0 125 | - decorator=4.3.0=py27_0 126 | - distributed=1.21.8=py27_0 127 | - docutils=0.14=py27h8652d09_0 128 | - entrypoints=0.2.3=py27h0271f2b_2 129 | - enum34=1.1.6=py27h2aa175b_1 130 | - et_xmlfile=1.0.1=py27h1de5d23_0 131 | - fastcache=1.0.2=py27h0c8e037_2 132 | - filelock=3.0.4=py27_0 133 | - flask=1.0.2=py27_1 134 | - flask-cors=3.0.4=py27_0 135 | - funcsigs=1.0.2=py27h8885ae1_0 136 | - functools32=3.2.3.2=py27h0cdbcdb_1 137 | - futures=3.2.0=py27h8b2aecd_0 138 | - futures-compat=1.0=py2_0 139 | - get_terminal_size=1.0.0=h38e98db_0 140 | - gevent=1.3.0=py27h0c8e037_0 141 | - glob2=0.6=py27hd4eee8c_0 142 | - greenlet=0.4.13=py27h0c8e037_0 143 | - grin=1.2.1=py27_4 144 | - heapdict=1.0.0=py27_2 145 | - html5lib=1.0.1=py27h5a33001_0 146 | - icc_rt=2017.0.4=h97af966_0 147 | - idna=2.6=py27h1ea29d3_1 148 | - imageio=2.3.0=py27_0 149 | - imagesize=1.0.0=py27_0 150 | - intel-openmp=2018.0.0=8 151 | - ipaddress=1.0.22=py27_0 152 | - ipykernel=4.8.2=py27_0 153 | - ipython=5.7.0=py27_0 154 | - ipython_genutils=0.2.0=py27hbe997df_0 155 | - ipywidgets=7.2.1=py27_0 156 | - isort=4.3.4=py27_0 157 | - itsdangerous=0.24=py27hcf63135_1 158 | - jdcal=1.4=py27_0 159 | - jedi=0.12.0=py27_1 160 | - jinja2=2.10=py27hba1794b_0 161 | - jsonschema=2.6.0=py27haaf3834_0 162 | - jupyter=1.0.0=py27_4 163 | - jupyter_client=5.2.3=py27_0 164 | - jupyter_console=5.2.0=py27h6ed736b_1 165 | - jupyter_core=4.4.0=py27h1619e65_0 166 | - jupyterlab=0.32.1=py27_0 167 | - jupyterlab_launcher=0.10.5=py27_0 168 | - kiwisolver=1.0.1=py27h9815e84_0 169 | - lazy-object-proxy=1.3.1=py27ha5c8080_0 170 | - libcurl=7.60.0=h0990ea7_0 171 | - libssh2=1.8.0=h77a7533_4 172 | - llvmlite=0.23.1=py27hf224fbf_0 173 | - locket=0.2.0=py27h1ca288a_1 174 | - m2w64-gcc-libgfortran=5.3.0=6 175 | - m2w64-gcc-libs=5.3.0=7 176 | - m2w64-gcc-libs-core=5.3.0=7 177 | - m2w64-gmp=6.1.0=2 178 | - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 179 | - markupsafe=1.0=py27h9d4480d_1 180 | - matplotlib=2.2.2=py27h8803d4e_1 181 | - mccabe=0.6.1=py27hde0bf6e_1 182 | - menuinst=1.4.14=py27h0c8e037_0 183 | - mistune=0.8.3=py27h0c8e037_1 184 | - mkl=2018.0.2=1 185 | - mkl-service=1.1.2=py27h3c6b6b0_4 186 | - mkl_fft=1.0.1=py27hc997a72_0 187 | - more-itertools=4.1.0=py27_0 188 | - mpmath=1.0.0=py27h0d59bc2_2 189 | - msgpack-python=0.5.6=py27hdc96acc_0 190 | - msys2-conda-epoch=20160418=1 191 | - multipledispatch=0.5.0=py27_0 192 | - nbconvert=5.3.1=py27h7a573cf_0 193 | - nbformat=4.4.0=py27hf49b375_0 194 | - networkx=2.1=py27_0 195 | - nltk=3.3.0=py27_0 196 | - nose=1.3.7=py27h84c72c6_2 197 | - notebook=5.5.0=py27_0 198 | - numba=0.38.0=py27h39f3610_0 199 | - numexpr=2.6.5=py27h3b259a0_0 200 | - numpy=1.14.3=py27h911edcf_1 201 | - numpy-base=1.14.3=py27h917549b_1 202 | - numpydoc=0.8.0=py27_0 203 | - odo=0.5.1=py27h64810b2_0 204 | - olefile=0.45.1=py27_0 205 | - openpyxl=2.5.3=py27_0 206 | - owslib=0.16.0=py27_0 207 | - packaging=17.1=py27_0 208 | - pandas=0.23.0=py27h39f3610_0 209 | - pandoc=1.19.2.1=hb2460c7_1 210 | - pandocfilters=1.4.2=py27h76461d3_1 211 | - parso=0.2.0=py27_0 212 | - partd=0.3.8=py27h1e0692f_0 213 | - path.py=11.0.1=py27_0 214 | - pathlib2=2.3.2=py27_0 215 | - patsy=0.5.0=py27_0 216 | - pep8=1.7.1=py27_0 217 | - pickleshare=0.7.4=py27hb5f6335_0 218 | - pip=10.0.1=py27_0 219 | - pkginfo=1.4.2=py27_1 220 | - pluggy=0.6.0=py27h89dc50b_0 221 | - ply=3.11=py27_0 222 | - prompt_toolkit=1.0.15=py27h3a8ec6a_0 223 | - psutil=5.4.5=py27h0c8e037_0 224 | - py=1.5.3=py27_0 225 | - pycodestyle=2.4.0=py27_0 226 | - pycosat=0.6.3=py27hcd410c5_0 227 | - pycparser=2.18=py27hb43d16c_1 228 | - pycrypto=2.6.1=py27h0c8e037_8 229 | - pycurl=7.43.0.1=py27hc64555f_0 230 | - pyepsg=0.3.2=py27_0 231 | - pyflakes=1.6.0=py27h34e7826_0 232 | - pygments=2.2.0=py27ha50f84f_0 233 | - pylint=1.8.4=py27_0 234 | - pyodbc=4.0.23=py27hc56fc5f_0 235 | - pyopenssl=18.0.0=py27_0 236 | - pyparsing=2.2.0=py27hc7d9fa6_1 237 | - pyqt=5.6.0=py27h224ed30_5 238 | - pyshp=1.2.12=py27_0 239 | - pysocks=1.6.8=py27_0 240 | - pytest=3.5.1=py27_0 241 | - python=2.7.15=he216670_0 242 | - python-dateutil=2.7.3=py27_0 243 | - python-snappy=0.5.2=py27hc56fc5f_0 244 | - pytz=2018.4=py27_0 245 | - pyviz_comms=0.1.1=py27_0 246 | - pywavelets=0.5.2=py27h0dc3f48_0 247 | - pywin32=223=py27h0c8e037_1 248 | - pywinpty=0.5.1=py27_0 249 | - pyyaml=3.12=py27ha287073_1 250 | - pyzmq=17.0.0=py27h0c8e037_1 251 | - qtawesome=0.4.4=py27h766b13d_0 252 | - qtconsole=4.3.1=py27h77d40ac_0 253 | - qtpy=1.4.1=py27_0 254 | - requests=2.18.4=py27h3159eba_1 255 | - rope=0.10.7=py27hb65afb6_0 256 | - ruamel_yaml=0.15.35=py27h0c8e037_1 257 | - scandir=1.7=py27h0c8e037_0 258 | - scikit-image=0.13.1=py27h0c8e037_1 259 | - scikit-learn=0.19.1=py27he00e316_0 260 | - scipy=1.1.0=py27ha6b90e7_0 261 | - seaborn=0.8.1=py27hab56d54_0 262 | - send2trash=1.5.0=py27_0 263 | - setuptools=39.1.0=py27_0 264 | - simplegeneric=0.8.1=py27_2 265 | - singledispatch=3.4.0.3=py27h3f9d112_0 266 | - sip=4.18.1=py27h5ec1c1a_2 267 | - six=1.11.0=py27ha5e1701_1 268 | - snowballstemmer=1.2.1=py27h28d3bf7_0 269 | - sortedcollections=0.6.1=py27_0 270 | - sortedcontainers=1.5.10=py27_0 271 | - sphinx=1.7.4=py27_0 272 | - sphinxcontrib=1.0=py27h0e2fb95_1 273 | - sphinxcontrib-websupport=1.0.1=py27h0d0f901_1 274 | - spyder=3.2.8=py27_0 275 | - sqlalchemy=1.2.7=py27h9dc6b9e_0 276 | - ssl_match_hostname=3.5.0.1=py27hea8a0f4_2 277 | - statsmodels=0.9.0=py27hc997a72_0 278 | - subprocess32=3.5.0=py27h0c8e037_0 279 | - sympy=1.1.1=py27hde44fae_0 280 | - tblib=1.3.2=py27h8ae915c_0 281 | - terminado=0.8.1=py27_1 282 | - testpath=0.3.1=py27h1cd488d_0 283 | - toolz=0.9.0=py27_0 284 | - tornado=5.0.2=py27_0 285 | - traitlets=4.3.2=py27h1b1b3a5_0 286 | - typing=3.6.4=py27_0 287 | - unicodecsv=0.14.1=py27h0bf7bb0_0 288 | - urllib3=1.22=py27hb9f5a07_0 289 | - vc=9=h7299396_1 290 | - vs2008_runtime=9.00.30729.1=hfaea7d5_1 291 | - vs2015_runtime=14.0.25123=3 292 | - wcwidth=0.1.7=py27hb1a0d82_0 293 | - webencodings=0.5.1=py27h4e224a2_1 294 | - werkzeug=0.14.1=py27_0 295 | - wheel=0.31.1=py27_0 296 | - widgetsnbextension=3.2.1=py27_0 297 | - win_inet_pton=1.0.1=py27hf41312a_1 298 | - win_unicode_console=0.5=py27hc037021_0 299 | - wincertstore=0.2=py27hf04cefb_0 300 | - winpty=0.4.3=4 301 | - wrapt=1.10.11=py27hcd2b27d_0 302 | - xlrd=1.1.0=py27h2b87a7f_1 303 | - xlsxwriter=1.0.4=py27_0 304 | - xlwings=0.11.8=py27_0 305 | - xlwt=1.3.0=py27h2271735_0 306 | - zict=0.1.3=py27h0171463_0 307 | - geoviews=1.4.3=py27_0 308 | - holoviews=1.10.7=py_0 309 | - param=1.6.1=py_0 310 | - pip: 311 | - backports.ssl-match-hostname==3.5.0.1 312 | - tables==3.4.4 313 | prefix: c:\Users\Pat\AppData\Local\Continuum\Anaconda2\envs\Python27 314 | 315 | -------------------------------------------------------------------------------- /Export/PTV_Locations.html: -------------------------------------------------------------------------------- 1 | 4 | 7 | 10 | 11 | 12 | 13 | 14 | 49 |
-------------------------------------------------------------------------------- /PyConDe 2018 - Interactive Visualization of Traffic Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Basic Imports**" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2018-10-17T13:10:57.106000Z", 16 | "start_time": "2018-10-17T13:10:55.355000Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import geopandas as gpd\n", 22 | "import pandas as pd\n", 23 | "import dask.dataframe as dd\n", 24 | "\n", 25 | "import numpy as np\n", 26 | "from shapely.geometry import Polygon, MultiPolygon\n", 27 | "from IPython.display import display, display_html\n", 28 | "%matplotlib inline\n", 29 | "\n", 30 | "def display_side_by_side(*args):\n", 31 | " html_str=''\n", 32 | " for df in args:\n", 33 | " html_str+= \" \" + df.to_html()\n", 34 | " display_html(html_str.replace('table','table style=\"display:inline\"'),raw=True)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# Download Data\n", 42 | "\n", 43 | "To download all neccessary data for this notebook, please run the following cell:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "ExecuteTime": { 51 | "end_time": "2018-10-17T13:01:30.497000Z", 52 | "start_time": "2018-10-17T12:41:03.166000Z" 53 | } 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import os\n", 58 | "import requests\n", 59 | "import os\n", 60 | "from multiprocessing.dummy import Pool\n", 61 | "from functools import partial\n", 62 | "import zipfile\n", 63 | "\n", 64 | "def download_file(url, export_directory=\"\", filename=None):\n", 65 | " print(\"Download file %s to directory %s\"%(url, export_directory))\n", 66 | " if filename == None:\n", 67 | " local_filename = os.path.join(export_directory,url.split('/')[-1])\n", 68 | " else:\n", 69 | " local_filename = os.path.join(export_directory,filename)\n", 70 | " # NOTE the stream=True parameter\n", 71 | " r = requests.get(url, stream=True)\n", 72 | " with open(local_filename, 'wb') as f:\n", 73 | " for chunk in r.iter_content(chunk_size=1024):\n", 74 | " if chunk: # filter out keep-alive new chunks\n", 75 | " f.write(chunk)\n", 76 | " #f.flush() commented by recommendation from J.F.Sebastian\n", 77 | " return local_filename\n", 78 | "\n", 79 | "#Create Folders for Data:\n", 80 | "if not os.path.exists(\"Data\"):\n", 81 | " os.mkdir(\"Data\")\n", 82 | "\n", 83 | "if not os.path.exists(os.path.join(\"Data\",\"New York Taxi\")):\n", 84 | " os.mkdir(os.path.join(\"Data\",\"New York Taxi\"))\n", 85 | "\n", 86 | "if not os.path.exists(os.path.join(\"Data\",\"OSM GPX\")):\n", 87 | " os.mkdir(os.path.join(\"Data\",\"OSM GPX\"))\n", 88 | "\n", 89 | "#Download New York Taxi Data for Yellow Cabs in 2017:\n", 90 | "download_taxi_data = partial(download_file, export_directory=os.path.join(\"Data\",\"New York Taxi\"))\n", 91 | "request_strings = [r\"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-%02d.csv\"%i for i in range(1,13)]\n", 92 | "pool = Pool(processes=4)\n", 93 | "pool.map(download_taxi_data, request_strings)\n", 94 | "\n", 95 | "#Download New York Taxi Data for Green Cabs in 2017:\n", 96 | "download_taxi_data = partial(download_file, export_directory=os.path.join(\"Data\",\"New York Taxi\"))\n", 97 | "request_strings = [r\"https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2017-%02d.csv\"%i for i in range(1,13)]\n", 98 | "pool = Pool(processes=4)\n", 99 | "pool.map(download_taxi_data, request_strings)\n", 100 | "\n", 101 | "#Download shapefile of New York Taxi Zones:\n", 102 | "print(\"Download Taxi Zones Shapefiles.\")\n", 103 | "zip_path = download_taxi_data(r\"https://s3.amazonaws.com/nyc-tlc/misc/taxi_zones.zip\")\n", 104 | "print(\"Unzip Taxi Zones Shapefiles.\")\n", 105 | "with zipfile.ZipFile(zip_path, \"r\") as z:\n", 106 | " z.extractall(os.path.dirname(zip_path))\n", 107 | "os.remove(zip_path)\n", 108 | "\n", 109 | "#Download OSM path data:\n", 110 | "osm_path = download_file(r\"https://ptv2box.ptvgroup.com/index.php/s/9sTUmxdF80NU2nr/download\", export_directory=os.path.join(\"Data\",\"OSM GPX\"), filename=\"OSM_GPX.parquet\")" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "# Interactive Maps for New York Taxi Data 2017" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## New York Taxi zones" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "ExecuteTime": { 131 | "end_time": "2018-08-16T18:44:00.299000Z", 132 | "start_time": "2018-08-16T18:44:00.292000Z" 133 | } 134 | }, 135 | "source": [ 136 | "In this section, we will load an process the shapefile **\"taxi_zones.shp\"** containing the New York Taxis Zones, such that we can use them as a basis for the **Bokeh** plot. In the next cell, we use GeoPandas to load the shapefile of Taxi zones and transform coordinate system to Web Mercador (EPSG-Code of Web Mercador Projection = 3785 ):" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "ExecuteTime": { 144 | "end_time": "2018-10-17T13:03:10.140000Z", 145 | "start_time": "2018-10-17T13:03:09.309000Z" 146 | } 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "df_taxizones = gpd.read_file(r\"Data\\New York Taxi\\taxi_zones.shp\")\n", 151 | "df_taxizones.to_crs(epsg=3785, inplace=True) #EPSG-Code of Web Mercador\n", 152 | "display(df_taxizones.head())\n", 153 | "print(\"Number of Polygons: %d\"%len(df_taxizones))\n", 154 | "df_taxizones.plot()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Simplify Shape of Zones (otherwise slow peformance of plot):" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": { 168 | "ExecuteTime": { 169 | "end_time": "2018-10-17T13:03:10.610000Z", 170 | "start_time": "2018-10-17T13:03:10.327000Z" 171 | } 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "df_taxizones[\"geometry\"] = df_taxizones[\"geometry\"].simplify(100)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Convert WKT Polygons to X and Y arrays with corresponding coordinates. Take into account **multipolygons** and separate them into individual shapes:" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "ExecuteTime": { 190 | "end_time": "2018-10-17T13:03:11.224000Z", 191 | "start_time": "2018-10-17T13:03:11.149000Z" 192 | } 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "data = []\n", 197 | "for zonename, LocationID, borough, shape in df_taxizones[[\"zone\", \"LocationID\", \"borough\", \"geometry\"]].values:\n", 198 | " #If shape is polygon, extract X and Y coordinates of boundary line:\n", 199 | " if isinstance(shape, Polygon):\n", 200 | " X, Y = shape.boundary.xy\n", 201 | " X = [int(x) for x in X]\n", 202 | " Y = [int(y) for y in Y]\n", 203 | " data.append([LocationID, zonename, borough, X, Y])\n", 204 | " \n", 205 | " #If shape is Multipolygon, extract X and Y coordinates of each sub-Polygon:\n", 206 | " if isinstance(shape, MultiPolygon):\n", 207 | " for poly in shape:\n", 208 | " X, Y = poly.boundary.xy\n", 209 | " X = [int(x) for x in X]\n", 210 | " Y = [int(y) for y in Y]\n", 211 | " data.append([LocationID, zonename, borough, X, Y])\n", 212 | "\n", 213 | "#Create new DataFrame with X an Y coordinates separated:\n", 214 | "df_taxizones = pd.DataFrame(data, columns=[\"LocationID\", \"ZoneName\", \"Borough\", \"X\", \"Y\"])\n", 215 | "display(df_taxizones.head())\n", 216 | "print(\"Number of Polygons: %d\"%len(df_taxizones))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "## New York Taxi Data" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Here, we read in the data from New York Taxis and aggregate them to show us information about how frequent each taxi zone is visited. We will use Dask.DataFrame, such that the whole dataset can be loaded at once without blowing up our memory and to get a nice speedup due to the parallelizm of Dask. Let us start a Dask Client and Local Cluster (after the execution of the cell, click on the **Dashboard Link** to view the Dask Dashboard, where you can see the resource consumption of our computation" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "ExecuteTime": { 238 | "end_time": "2018-10-17T13:03:18.902000Z", 239 | "start_time": "2018-10-17T13:03:12.732000Z" 240 | } 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "from dask.distributed import Client, LocalCluster\n", 245 | "import dask.dataframe as dd\n", 246 | "from dask import compute\n", 247 | "\n", 248 | "cluster = LocalCluster()\n", 249 | "print(cluster)\n", 250 | "client = Client(cluster)\n", 251 | "client" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Using Dask, we now read in the New York Taxi Data for the yellow and green cabs into distributed DataFrames. Note: A **Dask.DataFrame** is a *delayed object* and to calculate results, one has to trigger the computation via the **.compute()** method." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "ExecuteTime": { 266 | "end_time": "2018-10-17T13:03:22.022000Z", 267 | "start_time": "2018-10-17T13:03:18.924000Z" 268 | }, 269 | "code_folding": [] 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "df_taxis_yellow = dd.read_csv(r\"Data\\New York Taxi\\yellow_tripdata_2017-*.csv\", \n", 274 | " usecols=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\", \"passenger_count\",\n", 275 | " \"PULocationID\", \"DOLocationID\"],\n", 276 | " parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"])\n", 277 | "df_taxis_green = dd.read_csv(r\"Data\\New York Taxi\\green_tripdata_2017-*.csv\", \n", 278 | " usecols=[\"lpep_pickup_datetime\", \"lpep_dropoff_datetime\", \"passenger_count\",\n", 279 | " \"PULocationID\", \"DOLocationID\"],\n", 280 | " parse_dates=[\"lpep_pickup_datetime\", \"lpep_dropoff_datetime\"]).rename(\n", 281 | " columns = {\"lpep_pickup_datetime\": \"tpep_pickup_datetime\", \n", 282 | " \"lpep_dropoff_datetime\": \"tpep_dropoff_datetime\"})\n", 283 | "df_taxis = dd.concat([df_taxis_yellow, df_taxis_green])\n", 284 | "\n", 285 | "#Filter data for correct year :\n", 286 | "df_taxis = df_taxis[(df_taxis[\"tpep_pickup_datetime\"].dt.year == 2017)&(df_taxis[\"tpep_dropoff_datetime\"].dt.year == 2017)]\n", 287 | "\n", 288 | "df_taxis.head()" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Now, we calculate the number of boarding and alighting passengers for each zone and every hour and every day using the **GroupBy** Method. For this, we first create two columns specifying the hour and daytype of Pickup and Dropoff. Then, we define the groupby operations for Pickups and Dropoffs: and finally we trigger the parallelized computation using **dask.compute**:" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "ExecuteTime": { 303 | "end_time": "2018-10-17T13:05:52.951000Z", 304 | "start_time": "2018-10-17T13:03:22.026000Z" 305 | } 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "df_taxis[\"Pickup_Hour\"] = df_taxis[\"tpep_pickup_datetime\"].dt.hour\n", 310 | "df_taxis[\"Dropoff_Hour\"] = df_taxis[\"tpep_dropoff_datetime\"].dt.hour\n", 311 | "df_taxis[\"weekday\"] = df_taxis[\"tpep_dropoff_datetime\"].dt.weekday\n", 312 | "pickups = df_taxis.groupby(by=[\"Pickup_Hour\", \"weekday\", \"PULocationID\"])[\"passenger_count\"].sum()\n", 313 | "dropoffs = df_taxis.groupby(by=[\"Dropoff_Hour\", \"weekday\", \"DOLocationID\"])[\"passenger_count\"].sum()\n", 314 | "pickups, dropoffs = compute(pickups, dropoffs)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "**Time Series of Pickups and Dropoffs**" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": { 327 | "ExecuteTime": { 328 | "end_time": "2018-08-17T07:49:12.765000Z", 329 | "start_time": "2018-08-17T07:49:12.758000Z" 330 | } 331 | }, 332 | "source": [ 333 | "Aggregate Pickups and Dropoffs hourly:" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "ExecuteTime": { 341 | "end_time": "2018-10-17T13:05:53.019000Z", 342 | "start_time": "2018-10-17T13:05:52.959000Z" 343 | } 344 | }, 345 | "outputs": [], 346 | "source": [ 347 | "df_pudo = pd.DataFrame(pickups.groupby(level=0).sum())\n", 348 | "df_pudo[\"Dropoff\"] = dropoffs.groupby(level=0).sum()\n", 349 | "df_pudo.columns = [\"P\", \"D\"]\n", 350 | "df_pudo.index.rename(\"Hour\", inplace=True)\n", 351 | "df_pudo.head(3)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": { 357 | "ExecuteTime": { 358 | "end_time": "2018-08-17T07:49:27.619000Z", 359 | "start_time": "2018-08-17T07:49:27.613000Z" 360 | } 361 | }, 362 | "source": [ 363 | "Plot with Holoviews (Backend: Bokeh)" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "ExecuteTime": { 371 | "end_time": "2018-10-17T13:05:55.310000Z", 372 | "start_time": "2018-10-17T13:05:53.027000Z" 373 | } 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "import holoviews as hv\n", 378 | "hv.extension('bokeh')" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "ExecuteTime": { 386 | "end_time": "2018-10-17T13:05:55.642000Z", 387 | "start_time": "2018-10-17T13:05:55.316000Z" 388 | } 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "%%opts Bars.Grouped [group_index='Group' toolbar='above' tools=['hover'] width=800]\n", 393 | "from itertools import product\n", 394 | "hours, groups = df_pudo.index.values, ['P', 'D']\n", 395 | "keys = product(hours, groups)\n", 396 | "bars = hv.Bars([(hour, pudo, df_pudo.loc[hour, pudo]) for hour, pudo in keys],\n", 397 | " ['Hour', \"Group\"], \"Passengers\")\n", 398 | "bars.relabel(group='Grouped')" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": { 404 | "ExecuteTime": { 405 | "end_time": "2018-08-17T03:26:27.375000Z", 406 | "start_time": "2018-08-17T03:26:27.329000Z" 407 | } 408 | }, 409 | "source": [ 410 | "**Finally, join the Taxi Zones DataFrame with the information about the Pickups and Dropoffs for every hour and weekday:**" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": { 417 | "ExecuteTime": { 418 | "end_time": "2018-10-17T13:05:55.680000Z", 419 | "start_time": "2018-10-17T13:05:55.646000Z" 420 | } 421 | }, 422 | "outputs": [], 423 | "source": [ 424 | "display_side_by_side(pd.DataFrame(pickups).head(), df_taxizones[[\"LocationID\", \"ZoneName\", \"X\"]].head())" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "ExecuteTime": { 432 | "end_time": "2018-10-17T13:06:00.090000Z", 433 | "start_time": "2018-10-17T13:05:55.685000Z" 434 | } 435 | }, 436 | "outputs": [], 437 | "source": [ 438 | "pickups = pd.DataFrame(pickups)\n", 439 | "dropoffs = pd.DataFrame(dropoffs)\n", 440 | "\n", 441 | "for hour in range(24):\n", 442 | " \n", 443 | " for weekday in range(7):\n", 444 | " \n", 445 | " #Get pickups and dropoff for this hour and weekday:\n", 446 | " p = pd.DataFrame(pickups.loc[(hour, weekday)]).reset_index().rename(columns={\"PULocationID\" : \"LocationID\"})\n", 447 | " d = pd.DataFrame(dropoffs.loc[(hour, weekday)]).reset_index().rename(columns={\"DOLocationID\" : \"LocationID\"})\n", 448 | "\n", 449 | " #Add information of pickups and dropoff to the New York Taxi Zone DataFrame:\n", 450 | " df_taxizones = pd.merge(df_taxizones, p, on=\"LocationID\", how=\"left\").fillna(0)\n", 451 | " df_taxizones.rename(columns={\"passenger_count\" : \"PU_Passenger_%d_%d\"%(weekday, hour)}, inplace=True) \n", 452 | " df_taxizones = pd.merge(df_taxizones, d, on=\"LocationID\", how=\"left\").fillna(0)\n", 453 | " df_taxizones.rename(columns={\"passenger_count\" : \"DO_Passenger_%d_%d\"%(weekday, hour)}, inplace=True)\n", 454 | " \n", 455 | "df_taxizones.head(2) " 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": { 461 | "ExecuteTime": { 462 | "end_time": "2018-08-17T03:42:13.741000Z", 463 | "start_time": "2018-08-17T03:42:13.094000Z" 464 | } 465 | }, 466 | "source": [ 467 | "## Plot Interactive Demand Map using Bokeh" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "### Draw Taxi Zones on Map" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "Bokeh Imports" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": { 488 | "ExecuteTime": { 489 | "end_time": "2018-10-17T13:06:14.479000Z", 490 | "start_time": "2018-10-17T13:06:14.455000Z" 491 | } 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "from bokeh.io import output_notebook, output_file, show\n", 496 | "from bokeh.plotting import figure\n", 497 | "from bokeh.models import HoverTool, Select, ColumnDataSource, WheelZoomTool, LogColorMapper, LinearColorMapper\n", 498 | "from bokeh.palettes import OrRd9 as palette\n", 499 | "output_notebook()" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "Define Source for Plot. Bokeh, like its high-level API Holoviews, can convert dicts and DataFrames to a ColumnDataSource. Its columns can than be used to specify, what should be plotted." 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": null, 512 | "metadata": { 513 | "ExecuteTime": { 514 | "end_time": "2018-10-17T13:06:15.514000Z", 515 | "start_time": "2018-10-17T13:06:15.373000Z" 516 | } 517 | }, 518 | "outputs": [], 519 | "source": [ 520 | "df_taxizones[\"Passengers\"] = df_taxizones[\"PU_Passenger_0_7\"]\n", 521 | "source = ColumnDataSource(df_taxizones)" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": { 527 | "ExecuteTime": { 528 | "end_time": "2018-08-17T05:40:44.239000Z", 529 | "start_time": "2018-08-17T05:40:44.235000Z" 530 | } 531 | }, 532 | "source": [ 533 | "Define Colormapper for zones" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "metadata": { 540 | "ExecuteTime": { 541 | "end_time": "2018-10-17T13:06:16.274000Z", 542 | "start_time": "2018-10-17T13:06:16.263000Z" 543 | } 544 | }, 545 | "outputs": [], 546 | "source": [ 547 | "max_passengers_per_hour = df_taxizones[filter(lambda x: \"Passenger_\" in x, df_taxizones.columns)].max().max()\n", 548 | "color_mapper = LinearColorMapper(palette=palette[::-1], high=max_passengers_per_hour, low=0)" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": {}, 554 | "source": [ 555 | "Define Figure" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": { 562 | "ExecuteTime": { 563 | "end_time": "2018-10-17T13:06:17.188000Z", 564 | "start_time": "2018-10-17T13:06:17.172000Z" 565 | } 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "p = figure(title=\"Titel\",\n", 570 | " plot_width=900, plot_height=450,\n", 571 | " toolbar_location=None,\n", 572 | " tools=\"pan,wheel_zoom,box_zoom,reset,save\",\n", 573 | " active_scroll=\"wheel_zoom\")\n", 574 | "p.xaxis.visible = False\n", 575 | "p.yaxis.visible = False\n", 576 | "\n", 577 | "#Get rid of zoom on axes:\n", 578 | "for t in p.tools:\n", 579 | " if type(t) == WheelZoomTool:\n", 580 | " t.zoom_on_axis = False" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "Add Background Map (Custom Tile-Maps: http://geo.holoviews.org/Working_with_Bokeh.html)" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": { 594 | "ExecuteTime": { 595 | "end_time": "2018-10-17T13:06:18.125000Z", 596 | "start_time": "2018-10-17T13:06:18.116000Z" 597 | } 598 | }, 599 | "outputs": [], 600 | "source": [ 601 | "from bokeh.models import WMTSTileSource\n", 602 | "\n", 603 | "#Use OpenStreetMap Tiles:\n", 604 | "tiles = WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png')\n", 605 | "\n", 606 | "#Add Tile Layer and set alpha-value:\n", 607 | "tile_layer = p.add_tile(tiles)\n", 608 | "tile_layer.alpha = 0.6" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "Draw Taxi Zone Polygons on the Map. Pass the ColumnDataSource as **source**, such you can use the column names to pass data to the renderer. We use the **Passengers** column to draw a Choropleth map. " 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": null, 621 | "metadata": { 622 | "ExecuteTime": { 623 | "end_time": "2018-10-17T13:06:19.284000Z", 624 | "start_time": "2018-10-17T13:06:19.091000Z" 625 | } 626 | }, 627 | "outputs": [], 628 | "source": [ 629 | "patches = p.patches(xs=\"X\", ys=\"Y\", source=source,\n", 630 | " fill_color={'field': 'Passengers', 'transform': color_mapper},\n", 631 | " line_color=\"black\", alpha=0.5)\n", 632 | "show(p)" 633 | ] 634 | }, 635 | { 636 | "cell_type": "markdown", 637 | "metadata": {}, 638 | "source": [ 639 | "Add the Hovertool to show data of each zone (the attributes of the selected zone can be accessed by the **@** key): " 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": { 646 | "ExecuteTime": { 647 | "end_time": "2018-10-17T13:06:20.274000Z", 648 | "start_time": "2018-10-17T13:06:20.076000Z" 649 | } 650 | }, 651 | "outputs": [], 652 | "source": [ 653 | "#Add Hover Tool:\n", 654 | "hovertool = HoverTool(tooltips=[(\"Passengers:\", \"@Passengers\")])\n", 655 | "p.add_tools(hovertool)\n", 656 | "\n", 657 | "show(p)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "Add more advanced Hover Tools via HTML:" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": null, 670 | "metadata": { 671 | "ExecuteTime": { 672 | "end_time": "2018-10-17T13:06:21.543000Z", 673 | "start_time": "2018-10-17T13:06:21.303000Z" 674 | } 675 | }, 676 | "outputs": [], 677 | "source": [ 678 | "#Add Hovertool via HTML:\n", 679 | "hovertool = HoverTool(tooltips=\"\"\"\n", 680 | "\n", 681 | "\n", 692 | "\n", 693 | "\n", 694 | "\n", 695 | "
\n", 696 | "

@ZoneName

\n", 697 | "

@Borough

\n", 698 | "

@Passengers Passengers

\n", 699 | "

\"Country

\n", 700 | "
\"\"\")\n", 701 | "p.add_tools(hovertool)\n", 702 | "\n", 703 | "show(p)" 704 | ] 705 | }, 706 | { 707 | "cell_type": "markdown", 708 | "metadata": { 709 | "ExecuteTime": { 710 | "end_time": "2018-08-17T06:56:44.888000Z", 711 | "start_time": "2018-08-17T06:56:44.204000Z" 712 | } 713 | }, 714 | "source": [ 715 | "### Add Interactivity" 716 | ] 717 | }, 718 | { 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "Add Slider widget for selecting the hour of the day:" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": null, 728 | "metadata": { 729 | "ExecuteTime": { 730 | "end_time": "2018-10-17T13:06:23.436000Z", 731 | "start_time": "2018-10-17T13:06:23.388000Z" 732 | } 733 | }, 734 | "outputs": [], 735 | "source": [ 736 | "from bokeh.models.widgets import Slider\n", 737 | "\n", 738 | "slider = Slider(start=0, end=23, value=7, step=1, title=\"Hour\", width=600)\n", 739 | "\n", 740 | "show(slider)" 741 | ] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": {}, 746 | "source": [ 747 | "Add RadioButton widgets for selecting (Pickups/Dropoffs) and the weekday:" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": null, 753 | "metadata": { 754 | "ExecuteTime": { 755 | "end_time": "2018-10-17T13:06:24.291000Z", 756 | "start_time": "2018-10-17T13:06:24.261000Z" 757 | } 758 | }, 759 | "outputs": [], 760 | "source": [ 761 | "from bokeh.models.widgets import RadioButtonGroup, Div\n", 762 | "from bokeh.layouts import column, row\n", 763 | "\n", 764 | "radiobuttons_weekday = RadioButtonGroup(\n", 765 | " labels=[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"],\n", 766 | " active=0,\n", 767 | " width=400)\n", 768 | "\n", 769 | "radiobuttons_pudo = RadioButtonGroup(\n", 770 | " labels=[\"Pickups\", \"Dropoff\"], active=0)\n", 771 | "\n", 772 | "layout_widgets = column(slider, row(radiobuttons_weekday, Div(width=80), radiobuttons_pudo))\n", 773 | "\n", 774 | "show(layout_widgets)" 775 | ] 776 | }, 777 | { 778 | "cell_type": "markdown", 779 | "metadata": {}, 780 | "source": [ 781 | "Add Interaction via JavaScript Callback:" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": null, 787 | "metadata": { 788 | "ExecuteTime": { 789 | "end_time": "2018-10-17T13:06:25.460000Z", 790 | "start_time": "2018-10-17T13:06:25.211000Z" 791 | } 792 | }, 793 | "outputs": [], 794 | "source": [ 795 | "from bokeh.models.callbacks import CustomJS\n", 796 | "\n", 797 | "#Define callback-function with JavaScript Code:\n", 798 | "callback = CustomJS(args=dict(p=p, source=source, slider=slider, \n", 799 | " radiobuttons_pudo=radiobuttons_pudo, \n", 800 | " radiobuttons_weekday=radiobuttons_weekday),\n", 801 | " code=\"\"\"\n", 802 | " \n", 803 | "//Get value of slider for hour:\n", 804 | "var hour = slider.value;\n", 805 | "\n", 806 | "//Get value of weekday:\n", 807 | "var weekday = radiobuttons_weekday.active;\n", 808 | "\n", 809 | "//Get value of Pickups/Dropoffs RadioButtons:\n", 810 | "if (radiobuttons_pudo.active == 0)\n", 811 | " var pudo = \"PU\"\n", 812 | "else\n", 813 | " var pudo = \"DO\"\n", 814 | "\n", 815 | "//Change data of \"Passengers\" column in data source to passenger data of the selected hour:\n", 816 | "source.data[\"Passengers\"] = source.data[pudo + \"_Passenger_\" + weekday + \"_\" + hour];\n", 817 | "source.change.emit();\n", 818 | " \n", 819 | " \"\"\")\n", 820 | "\n", 821 | "#Bind Callback to value change of slider and radiobuttons:\n", 822 | "slider.js_on_change(\"value\", callback)\n", 823 | "radiobuttons_pudo.js_on_change(\"active\", callback)\n", 824 | "radiobuttons_weekday.js_on_change(\"active\", callback)\n", 825 | "\n", 826 | "#Show all elements:\n", 827 | "layout = column( layout_widgets , p)\n", 828 | "show(layout)" 829 | ] 830 | }, 831 | { 832 | "cell_type": "markdown", 833 | "metadata": {}, 834 | "source": [ 835 | "Change to logarithmic Colormapper (to see patterns in zones with low demand):" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "metadata": { 842 | "ExecuteTime": { 843 | "end_time": "2018-10-17T13:06:27.104000Z", 844 | "start_time": "2018-10-17T13:06:26.861000Z" 845 | } 846 | }, 847 | "outputs": [], 848 | "source": [ 849 | "color_mapper_log = LogColorMapper(palette=palette[::-1], high=max_passengers_per_hour, low=0)\n", 850 | "patches.glyph.fill_color[\"transform\"] = color_mapper_log\n", 851 | "show(layout)" 852 | ] 853 | }, 854 | { 855 | "cell_type": "markdown", 856 | "metadata": {}, 857 | "source": [ 858 | "### Dropoff and Pickup Zones" 859 | ] 860 | }, 861 | { 862 | "cell_type": "markdown", 863 | "metadata": {}, 864 | "source": [ 865 | "To clearly see the structure of the Taxi transport flow for each cell, let us look at the difference between Pickups and Dropoffs in a zone. If this is a positive number, more people leave the zone than enter it:" 866 | ] 867 | }, 868 | { 869 | "cell_type": "code", 870 | "execution_count": null, 871 | "metadata": { 872 | "ExecuteTime": { 873 | "end_time": "2018-10-17T13:06:29.382000Z", 874 | "start_time": "2018-10-17T13:06:29.008000Z" 875 | } 876 | }, 877 | "outputs": [], 878 | "source": [ 879 | "df_pudo = df_taxizones.copy()\n", 880 | "\n", 881 | "for hour in range(24):\n", 882 | " for weekday in range(7):\n", 883 | " df_pudo[\"PUDO_%d_%d\"%(weekday, hour)] = df_pudo[\"PU_Passenger_%d_%d\"%(weekday, hour)] - df_pudo[\"DO_Passenger_%d_%d\"%(weekday, hour)]\n", 884 | " df_pudo[\"PUDO_%d_%d\"%(weekday, hour)] = df_pudo[\"PUDO_%d_%d\"%(weekday, hour)].apply(lambda x: \"PU > DO\" if x>0 else \"PU < DO\")\n", 885 | "df_pudo.drop(columns=filter(lambda x: \"Passenger\" in x, df_pudo.columns), inplace=True)\n", 886 | "df_pudo[\"PUDO\"] = df_pudo[\"PUDO_0_7\"]\n", 887 | "df_pudo.head(3)" 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": null, 893 | "metadata": { 894 | "ExecuteTime": { 895 | "end_time": "2018-10-17T13:06:30.464000Z", 896 | "start_time": "2018-10-17T13:06:29.941000Z" 897 | } 898 | }, 899 | "outputs": [], 900 | "source": [ 901 | "from bokeh.transform import factor_cmap\n", 902 | "\n", 903 | "#Define Categorical Color Map for Plot (red=\"PU > DO\", blue=\"PU < DO\"):\n", 904 | "categorical_cmap = factor_cmap(\"PUDO\", palette=[\"red\", \"blue\"], factors=[\"PU > DO\", \"PU < DO\"] )\n", 905 | "\n", 906 | "#Define Source for Plot:\n", 907 | "source = ColumnDataSource(df_pudo)\n", 908 | "\n", 909 | "#Define Figure for Plot:\n", 910 | "p = figure(title=\"Titel\",\n", 911 | " plot_width=900, plot_height=450,\n", 912 | " toolbar_location=None,\n", 913 | " tools=\"pan,wheel_zoom,box_zoom,reset,save\",\n", 914 | " active_scroll=\"wheel_zoom\")\n", 915 | "p.xaxis.visible = False\n", 916 | "p.yaxis.visible = False\n", 917 | "\n", 918 | "#Get rid of zoom on axes:\n", 919 | "for t in p.tools:\n", 920 | " if type(t) == WheelZoomTool:\n", 921 | " t.zoom_on_axis = False\n", 922 | "\n", 923 | "#Use OpenStreetMap Tiles:\n", 924 | "tiles = WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png')\n", 925 | "\n", 926 | "#Add Tile Layer and set alpha-value:\n", 927 | "tile_layer = p.add_tile(tiles)\n", 928 | "tile_layer.alpha = 0.6\n", 929 | "\n", 930 | "patches = p.patches(xs=\"X\", ys=\"Y\", source=source,\n", 931 | " fill_color=categorical_cmap,\n", 932 | " line_color=\"black\", alpha=0.5,\n", 933 | " legend=\"PUDO\")\n", 934 | "\n", 935 | "\n", 936 | "slider = Slider(start=0, end=23, value=7, step=1, title=\"Hour\", width=350)\n", 937 | "\n", 938 | "radiobuttons_weekday = RadioButtonGroup(\n", 939 | " labels=[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"],\n", 940 | " active=0,\n", 941 | " width=350)\n", 942 | "\n", 943 | "#Define callback-function with JavaScript Code:\n", 944 | "callback = CustomJS(args=dict(p=p, source=source, slider=slider,\n", 945 | " radiobuttons_weekday=radiobuttons_weekday),\n", 946 | " code=\"\"\"\n", 947 | " \n", 948 | "//Get value of slider for hour:\n", 949 | "var hour = slider.value;\n", 950 | "\n", 951 | "//Get value of weekday:\n", 952 | "var weekday = radiobuttons_weekday.active;\n", 953 | "\n", 954 | "//Change data of \"PUDO\" column in data source to passenger data of the selected hour:\n", 955 | "source.data[\"PUDO\"] = source.data[\"PUDO_\" + weekday + \"_\" + hour];\n", 956 | "source.change.emit();\n", 957 | "\n", 958 | " \"\"\")\n", 959 | "\n", 960 | "#Bind Callback to value change of slider and radiobuttons:\n", 961 | "slider.js_on_change(\"value\", callback)\n", 962 | "radiobuttons_weekday.js_on_change(\"active\", callback)\n", 963 | "\n", 964 | "show(column(row(slider, Div(width=100), radiobuttons_weekday), p))" 965 | ] 966 | }, 967 | { 968 | "cell_type": "markdown", 969 | "metadata": { 970 | "ExecuteTime": { 971 | "end_time": "2018-08-20T12:07:54.448000Z", 972 | "start_time": "2018-08-20T12:07:54.400000Z" 973 | }, 974 | "collapsed": true 975 | }, 976 | "source": [ 977 | "# OSM GPS Data and Datashader" 978 | ] 979 | }, 980 | { 981 | "cell_type": "markdown", 982 | "metadata": {}, 983 | "source": [ 984 | "## Load and process data" 985 | ] 986 | }, 987 | { 988 | "cell_type": "markdown", 989 | "metadata": {}, 990 | "source": [ 991 | "Use GPX Dataset for Germany (only identifiable trips) from OpenStreetMap (**© OpenStreetMap-Mitwirkende**). From the GPX files a final DataFrame (only containing Latitude and Longitude) was created. Let us load the data into the DataFrame: \n", 992 | "\n", 993 | "**Note**: The OSM Licence Conditions also apply to the provided data. " 994 | ] 995 | }, 996 | { 997 | "cell_type": "code", 998 | "execution_count": null, 999 | "metadata": { 1000 | "ExecuteTime": { 1001 | "end_time": "2018-10-17T13:06:44.773000Z", 1002 | "start_time": "2018-10-17T13:06:36.007000Z" 1003 | } 1004 | }, 1005 | "outputs": [], 1006 | "source": [ 1007 | "df_osm = pd.read_parquet(\"Data\\OSM GPX\\OSM_GPX.parquet\")\n", 1008 | "display_side_by_side(df_osm.query(\"TripId==1\").head(), df_osm.query(\"TripId==2\").head())" 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "markdown", 1013 | "metadata": {}, 1014 | "source": [ 1015 | "Calculate distances between the GPS points of a trip:" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": null, 1021 | "metadata": { 1022 | "ExecuteTime": { 1023 | "end_time": "2018-10-17T13:09:02.566000Z", 1024 | "start_time": "2018-10-17T13:08:45.693000Z" 1025 | } 1026 | }, 1027 | "outputs": [], 1028 | "source": [ 1029 | "def calc_distance_from_lonlat(x1, y1, x2, y2):\n", 1030 | " \"\"\"Calculates the distance between two points in WGS84 projection (Lat, Lon)\n", 1031 | " based on a linear approximation around the origin of the first point. Works \n", 1032 | " well for smaller distances < 100km. \n", 1033 | " \"\"\"\n", 1034 | "\n", 1035 | " return np.sqrt((np.cos(y1 * 2 * np.pi / 360) * (x2 - x1))**2 +\n", 1036 | " (y2 - y1)**2) * 6371 * 2 * np.pi / 360\n", 1037 | "\n", 1038 | "#Calculate postlength of every GPS point:\n", 1039 | "df_osm[\"Postlength\"] = calc_distance_from_lonlat(df_osm[\"longitude\"], df_osm[\"latitude\"], df_osm[\"longitude\"].shift(-1), df_osm[\"latitude\"].shift(-1))\n", 1040 | "#Set Postlength of every last element of each trip to NaN:\n", 1041 | "df_osm.loc[df_osm[\"TripId\"]-df_osm[\"TripId\"].shift(-1)!=0, \"Postlength\"] = np.NaN\n", 1042 | "\n", 1043 | "display_side_by_side(df_osm.query(\"TripId==1\").head(), df_osm.query(\"TripId==1\").tail())" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "markdown", 1048 | "metadata": {}, 1049 | "source": [ 1050 | "## Basic Plots" 1051 | ] 1052 | }, 1053 | { 1054 | "cell_type": "markdown", 1055 | "metadata": {}, 1056 | "source": [ 1057 | "**Let us quickly create some easy plots with basic statistics about the GPS data**" 1058 | ] 1059 | }, 1060 | { 1061 | "cell_type": "markdown", 1062 | "metadata": {}, 1063 | "source": [ 1064 | "Histogram with number of points per trip:" 1065 | ] 1066 | }, 1067 | { 1068 | "cell_type": "code", 1069 | "execution_count": null, 1070 | "metadata": { 1071 | "ExecuteTime": { 1072 | "end_time": "2018-10-17T13:09:06.185000Z", 1073 | "start_time": "2018-10-17T13:09:02.571000Z" 1074 | } 1075 | }, 1076 | "outputs": [], 1077 | "source": [ 1078 | "%%output filename=\"Export/Trip_points\" fig=\"html\"\n", 1079 | "%%opts Histogram [width=500 tools=['hover']]\n", 1080 | "%%opts Histogram (fill_color='red')\n", 1081 | "\n", 1082 | "#Count Number of Points for every trip:\n", 1083 | "trip_points = df_osm.groupby(\"TripId\")[\"latitude\"].count().values\n", 1084 | "#Calculate 90 percent quantile for setting plotting range:\n", 1085 | "quantile_90 = np.percentile(trip_points, 90)\n", 1086 | "\n", 1087 | "#Plot Histogram via Holoviews (output to Data/Trip_points due to the %%magic command in the top of the cell):\n", 1088 | "frequencies, edges = np.histogram(trip_points, np.arange(0, trip_points.max()+100, 100))\n", 1089 | "p_trip_points = hv.Histogram((edges, frequencies), label=\"Number of GPS Points per Trip\")\n", 1090 | "p_trip_points = p_trip_points.redim.label(x=\"Number of Points\", Frequency=\"Trips\")\n", 1091 | "p_trip_points = p_trip_points.redim.range(x=(0,quantile_90))\n", 1092 | "p_trip_points" 1093 | ] 1094 | }, 1095 | { 1096 | "cell_type": "markdown", 1097 | "metadata": {}, 1098 | "source": [ 1099 | "Trip length distribution " 1100 | ] 1101 | }, 1102 | { 1103 | "cell_type": "code", 1104 | "execution_count": null, 1105 | "metadata": { 1106 | "ExecuteTime": { 1107 | "end_time": "2018-10-17T13:09:09.302000Z", 1108 | "start_time": "2018-10-17T13:09:06.192000Z" 1109 | } 1110 | }, 1111 | "outputs": [], 1112 | "source": [ 1113 | "%%output filename=\"Export/Trip_lengths\" fig=\"html\"\n", 1114 | "%%opts Histogram [width=500 tools=['hover']]\n", 1115 | "%%opts Histogram (fill_color='red')\n", 1116 | "\n", 1117 | "#Count Number of Points for every trip:\n", 1118 | "trip_length = df_osm.groupby(\"TripId\")[\"Postlength\"].sum().values\n", 1119 | "#Calculate 90 percent quantile for setting plotting range:\n", 1120 | "quantile_90 = np.percentile(trip_length, 90)\n", 1121 | "\n", 1122 | "#Plot Histogram via Holoviews (output to Data/Trip_points due to the %%magic command in the top of the cell):\n", 1123 | "frequencies, edges = np.histogram(trip_length, np.arange(0, 1000, 1))\n", 1124 | "p_trip_length = hv.Histogram((edges, frequencies), label=\"Trip Distances\")\n", 1125 | "p_trip_length = p_trip_length.redim.label(x=\"Length [km]\", Frequency=\"Trips\")\n", 1126 | "p_trip_length = p_trip_length.redim.range(x=(0,quantile_90))\n", 1127 | "\n", 1128 | "p_trip_length" 1129 | ] 1130 | }, 1131 | { 1132 | "cell_type": "markdown", 1133 | "metadata": {}, 1134 | "source": [ 1135 | "## Heatmap with DataShader" 1136 | ] 1137 | }, 1138 | { 1139 | "cell_type": "markdown", 1140 | "metadata": {}, 1141 | "source": [ 1142 | "Add coordinates in Web Mercador:" 1143 | ] 1144 | }, 1145 | { 1146 | "cell_type": "code", 1147 | "execution_count": null, 1148 | "metadata": { 1149 | "ExecuteTime": { 1150 | "end_time": "2018-10-17T13:09:31.824000Z", 1151 | "start_time": "2018-10-17T13:09:09.314000Z" 1152 | } 1153 | }, 1154 | "outputs": [], 1155 | "source": [ 1156 | "from pyproj import Proj, transform\n", 1157 | "\n", 1158 | "#Define initial and output Projection (WGS84 and Web-Mercador):\n", 1159 | "inProj = Proj(init='epsg:4326')\n", 1160 | "outProj = Proj(init='epsg:3857')\n", 1161 | "\n", 1162 | "#Add Web Mercador coordinates to columns X, Y\n", 1163 | "df_osm[\"X\"], df_osm[\"Y\"] = transform(inProj, outProj, df_osm[\"longitude\"].values, df_osm[\"latitude\"].values)\n", 1164 | "\n", 1165 | "df_osm.head()" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": {}, 1171 | "source": [ 1172 | "Filter data with Germany Bounding-Box:" 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "execution_count": null, 1178 | "metadata": { 1179 | "ExecuteTime": { 1180 | "end_time": "2018-10-17T13:09:45.602000Z", 1181 | "start_time": "2018-10-17T13:09:31.828000Z" 1182 | } 1183 | }, 1184 | "outputs": [], 1185 | "source": [ 1186 | "minlat, maxlat, minlon, maxlon = 47.3, 55, 5.9, 15.1 #Bounding Box Germany\n", 1187 | "#maxlat, minlon, minlat, maxlon = 52.57, 13.25, 52.45, 13.5 #Bounding Box Berlin City\n", 1188 | "df_osm_filtered = df_osm[(df_osm[\"latitude\"]>minlat)&(df_osm[\"latitude\"]minlon)&(df_osm[\"longitude\"]\n", 1443 | "#hover {\n", 1444 | " font-family: \"Trebuchet MS\", Arial, Helvetica, sans-serif;\n", 1445 | " border-collapse: collapse;\n", 1446 | " width = 250px;\n", 1447 | "}\n", 1448 | "\n", 1449 | "#hover td, #hover th {\n", 1450 | " border: 1px solid #ddd;\n", 1451 | " text-align: center;\n", 1452 | " padding: 5px;\n", 1453 | " width:125px;\n", 1454 | "}\n", 1455 | "\n", 1456 | "#hover tr:nth-child(even){background-color: #f2f2f2;}\n", 1457 | "\n", 1458 | "\n", 1459 | "#hover th {\n", 1460 | " padding-top: 5px;\n", 1461 | " padding-bottom: 5px;\n", 1462 | " text-align: center;\n", 1463 | " background-color: #b50000;\n", 1464 | " color: white;\n", 1465 | " width:125px;\n", 1466 | "}\n", 1467 | "\n", 1468 | "#hover ex {\n", 1469 | " padding-top: 5px;\n", 1470 | " padding-bottom: 5px;\n", 1471 | " text-align: center;\n", 1472 | " background-color: #b50000;\n", 1473 | " color: black;\n", 1474 | " width:125px;\n", 1475 | "}\n", 1476 | "\"\"\"\n", 1477 | "my_hover.tooltips = css_style + \"\"\"\n", 1478 | "

@Name \"\"

\n", 1479 | "

@Country

\n", 1480 | "

@City, @Address

\n", 1481 | "
Telefone: @Telefone
\n", 1482 | "
Email: @Email
\n", 1483 | "\"\"\"\n", 1484 | "p.add_tools(my_hover)\n", 1485 | "\n", 1486 | "\n", 1487 | "show(p)" 1488 | ] 1489 | }, 1490 | { 1491 | "cell_type": "markdown", 1492 | "metadata": {}, 1493 | "source": [ 1494 | "Export components to create HTML content that can be embedded into another DIV:" 1495 | ] 1496 | }, 1497 | { 1498 | "cell_type": "code", 1499 | "execution_count": null, 1500 | "metadata": { 1501 | "ExecuteTime": { 1502 | "end_time": "2018-10-17T13:14:38.359000Z", 1503 | "start_time": "2018-10-17T13:14:38.323000Z" 1504 | } 1505 | }, 1506 | "outputs": [], 1507 | "source": [ 1508 | "#Get components of plot:\n", 1509 | "from bokeh.embed import components\n", 1510 | "script, div = components(p)\n", 1511 | "\n", 1512 | "#Define source for Bokeh CSS and JS:\n", 1513 | "import bokeh\n", 1514 | "version = bokeh.__version__\n", 1515 | "source = \"\"\".min.css\"\n", 1517 | " rel=\"stylesheet\" type=\"text/css\">\n", 1518 | ".min.css\"\n", 1520 | " rel=\"stylesheet\" type=\"text/css\">\n", 1521 | ".min.css\"\n", 1523 | " rel=\"stylesheet\" type=\"text/css\">\n", 1524 | "\n", 1525 | "\n", 1526 | "\n", 1527 | "\"\"\".replace(\"\", version)\n", 1528 | "\n", 1529 | "#Export final HTML content:\n", 1530 | "html_content = source + script + div\n", 1531 | "\n", 1532 | "with open(\"Export//PTV_Locations.html\", \"w\") as f:\n", 1533 | " f.write(html_content)" 1534 | ] 1535 | } 1536 | ], 1537 | "metadata": { 1538 | "kernelspec": { 1539 | "display_name": "Python 2", 1540 | "language": "python", 1541 | "name": "python2" 1542 | }, 1543 | "language_info": { 1544 | "codemirror_mode": { 1545 | "name": "ipython", 1546 | "version": 2 1547 | }, 1548 | "file_extension": ".py", 1549 | "mimetype": "text/x-python", 1550 | "name": "python", 1551 | "nbconvert_exporter": "python", 1552 | "pygments_lexer": "ipython2", 1553 | "version": "2.7.15" 1554 | }, 1555 | "toc": { 1556 | "nav_menu": {}, 1557 | "number_sections": true, 1558 | "sideBar": true, 1559 | "skip_h1_title": false, 1560 | "title_cell": "Table of Contents", 1561 | "title_sidebar": "Contents", 1562 | "toc_cell": false, 1563 | "toc_position": { 1564 | "height": "calc(100% - 180px)", 1565 | "left": "10px", 1566 | "top": "150px", 1567 | "width": "246px" 1568 | }, 1569 | "toc_section_display": true, 1570 | "toc_window_display": true 1571 | } 1572 | }, 1573 | "nbformat": 4, 1574 | "nbformat_minor": 2 1575 | } 1576 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/PyConDe 2018 - Interactive Visualization of Traffic Data-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Basic Imports**" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2018-10-17T13:10:57.106000Z", 16 | "start_time": "2018-10-17T13:10:55.355000Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import geopandas as gpd\n", 22 | "import pandas as pd\n", 23 | "import dask.dataframe as dd\n", 24 | "\n", 25 | "import numpy as np\n", 26 | "from shapely.geometry import Polygon, MultiPolygon\n", 27 | "from IPython.display import display, display_html\n", 28 | "%matplotlib inline\n", 29 | "\n", 30 | "def display_side_by_side(*args):\n", 31 | " html_str=''\n", 32 | " for df in args:\n", 33 | " html_str+= \" \" + df.to_html()\n", 34 | " display_html(html_str.replace('table','table style=\"display:inline\"'),raw=True)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# Download Data\n", 42 | "\n", 43 | "To download all neccessary data for this notebook, please run the following cell:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "ExecuteTime": { 51 | "end_time": "2018-10-17T13:01:30.497000Z", 52 | "start_time": "2018-10-17T12:41:03.166000Z" 53 | } 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import os\n", 58 | "import requests\n", 59 | "import os\n", 60 | "from multiprocessing.dummy import Pool\n", 61 | "from functools import partial\n", 62 | "import zipfile\n", 63 | "\n", 64 | "def download_file(url, export_directory=\"\", filename=None):\n", 65 | " print(\"Download file %s to directory %s\"%(url, export_directory))\n", 66 | " if filename == None:\n", 67 | " local_filename = os.path.join(export_directory,url.split('/')[-1])\n", 68 | " else:\n", 69 | " local_filename = os.path.join(export_directory,filename)\n", 70 | " # NOTE the stream=True parameter\n", 71 | " r = requests.get(url, stream=True)\n", 72 | " with open(local_filename, 'wb') as f:\n", 73 | " for chunk in r.iter_content(chunk_size=1024):\n", 74 | " if chunk: # filter out keep-alive new chunks\n", 75 | " f.write(chunk)\n", 76 | " #f.flush() commented by recommendation from J.F.Sebastian\n", 77 | " return local_filename\n", 78 | "\n", 79 | "#Create Folders for Data:\n", 80 | "if not os.path.exists(\"Data\"):\n", 81 | " os.mkdir(\"Data\")\n", 82 | "\n", 83 | "if not os.path.exists(os.path.join(\"Data\",\"New York Taxi\")):\n", 84 | " os.mkdir(os.path.join(\"Data\",\"New York Taxi\"))\n", 85 | "\n", 86 | "if not os.path.exists(os.path.join(\"Data\",\"OSM GPX\")):\n", 87 | " os.mkdir(os.path.join(\"Data\",\"OSM GPX\"))\n", 88 | "\n", 89 | "#Download New York Taxi Data for Yellow Cabs in 2017:\n", 90 | "download_taxi_data = partial(download_file, export_directory=os.path.join(\"Data\",\"New York Taxi\"))\n", 91 | "request_strings = [r\"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-%02d.csv\"%i for i in range(1,13)]\n", 92 | "pool = Pool(processes=4)\n", 93 | "pool.map(download_taxi_data, request_strings)\n", 94 | "\n", 95 | "#Download New York Taxi Data for Green Cabs in 2017:\n", 96 | "download_taxi_data = partial(download_file, export_directory=os.path.join(\"Data\",\"New York Taxi\"))\n", 97 | "request_strings = [r\"https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2017-%02d.csv\"%i for i in range(1,13)]\n", 98 | "pool = Pool(processes=4)\n", 99 | "pool.map(download_taxi_data, request_strings)\n", 100 | "\n", 101 | "#Download shapefile of New York Taxi Zones:\n", 102 | "print(\"Download Taxi Zones Shapefiles.\")\n", 103 | "zip_path = download_taxi_data(r\"https://s3.amazonaws.com/nyc-tlc/misc/taxi_zones.zip\")\n", 104 | "print(\"Unzip Taxi Zones Shapefiles.\")\n", 105 | "with zipfile.ZipFile(zip_path, \"r\") as z:\n", 106 | " z.extractall(os.path.dirname(zip_path))\n", 107 | "os.remove(zip_path)\n", 108 | "\n", 109 | "#Download OSM path data:\n", 110 | "osm_path = download_file(r\"https://ptv2box.ptvgroup.com/index.php/s/9sTUmxdF80NU2nr/download\", export_directory=os.path.join(\"Data\",\"OSM GPX\"), filename=\"OSM_GPX.parquet\")" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "# Interactive Maps for New York Taxi Data 2017" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## New York Taxi zones" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "ExecuteTime": { 131 | "end_time": "2018-08-16T18:44:00.299000Z", 132 | "start_time": "2018-08-16T18:44:00.292000Z" 133 | } 134 | }, 135 | "source": [ 136 | "In this section, we will load an process the shapefile **\"taxi_zones.shp\"** containing the New York Taxis Zones, such that we can use them as a basis for the **Bokeh** plot. In the next cell, we use GeoPandas to load the shapefile of Taxi zones and transform coordinate system to Web Mercador (EPSG-Code of Web Mercador Projection = 3785 ):" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "ExecuteTime": { 144 | "end_time": "2018-10-17T13:03:10.140000Z", 145 | "start_time": "2018-10-17T13:03:09.309000Z" 146 | } 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "df_taxizones = gpd.read_file(r\"Data\\New York Taxi\\taxi_zones.shp\")\n", 151 | "df_taxizones.to_crs(epsg=3785, inplace=True) #EPSG-Code of Web Mercador\n", 152 | "display(df_taxizones.head())\n", 153 | "print(\"Number of Polygons: %d\"%len(df_taxizones))\n", 154 | "df_taxizones.plot()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Simplify Shape of Zones (otherwise slow peformance of plot):" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": { 168 | "ExecuteTime": { 169 | "end_time": "2018-10-17T13:03:10.610000Z", 170 | "start_time": "2018-10-17T13:03:10.327000Z" 171 | } 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "df_taxizones[\"geometry\"] = df_taxizones[\"geometry\"].simplify(100)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Convert WKT Polygons to X and Y arrays with corresponding coordinates. Take into account **multipolygons** and separate them into individual shapes:" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "ExecuteTime": { 190 | "end_time": "2018-10-17T13:03:11.224000Z", 191 | "start_time": "2018-10-17T13:03:11.149000Z" 192 | } 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "data = []\n", 197 | "for zonename, LocationID, borough, shape in df_taxizones[[\"zone\", \"LocationID\", \"borough\", \"geometry\"]].values:\n", 198 | " #If shape is polygon, extract X and Y coordinates of boundary line:\n", 199 | " if isinstance(shape, Polygon):\n", 200 | " X, Y = shape.boundary.xy\n", 201 | " X = [int(x) for x in X]\n", 202 | " Y = [int(y) for y in Y]\n", 203 | " data.append([LocationID, zonename, borough, X, Y])\n", 204 | " \n", 205 | " #If shape is Multipolygon, extract X and Y coordinates of each sub-Polygon:\n", 206 | " if isinstance(shape, MultiPolygon):\n", 207 | " for poly in shape:\n", 208 | " X, Y = poly.boundary.xy\n", 209 | " X = [int(x) for x in X]\n", 210 | " Y = [int(y) for y in Y]\n", 211 | " data.append([LocationID, zonename, borough, X, Y])\n", 212 | "\n", 213 | "#Create new DataFrame with X an Y coordinates separated:\n", 214 | "df_taxizones = pd.DataFrame(data, columns=[\"LocationID\", \"ZoneName\", \"Borough\", \"X\", \"Y\"])\n", 215 | "display(df_taxizones.head())\n", 216 | "print(\"Number of Polygons: %d\"%len(df_taxizones))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "## New York Taxi Data" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Here, we read in the data from New York Taxis and aggregate them to show us information about how frequent each taxi zone is visited. We will use Dask.DataFrame, such that the whole dataset can be loaded at once without blowing up our memory and to get a nice speedup due to the parallelizm of Dask. Let us start a Dask Client and Local Cluster (after the execution of the cell, click on the **Dashboard Link** to view the Dask Dashboard, where you can see the resource consumption of our computation" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "ExecuteTime": { 238 | "end_time": "2018-10-17T13:03:18.902000Z", 239 | "start_time": "2018-10-17T13:03:12.732000Z" 240 | } 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "from dask.distributed import Client, LocalCluster\n", 245 | "import dask.dataframe as dd\n", 246 | "from dask import compute\n", 247 | "\n", 248 | "cluster = LocalCluster()\n", 249 | "print(cluster)\n", 250 | "client = Client(cluster)\n", 251 | "client" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Using Dask, we now read in the New York Taxi Data for the yellow and green cabs into distributed DataFrames. Note: A **Dask.DataFrame** is a *delayed object* and to calculate results, one has to trigger the computation via the **.compute()** method." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "ExecuteTime": { 266 | "end_time": "2018-10-17T13:03:22.022000Z", 267 | "start_time": "2018-10-17T13:03:18.924000Z" 268 | }, 269 | "code_folding": [] 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "df_taxis_yellow = dd.read_csv(r\"Data\\New York Taxi\\yellow_tripdata_2017-*.csv\", \n", 274 | " usecols=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\", \"passenger_count\",\n", 275 | " \"PULocationID\", \"DOLocationID\"],\n", 276 | " parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"])\n", 277 | "df_taxis_green = dd.read_csv(r\"Data\\New York Taxi\\green_tripdata_2017-*.csv\", \n", 278 | " usecols=[\"lpep_pickup_datetime\", \"lpep_dropoff_datetime\", \"passenger_count\",\n", 279 | " \"PULocationID\", \"DOLocationID\"],\n", 280 | " parse_dates=[\"lpep_pickup_datetime\", \"lpep_dropoff_datetime\"]).rename(\n", 281 | " columns = {\"lpep_pickup_datetime\": \"tpep_pickup_datetime\", \n", 282 | " \"lpep_dropoff_datetime\": \"tpep_dropoff_datetime\"})\n", 283 | "df_taxis = dd.concat([df_taxis_yellow, df_taxis_green])\n", 284 | "\n", 285 | "#Filter data for correct year :\n", 286 | "df_taxis = df_taxis[(df_taxis[\"tpep_pickup_datetime\"].dt.year == 2017)&(df_taxis[\"tpep_dropoff_datetime\"].dt.year == 2017)]\n", 287 | "\n", 288 | "df_taxis.head()" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Now, we calculate the number of boarding and alighting passengers for each zone and every hour and every day using the **GroupBy** Method. For this, we first create two columns specifying the hour and daytype of Pickup and Dropoff. Then, we define the groupby operations for Pickups and Dropoffs: and finally we trigger the parallelized computation using **dask.compute**:" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "ExecuteTime": { 303 | "end_time": "2018-10-17T13:05:52.951000Z", 304 | "start_time": "2018-10-17T13:03:22.026000Z" 305 | } 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "df_taxis[\"Pickup_Hour\"] = df_taxis[\"tpep_pickup_datetime\"].dt.hour\n", 310 | "df_taxis[\"Dropoff_Hour\"] = df_taxis[\"tpep_dropoff_datetime\"].dt.hour\n", 311 | "df_taxis[\"weekday\"] = df_taxis[\"tpep_dropoff_datetime\"].dt.weekday\n", 312 | "pickups = df_taxis.groupby(by=[\"Pickup_Hour\", \"weekday\", \"PULocationID\"])[\"passenger_count\"].sum()\n", 313 | "dropoffs = df_taxis.groupby(by=[\"Dropoff_Hour\", \"weekday\", \"DOLocationID\"])[\"passenger_count\"].sum()\n", 314 | "pickups, dropoffs = compute(pickups, dropoffs)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "**Time Series of Pickups and Dropoffs**" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": { 327 | "ExecuteTime": { 328 | "end_time": "2018-08-17T07:49:12.765000Z", 329 | "start_time": "2018-08-17T07:49:12.758000Z" 330 | } 331 | }, 332 | "source": [ 333 | "Aggregate Pickups and Dropoffs hourly:" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "ExecuteTime": { 341 | "end_time": "2018-10-17T13:05:53.019000Z", 342 | "start_time": "2018-10-17T13:05:52.959000Z" 343 | } 344 | }, 345 | "outputs": [], 346 | "source": [ 347 | "df_pudo = pd.DataFrame(pickups.groupby(level=0).sum())\n", 348 | "df_pudo[\"Dropoff\"] = dropoffs.groupby(level=0).sum()\n", 349 | "df_pudo.columns = [\"P\", \"D\"]\n", 350 | "df_pudo.index.rename(\"Hour\", inplace=True)\n", 351 | "df_pudo.head(3)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": { 357 | "ExecuteTime": { 358 | "end_time": "2018-08-17T07:49:27.619000Z", 359 | "start_time": "2018-08-17T07:49:27.613000Z" 360 | } 361 | }, 362 | "source": [ 363 | "Plot with Holoviews (Backend: Bokeh)" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "ExecuteTime": { 371 | "end_time": "2018-10-17T13:05:55.310000Z", 372 | "start_time": "2018-10-17T13:05:53.027000Z" 373 | } 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "import holoviews as hv\n", 378 | "hv.extension('bokeh')" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "ExecuteTime": { 386 | "end_time": "2018-10-17T13:05:55.642000Z", 387 | "start_time": "2018-10-17T13:05:55.316000Z" 388 | } 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "%%opts Bars.Grouped [group_index='Group' toolbar='above' tools=['hover'] width=800]\n", 393 | "from itertools import product\n", 394 | "hours, groups = df_pudo.index.values, ['P', 'D']\n", 395 | "keys = product(hours, groups)\n", 396 | "bars = hv.Bars([(hour, pudo, df_pudo.loc[hour, pudo]) for hour, pudo in keys],\n", 397 | " ['Hour', \"Group\"], \"Passengers\")\n", 398 | "bars.relabel(group='Grouped')" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": { 404 | "ExecuteTime": { 405 | "end_time": "2018-08-17T03:26:27.375000Z", 406 | "start_time": "2018-08-17T03:26:27.329000Z" 407 | } 408 | }, 409 | "source": [ 410 | "**Finally, join the Taxi Zones DataFrame with the information about the Pickups and Dropoffs for every hour and weekday:**" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": { 417 | "ExecuteTime": { 418 | "end_time": "2018-10-17T13:05:55.680000Z", 419 | "start_time": "2018-10-17T13:05:55.646000Z" 420 | } 421 | }, 422 | "outputs": [], 423 | "source": [ 424 | "display_side_by_side(pd.DataFrame(pickups).head(), df_taxizones[[\"LocationID\", \"ZoneName\", \"X\"]].head())" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "ExecuteTime": { 432 | "end_time": "2018-10-17T13:06:00.090000Z", 433 | "start_time": "2018-10-17T13:05:55.685000Z" 434 | } 435 | }, 436 | "outputs": [], 437 | "source": [ 438 | "pickups = pd.DataFrame(pickups)\n", 439 | "dropoffs = pd.DataFrame(dropoffs)\n", 440 | "\n", 441 | "for hour in range(24):\n", 442 | " \n", 443 | " for weekday in range(7):\n", 444 | " \n", 445 | " #Get pickups and dropoff for this hour and weekday:\n", 446 | " p = pd.DataFrame(pickups.loc[(hour, weekday)]).reset_index().rename(columns={\"PULocationID\" : \"LocationID\"})\n", 447 | " d = pd.DataFrame(dropoffs.loc[(hour, weekday)]).reset_index().rename(columns={\"DOLocationID\" : \"LocationID\"})\n", 448 | "\n", 449 | " #Add information of pickups and dropoff to the New York Taxi Zone DataFrame:\n", 450 | " df_taxizones = pd.merge(df_taxizones, p, on=\"LocationID\", how=\"left\").fillna(0)\n", 451 | " df_taxizones.rename(columns={\"passenger_count\" : \"PU_Passenger_%d_%d\"%(weekday, hour)}, inplace=True) \n", 452 | " df_taxizones = pd.merge(df_taxizones, d, on=\"LocationID\", how=\"left\").fillna(0)\n", 453 | " df_taxizones.rename(columns={\"passenger_count\" : \"DO_Passenger_%d_%d\"%(weekday, hour)}, inplace=True)\n", 454 | " \n", 455 | "df_taxizones.head(2) " 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": { 461 | "ExecuteTime": { 462 | "end_time": "2018-08-17T03:42:13.741000Z", 463 | "start_time": "2018-08-17T03:42:13.094000Z" 464 | } 465 | }, 466 | "source": [ 467 | "## Plot Interactive Demand Map using Bokeh" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "### Draw Taxi Zones on Map" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "Bokeh Imports" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": { 488 | "ExecuteTime": { 489 | "end_time": "2018-10-17T13:06:14.479000Z", 490 | "start_time": "2018-10-17T13:06:14.455000Z" 491 | } 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "from bokeh.io import output_notebook, output_file, show\n", 496 | "from bokeh.plotting import figure\n", 497 | "from bokeh.models import HoverTool, Select, ColumnDataSource, WheelZoomTool, LogColorMapper, LinearColorMapper\n", 498 | "from bokeh.palettes import OrRd9 as palette\n", 499 | "output_notebook()" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "Define Source for Plot. Bokeh, like its high-level API Holoviews, can convert dicts and DataFrames to a ColumnDataSource. Its columns can than be used to specify, what should be plotted." 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": null, 512 | "metadata": { 513 | "ExecuteTime": { 514 | "end_time": "2018-10-17T13:06:15.514000Z", 515 | "start_time": "2018-10-17T13:06:15.373000Z" 516 | } 517 | }, 518 | "outputs": [], 519 | "source": [ 520 | "df_taxizones[\"Passengers\"] = df_taxizones[\"PU_Passenger_0_7\"]\n", 521 | "source = ColumnDataSource(df_taxizones)" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": { 527 | "ExecuteTime": { 528 | "end_time": "2018-08-17T05:40:44.239000Z", 529 | "start_time": "2018-08-17T05:40:44.235000Z" 530 | } 531 | }, 532 | "source": [ 533 | "Define Colormapper for zones" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "metadata": { 540 | "ExecuteTime": { 541 | "end_time": "2018-10-17T13:06:16.274000Z", 542 | "start_time": "2018-10-17T13:06:16.263000Z" 543 | } 544 | }, 545 | "outputs": [], 546 | "source": [ 547 | "max_passengers_per_hour = df_taxizones[filter(lambda x: \"Passenger_\" in x, df_taxizones.columns)].max().max()\n", 548 | "color_mapper = LinearColorMapper(palette=palette[::-1], high=max_passengers_per_hour, low=0)" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": {}, 554 | "source": [ 555 | "Define Figure" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": { 562 | "ExecuteTime": { 563 | "end_time": "2018-10-17T13:06:17.188000Z", 564 | "start_time": "2018-10-17T13:06:17.172000Z" 565 | } 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "p = figure(title=\"Titel\",\n", 570 | " plot_width=900, plot_height=450,\n", 571 | " toolbar_location=None,\n", 572 | " tools=\"pan,wheel_zoom,box_zoom,reset,save\",\n", 573 | " active_scroll=\"wheel_zoom\")\n", 574 | "p.xaxis.visible = False\n", 575 | "p.yaxis.visible = False\n", 576 | "\n", 577 | "#Get rid of zoom on axes:\n", 578 | "for t in p.tools:\n", 579 | " if type(t) == WheelZoomTool:\n", 580 | " t.zoom_on_axis = False" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "Add Background Map (Custom Tile-Maps: http://geo.holoviews.org/Working_with_Bokeh.html)" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": { 594 | "ExecuteTime": { 595 | "end_time": "2018-10-17T13:06:18.125000Z", 596 | "start_time": "2018-10-17T13:06:18.116000Z" 597 | } 598 | }, 599 | "outputs": [], 600 | "source": [ 601 | "from bokeh.models import WMTSTileSource\n", 602 | "\n", 603 | "#Use OpenStreetMap Tiles:\n", 604 | "tiles = WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png')\n", 605 | "\n", 606 | "#Add Tile Layer and set alpha-value:\n", 607 | "tile_layer = p.add_tile(tiles)\n", 608 | "tile_layer.alpha = 0.6" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "Draw Taxi Zone Polygons on the Map. Pass the ColumnDataSource as **source**, such you can use the column names to pass data to the renderer. We use the **Passengers** column to draw a Choropleth map. " 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": null, 621 | "metadata": { 622 | "ExecuteTime": { 623 | "end_time": "2018-10-17T13:06:19.284000Z", 624 | "start_time": "2018-10-17T13:06:19.091000Z" 625 | } 626 | }, 627 | "outputs": [], 628 | "source": [ 629 | "patches = p.patches(xs=\"X\", ys=\"Y\", source=source,\n", 630 | " fill_color={'field': 'Passengers', 'transform': color_mapper},\n", 631 | " line_color=\"black\", alpha=0.5)\n", 632 | "show(p)" 633 | ] 634 | }, 635 | { 636 | "cell_type": "markdown", 637 | "metadata": {}, 638 | "source": [ 639 | "Add the Hovertool to show data of each zone (the attributes of the selected zone can be accessed by the **@** key): " 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": { 646 | "ExecuteTime": { 647 | "end_time": "2018-10-17T13:06:20.274000Z", 648 | "start_time": "2018-10-17T13:06:20.076000Z" 649 | } 650 | }, 651 | "outputs": [], 652 | "source": [ 653 | "#Add Hover Tool:\n", 654 | "hovertool = HoverTool(tooltips=[(\"Passengers:\", \"@Passengers\")])\n", 655 | "p.add_tools(hovertool)\n", 656 | "\n", 657 | "show(p)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "Add more advanced Hover Tools via HTML:" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": null, 670 | "metadata": { 671 | "ExecuteTime": { 672 | "end_time": "2018-10-17T13:06:21.543000Z", 673 | "start_time": "2018-10-17T13:06:21.303000Z" 674 | } 675 | }, 676 | "outputs": [], 677 | "source": [ 678 | "#Add Hovertool via HTML:\n", 679 | "hovertool = HoverTool(tooltips=\"\"\"\n", 680 | "\n", 681 | "\n", 692 | "\n", 693 | "\n", 694 | "\n", 695 | "
\n", 696 | "

@ZoneName

\n", 697 | "

@Borough

\n", 698 | "

@Passengers Passengers

\n", 699 | "

\"Country

\n", 700 | "
\"\"\")\n", 701 | "p.add_tools(hovertool)\n", 702 | "\n", 703 | "show(p)" 704 | ] 705 | }, 706 | { 707 | "cell_type": "markdown", 708 | "metadata": { 709 | "ExecuteTime": { 710 | "end_time": "2018-08-17T06:56:44.888000Z", 711 | "start_time": "2018-08-17T06:56:44.204000Z" 712 | } 713 | }, 714 | "source": [ 715 | "### Add Interactivity" 716 | ] 717 | }, 718 | { 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "Add Slider widget for selecting the hour of the day:" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": null, 728 | "metadata": { 729 | "ExecuteTime": { 730 | "end_time": "2018-10-17T13:06:23.436000Z", 731 | "start_time": "2018-10-17T13:06:23.388000Z" 732 | } 733 | }, 734 | "outputs": [], 735 | "source": [ 736 | "from bokeh.models.widgets import Slider\n", 737 | "\n", 738 | "slider = Slider(start=0, end=23, value=7, step=1, title=\"Hour\", width=600)\n", 739 | "\n", 740 | "show(slider)" 741 | ] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": {}, 746 | "source": [ 747 | "Add RadioButton widgets for selecting (Pickups/Dropoffs) and the weekday:" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": null, 753 | "metadata": { 754 | "ExecuteTime": { 755 | "end_time": "2018-10-17T13:06:24.291000Z", 756 | "start_time": "2018-10-17T13:06:24.261000Z" 757 | } 758 | }, 759 | "outputs": [], 760 | "source": [ 761 | "from bokeh.models.widgets import RadioButtonGroup, Div\n", 762 | "from bokeh.layouts import column, row\n", 763 | "\n", 764 | "radiobuttons_weekday = RadioButtonGroup(\n", 765 | " labels=[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"],\n", 766 | " active=0,\n", 767 | " width=400)\n", 768 | "\n", 769 | "radiobuttons_pudo = RadioButtonGroup(\n", 770 | " labels=[\"Pickups\", \"Dropoff\"], active=0)\n", 771 | "\n", 772 | "layout_widgets = column(slider, row(radiobuttons_weekday, Div(width=80), radiobuttons_pudo))\n", 773 | "\n", 774 | "show(layout_widgets)" 775 | ] 776 | }, 777 | { 778 | "cell_type": "markdown", 779 | "metadata": {}, 780 | "source": [ 781 | "Add Interaction via JavaScript Callback:" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": null, 787 | "metadata": { 788 | "ExecuteTime": { 789 | "end_time": "2018-10-17T13:06:25.460000Z", 790 | "start_time": "2018-10-17T13:06:25.211000Z" 791 | } 792 | }, 793 | "outputs": [], 794 | "source": [ 795 | "from bokeh.models.callbacks import CustomJS\n", 796 | "\n", 797 | "#Define callback-function with JavaScript Code:\n", 798 | "callback = CustomJS(args=dict(p=p, source=source, slider=slider, \n", 799 | " radiobuttons_pudo=radiobuttons_pudo, \n", 800 | " radiobuttons_weekday=radiobuttons_weekday),\n", 801 | " code=\"\"\"\n", 802 | " \n", 803 | "//Get value of slider for hour:\n", 804 | "var hour = slider.value;\n", 805 | "\n", 806 | "//Get value of weekday:\n", 807 | "var weekday = radiobuttons_weekday.active;\n", 808 | "\n", 809 | "//Get value of Pickups/Dropoffs RadioButtons:\n", 810 | "if (radiobuttons_pudo.active == 0)\n", 811 | " var pudo = \"PU\"\n", 812 | "else\n", 813 | " var pudo = \"DO\"\n", 814 | "\n", 815 | "//Change data of \"Passengers\" column in data source to passenger data of the selected hour:\n", 816 | "source.data[\"Passengers\"] = source.data[pudo + \"_Passenger_\" + weekday + \"_\" + hour];\n", 817 | "source.change.emit();\n", 818 | " \n", 819 | " \"\"\")\n", 820 | "\n", 821 | "#Bind Callback to value change of slider and radiobuttons:\n", 822 | "slider.js_on_change(\"value\", callback)\n", 823 | "radiobuttons_pudo.js_on_change(\"active\", callback)\n", 824 | "radiobuttons_weekday.js_on_change(\"active\", callback)\n", 825 | "\n", 826 | "#Show all elements:\n", 827 | "layout = column( layout_widgets , p)\n", 828 | "show(layout)" 829 | ] 830 | }, 831 | { 832 | "cell_type": "markdown", 833 | "metadata": {}, 834 | "source": [ 835 | "Change to logarithmic Colormapper (to see patterns in zones with low demand):" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "metadata": { 842 | "ExecuteTime": { 843 | "end_time": "2018-10-17T13:06:27.104000Z", 844 | "start_time": "2018-10-17T13:06:26.861000Z" 845 | } 846 | }, 847 | "outputs": [], 848 | "source": [ 849 | "color_mapper_log = LogColorMapper(palette=palette[::-1], high=max_passengers_per_hour, low=0)\n", 850 | "patches.glyph.fill_color[\"transform\"] = color_mapper_log\n", 851 | "show(layout)" 852 | ] 853 | }, 854 | { 855 | "cell_type": "markdown", 856 | "metadata": {}, 857 | "source": [ 858 | "### Dropoff and Pickup Zones" 859 | ] 860 | }, 861 | { 862 | "cell_type": "markdown", 863 | "metadata": {}, 864 | "source": [ 865 | "To clearly see the structure of the Taxi transport flow for each cell, let us look at the difference between Pickups and Dropoffs in a zone. If this is a positive number, more people leave the zone than enter it:" 866 | ] 867 | }, 868 | { 869 | "cell_type": "code", 870 | "execution_count": null, 871 | "metadata": { 872 | "ExecuteTime": { 873 | "end_time": "2018-10-17T13:06:29.382000Z", 874 | "start_time": "2018-10-17T13:06:29.008000Z" 875 | } 876 | }, 877 | "outputs": [], 878 | "source": [ 879 | "df_pudo = df_taxizones.copy()\n", 880 | "\n", 881 | "for hour in range(24):\n", 882 | " for weekday in range(7):\n", 883 | " df_pudo[\"PUDO_%d_%d\"%(weekday, hour)] = df_pudo[\"PU_Passenger_%d_%d\"%(weekday, hour)] - df_pudo[\"DO_Passenger_%d_%d\"%(weekday, hour)]\n", 884 | " df_pudo[\"PUDO_%d_%d\"%(weekday, hour)] = df_pudo[\"PUDO_%d_%d\"%(weekday, hour)].apply(lambda x: \"PU > DO\" if x>0 else \"PU < DO\")\n", 885 | "df_pudo.drop(columns=filter(lambda x: \"Passenger\" in x, df_pudo.columns), inplace=True)\n", 886 | "df_pudo[\"PUDO\"] = df_pudo[\"PUDO_0_7\"]\n", 887 | "df_pudo.head(3)" 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": null, 893 | "metadata": { 894 | "ExecuteTime": { 895 | "end_time": "2018-10-17T13:06:30.464000Z", 896 | "start_time": "2018-10-17T13:06:29.941000Z" 897 | } 898 | }, 899 | "outputs": [], 900 | "source": [ 901 | "from bokeh.transform import factor_cmap\n", 902 | "\n", 903 | "#Define Categorical Color Map for Plot (red=\"PU > DO\", blue=\"PU < DO\"):\n", 904 | "categorical_cmap = factor_cmap(\"PUDO\", palette=[\"red\", \"blue\"], factors=[\"PU > DO\", \"PU < DO\"] )\n", 905 | "\n", 906 | "#Define Source for Plot:\n", 907 | "source = ColumnDataSource(df_pudo)\n", 908 | "\n", 909 | "#Define Figure for Plot:\n", 910 | "p = figure(title=\"Titel\",\n", 911 | " plot_width=900, plot_height=450,\n", 912 | " toolbar_location=None,\n", 913 | " tools=\"pan,wheel_zoom,box_zoom,reset,save\",\n", 914 | " active_scroll=\"wheel_zoom\")\n", 915 | "p.xaxis.visible = False\n", 916 | "p.yaxis.visible = False\n", 917 | "\n", 918 | "#Get rid of zoom on axes:\n", 919 | "for t in p.tools:\n", 920 | " if type(t) == WheelZoomTool:\n", 921 | " t.zoom_on_axis = False\n", 922 | "\n", 923 | "#Use OpenStreetMap Tiles:\n", 924 | "tiles = WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png')\n", 925 | "\n", 926 | "#Add Tile Layer and set alpha-value:\n", 927 | "tile_layer = p.add_tile(tiles)\n", 928 | "tile_layer.alpha = 0.6\n", 929 | "\n", 930 | "patches = p.patches(xs=\"X\", ys=\"Y\", source=source,\n", 931 | " fill_color=categorical_cmap,\n", 932 | " line_color=\"black\", alpha=0.5,\n", 933 | " legend=\"PUDO\")\n", 934 | "\n", 935 | "\n", 936 | "slider = Slider(start=0, end=23, value=7, step=1, title=\"Hour\", width=350)\n", 937 | "\n", 938 | "radiobuttons_weekday = RadioButtonGroup(\n", 939 | " labels=[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"],\n", 940 | " active=0,\n", 941 | " width=350)\n", 942 | "\n", 943 | "#Define callback-function with JavaScript Code:\n", 944 | "callback = CustomJS(args=dict(p=p, source=source, slider=slider,\n", 945 | " radiobuttons_weekday=radiobuttons_weekday),\n", 946 | " code=\"\"\"\n", 947 | " \n", 948 | "//Get value of slider for hour:\n", 949 | "var hour = slider.value;\n", 950 | "\n", 951 | "//Get value of weekday:\n", 952 | "var weekday = radiobuttons_weekday.active;\n", 953 | "\n", 954 | "//Change data of \"PUDO\" column in data source to passenger data of the selected hour:\n", 955 | "source.data[\"PUDO\"] = source.data[\"PUDO_\" + weekday + \"_\" + hour];\n", 956 | "source.change.emit();\n", 957 | "\n", 958 | " \"\"\")\n", 959 | "\n", 960 | "#Bind Callback to value change of slider and radiobuttons:\n", 961 | "slider.js_on_change(\"value\", callback)\n", 962 | "radiobuttons_weekday.js_on_change(\"active\", callback)\n", 963 | "\n", 964 | "show(column(row(slider, Div(width=100), radiobuttons_weekday), p))" 965 | ] 966 | }, 967 | { 968 | "cell_type": "markdown", 969 | "metadata": { 970 | "ExecuteTime": { 971 | "end_time": "2018-08-20T12:07:54.448000Z", 972 | "start_time": "2018-08-20T12:07:54.400000Z" 973 | }, 974 | "collapsed": true 975 | }, 976 | "source": [ 977 | "# OSM GPS Data and Datashader" 978 | ] 979 | }, 980 | { 981 | "cell_type": "markdown", 982 | "metadata": {}, 983 | "source": [ 984 | "## Load and process data" 985 | ] 986 | }, 987 | { 988 | "cell_type": "markdown", 989 | "metadata": {}, 990 | "source": [ 991 | "Use GPX Dataset for Germany (only identifiable trips) from OpenStreetMap (**© OpenStreetMap-Mitwirkende**). From the GPX files a final DataFrame (only containing Latitude and Longitude) was created. Let us load the data into the DataFrame: \n", 992 | "\n", 993 | "**Note**: The OSM Licence Conditions also apply to the provided data. " 994 | ] 995 | }, 996 | { 997 | "cell_type": "code", 998 | "execution_count": null, 999 | "metadata": { 1000 | "ExecuteTime": { 1001 | "end_time": "2018-10-17T13:06:44.773000Z", 1002 | "start_time": "2018-10-17T13:06:36.007000Z" 1003 | } 1004 | }, 1005 | "outputs": [], 1006 | "source": [ 1007 | "df_osm = pd.read_parquet(\"Data\\OSM GPX\\OSM_GPX.parquet\")\n", 1008 | "display_side_by_side(df_osm.query(\"TripId==1\").head(), df_osm.query(\"TripId==2\").head())" 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "markdown", 1013 | "metadata": {}, 1014 | "source": [ 1015 | "Calculate distances between the GPS points of a trip:" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": null, 1021 | "metadata": { 1022 | "ExecuteTime": { 1023 | "end_time": "2018-10-17T13:09:02.566000Z", 1024 | "start_time": "2018-10-17T13:08:45.693000Z" 1025 | } 1026 | }, 1027 | "outputs": [], 1028 | "source": [ 1029 | "def calc_distance_from_lonlat(x1, y1, x2, y2):\n", 1030 | " \"\"\"Calculates the distance between two points in WGS84 projection (Lat, Lon)\n", 1031 | " based on a linear approximation around the origin of the first point. Works \n", 1032 | " well for smaller distances < 100km. \n", 1033 | " \"\"\"\n", 1034 | "\n", 1035 | " return np.sqrt((np.cos(y1 * 2 * np.pi / 360) * (x2 - x1))**2 +\n", 1036 | " (y2 - y1)**2) * 6371 * 2 * np.pi / 360\n", 1037 | "\n", 1038 | "#Calculate postlength of every GPS point:\n", 1039 | "df_osm[\"Postlength\"] = calc_distance_from_lonlat(df_osm[\"longitude\"], df_osm[\"latitude\"], df_osm[\"longitude\"].shift(-1), df_osm[\"latitude\"].shift(-1))\n", 1040 | "#Set Postlength of every last element of each trip to NaN:\n", 1041 | "df_osm.loc[df_osm[\"TripId\"]-df_osm[\"TripId\"].shift(-1)!=0, \"Postlength\"] = np.NaN\n", 1042 | "\n", 1043 | "display_side_by_side(df_osm.query(\"TripId==1\").head(), df_osm.query(\"TripId==1\").tail())" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "markdown", 1048 | "metadata": {}, 1049 | "source": [ 1050 | "## Basic Plots" 1051 | ] 1052 | }, 1053 | { 1054 | "cell_type": "markdown", 1055 | "metadata": {}, 1056 | "source": [ 1057 | "**Let us quickly create some easy plots with basic statistics about the GPS data**" 1058 | ] 1059 | }, 1060 | { 1061 | "cell_type": "markdown", 1062 | "metadata": {}, 1063 | "source": [ 1064 | "Histogram with number of points per trip:" 1065 | ] 1066 | }, 1067 | { 1068 | "cell_type": "code", 1069 | "execution_count": null, 1070 | "metadata": { 1071 | "ExecuteTime": { 1072 | "end_time": "2018-10-17T13:09:06.185000Z", 1073 | "start_time": "2018-10-17T13:09:02.571000Z" 1074 | } 1075 | }, 1076 | "outputs": [], 1077 | "source": [ 1078 | "%%output filename=\"Export/Trip_points\" fig=\"html\"\n", 1079 | "%%opts Histogram [width=500 tools=['hover']]\n", 1080 | "%%opts Histogram (fill_color='red')\n", 1081 | "\n", 1082 | "#Count Number of Points for every trip:\n", 1083 | "trip_points = df_osm.groupby(\"TripId\")[\"latitude\"].count().values\n", 1084 | "#Calculate 90 percent quantile for setting plotting range:\n", 1085 | "quantile_90 = np.percentile(trip_points, 90)\n", 1086 | "\n", 1087 | "#Plot Histogram via Holoviews (output to Data/Trip_points due to the %%magic command in the top of the cell):\n", 1088 | "frequencies, edges = np.histogram(trip_points, np.arange(0, trip_points.max()+100, 100))\n", 1089 | "p_trip_points = hv.Histogram((edges, frequencies), label=\"Number of GPS Points per Trip\")\n", 1090 | "p_trip_points = p_trip_points.redim.label(x=\"Number of Points\", Frequency=\"Trips\")\n", 1091 | "p_trip_points = p_trip_points.redim.range(x=(0,quantile_90))\n", 1092 | "p_trip_points" 1093 | ] 1094 | }, 1095 | { 1096 | "cell_type": "markdown", 1097 | "metadata": {}, 1098 | "source": [ 1099 | "Trip length distribution " 1100 | ] 1101 | }, 1102 | { 1103 | "cell_type": "code", 1104 | "execution_count": null, 1105 | "metadata": { 1106 | "ExecuteTime": { 1107 | "end_time": "2018-10-17T13:09:09.302000Z", 1108 | "start_time": "2018-10-17T13:09:06.192000Z" 1109 | } 1110 | }, 1111 | "outputs": [], 1112 | "source": [ 1113 | "%%output filename=\"Export/Trip_lengths\" fig=\"html\"\n", 1114 | "%%opts Histogram [width=500 tools=['hover']]\n", 1115 | "%%opts Histogram (fill_color='red')\n", 1116 | "\n", 1117 | "#Count Number of Points for every trip:\n", 1118 | "trip_length = df_osm.groupby(\"TripId\")[\"Postlength\"].sum().values\n", 1119 | "#Calculate 90 percent quantile for setting plotting range:\n", 1120 | "quantile_90 = np.percentile(trip_length, 90)\n", 1121 | "\n", 1122 | "#Plot Histogram via Holoviews (output to Data/Trip_points due to the %%magic command in the top of the cell):\n", 1123 | "frequencies, edges = np.histogram(trip_length, np.arange(0, 1000, 1))\n", 1124 | "p_trip_length = hv.Histogram((edges, frequencies), label=\"Trip Distances\")\n", 1125 | "p_trip_length = p_trip_length.redim.label(x=\"Length [km]\", Frequency=\"Trips\")\n", 1126 | "p_trip_length = p_trip_length.redim.range(x=(0,quantile_90))\n", 1127 | "\n", 1128 | "p_trip_length" 1129 | ] 1130 | }, 1131 | { 1132 | "cell_type": "markdown", 1133 | "metadata": {}, 1134 | "source": [ 1135 | "## Heatmap with DataShader" 1136 | ] 1137 | }, 1138 | { 1139 | "cell_type": "markdown", 1140 | "metadata": {}, 1141 | "source": [ 1142 | "Add coordinates in Web Mercador:" 1143 | ] 1144 | }, 1145 | { 1146 | "cell_type": "code", 1147 | "execution_count": null, 1148 | "metadata": { 1149 | "ExecuteTime": { 1150 | "end_time": "2018-10-17T13:09:31.824000Z", 1151 | "start_time": "2018-10-17T13:09:09.314000Z" 1152 | } 1153 | }, 1154 | "outputs": [], 1155 | "source": [ 1156 | "from pyproj import Proj, transform\n", 1157 | "\n", 1158 | "#Define initial and output Projection (WGS84 and Web-Mercador):\n", 1159 | "inProj = Proj(init='epsg:4326')\n", 1160 | "outProj = Proj(init='epsg:3857')\n", 1161 | "\n", 1162 | "#Add Web Mercador coordinates to columns X, Y\n", 1163 | "df_osm[\"X\"], df_osm[\"Y\"] = transform(inProj, outProj, df_osm[\"longitude\"].values, df_osm[\"latitude\"].values)\n", 1164 | "\n", 1165 | "df_osm.head()" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": {}, 1171 | "source": [ 1172 | "Filter data with Germany Bounding-Box:" 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "execution_count": null, 1178 | "metadata": { 1179 | "ExecuteTime": { 1180 | "end_time": "2018-10-17T13:09:45.602000Z", 1181 | "start_time": "2018-10-17T13:09:31.828000Z" 1182 | } 1183 | }, 1184 | "outputs": [], 1185 | "source": [ 1186 | "minlat, maxlat, minlon, maxlon = 47.3, 55, 5.9, 15.1 #Bounding Box Germany\n", 1187 | "#maxlat, minlon, minlat, maxlon = 52.57, 13.25, 52.45, 13.5 #Bounding Box Berlin City\n", 1188 | "df_osm_filtered = df_osm[(df_osm[\"latitude\"]>minlat)&(df_osm[\"latitude\"]minlon)&(df_osm[\"longitude\"]\n", 1443 | "#hover {\n", 1444 | " font-family: \"Trebuchet MS\", Arial, Helvetica, sans-serif;\n", 1445 | " border-collapse: collapse;\n", 1446 | " width = 250px;\n", 1447 | "}\n", 1448 | "\n", 1449 | "#hover td, #hover th {\n", 1450 | " border: 1px solid #ddd;\n", 1451 | " text-align: center;\n", 1452 | " padding: 5px;\n", 1453 | " width:125px;\n", 1454 | "}\n", 1455 | "\n", 1456 | "#hover tr:nth-child(even){background-color: #f2f2f2;}\n", 1457 | "\n", 1458 | "\n", 1459 | "#hover th {\n", 1460 | " padding-top: 5px;\n", 1461 | " padding-bottom: 5px;\n", 1462 | " text-align: center;\n", 1463 | " background-color: #b50000;\n", 1464 | " color: white;\n", 1465 | " width:125px;\n", 1466 | "}\n", 1467 | "\n", 1468 | "#hover ex {\n", 1469 | " padding-top: 5px;\n", 1470 | " padding-bottom: 5px;\n", 1471 | " text-align: center;\n", 1472 | " background-color: #b50000;\n", 1473 | " color: black;\n", 1474 | " width:125px;\n", 1475 | "}\n", 1476 | "\"\"\"\n", 1477 | "my_hover.tooltips = css_style + \"\"\"\n", 1478 | "

@Name \"\"

\n", 1479 | "

@Country

\n", 1480 | "

@City, @Address

\n", 1481 | "
Telefone: @Telefone
\n", 1482 | "
Email: @Email
\n", 1483 | "\"\"\"\n", 1484 | "p.add_tools(my_hover)\n", 1485 | "\n", 1486 | "\n", 1487 | "show(p)" 1488 | ] 1489 | }, 1490 | { 1491 | "cell_type": "markdown", 1492 | "metadata": {}, 1493 | "source": [ 1494 | "Export components to create HTML content that can be embedded into another DIV:" 1495 | ] 1496 | }, 1497 | { 1498 | "cell_type": "code", 1499 | "execution_count": null, 1500 | "metadata": { 1501 | "ExecuteTime": { 1502 | "end_time": "2018-10-17T13:14:38.359000Z", 1503 | "start_time": "2018-10-17T13:14:38.323000Z" 1504 | } 1505 | }, 1506 | "outputs": [], 1507 | "source": [ 1508 | "#Get components of plot:\n", 1509 | "from bokeh.embed import components\n", 1510 | "script, div = components(p)\n", 1511 | "\n", 1512 | "#Define source for Bokeh CSS and JS:\n", 1513 | "import bokeh\n", 1514 | "version = bokeh.__version__\n", 1515 | "source = \"\"\".min.css\"\n", 1517 | " rel=\"stylesheet\" type=\"text/css\">\n", 1518 | ".min.css\"\n", 1520 | " rel=\"stylesheet\" type=\"text/css\">\n", 1521 | ".min.css\"\n", 1523 | " rel=\"stylesheet\" type=\"text/css\">\n", 1524 | "\n", 1525 | "\n", 1526 | "\n", 1527 | "\"\"\".replace(\"\", version)\n", 1528 | "\n", 1529 | "#Export final HTML content:\n", 1530 | "html_content = source + script + div\n", 1531 | "\n", 1532 | "with open(\"Export//PTV_Locations.html\", \"w\") as f:\n", 1533 | " f.write(html_content)" 1534 | ] 1535 | } 1536 | ], 1537 | "metadata": { 1538 | "kernelspec": { 1539 | "display_name": "Python 2", 1540 | "language": "python", 1541 | "name": "python2" 1542 | }, 1543 | "language_info": { 1544 | "codemirror_mode": { 1545 | "name": "ipython", 1546 | "version": 2 1547 | }, 1548 | "file_extension": ".py", 1549 | "mimetype": "text/x-python", 1550 | "name": "python", 1551 | "nbconvert_exporter": "python", 1552 | "pygments_lexer": "ipython2", 1553 | "version": "2.7.15" 1554 | }, 1555 | "toc": { 1556 | "nav_menu": {}, 1557 | "number_sections": true, 1558 | "sideBar": true, 1559 | "skip_h1_title": false, 1560 | "title_cell": "Table of Contents", 1561 | "title_sidebar": "Contents", 1562 | "toc_cell": false, 1563 | "toc_position": { 1564 | "height": "calc(100% - 180px)", 1565 | "left": "10px", 1566 | "top": "150px", 1567 | "width": "246px" 1568 | }, 1569 | "toc_section_display": true, 1570 | "toc_window_display": true 1571 | } 1572 | }, 1573 | "nbformat": 4, 1574 | "nbformat_minor": 2 1575 | } 1576 | -------------------------------------------------------------------------------- /Export/Trip_lengths.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 744 | 784 | 785 | 786 | 787 | 788 | 789 | 790 |
897 | 898 | 899 | 900 | 901 | 902 | 903 |
904 | 905 | 932 |
--------------------------------------------------------------------------------