├── .gitignore ├── LICENSE ├── README.md ├── assignments ├── assignment-00 │ ├── README.md │ └── assignment0-solution.ipynb ├── assignment-01 │ ├── README.md │ └── rents.csv ├── assignment-02 │ └── README.md ├── assignment-03 │ └── README.md ├── assignment-04 │ └── README.md ├── assignment-05 │ └── README.md └── final-project │ └── readme.md ├── docker ├── Dockerfile ├── environment.yml ├── readme.md ├── requirements.txt └── windows-env │ ├── create-windows-environment.bat │ └── environment-windows.yml ├── modules ├── module00 - welcome │ └── readme.md ├── module01 - intro to python 1 │ ├── module01.ipynb │ ├── readme.md │ └── simple_script.py ├── module02 - intro to python 2 │ ├── README.md │ └── module02.ipynb ├── module03 - intro to pandas 1 │ ├── README.md │ ├── data │ │ ├── cities.csv │ │ └── rain.csv │ └── module03.ipynb ├── module04 - intro to pandas 2 │ ├── README.md │ ├── data │ │ ├── gps-coords-reduced.csv │ │ ├── gps-coords.csv │ │ ├── tracts_pop_age.csv │ │ └── tracts_white_income.csv │ └── module04.ipynb ├── module05 - data visualization │ ├── README.md │ ├── data │ │ ├── gps-coords.csv │ │ └── tracts.csv │ └── module05.ipynb ├── module06 - intro to geopandas │ ├── README.md │ ├── data │ │ ├── listings.csv │ │ ├── states.geojson │ │ ├── states_21basic │ │ │ ├── states.dbf │ │ │ ├── states.prj │ │ │ ├── states.sbn │ │ │ ├── states.sbx │ │ │ ├── states.shp │ │ │ ├── states.shp.xml │ │ │ └── states.shx │ │ └── usa-latlong.csv │ └── module06.ipynb ├── module07 - maps and web mapping │ ├── README.md │ └── module07.ipynb ├── module08 - apis, geocoding, geolocation │ ├── README.md │ ├── data │ │ ├── parcels.geojson │ │ ├── tl_2018_25_place │ │ │ ├── tl_2018_25_place.cpg │ │ │ ├── tl_2018_25_place.dbf │ │ │ ├── tl_2018_25_place.prj │ │ │ ├── tl_2018_25_place.shp │ │ │ ├── tl_2018_25_place.shp.ea.iso.xml │ │ │ ├── tl_2018_25_place.shp.iso.xml │ │ │ └── tl_2018_25_place.shx │ │ └── tl_2018_25_tract │ │ │ ├── tl_2018_25_tract.cpg │ │ │ ├── tl_2018_25_tract.dbf │ │ │ ├── tl_2018_25_tract.prj │ │ │ ├── tl_2018_25_tract.shp │ │ │ ├── tl_2018_25_tract.shp.ea.iso.xml │ │ │ ├── tl_2018_25_tract.shp.iso.xml │ │ │ └── tl_2018_25_tract.shx │ └── module08.ipynb ├── module09 - spatial weights and esda │ ├── README.md │ ├── census │ │ ├── census_tracts_data.geojson │ │ ├── download-census-data.ipynb │ │ ├── getcensus.py │ │ └── tl_2018_25_tract │ │ │ ├── tl_2018_25_tract.cpg │ │ │ ├── tl_2018_25_tract.dbf │ │ │ ├── tl_2018_25_tract.prj │ │ │ ├── tl_2018_25_tract.shp │ │ │ ├── tl_2018_25_tract.shp.ea.iso.xml │ │ │ ├── tl_2018_25_tract.shp.iso.xml │ │ │ └── tl_2018_25_tract.shx │ └── module09.ipynb ├── module10 - inferential spatial models │ ├── README.md │ ├── data │ │ └── census_tracts_data.geojson │ ├── img │ │ ├── frequentists_vs_bayesians.png │ │ └── regression.png │ └── module10.ipynb ├── module11 - postgis │ ├── 01-store-data-in-postgis-db.ipynb │ ├── 02-postgis-spatial-join.ipynb │ └── README.md ├── module12 - spatial networks │ ├── README.md │ └── module12.ipynb ├── module13 - course wrap up │ ├── README.md │ ├── data │ │ └── census_tracts_data.geojson │ └── module13.ipynb └── module14 - final projects │ └── README.md ├── readings └── readme.md └── syllabus └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | keys.py 2 | *.gal 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Geoff Boeing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis of Urban Systems 2 | 3 | This course has been superceded by this [urban data science](https://github.com/gboeing/urban-data-science) course series. 4 | -------------------------------------------------------------------------------- /assignments/assignment-00/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Assignment 0 3 | 4 | Due Sunday 27 January by 23:59 via Blackboard submission. 5 | 6 | 1. You will create your first complete Jupyter notebook in order to work with loops, conditionals, iteration, and functions. Review *Think Python* section 4.8 to develop a plan and section 6.2 on incremental development. Also review the code in the notebook from this class session. 7 | 2. You will write four functions to work with prime numbers. A prime number is any whole number greater than 1 that has no positive divisors besides 1 and itself. So, a prime number a) must be an integer, b) must be greater than 1, and c) must be divisible only by 1 and itself. 8 | 1. Write a function `is_prime(n)` that accepts an argument n and returns boolean True if n is a prime number and boolean False if n is not prime. For example, `is_prime(11)` should return True and `is_prime(12)` should return False. 9 | 2. Write a function `print_primes(n)` that accepts an argument n and prints all the prime numbers between 0 and n (inclusive). This function should call the `is_prime` function you developed earlier. For example, `print_primes(13)` should print out: 2 3 5 7 11 13 10 | 3. Write a function `count_primes(n)` that accepts an argument n and returns an integer count of how many prime numbers there are between 0 and n (inclusive). This function should also call the `is_prime` function you developed earlier. For example, `count_primes(19)` should return 8, because there are eight prime numbers between 0 and 19. 11 | 4. Write a function `first_primes(n)` that accepts an argument n and returns a Python list containing the first n prime numbers. (Hint: use a while loop and append each prime you find to a list.) This function should also call the `is_prime` function you developed earlier. For example, `first_primes(5)` should return the first five prime numbers as a list: `[2, 3, 5, 7, 11]` 12 | 3. Create a new, clean notebook to contain your work. The first cell in your notebook should be a markdown cell containing a plain-English narrative of the a) logic you will need to code and b) how your development process corresponds to *Think Python* sections 4.8 and 6.2. Use this narrative to think through how you will tackle each function. Each of the four functions should be in its own notebook cell. At the end of each of these cells, call the function by passing in a test value to demonstrate that it is working properly. 13 | 14 | Make sure your entire notebook runs properly and without any errors. Click Kernel > Restart > Clear all outputs and restart. This clears your memory and re-initializes your notebook. Then click Cell > Run all and make sure everything behaves properly. 15 | 16 | Give your notebook a meaningful filename like `your-lastname-assignment0.ipynb`. Submit your notebook file as (ungraded/practice) Assignment 0 on Blackboard by Sunday night. Before submitting, confirm the following to the best of your abilities: does your code fully run? Does it do what it's supposed to do the way it's supposed to do it? Is it well-commented and documented? Is your code clear, straightforward, and reasonably efficient? -------------------------------------------------------------------------------- /assignments/assignment-00/assignment0-solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "def is_prime(n):\n", 10 | " \n", 11 | " if not isinstance(n, int):\n", 12 | " return False\n", 13 | " \n", 14 | " if n <= 1:\n", 15 | " return False\n", 16 | " \n", 17 | " for i in range(2, n):\n", 18 | " if n % i == 0:\n", 19 | " return False\n", 20 | " \n", 21 | " return True\n", 22 | "\n", 23 | "print(is_prime(11))\n", 24 | "print(is_prime(12))" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "def print_primes(n):\n", 34 | " for i in range(n + 1):\n", 35 | " if is_prime(i):\n", 36 | " print(i, end=' ')\n", 37 | " \n", 38 | "print_primes(13)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "def count_primes(n):\n", 48 | " counter = 0\n", 49 | " for i in range(n + 1):\n", 50 | " if is_prime(i):\n", 51 | " counter += 1\n", 52 | " return counter\n", 53 | "\n", 54 | "count_primes(19)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "def first_primes(n):\n", 64 | " primes = []\n", 65 | " i = 0\n", 66 | " while len(primes) < n:\n", 67 | " if is_prime(i):\n", 68 | " primes.append(i)\n", 69 | " i += 1\n", 70 | " return primes\n", 71 | " \n", 72 | "first_primes(5)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [] 81 | } 82 | ], 83 | "metadata": { 84 | "kernelspec": { 85 | "display_name": "Python 3", 86 | "language": "python", 87 | "name": "python3" 88 | }, 89 | "language_info": { 90 | "codemirror_mode": { 91 | "name": "ipython", 92 | "version": 3 93 | }, 94 | "file_extension": ".py", 95 | "mimetype": "text/x-python", 96 | "name": "python", 97 | "nbconvert_exporter": "python", 98 | "pygments_lexer": "ipython3", 99 | "version": "3.7.1" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /assignments/assignment-01/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | 3 | ## Assignment 1 4 | 5 | Due Sunday 3 February by 23:59 via Blackboard submission. 6 | 7 | 1. Create a new, clean Jupyter notebook to contain your submission. The first cell in your notebook should be a markdown cell containing a plain-English narrative of the a) logic you will need to code and b) how your development process corresponds to *Think Python* sections 4.8 and 6.2. Use this narrative to think through how you will tackle each piece of the assignment in straightforward language. 8 | 2. Download the rents.csv and save it to your working directory (i.e., the same directory your notebook is in). 9 | 3. Using the rents.csv file and the pandas techniques we've learned: 10 | 1. Clean the rent, bedrooms and sqft fields, rename each to a new name (your choice), and make each a float type 11 | 2. Calculate the mean, minimum, and maximum rent, bedrooms, and square footage 12 | 3. Create a subset dataframe containing listings between $1,000 and $4,000, and excluding listings with missing values for bedrooms or square footage 13 | 4. Calculate the mean, minimum, and maximum rent, bedrooms, and square footage on this filtered subset 14 | 15 | Use comments to document all of your logic throughout the notebook. Make sure your entire notebook runs properly and without any errors. Click Kernel > Restart > Clear all outputs and restart. This clears your memory and re-initializes your notebook. Then click Cell > Run all and make sure everything behaves properly. 16 | 17 | Submit your notebook file as Assignment 1 on Blackboard by Sunday night. Before submitting, confirm the following: does your code fully run? Does it do what it's supposed to do the way it's supposed to do it? Is it well-commented and documented? Is your code clear, straightforward, and reasonably efficient? 18 | -------------------------------------------------------------------------------- /assignments/assignment-02/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | 3 | ## Assignment 2 4 | 5 | Due Sunday 17 February by 23:59 via Blackboard submission. 6 | 7 | First, get your hands on a data set. You can download one as a file, connect to an API to download data, or use some data set you're working with elsewhere in life. Whatever you'd like. But perhaps consider a data set that helps you move toward the final project. 8 | 9 | Create a new Jupyter notebook. Use pandas to load your data set and clean/process it as needed. 10 | 11 | Using the matplotlib techniques we learned in class, visualize interesting aspects of your data set. Create at least 4 (in total) visualizations using at least 3 different visualization types (such as bar charts, line charts, scatter plots, or anything else). You can optionally use other Python visualization libraries like seaborn or bokeh for any visualizations beyond the minimum required. Each visualization should be followed by a markdown cell that succinctly explains what you are visualizing and why it is interesting. 12 | 13 | Make sure your notebook runs from the top without any errors and that all the visuals can be seen inline (without me having to re-run your notebook). Then submit your notebook and your data file, zipped, via Blackboard. 14 | -------------------------------------------------------------------------------- /assignments/assignment-03/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | 3 | ## Assignment 3 4 | 5 | Due Sunday 3 March by 23:59 via Blackboard submission. 6 | 7 | First, get your hands on a data set. You can download one as a file, connect to an API to download data, or use some data set you're working with elsewhere in life. Whatever you'd like. But perhaps consider a data set that helps you move toward the final project. You may use the same data set as in the previous assignment, but preferably choose a new one. 8 | 9 | Create a new Jupyter notebook. Use geopandas to load your data set and clean/process it as needed. 10 | 11 | Using the mapping techniques we learned in class, visualize interesting aspects of your data. Create at least 3 static maps and explain both the color map and the projection you chose to use (in markdown). These maps can be plotted with geopandas directly or with cartopy. Each map should be followed by a markdown cell that succinctly explains what you are visualizing and why it is interesting. Finally, create at least 1 interactive web map, either using folium, carto, or leaflet directly. 12 | 13 | Make sure your notebook runs from the top without any errors and that all the visuals can be seen inline (without me having to re-run your notebook). Then submit your notebook, web map file, and your data file, zipped, via Blackboard. 14 | -------------------------------------------------------------------------------- /assignments/assignment-04/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | 3 | ## Assignment 4 4 | 5 | Due Tuesday 2 April by 23:59 via Blackboard submission. 6 | 7 | First, get your hands on a spatial data set. You can download one as a file, connect to an API to download data, or use some data set you're working with elsewhere in life. Whatever you'd like. But perhaps consider a data set that helps you move toward the final project. If necessary, you may use the same data set as in the previous assignment, but preferably choose a new one. 8 | 9 | Create a new Jupyter notebook. Use geopandas to load your data set and clean/process it as needed. Using the techniques we learned in class, do the following: 10 | 11 | 1. conduct a nonspatial inferential analysis (such as difference-in-means or estimate a population parameter from a sample) 12 | 1. estimate a nonspatial regression model 13 | 1. compute spatial diagnostics and estimate an appropriate spatial regression model 14 | 15 | Each step above should be followed by a markdown cell that succinctly explains your analysis, why you set it up the way you did, and how you interpret its results. 16 | 17 | Make sure your notebook runs from the top without any errors and that all the visuals/output can be seen inline (without me having to re-run it). Restart your kernel, run all cells, then save/submit your notebook and any necessary data files, zipped, via Blackboard. 18 | -------------------------------------------------------------------------------- /assignments/assignment-05/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | 3 | ## Assignment 5 4 | 5 | Due Tuesday 16 April by 23:59 via Blackboard submission. 6 | 7 | Choose two US cities. Create a new Jupyter notebook. Use OSMnx to download their street networks and then conduct the following analyses to compare each to the other: 8 | 9 | 1. How many intersections and street segments does the network have? 10 | 1. What is the network's overall intersection density per square mile? 11 | 1. What is the average street segment length? 12 | 1. What is the average street circuity? 13 | 14 | Finally, choose two points in each network, and calculate and visualize the shortest path between them. Make sure your notebook runs from the top without any errors and that all the visuals/output can be seen inline (without me having to re-run it). Restart your kernel, run all cells, then save/submit your notebook and any necessary data files, zipped, via Blackboard. 15 | -------------------------------------------------------------------------------- /assignments/final-project/readme.md: -------------------------------------------------------------------------------- 1 | # Final Project 2 | 3 | ## Overview 4 | 5 | The final project is a cumulative and applied assignment that requires you to use the skills you developed over the entire semester. I want you to develop an interesting research question that can be answered using the spatial data analysis, modeling, and visualization methods we have learned in this course. 6 | 7 | ## Instructions 8 | 9 | Develop a spatial research question that interests you. 10 | 11 | Collect data from two or more different sources, including but not limited to: data APIs, web scraping, public data portals, FTP servers, or directly from an organization. 12 | 13 | Clean, organize, merge, and process the data using pandas/geopandas into a nice analyzable format 14 | 15 | Conduct a statistical analysis. This shall include, at a minimum, a set of descriptive statistics, some exploratory spatial analysis, and a model, such as a spatial multiple regression model. 16 | 17 | Create 3 or more static data visualizations, such as scatter plots, bar charts, line graphs, etc. Feel free to use matplotlib or a similar visualization library. 18 | 19 | Create 3 or more maps, including at least 1 choropleth map and 1 point map. Optionally consider creating an interactive web map. 20 | 21 | Write a data narrative telling the story of your analysis in 2500+ words (not including tables, figures, captions, or references). Incorporate the visualizations and analytical results into your narrative. Organize it into five sections: 1, introduction (provide an overview of your entire project); 2, background (explain the context, prior work, and motivation leading to your research question). 3, methods (explain your data and your analytical process). 4, results (lay out your findings and visuals). 5, discussion (circle back to your research question and what your analysis tells you about it; what is the big picture; how are these findings useful?). At the top of the first page, include your name, the date, your project title, and the word count (not including tables, figures, captions, or references). 22 | 23 | ## Submission 24 | 25 | Project proposals are due Feb 27: this is a 1 page paper explaining your project idea, the motivation, where you will acquire your data, how you will process it, and how you will visualize it. What sort of findings do you expect? What new insights will they provide? Write up as a PDF and submit via Blackboard. 26 | 27 | Final projects are due Apr 24, via Blackboard submission. Please submit a zip file containing the narrative (as PDF), your visualization and map image files, web maps (if any), and the Jupyter notebook(s) and data files used to complete this project. Make sure everything is runnable and reproducible. 28 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # ASA course Dockerfile 3 | # Web: https://github.com/gboeing/asa 4 | # 5 | # Build an image from the dockerfile: 6 | # >>> docker build -t gboeing/asa . 7 | # 8 | # Push the built image to hub so others can pull/run it: 9 | # >>> docker tag gboeing/asa gboeing/asa:latest 10 | # >>> docker login 11 | # >>> docker push gboeing/asa 12 | # 13 | # Run bash in this container and export final conda environment to a yml file: 14 | # >>> docker run --rm -it -u 0 --name asa -v %cd%:/home/jovyan/work gboeing/asa /bin/bash 15 | # >>> conda env export -n base > /home/jovyan/work/environment.yml 16 | # 17 | # Run jupyter lab in this container: 18 | # >>> docker run --rm -it --name asa -p 8888:8888 -v %cd%:/home/jovyan/work gboeing/asa 19 | # 20 | # Stop/delete all local docker containers/images: 21 | # >>> docker stop $(docker ps -aq) 22 | # >>> docker rm $(docker ps -aq) 23 | # >>> docker rmi $(docker images -q) 24 | ######################################################################## 25 | 26 | FROM continuumio/miniconda3 27 | LABEL maintainer="Geoff Boeing " 28 | LABEL url="https://github.com/gboeing/asa" 29 | 30 | COPY requirements.txt /tmp/ 31 | 32 | # configure conda and install packages in one RUN to keep image tidy 33 | RUN conda config --set show_channel_urls true && \ 34 | conda config --set channel_priority strict && \ 35 | conda config --prepend channels conda-forge && \ 36 | conda update --yes -n base conda && \ 37 | conda install --update-all --force-reinstall --yes --file /tmp/requirements.txt && \ 38 | jupyter labextension install @jupyter-widgets/jupyterlab-manager && \ 39 | jupyter labextension install jupyter-leaflet && \ 40 | conda clean --all --yes && \ 41 | conda env export -n base > environment-windows.yml && \ 42 | conda info --all && \ 43 | conda list 44 | 45 | # launch jupyter in the local working directory that we mount 46 | WORKDIR /home/jovyan/work 47 | 48 | # set default command to launch when container is run 49 | CMD ["jupyter", "lab", "--ip='0.0.0.0'", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token=''", "--NotebookApp.password=''"] 50 | 51 | # to test, import OSMnx and print its version 52 | RUN ipython -c "import osmnx; print(osmnx.__version__)" 53 | -------------------------------------------------------------------------------- /docker/environment.yml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - altair=3.2.0=py37_0 8 | - asn1crypto=0.24.0=py37_1003 9 | - attrs=19.1.0=py_0 10 | - backcall=0.1.0=py_0 11 | - beautifulsoup4=4.8.0=py37_0 12 | - bleach=3.1.0=py_0 13 | - bokeh=1.3.4=py37_0 14 | - boost-cpp=1.70.0=h8e57a91_2 15 | - bottleneck=1.2.1=py37h3010b51_1001 16 | - branca=0.3.1=py_0 17 | - bzip2=1.0.8=h516909a_0 18 | - ca-certificates=2019.6.16=hecc5488_0 19 | - cairo=1.16.0=hfb77d84_1002 20 | - cartopy=0.17.0=py37he1be148_1005 21 | - certifi=2019.6.16=py37_1 22 | - cffi=1.12.3=py37h8022711_0 23 | - cfitsio=3.470=hb60a0a2_1 24 | - chardet=3.0.4=py37_1003 25 | - click=7.0=py_0 26 | - click-plugins=1.1.1=py_0 27 | - cligj=0.5.0=py_0 28 | - conda=4.7.11=py37_0 29 | - conda-package-handling=1.4.1=py37_0 30 | - cryptography=2.7=py37h72c5cf5_0 31 | - curl=7.65.3=hf8cf82a_0 32 | - cycler=0.10.0=py_1 33 | - cython=0.29.13=py37he1b5a44_0 34 | - dbus=1.13.6=he372182_0 35 | - decorator=4.4.0=py_0 36 | - defusedxml=0.5.0=py_1 37 | - deprecated=1.2.6=py_0 38 | - descartes=1.1.0=py_3 39 | - entrypoints=0.3=py37_1000 40 | - expat=2.2.5=he1b5a44_1003 41 | - fiona=1.8.6=py37h5f563d9_4 42 | - folium=0.10.0=py_0 43 | - fontconfig=2.13.1=h86ecdb6_1001 44 | - freetype=2.10.0=he983fc9_1 45 | - freexl=1.0.5=h14c3975_1002 46 | - gdal=2.4.2=py37h5f563d9_4 47 | - geographiclib=1.49=py_0 48 | - geopandas=0.5.1=py_0 49 | - geopy=1.20.0=py_0 50 | - geos=3.7.2=he1b5a44_1 51 | - geotiff=1.5.1=h560c3f3_2 52 | - gettext=0.19.8.1=hc5be6a0_1002 53 | - giflib=5.1.7=h516909a_1 54 | - glib=2.58.3=h6f030ca_1002 55 | - gst-plugins-base=1.14.5=h0935bb2_0 56 | - gstreamer=1.14.5=h36ae1b5_0 57 | - hdf4=4.2.13=h9a582f1_1002 58 | - hdf5=1.10.5=nompi_h3c11f04_1100 59 | - icu=64.2=he1b5a44_0 60 | - idna=2.8=py37_1000 61 | - ipykernel=5.1.2=py37h5ca1d4c_0 62 | - ipyleaflet=0.11.1=py37_1 63 | - ipython=7.7.0=py37h5ca1d4c_0 64 | - ipython_genutils=0.2.0=py_1 65 | - ipywidgets=7.5.1=py_0 66 | - jedi=0.15.1=py37_0 67 | - jinja2=2.10.1=py_0 68 | - joblib=0.13.2=py_0 69 | - jpeg=9c=h14c3975_1001 70 | - json-c=0.13.1=h14c3975_1001 71 | - json5=0.8.5=py_0 72 | - jsonschema=3.0.2=py37_0 73 | - jupyter_client=5.3.1=py_0 74 | - jupyter_core=4.4.0=py_0 75 | - jupyterlab=1.0.5=py37_0 76 | - jupyterlab_server=1.0.2=py_0 77 | - kealib=1.4.10=h58c409b_1005 78 | - kiwisolver=1.1.0=py37hc9558a2_0 79 | - krb5=1.16.3=h05b26f9_1001 80 | - libarchive=3.3.3=hb44662c_1005 81 | - libblas=3.8.0=12_openblas 82 | - libcblas=3.8.0=12_openblas 83 | - libcurl=7.65.3=hda55be3_0 84 | - libdap4=3.20.2=hd48c02d_1000 85 | - libedit=3.1.20170329=hf8c457e_1001 86 | - libffi=3.2.1=he1b5a44_1006 87 | - libgcc-ng=9.1.0=hdf63c60_0 88 | - libgdal=2.4.2=hbef8c27_4 89 | - libgfortran-ng=7.3.0=hdf63c60_0 90 | - libiconv=1.15=h516909a_1005 91 | - libkml=1.3.0=h4fcabce_1010 92 | - liblapack=3.8.0=12_openblas 93 | - libnetcdf=4.6.2=h056eaf5_1002 94 | - libopenblas=0.3.7=h6e990d7_0 95 | - libpng=1.6.37=hed695b0_0 96 | - libpq=11.5=hd9ab2ff_0 97 | - libsodium=1.0.17=h516909a_0 98 | - libspatialindex=1.9.0=he1b5a44_1 99 | - libspatialite=4.3.0a=he1bb1e1_1029 100 | - libssh2=1.8.2=h22169c7_2 101 | - libstdcxx-ng=9.1.0=hdf63c60_0 102 | - libtiff=4.0.10=h57b8799_1003 103 | - libuuid=2.32.1=h14c3975_1000 104 | - libuv=1.30.1=h516909a_0 105 | - libxcb=1.13=h14c3975_1002 106 | - libxml2=2.9.9=hee79883_2 107 | - lz4-c=1.8.3=he1b5a44_1001 108 | - lzo=2.10=h14c3975_1000 109 | - markupsafe=1.1.1=py37h14c3975_0 110 | - matplotlib=3.1.1=py37_1 111 | - matplotlib-base=3.1.1=py37he7580a8_1 112 | - mistune=0.8.4=py37h14c3975_1000 113 | - munch=2.3.2=py_0 114 | - nbconvert=5.6.0=py_0 115 | - nbformat=4.4.0=py_1 116 | - ncurses=6.1=hf484d3e_1002 117 | - networkx=2.3=py_0 118 | - nodejs=12.8.0=h10a4023_1 119 | - notebook=6.0.0=py37_0 120 | - numexpr=2.7.0=py37hb3f55d8_0 121 | - numpy=1.17.0=py37h95a1406_0 122 | - olefile=0.46=py_0 123 | - openjpeg=2.3.1=h58a6597_0 124 | - openssl=1.1.1c=h516909a_0 125 | - osmnx=0.10=py_1 126 | - owslib=0.18.0=py_0 127 | - packaging=19.0=py_0 128 | - palettable=3.2.0=py_0 129 | - pandas=0.25.0=py37hb3f55d8_0 130 | - pandoc=2.7.3=0 131 | - pandocfilters=1.4.2=py_1 132 | - parso=0.5.1=py_0 133 | - patsy=0.5.1=py_0 134 | - pcre=8.41=hf484d3e_1003 135 | - pexpect=4.7.0=py37_0 136 | - pickleshare=0.7.5=py37_1000 137 | - pillow=6.1.0=py37h6b7be26_1 138 | - pip=19.2.2=py37_0 139 | - pixman=0.38.0=h516909a_1003 140 | - poppler=0.67.0=ha967d66_7 141 | - poppler-data=0.4.9=1 142 | - postgresql=11.5=hc63931a_0 143 | - proj4=6.1.0=he751ad9_2 144 | - prometheus_client=0.7.1=py_0 145 | - prompt_toolkit=2.0.9=py_0 146 | - psycopg2=2.8.3=py37h72c5cf5_0 147 | - pthread-stubs=0.4=h14c3975_1001 148 | - ptyprocess=0.6.0=py_1001 149 | - pycosat=0.6.3=py37h14c3975_1001 150 | - pycparser=2.19=py37_1 151 | - pyepsg=0.4.0=py_0 152 | - pygments=2.4.2=py_0 153 | - pykdtree=1.3.1=py37h3010b51_1002 154 | - pyopenssl=19.0.0=py37_0 155 | - pyparsing=2.4.2=py_0 156 | - pyproj=2.2.1=py37hc44880f_0 157 | - pyqt=5.9.2=py37hcca6a23_2 158 | - pyrsistent=0.15.4=py37h516909a_0 159 | - pysal=2.1.0=py_0 160 | - pyshp=2.1.0=py_0 161 | - pysocks=1.7.0=py37_0 162 | - python=3.7.3=h33d41f4_1 163 | - python-dateutil=2.8.0=py_0 164 | - python-libarchive-c=2.8=py37_1004 165 | - pytz=2019.2=py_0 166 | - pyyaml=5.1.2=py37h516909a_0 167 | - pyzmq=18.0.2=py37h1768529_2 168 | - qt=5.9.7=h0c104cb_3 169 | - readline=8.0=hf8c457e_0 170 | - requests=2.22.0=py37_1 171 | - rtree=0.8.3=py37h666c49c_1002 172 | - ruamel_yaml=0.15.71=py37h14c3975_1000 173 | - scikit-learn=0.21.3=py37hcdab131_0 174 | - scipy=1.3.1=py37h921218d_1 175 | - seaborn=0.9.0=py_1 176 | - send2trash=1.5.0=py_0 177 | - setuptools=41.0.1=py37_0 178 | - shapely=1.6.4=py37hec07ddf_1006 179 | - sip=4.19.8=py37hf484d3e_1000 180 | - six=1.12.0=py37_1000 181 | - soupsieve=1.9.2=py37_0 182 | - sqlite=3.29.0=hcee41ef_0 183 | - statsmodels=0.10.1=py37hc1659b7_0 184 | - terminado=0.8.2=py37_0 185 | - testpath=0.4.2=py_1001 186 | - tk=8.6.9=hed695b0_1002 187 | - toolz=0.10.0=py_0 188 | - tornado=6.0.3=py37h516909a_0 189 | - tqdm=4.33.0=py_0 190 | - traitlets=4.3.2=py37_1000 191 | - traittypes=0.2.1=py_1 192 | - tzcode=2019a=h516909a_1002 193 | - urllib3=1.25.3=py37_0 194 | - vincent=0.4.4=py_1 195 | - wcwidth=0.1.7=py_1 196 | - webencodings=0.5.1=py_1 197 | - wheel=0.33.4=py37_0 198 | - widgetsnbextension=3.5.1=py37_0 199 | - wrapt=1.11.2=py37h516909a_0 200 | - xarray=0.12.3=py_0 201 | - xerces-c=3.2.2=h8412b87_1004 202 | - xlrd=1.2.0=py_0 203 | - xlwt=1.3.0=py_1 204 | - xorg-kbproto=1.0.7=h14c3975_1002 205 | - xorg-libice=1.0.10=h516909a_0 206 | - xorg-libsm=1.2.3=h84519dc_1000 207 | - xorg-libx11=1.6.8=h516909a_0 208 | - xorg-libxau=1.0.9=h14c3975_0 209 | - xorg-libxdmcp=1.1.3=h516909a_0 210 | - xorg-libxext=1.3.4=h516909a_0 211 | - xorg-libxrender=0.9.10=h516909a_1002 212 | - xorg-renderproto=0.11.1=h14c3975_1002 213 | - xorg-xextproto=7.3.0=h14c3975_1002 214 | - xorg-xproto=7.0.31=h14c3975_1007 215 | - xz=5.2.4=h14c3975_1001 216 | - yaml=0.1.7=h14c3975_1001 217 | - zeromq=4.3.2=he1b5a44_2 218 | - zlib=1.2.11=h516909a_1005 219 | - zstd=1.4.0=h3b9ef0a_0 220 | prefix: /opt/conda 221 | 222 | -------------------------------------------------------------------------------- /docker/readme.md: -------------------------------------------------------------------------------- 1 | # Docker Container 2 | 3 | You need to install [docker](https://www.docker.com/) first. If you're on Windows, you must have Microsoft Windows 10 Professional or Enterprise 64-bit. If you don't, you'll need to [upgrade](https://support.microsoft.com/en-us/help/12384/windows-10-upgrading-home-to-pro) your OS. 4 | 5 | The image is available from https://hub.docker.com/u/gboeing 6 | 7 | ## Pull image, run container 8 | 9 | ### On Windows: 10 | 11 | Open a command prompt, change directory to location of notebook file, and run: 12 | 13 | ``` 14 | docker run --rm -it --name asa -p 8888:8888 -v %cd%:/home/jovyan/work gboeing/asa 15 | ``` 16 | 17 | This runs the container ephemerally, interactively, with friendly name "asa", exposing container's port 8888 on host machine's port 8888, mounting the current directory on the host machine as the container's working directory, and launching jupyter lab. 18 | 19 | ### On Mac/Linux: 20 | 21 | ``` 22 | docker run --rm -it --name asa -p 8888:8888 -v "$PWD":/home/jovyan/work gboeing/asa 23 | ``` 24 | 25 | ### To access jupyter running in this container 26 | 27 | Open web browser and visit [http://localhost:8888](http://localhost:8888) 28 | 29 | ### To access bash in this container: 30 | 31 | ``` 32 | docker run --rm -it -u 0 --name asa -v %cd%:/home/jovyan/work gboeing/asa /bin/bash 33 | ``` 34 | 35 | Replace `%cd%` with `"$PWD"` if you're on Mac/Linux. 36 | 37 | ## Pull image down from registry without running 38 | 39 | ``` 40 | docker pull gboeing/asa 41 | ``` 42 | -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | bokeh 3 | bottleneck 4 | cartopy 5 | cython 6 | folium 7 | geopandas 8 | ipyleaflet 9 | ipywidgets 10 | jupyterlab 11 | networkx 12 | nodejs 13 | numexpr 14 | numpy 15 | osmnx 16 | pillow 17 | pip 18 | psycopg2 19 | pysal 20 | python == 3.* 21 | scikit-learn 22 | scipy 23 | seaborn 24 | statsmodels 25 | xlrd 26 | xlwt 27 | -------------------------------------------------------------------------------- /docker/windows-env/create-windows-environment.bat: -------------------------------------------------------------------------------- 1 | CALL conda update --strict-channel-priority conda -n base --yes 2 | CALL conda config --prepend channels conda-forge 3 | CALL conda deactivate 4 | CALL conda env remove -n asa --yes 5 | CALL conda create -n asa -c conda-forge --file "../requirements.txt" --yes 6 | CALL conda activate asa 7 | CALL python -m ipykernel install --user --name asa --display-name "Python (asa)" 8 | CALL jupyter labextension install @jupyter-widgets/jupyterlab-manager 9 | CALL jupyter labextension install jupyter-leaflet 10 | CALL conda clean --all --yes 11 | CALL conda env export > environment-windows.yml 12 | CALL conda list 13 | CALL jupyter kernelspec list 14 | CALL python -c "import osmnx; print(osmnx.__version__)" 15 | -------------------------------------------------------------------------------- /docker/windows-env/environment-windows.yml: -------------------------------------------------------------------------------- 1 | name: asa 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - altair=2.4.1=py37_0 7 | - asn1crypto=0.24.0=py37_1003 8 | - attrs=19.1.0=py_0 9 | - backcall=0.1.0=py_0 10 | - beautifulsoup4=4.7.1=py37_1001 11 | - blas=1.0=mkl 12 | - bleach=3.1.0=py_0 13 | - bokeh=1.1.0=py37_0 14 | - boost-cpp=1.68.0=h6a4c333_1000 15 | - bottleneck=1.2.1=py37h452e1ab_1001 16 | - branca=0.3.1=py_0 17 | - bzip2=1.0.6=hfa6e2cd_1002 18 | - ca-certificates=2019.3.9=hecc5488_0 19 | - cartopy=0.17.0=py37h814cdbc_1004 20 | - certifi=2019.3.9=py37_0 21 | - cffi=1.12.3=py37hb32ad35_0 22 | - chardet=3.0.4=py37_1003 23 | - click=7.0=py_0 24 | - click-plugins=1.1.1=py_0 25 | - cligj=0.5.0=py_0 26 | - colorama=0.4.1=py_0 27 | - cryptography=2.6.1=py37hb32ad35_0 28 | - curl=7.64.1=h4496350_0 29 | - cycler=0.10.0=py_1 30 | - cython=0.29.7=py37h6538335_0 31 | - decorator=4.4.0=py_0 32 | - defusedxml=0.5.0=py_1 33 | - descartes=1.1.0=py_3 34 | - entrypoints=0.3=py37_1000 35 | - expat=2.2.5=he025d50_1002 36 | - fiona=1.8.6=py37hce0be61_3 37 | - folium=0.8.3=py_0 38 | - freetype=2.10.0=h5db478b_0 39 | - freexl=1.0.5=hd288d7e_1002 40 | - gdal=2.4.1=py37hdf5ee75_0 41 | - geographiclib=1.49=py_0 42 | - geopandas=0.4.1=py_1 43 | - geopy=1.19.0=py_0 44 | - geos=3.7.1=he025d50_1000 45 | - geotiff=1.4.3=h8408f58_1000 46 | - gettext=0.19.8.1=hb01d8f6_1002 47 | - glib=2.58.3=hc0c2ac7_1001 48 | - hdf4=4.2.13=hf8e6fe8_1002 49 | - hdf5=1.10.4=nompi_hcc15c50_1106 50 | - icc_rt=2019.0.0=h0cc432a_1 51 | - icu=58.2=ha66f8fd_1 52 | - idna=2.8=py37_1000 53 | - intel-openmp=2019.3=203 54 | - ipykernel=5.1.0=py37h39e3cac_1002 55 | - ipyleaflet=0.10.1=py37_0 56 | - ipython=7.4.0=py37h39e3cac_0 57 | - ipython_genutils=0.2.0=py_1 58 | - ipywidgets=7.4.2=py_0 59 | - jedi=0.13.3=py37_0 60 | - jinja2=2.10.1=py_0 61 | - jpeg=9c=hfa6e2cd_1001 62 | - jsonschema=3.0.1=py37_0 63 | - jupyter_client=5.2.4=py_3 64 | - jupyter_core=4.4.0=py_0 65 | - jupyterlab=0.35.4=py37_0 66 | - jupyterlab_server=0.2.0=py_0 67 | - kealib=1.4.10=heacb130_1003 68 | - kiwisolver=1.0.1=py37he980bc4_1002 69 | - krb5=1.16.3=hdd46e55_1001 70 | - libblas=3.8.0=6_mkl 71 | - libcblas=3.8.0=6_mkl 72 | - libcurl=7.64.1=h4496350_0 73 | - libffi=3.2.1=h6538335_1006 74 | - libgdal=2.4.1=he26aab1_0 75 | - libiconv=1.15=hfa6e2cd_1005 76 | - libkml=1.3.0=h4fd0f3b_1009 77 | - liblapack=3.8.0=6_mkl 78 | - libnetcdf=4.6.2=h396784b_1001 79 | - libpng=1.6.37=h7602738_0 80 | - libpq=11.2=hb0bdaea_1 81 | - libsodium=1.0.16=h2fa13f4_1001 82 | - libspatialindex=1.8.5=he025d50_4 83 | - libspatialite=4.3.0a=h6a0152f_1026 84 | - libssh2=1.8.2=h642c060_2 85 | - libtiff=4.0.10=h36446d0_1001 86 | - libxml2=2.9.9=h9ce36c8_0 87 | - libxslt=1.1.32=heafd4d3_1002 88 | - lxml=4.3.3=py37heafd4d3_0 89 | - m2w64-expat=2.1.1=2 90 | - m2w64-gcc-libgfortran=5.3.0=6 91 | - m2w64-gcc-libs=5.3.0=7 92 | - m2w64-gcc-libs-core=5.3.0=7 93 | - m2w64-gettext=0.19.7=2 94 | - m2w64-gmp=6.1.0=2 95 | - m2w64-libiconv=1.14=6 96 | - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 97 | - m2w64-xz=5.2.2=2 98 | - mapclassify=2.0.1=py_0 99 | - markupsafe=1.1.1=py37hfa6e2cd_0 100 | - matplotlib=3.0.3=py37_1 101 | - matplotlib-base=3.0.3=py37h3e3dc42_1 102 | - mistune=0.8.4=py37hfa6e2cd_1000 103 | - mkl=2019.3=203 104 | - msys2-conda-epoch=20160418=1 105 | - munch=2.3.2=py_0 106 | - nbconvert=5.4.1=py_2 107 | - nbformat=4.4.0=py_1 108 | - networkx=2.3=py_0 109 | - nodejs=11.11.0=0 110 | - notebook=5.7.8=py37_0 111 | - numexpr=2.6.9=py37h6538335_1000 112 | - numpy=1.16.3=py37h873a0b8_0 113 | - olefile=0.46=py_0 114 | - openjpeg=2.3.1=ha922770_0 115 | - openssl=1.1.1b=hfa6e2cd_2 116 | - osmnx=0.9=py_1 117 | - owslib=0.17.1=py_0 118 | - packaging=19.0=py_0 119 | - palettable=3.1.1=py_0 120 | - pandas=0.24.2=py37h6538335_0 121 | - pandoc=2.7.2=0 122 | - pandocfilters=1.4.2=py_1 123 | - parso=0.4.0=py_0 124 | - patsy=0.5.1=py_0 125 | - pcre=8.41=h6538335_1003 126 | - pickleshare=0.7.5=py37_1000 127 | - pillow=6.0.0=py37h9a613e6_0 128 | - pip=19.0.3=py37_0 129 | - poppler=0.67.0=heddaa77_6 130 | - poppler-data=0.4.9=1 131 | - postgresql=11.2=h06f7779_1 132 | - proj4=5.2.0=ha925a31_1 133 | - prometheus_client=0.6.0=py_0 134 | - prompt_toolkit=2.0.9=py_0 135 | - psycopg2=2.8.2=py37hb32ad35_0 136 | - pycparser=2.19=py37_1 137 | - pyepsg=0.4.0=py_0 138 | - pygments=2.3.1=py_0 139 | - pykdtree=1.3.1=py37h452e1ab_1002 140 | - pyopenssl=19.0.0=py37_0 141 | - pyparsing=2.4.0=py_0 142 | - pyproj=1.9.6=py37h1fcc0e4_1000 143 | - pyqt=5.9.2=py37h6538335_2 144 | - pyrsistent=0.14.11=py37hfa6e2cd_0 145 | - pysal=2.0.0=py_0 146 | - pyshp=2.1.0=py_0 147 | - pysocks=1.6.8=py37_1002 148 | - python=3.7.3=hb12ca83_0 149 | - python-dateutil=2.8.0=py_0 150 | - pytz=2019.1=py_0 151 | - pywinpty=0.5.5=py37_1000 152 | - pyyaml=5.1=py37hfa6e2cd_0 153 | - pyzmq=18.0.1=py37he7828b0_1 154 | - qt=5.9.7=vc14h73c81de_0 155 | - requests=2.21.0=py37_1000 156 | - rtree=0.8.3=py37h21ff451_1002 157 | - scikit-learn=0.20.3=py37h3d241f0_1 158 | - scipy=1.2.1=py37h29ff71c_0 159 | - seaborn=0.9.0=py_0 160 | - send2trash=1.5.0=py_0 161 | - setuptools=41.0.1=py37_0 162 | - shapely=1.6.4=py37h8921fb9_1003 163 | - sip=4.19.8=py37h6538335_1000 164 | - six=1.12.0=py37_1000 165 | - soupsieve=1.9.1=py37_0 166 | - sqlite=3.26.0=hfa6e2cd_1001 167 | - statsmodels=0.9.0=py37hfa6e2cd_1000 168 | - terminado=0.8.2=py37_0 169 | - testpath=0.4.2=py_1001 170 | - tk=8.6.9=hfa6e2cd_1001 171 | - toolz=0.9.0=py_1 172 | - tornado=6.0.2=py37hfa6e2cd_0 173 | - traitlets=4.3.2=py37_1000 174 | - traittypes=0.2.1=py_1 175 | - urllib3=1.24.2=py37_0 176 | - vc=14.1=h0510ff6_4 177 | - vincent=0.4.4=py_1 178 | - vs2015_runtime=14.15.26706=h3a45250_0 179 | - wcwidth=0.1.7=py_1 180 | - webencodings=0.5.1=py_1 181 | - wheel=0.33.1=py37_0 182 | - widgetsnbextension=3.4.2=py37_1000 183 | - win_inet_pton=1.1.0=py37_0 184 | - wincertstore=0.2=py37_1002 185 | - winpty=0.4.3=4 186 | - xarray=0.12.1=py_0 187 | - xerces-c=3.2.2=h6538335_1001 188 | - xlrd=1.2.0=py_0 189 | - xlwt=1.3.0=py_1 190 | - xz=5.2.4=h2fa13f4_1001 191 | - yaml=0.1.7=hfa6e2cd_1001 192 | - zeromq=4.3.1=he025d50_1000 193 | - zlib=1.2.11=h2fa13f4_1004 194 | prefix: C:\Anaconda\envs\asa 195 | 196 | -------------------------------------------------------------------------------- /modules/module00 - welcome/readme.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 00: Getting Started 3 | 4 | ## Overview 5 | 6 | In this module we will go over the syllabus and introduce Sublime, Docker, Git, and Python. 7 | 8 | ## Reading Materials 9 | 10 | Please set aside time early this week to complete these readings *before class*. It'll be a lot to process initially but it's essential to have familiarity with these tools and concepts as we begin the semester. 11 | 12 | 1. read the 'basics' and 'folders' sections of this short [DOS prompt guide](http://dosprompt.info/) (*optional*: Mac users, read this short [terminal guide](http://guides.macrumors.com/Terminal)) 13 | 1. read this short [markdown tutorial](https://commonmark.org/help/) 14 | 1. read [getting started with conda](https://conda.io/docs/user-guide/getting-started.html), all about [conda-forge](https://conda-forge.org/), and the [JupyterLab](https://jupyterlab.readthedocs.io) 'getting started' and 'user guide' sections. (budget about 75 minutes) 15 | 1. read the short [Git handbook](https://guides.github.com/introduction/git-handbook/) 16 | 1. read [getting started with docker](https://docs.docker.com/get-started/) 17 | 18 | If you want to install the course software on your own computer (see below for caveats and instructions), do so before the readings so you can complete the tutorials within the readings. If you are not installing the software on your own computer (i.e., if you'll be using a lab computer for this course instead), then just read through the tutorials within the readings without completing the tutorial steps. 19 | 20 | ## Before Class This Week 21 | 22 | Read the reading materials above. 23 | 24 | Lab computers are available to students in this course. They are already set up with the software we will need. This is the easiest way to get going if you're a beginner. *Please note: the instructor cannot troubleshoot installation/execution issues on your own personal computer.* 25 | 26 | However, if (and only if) you want to use your own computer in this course, you must complete the following steps in this section *before* class begins and verify that everything is fully working properly. 27 | 28 | ### 1. verify your hardware 29 | 30 | Feel free to use your own computer in this course. But be aware that if you choose to do so, it must meet minimal hardware specifications. These are always evolving, but expect to need at least 8 GB RAM, Intel i5 or better processor, and 50+ GB of free hard disk space. If your computer does not meet the minimal requirements, you must use a lab computer. 31 | 32 | ### 2. install sublime text 33 | 34 | First we need a full-featured text editor. Download and install [Sublime Text](https://www.sublimetext.com/). We will install its Package Control and MarkdownLivePreview plugins together in class. 35 | 36 | ### 3. install docker 37 | 38 | If you're on Windows, you must have Microsoft Windows 10 Professional or Enterprise 64-bit. If you don't, you'll need to [upgrade](https://support.microsoft.com/en-us/help/12384/windows-10-upgrading-home-to-pro) your OS. 39 | 40 | 1. Download and install the version of [Docker Desktop](https://www.docker.com/products/docker-desktop) for your operating system. 41 | 2. When it's finished, restart your computer. 42 | 3. Open a terminal window and run `docker login` to sign in to docker hub 43 | 4. Run `docker run hello-world` to check that everything works 44 | 45 | ### 4. install git 46 | 47 | Download and install [Git](https://git-scm.com/downloads). 48 | 49 | ### 5. install miniconda 50 | 51 | We will install Python via the Miniconda distribution - a lightweight version of Anaconda that lets us install all of our environment's packages ourselves. 52 | 53 | - Download and install the Python 3.7 64-bit [Miniconda](https://conda.io/miniconda.html) installer. 54 | - Install it into `C:\Anaconda` (rather than the default), and when prompted, make it the system default python and add it to the PATH 55 | - Open a terminal window and run `conda config --add channels conda-forge --force` 56 | - Update default packages `conda update --all` 57 | - Test your installation by running `conda info --all` 58 | 59 | Create a virtual environment for the packages we'll use in this course: 60 | 61 | - In the terminal, run `conda create -n ppua7237 -c conda-forge --yes python=3 beautifulsoup4 bokeh bottleneck cartopy cython folium geopandas ipyleaflet ipywidgets jupyterlab networkx nodejs notebook numexpr numpy osmnx pillow scikit-learn scipy seaborn statsmodels xlrd xlwt` 62 | - Then run `activate ppua7237` if you're on Windows or `source activate ppua7237` if you're on Mac. 63 | - Install this extension `jupyter labextension install @jupyter-widgets/jupyterlab-manager` 64 | - Then install this extension `jupyter labextension install jupyter-leaflet` 65 | - Then run `python` to launch the python interpreter. 66 | - In the python interpreter, run `import osmnx` and verify it runs without error. 67 | - Create any OS environment variables as needed (this can change from time to time and could require some platform-specific research on your part). 68 | 69 | ## In Class This Week 70 | 71 | We'll have a short class session this week. First we'll go over the syllabus. Then we'll discuss the course's approach to coding and spatial data analysis and introduce some of the course software. In the remaining time I can answer any questions or concerns and then we'll do some in-class exercises to give you some initial familiarity with the course software, including https://learngitbranching.js.org/ 72 | -------------------------------------------------------------------------------- /modules/module01 - intro to python 1/readme.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 01: Intro to Python 3 | 4 | ## Overview 5 | 6 | In this module we will introduce the basics of Python, a powerful programming language for data analysis, visualization, and software development. It's a great starter language because its syntax is simple and straightforward. We will use Miniconda to install Python onto our computers, and we will work with Python via the Jupyter notebook. Jupyter notebooks let you use Python in an interactive coding environment that's easily packaged up and shared with others. 7 | 8 | ## Reading Materials 9 | 10 | Please complete these readings *before class begins*. 11 | 12 | 1. Make sure you have finished reading last week's [getting started with conda](https://conda.io/docs/user-guide/getting-started.html), all about [conda-forge](https://conda-forge.org/), and the [JupyterLab](https://jupyterlab.readthedocs.io) 'getting started' and 'user guide' sections. 13 | 1. McKinney chapters 1-3 (on blackboard > course material > textbook). 14 | 1. Optional: there's a lot to absorb in the first few weeks for newcomers to Python. If you need some clarification or extra practice, read through the relevant sections of Downey's Think Python. It's clear, straightforward, and tailored to absolute beginners. 15 | 16 | ## Getting Started 17 | 18 | 1. Using git, clone (or pull if you already have it) this repo. 19 | 1. Open a command prompt and change directory to this directory. 20 | 1. Activate the virtual environment. 21 | 1. Run the command: `jupyter lab` 22 | 1. A browser window will open. Click the notebook to launch it. 23 | -------------------------------------------------------------------------------- /modules/module01 - intro to python 1/simple_script.py: -------------------------------------------------------------------------------- 1 | print(1 + 2) 2 | print('hello world') -------------------------------------------------------------------------------- /modules/module02 - intro to python 2/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 02: Python loops, conditionals, and functions 3 | 4 | ### Overview: 5 | 6 | Today we'll introduce the basics of Python control: using loops, conditionals, and functions to control the logic and flow of your code's execution. 7 | 8 | ### Readings: 9 | 10 | Prior to the class session, please read: 11 | 12 | - Chapters 3-7 [Think Python](http://www.greenteapress.com/thinkpython/) (and optionally any preceding chapters in this book, as reinforcement) 13 | - http://lorenabarba.com/blog/why-i-push-for-python/ 14 | - https://www.codefellows.org/blog/5-reasons-why-python-is-powerful-enough-for-google 15 | 16 | ### Class Session: 17 | 18 | Run through the Jupyter notebook. 19 | 20 | ### Post-Lecture Workshop Exercise: 21 | 22 | 1. You will create your first complete Jupyter notebook in order to work with loops, conditionals, iteration, and functions. Review *Think Python* section 4.8 to develop a plan and section 6.2 on incremental development. Also review the code in the notebook from this class session. 23 | 2. You will write four functions to work with prime numbers. A prime number is any whole number greater than 1 that has no positive divisors besides 1 and itself. So, a prime number a) must be an integer, b) must be greater than 1, and c) must be divisible only by 1 and itself. 24 | 1. Write a function `is_prime(n)` that accepts an argument n and returns boolean True if n is a prime number and boolean False if n is not prime. For example, `is_prime(11)` should return True and `is_prime(12)` should return False. 25 | 2. Write a function `print_primes(n)` that accepts an argument n and prints all the prime numbers between 0 and n (inclusive). This function should call the `is_prime` function you developed earlier. For example, `print_primes(13)` should print out: 2 3 5 7 11 13 26 | 3. Write a function `count_primes(n)` that accepts an argument n and returns an integer count of how many prime numbers there are between 0 and n (inclusive). This function should also call the `is_prime` function you developed earlier. For example, `count_primes(19)` should return 8, because there are eight prime numbers between 0 and 19. 27 | 4. Write a function `first_primes(n)` that accepts an argument n and returns a Python list containing the first n prime numbers. (Hint: use a while loop and append each prime you find to a list.) This function should also call the `is_prime` function you developed earlier. For example, `first_primes(5)` should return the first five prime numbers as a list: `[2, 3, 5, 7, 11]` 28 | 3. Create a new, clean notebook to contain your work. The first cell in your notebook should be a markdown cell containing a plain-English narrative of the a) logic you will need to code and b) how your development process corresponds to *Think Python* sections 4.8 and 6.2. Use this narrative to think through how you will tackle each function. Each of the four functions should be in its own notebook cell. At the end of each of these cells, call the function by passing in a test value to demonstrate that it is working properly. 29 | 30 | Make sure your entire notebook runs properly and without any errors. Click Kernel > Restart > Clear all outputs and restart. This clears your memory and re-initializes your notebook. Then click Cell > Run all and make sure everything behaves properly. 31 | 32 | Give your notebook a meaningful filename like `your-lastname-assignment0.ipynb`. Submit your notebook file as (ungraded/practice) Assignment 0 on Blackboard by Sunday night. Before submitting, confirm the following to the best of your abilities: does your code fully run? Does it do what it's supposed to do the way it's supposed to do it? Is it well-commented and documented? Is your code clear, straightforward, and reasonably efficient? -------------------------------------------------------------------------------- /modules/module02 - intro to python 2/module02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Module 02: Loops, Conditionals, Functions\n", 8 | "\n", 9 | "## 1. Review\n", 10 | "\n", 11 | "First, a quick review of the previous module's datatypes" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# integers \n", 21 | "x = 10\n", 22 | "type(x)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# floating points\n", 32 | "x = 10.0\n", 33 | "type(x)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# strings\n", 43 | "x = 'Hello'\n", 44 | "type(x)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# tuples\n", 54 | "x = (1, 2, 3)\n", 55 | "type(x)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "# lists\n", 65 | "x = [4, 5, 6]\n", 66 | "type(x)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# dictionaries\n", 76 | "x = {'name' : 'Bob', 'id' : 1234}\n", 77 | "type(x)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "#### More string methods" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "# create a new string to work with\n", 96 | "sentence = 'This is PPUA7237!'" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "sentence" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# what is the length of the string?\n", 115 | "len(sentence)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "# extract a substring\n", 125 | "sentence[3:9]" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# use the strip method to remove characters from the beginning and end of a string\n", 135 | "sentence.strip('!')" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# our string hasn't changed, because we didn't assign a new value to it\n", 145 | "sentence" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "# create a new string to contain the stripped version of our string\n", 157 | "new_sentence = sentence.strip('!')" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "# the variable new_sentence captured the result of the strip() method\n", 167 | "new_sentence" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# you can create a string variable and pass it into the strip method as an argument\n", 177 | "to_strip = '7237!'\n", 178 | "sentence.strip(to_strip)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "sentence.replace('is', 'XX')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# use split() to break a string into tokens\n", 197 | "sentence.split()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# default argument is ' ', but you can pass other substrings to split on\n", 207 | "sentence.split('i')" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# use the string join method to turn a list into a string\n", 217 | "'-'.join(sentence.split())" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# use the find method to return the index of the first instance of some substring within another string\n", 227 | "sentence.find('P')" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "to_find = 'is'\n", 237 | "sentence.find(to_find)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "sentence.rfind(to_find)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "import string\n", 256 | "string.ascii_lowercase" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "## 2. Loops\n", 264 | "\n", 265 | "Loops let us iterate over a container of elements, handling each element in sequence, one at a time." 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "# loop through a list\n", 275 | "letters = ['a', 'b', 'c', 'd'] #a list\n", 276 | "for letter in letters: #a for loop\n", 277 | " print(letter) #a set of actions" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "# loop through list of words in string\n", 287 | "for word in sentence.split():\n", 288 | " print(word)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "for word in sentence.split():\n", 298 | " print('s' in word)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "for count, letter in enumerate(letters):\n", 308 | " print(count, letter)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "alphabet = {}\n", 318 | "for count, letter in enumerate(string.ascii_lowercase):\n", 319 | " alphabet[letter] = count\n", 320 | "alphabet" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "# range produces a range of values\n", 330 | "range(9)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "# convert it to list to explicitly see what's in the range\n", 340 | "list(range(9))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "# you can loop through a range\n", 350 | "for x in range(9):\n", 351 | " print(x, x ** 2)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "# because range goes up to but does not include the ending number, you can add 1 to include it\n", 361 | "n = 10\n", 362 | "list(range(n + 1))" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "# optionally use a start, end, and step\n", 372 | "list(range(10, 20, 2))" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "## 3. Booleans" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "A boolean expression is interpreted as True or False. The == operator means \"is equal?\" and it asks if what is on the left of it is equivalent to what is on its right. The >= operator is greater than or equal to, and != asserts that two objects are not the same. Don't be confused by the similarity of '=' and '==': one is an assignment of a value to a variable, and the other is a comparison operator." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "# use == to evaluate if two values are equal\n", 396 | "2 + 2 == 4" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [ 405 | "# remember to use double equals for \"is equal\"\n", 406 | "# everyone makes this mistake constantly when they're new\n", 407 | "2 + 2 = 4" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "# you can also compare variables\n", 417 | "a = 1\n", 418 | "b = 1\n", 419 | "a == b #a is equal to b" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "# > and < are greater than and less than\n", 429 | "a > 10" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "# >= and <= are greater than or equal to, and less than or equal to\n", 439 | "a >= 1" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "# != means does not equal\n", 449 | "a != b" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "# use and to return True if multiple conditions are satisfied\n", 459 | "c = 5\n", 460 | "c > 2 and c < 5" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "# use or to return True if either condition is satisfied\n", 470 | "c > 2 or c < 5" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "c == 5" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "metadata": {}, 486 | "outputs": [], 487 | "source": [ 488 | "# use not to negate some condition\n", 489 | "# use parentheses liberally to ensure the expected order of operations\n", 490 | "not (c == 5)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "d = True\n", 500 | "not d" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": {}, 506 | "source": [ 507 | "The modulo operator (%) tests whether or not we can divide two integers evenly. It gives the remainder from the division. That is, what is left over after dividing the numerator by the largest multiple of the denominator? For example, 10%5 is 0 since 5 goes into 10 twice, with nothing left over. 10%3 is 1, since 3 goes into 10 3 times, with one left over." 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "10 % 3" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "10 % 5" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "# check if x is evenly divisble by y\n", 535 | "x = 10\n", 536 | "y = 5\n", 537 | "x % y == 0" 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "## 4. If-Then-Else\n", 545 | "\n", 546 | "Controlling the flow of program execution" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "# use an if statement to execute indented code only if some condition is true\n", 556 | "x = 9\n", 557 | "if x < 10:\n", 558 | " print(str(x) + ' is less than 10')" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [ 567 | "# you can chain conditions together with and/or\n", 568 | "# group conditions with parentheses for readibility\n", 569 | "x = 3.5\n", 570 | "if (x >= 3) and (x <= 6):\n", 571 | " print('x is between 3 and 6')" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "# if/else statement to handle different branches of execution\n", 581 | "sentence = 'Today is Wednesday.'\n", 582 | "if 'Wed' in sentence:\n", 583 | " print('Yes')\n", 584 | "else:\n", 585 | " print('No')" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "metadata": {}, 592 | "outputs": [], 593 | "source": [ 594 | "# if the first if statement evaluates to false, elif (ie, \"else if\") executes a code block if its condition is true\n", 595 | "# else executes a code block if no preceding if or elif evaluated to true\n", 596 | "x = 10\n", 597 | "if x < 10:\n", 598 | " print('x is less than 10')\n", 599 | "elif x == 10:\n", 600 | " print('x equals 10')\n", 601 | "else:\n", 602 | " print('x is greater than 10')" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": null, 608 | "metadata": {}, 609 | "outputs": [], 610 | "source": [ 611 | "# now it's your turn\n", 612 | "# loop through the numbers 1 through 15, using modulo to print 'even' if each is evenly divisible by 2, and 'odd' if not\n" 613 | ] 614 | }, 615 | { 616 | "cell_type": "markdown", 617 | "metadata": {}, 618 | "source": [ 619 | "## 5. Functions" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": null, 625 | "metadata": { 626 | "collapsed": true 627 | }, 628 | "outputs": [], 629 | "source": [ 630 | "# encapsulation turns a handful of statements into a reusable function\n", 631 | "def my_function(value):\n", 632 | " if value < 10:\n", 633 | " print(value, 'is less than 10')\n", 634 | " elif value == 10:\n", 635 | " print(value, 'equals 10')\n", 636 | " else:\n", 637 | " print(value, 'is greater than 10')" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": null, 643 | "metadata": {}, 644 | "outputs": [], 645 | "source": [ 646 | "# now call the function\n", 647 | "my_function(5)" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "for x in [8, 9, 10, 11, 12]:\n", 657 | " my_function(x)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# your function can return results that you can use elsewhere in your code\n", 667 | "def calculate_value(x, y):\n", 668 | " value = x ** 2 + y - 7\n", 669 | " return round(value, 1)" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "metadata": {}, 676 | "outputs": [], 677 | "source": [ 678 | "calculate_value(5, 2.7)" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": null, 684 | "metadata": {}, 685 | "outputs": [], 686 | "source": [ 687 | "data = [(3, 9),\n", 688 | " (7.1, 0.8),\n", 689 | " (22, 19),\n", 690 | " (-4, 98)]\n", 691 | "\n", 692 | "for x, y in data:\n", 693 | " result = calculate_value(x, y)\n", 694 | " print(result)" 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": null, 700 | "metadata": {}, 701 | "outputs": [], 702 | "source": [ 703 | "# now it's your turn\n", 704 | "# write a function that accepts 2 arguments and uses modulo to return True if the first is evenly divisible by the second, and False if not\n" 705 | ] 706 | }, 707 | { 708 | "cell_type": "markdown", 709 | "metadata": {}, 710 | "source": [ 711 | "## 6. More Iteration: Miscellaneous" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": null, 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [ 720 | "# a while loop repeats as long as some condition is True\n", 721 | "x = 5\n", 722 | "while x > 0:\n", 723 | " print(x)\n", 724 | " x = x - 1\n", 725 | "print('blast off!')" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": {}, 732 | "outputs": [], 733 | "source": [ 734 | "# add the numbers 1 through 9 to a list\n", 735 | "my_list = []\n", 736 | "x = 1\n", 737 | "while x <= 9:\n", 738 | " my_list.append(x)\n", 739 | " x = x + 1 \n", 740 | "my_list" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": null, 746 | "metadata": {}, 747 | "outputs": [], 748 | "source": [ 749 | "# print out only the integers in a list\n", 750 | "my_list = [3.3, 19.75, 6, 3.3, 8]\n", 751 | "for element in my_list:\n", 752 | " if isinstance(element, int):\n", 753 | " print(element)" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": null, 759 | "metadata": {}, 760 | "outputs": [], 761 | "source": [ 762 | "# how many times does the value 3.3 appear in my_list?\n", 763 | "def count_occurrences(my_list, value):\n", 764 | " count = 0 #initialize a counter to keep track\n", 765 | " for element in my_list:\n", 766 | " if element == value:\n", 767 | " # add one to the counter each time we find the value\n", 768 | " count = count + 1 \n", 769 | " return count\n", 770 | "\n", 771 | "count_occurrences(my_list, 3.3)" 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": null, 777 | "metadata": {}, 778 | "outputs": [], 779 | "source": [ 780 | "# list comprehension lets you create a list based on some expression\n", 781 | "# it integrates for-loop and list-append operations\n", 782 | "new_list = [x * 2 for x in range(5)]\n", 783 | "new_list" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "# you can use list comprehension to convert a list of ints to a new list of strings\n", 793 | "string_list = [str(x * 2) for x in range(2, 12, 2)]\n", 794 | "string_list" 795 | ] 796 | }, 797 | { 798 | "cell_type": "markdown", 799 | "metadata": {}, 800 | "source": [ 801 | "## Exercise\n", 802 | "\n", 803 | "See the module's [readme on GitHub](https://github.com/gboeing/asa/tree/master/modules/module02#post-lecture-workshop-exercise) for an exercise." 804 | ] 805 | } 806 | ], 807 | "metadata": { 808 | "kernelspec": { 809 | "display_name": "Python 3", 810 | "language": "python", 811 | "name": "python3" 812 | }, 813 | "language_info": { 814 | "codemirror_mode": { 815 | "name": "ipython", 816 | "version": 3 817 | }, 818 | "file_extension": ".py", 819 | "mimetype": "text/x-python", 820 | "name": "python", 821 | "nbconvert_exporter": "python", 822 | "pygments_lexer": "ipython3", 823 | "version": "3.7.1" 824 | } 825 | }, 826 | "nbformat": 4, 827 | "nbformat_minor": 2 828 | } 829 | -------------------------------------------------------------------------------- /modules/module03 - intro to pandas 1/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 03: Intro to pandas, Part I 3 | 4 | ### Overview: 5 | 6 | Today we will introduce pandas, a powerful Python data analysis package that expands Python's capabilities into 7 | and beyond the abilities of R and Stata. We'll learn about loading data, the basics of pandas data structures, 8 | how to subset and filter data sets, how to calculate summary statistics, how to deal with missing values, how to merge data 9 | sets, how to create new variables, and how to export data. 10 | 11 | ### Readings: 12 | 13 | Chapters 4, 5, 6 of McKinney's *Python for Data Analysis* 14 | 15 | ### Class Session: 16 | 17 | Run through the Jupyter notebook. 18 | 19 | ### After-Class Assignment: 20 | 21 | Complete assignment 1 (see the assignments folder on GitHub) by Sunday 23:59 and submit it via Blackboard. 22 | -------------------------------------------------------------------------------- /modules/module03 - intro to pandas 1/data/cities.csv: -------------------------------------------------------------------------------- 1 | city,state 2 | san francisco,california 3 | phoenix,arizona 4 | seattle,washington 5 | dallas,texas 6 | denver,colorado 7 | chicago,illinois 8 | portland,oregon 9 | miami,florida -------------------------------------------------------------------------------- /modules/module03 - intro to pandas 1/data/rain.csv: -------------------------------------------------------------------------------- 1 | month,rainfall_inches 2 | jan,5.3 3 | feb,5.4 4 | mar,4.8 5 | apr,4.7 6 | may,3.3 7 | jun,1.2 8 | jul,0.8 9 | aug,0.7 10 | sep, 11 | oct,3.9 12 | nov,4.5 13 | dec,5.9 -------------------------------------------------------------------------------- /modules/module04 - intro to pandas 2/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 04: Intro to pandas, Part II 3 | 4 | ### Overview: 5 | 6 | Today we will continue the previous module's introduction of pandas functionality. 7 | 8 | ### Readings: 9 | 10 | Chapters 7, 8, 10 of McKinney's *Python for Data Analysis* 11 | 12 | Consult this [blog post](https://engineering.upside.com/a-beginners-guide-to-optimizing-pandas-code-for-speed-c09ef2c6a4d6) on optimizing pandas speed. 13 | 14 | ### Class Session: 15 | 16 | Run through the Jupyter notebook. 17 | 18 | ### In-Class Exercise: 19 | 20 | 1. Load the tracts_pop_age.csv and the tracts_white_income.csv datasets as DataFrames (do not sample or filter them). 21 | 1. Merge them together on the geoid. 22 | 1. Map a function to create a new dummy variable with value of 1 if median income is greater than \$50,000 and 0 otherwise. 23 | 1. How many tracts have a non-hispanic white majority of the population? 24 | 1. What is the average tract-level median income in Boston? 25 | 1. Create a subset of the dataframe that only contains tracts in states with names that begin with the letter "N". 26 | 1. Create a subset of this subset that only contains tracts with median income below \$30,000. 27 | -------------------------------------------------------------------------------- /modules/module04 - intro to pandas 2/data/gps-coords-reduced.csv: -------------------------------------------------------------------------------- 1 | lat,lon,date,city,country 2 | 51.481291600000006,-0.4510112,05/14/2014 09:07,West Drayton,United Kingdom 3 | 38.781774600000006,-9.1375444,05/14/2014 15:11,Lisbon,Portugal 4 | 38.7110495,-9.1397388,05/14/2014 16:40,Lisbon,Portugal 5 | 38.715637,-9.1205578,05/14/2014 18:25,Lisbon,Portugal 6 | 38.711977000000005,-9.141787800000001,05/14/2014 19:26,Lisbon,Portugal 7 | 38.723107500000005,-9.1133736,05/15/2014 11:34,Lisbon,Portugal 8 | 38.6919442,-9.215560499999999,05/15/2014 14:50,Lisbon,Portugal 9 | 38.7084872,-9.1360221,05/15/2014 16:49,Lisbon,Portugal 10 | 38.797558200000005,-9.3409997,05/16/2014 10:58,Algueirão-Mem Martins,Portugal 11 | 38.801171200000006,-9.4251031,05/16/2014 13:43,Sintra,Portugal 12 | 38.793071000000005,-9.2858255,05/16/2014 15:13,Sintra,Portugal 13 | 38.816610700000005,-9.408499800000001,05/16/2014 16:13,Sintra,Portugal 14 | 38.7882049,-9.3880175,05/16/2014 16:28,Sintra,Portugal 15 | 38.711024200000004,-9.1373975,05/16/2014 23:24,Lisbon,Portugal 16 | 38.742017600000004,-9.1693804,05/17/2014 09:24,Lisbon,Portugal 17 | 39.1183833,-8.913737800000002,05/17/2014 11:12,Azambuja,Portugal 18 | 39.27287570000001,-8.7121869,05/17/2014 11:27,Santarém,Portugal 19 | 39.476521999999996,-8.634345300000001,05/17/2014 11:42,Zibreira,Portugal 20 | 39.6287723,-8.695136,05/17/2014 12:12,São Mamede,Portugal 21 | 39.841085799999995,-8.718608,05/17/2014 12:28,Leiria District,Portugal 22 | 40.2108693,-8.481383099999999,05/17/2014 12:58,Coimbra Municipality,Portugal 23 | 40.6091112,-8.5448192,05/17/2014 13:28,Aveiro District,Portugal 24 | 41.0621474,-8.5758203,05/17/2014 13:58,Vila Nova de Gaia,Portugal 25 | 41.23720529999999,-8.6721733,05/18/2014 19:55,Maia,Portugal 26 | 41.3039111,2.1059314,05/18/2014 22:35,El Prat de Llobregat,Spain 27 | 41.289946,2.0645898999999996,05/18/2014 23:04,Viladecans,Spain 28 | 41.3883249,2.1596929,05/19/2014 18:48,Barcelona,Spain 29 | 41.3846963,2.1867123,05/19/2014 19:18,Barcelona,Spain 30 | 41.389413299999994,2.1132551,05/20/2014 14:33,Barcelona,Spain 31 | 41.38948370000001,2.1881876,05/21/2014 14:46,Barcelona,Spain 32 | 41.3714486,2.1422407000000003,05/22/2014 11:22,Barcelona,Spain 33 | 41.3779089,2.1703523999999996,05/22/2014 15:07,Barcelona,Spain 34 | 41.39258220000001,2.1491597000000002,05/23/2014 16:13,Barcelona,Spain 35 | 41.380926200000005,2.1371982999999997,05/23/2014 16:28,Barcelona,Spain 36 | 41.3744729,2.1695484,05/24/2014 01:37,Barcelona,Spain 37 | 41.371348,2.141818,05/24/2014 13:53,Barcelona,Spain 38 | 41.390759100000004,2.187896,05/24/2014 14:23,Barcelona,Spain 39 | 41.3715467,2.1820937000000002,05/24/2014 15:07,Barcelona,Spain 40 | 41.3945166,2.1775312999999996,05/24/2014 15:52,Barcelona,Spain 41 | 41.3847765,2.1104196,05/26/2014 09:23,Barcelona,Spain 42 | 41.3743191,2.1892515,05/26/2014 13:38,Barcelona,Spain 43 | 41.387974,2.1114027,05/27/2014 10:10,Barcelona,Spain 44 | 41.370491799999996,2.1491767000000004,05/28/2014 15:10,Barcelona,Spain 45 | 41.3983691,2.1684677000000003,05/29/2014 10:55,Barcelona,Spain 46 | 41.3693464,2.1524444,05/29/2014 18:54,Barcelona,Spain 47 | 50.4893042,19.0983815,05/30/2014 16:10,Zendek,Poland 48 | 50.4487039,19.059497600000004,05/30/2014 16:55,Silesian Voivodeship,Poland 49 | 50.3284979,19.2424844,05/30/2014 17:10,Dabrowa Gornicza,Poland 50 | 50.3063515,19.475674800000004,05/30/2014 17:25,Olkusz County,Poland 51 | 50.0689532,19.9463832,05/30/2014 18:25,Krakow,Poland 52 | 50.027998600000004,19.2016098,05/31/2014 08:24,Oswiecim,Poland 53 | 50.0514717,19.213068,05/31/2014 12:55,"Babice, Oświęcim County",Poland 54 | 50.072535200000004,19.4339893,05/31/2014 13:10,"Wygiełzów, Lesser Poland Voivodeship",Poland 55 | 50.0340482,19.6125103,05/31/2014 13:25,Chrzanów County,Poland 56 | 50.0549321,19.9504634,05/31/2014 18:40,Krakow,Poland 57 | 50.06345579999999,19.9335342,05/31/2014 22:55,Krakow,Poland 58 | 49.9848481,20.0524206,06/01/2014 10:25,Wieliczka,Poland 59 | 50.089150399999994,19.9089234,06/02/2014 05:10,Krakow,Poland 60 | 50.0718414,19.801448399999998,06/02/2014 05:25,"Balice, Lesser Poland Voivodeship",Poland 61 | 50.145956899999995,19.4649668,06/02/2014 05:54,Trzebinia,Poland 62 | 50.1543968,19.244136100000002,06/02/2014 06:10,Imielin,Poland 63 | 50.254720899999995,18.8870225,06/02/2014 06:25,Ruda Slaska,Poland 64 | 50.2736325,18.7294285,06/02/2014 06:39,Gliwice,Poland 65 | 50.4622715,19.0815141,06/02/2014 07:10,Tarnowskie Góry County,Poland 66 | 50.373689500000005,18.8892047,06/02/2014 07:25,Bytom,Poland 67 | 49.9005024,18.3465247,06/02/2014 08:10,Novy Bohumin,Czech Republic 68 | 49.559495,17.7181638,06/02/2014 09:25,Hranice,Czech Republic 69 | 49.450555,17.4190687,06/02/2014 09:40,Prerov,Czech Republic 70 | 49.966878799999996,16.4022121,06/02/2014 10:55,Ústí nad Orlicí,Czech Republic 71 | 50.0418467,15.3950116,06/02/2014 11:40,Kojice,Czech Republic 72 | 50.1212466,14.995289999999999,06/02/2014 11:55,Sadská,Czech Republic 73 | 50.074833500000004,14.444941,06/02/2014 13:10,Prague,Czech Republic 74 | 50.0929545,14.3923636,06/02/2014 15:39,Prague,Czech Republic 75 | 50.074339200000004,14.443799499999999,06/04/2014 09:42,Prague,Czech Republic 76 | 50.1015078,14.4977957,06/04/2014 10:57,Prague,Czech Republic 77 | 50.076828799999994,14.710014600000001,06/04/2014 11:12,Úvaly,Czech Republic 78 | 50.10882539999999,14.9214424,06/04/2014 11:27,Poříčany,Czech Republic 79 | 50.0257615,15.214198499999998,06/04/2014 11:42,Kolin,Czech Republic 80 | 49.9617593,15.298554000000001,06/04/2014 11:57,Kutna Hora,Czech Republic 81 | 49.9675512,15.2771381,06/04/2014 12:12,Kutna Hora,Czech Republic 82 | 49.9597606,15.2904163,06/04/2014 12:56,Kutna Hora,Czech Republic 83 | 49.9373516,15.2659212,06/04/2014 15:42,Kutna Hora,Czech Republic 84 | 49.9705236,15.3103109,06/04/2014 18:41,Nové Dvory (Kutná Hora District),Czech Republic 85 | 50.0280826,15.2108136,06/04/2014 19:12,Kolin,Czech Republic 86 | 50.071361100000004,14.8375136,06/04/2014 19:42,Český Brod,Czech Republic 87 | 50.07479910000001,14.4449386,06/05/2014 09:55,Prague,Czech Republic 88 | 50.094667,14.408625500000001,06/05/2014 14:37,Prague,Czech Republic 89 | 50.0639021,14.4419381,06/05/2014 19:08,Prague,Czech Republic 90 | 50.0808333,14.450710399999998,06/05/2014 23:22,Prague,Czech Republic 91 | 50.0861426,14.404133600000002,06/06/2014 11:07,Prague,Czech Republic 92 | 50.0524104,14.2921832,06/06/2014 13:07,Prague,Czech Republic 93 | 49.7863143,13.718423399999999,06/06/2014 13:36,Mýto,Czech Republic 94 | 49.698620299999995,13.423219699999999,06/06/2014 13:51,Plzen,Czech Republic 95 | 49.71359644,13.14887376,06/06/2014 14:07,Blatnice,Czech Republic 96 | 49.66003641,12.55782719,06/06/2014 14:25,Rozvadov,Czech Republic 97 | 49.3986061,11.8477583,06/06/2014 14:52,Kümmersbruck,Germany 98 | 49.4080691,11.2899446,06/06/2014 15:06,Winkelhaid,Germany 99 | 49.309917799999994,10.989308900000001,06/06/2014 15:21,Kammerstein,Germany 100 | 49.16301872,9.37027094,06/06/2014 16:30,Ellhofen,Germany 101 | 49.157872999999995,9.3457177,06/06/2014 16:37,Obersulm,Germany 102 | 48.8263821,9.170884800000001,06/06/2014 17:06,Stuttgart,Germany 103 | 48.783639799999996,9.180591699999999,06/06/2014 17:21,Stuttgart,Germany 104 | 48.6936721,9.4128045,06/06/2014 18:21,Wernau,Germany 105 | 48.5563514,9.273472300000002,06/06/2014 18:37,Riederich,Germany 106 | 48.5093534,9.0359388,06/08/2014 14:05,Tübingen,Germany 107 | 48.5285424,9.063768199999998,06/09/2014 14:58,Tübingen,Germany 108 | 48.5744624,9.066455300000001,06/09/2014 15:57,Tübingen,Germany 109 | 48.5095063,9.0773955,06/09/2014 16:43,Tübingen,Germany 110 | 48.5239272,9.0567124,06/09/2014 21:57,Tübingen,Germany 111 | 48.472658700000004,8.935116699999998,06/10/2014 11:18,Rottenburg,Germany 112 | 48.442241700000004,8.6921915,06/10/2014 11:48,Horb am Neckar,Germany 113 | 48.4800011,8.7286457,06/10/2014 12:03,Eutingen im Gäu,Germany 114 | 48.4458391,8.4314088,06/10/2014 12:48,Freudenstadt,Germany 115 | 48.343188399999995,8.397952499999999,06/10/2014 13:03,Alpirsbach,Germany 116 | 48.282940100000005,8.1996304,06/10/2014 13:33,Gutach,Germany 117 | 48.3890293,8.0213421,06/10/2014 13:48,Gengenbach,Germany 118 | 48.4752249,7.946886599999999,06/10/2014 14:03,Offenburg,Germany 119 | 48.233450899999994,7.7536459,06/10/2014 14:33,Freiburg,Germany 120 | 47.995716200000004,7.8444118000000005,06/10/2014 15:03,Freiburg,Germany 121 | 47.9856227,7.8902617,06/11/2014 08:29,Freiburg,Germany 122 | 47.9992777,7.8392143,06/11/2014 19:44,Freiburg,Germany 123 | 47.905547799999994,8.223163000000001,06/12/2014 09:59,Titisee-Neustadt,Germany 124 | 48.0574273,8.5593472,06/12/2014 10:59,Villingen-Schwenningen,Germany 125 | 48.417789,8.7342731,06/12/2014 11:29,Horb am Neckar,Germany 126 | 48.5403852,9.148518600000001,06/12/2014 12:14,Kirchentellinsfurt,Germany 127 | 48.689611299999996,9.193634900000001,06/12/2014 13:00,Stuttgart,Germany 128 | 41.3067518,2.0976237999999996,06/12/2014 17:19,El Prat de Llobregat,Spain 129 | 41.3043329,2.0727284,06/12/2014 17:49,El Prat de Llobregat,Spain 130 | 41.3860186,2.1700736000000003,06/12/2014 18:34,Barcelona,Spain 131 | 41.39641820000001,2.1252823,06/12/2014 19:05,Barcelona,Spain 132 | 41.3770441,2.1516166,06/12/2014 23:04,Barcelona,Spain 133 | 41.3584596,2.1287013,06/13/2014 11:35,Hospitalet de Llobregat,Spain 134 | 41.3838555,2.1816792,06/15/2014 14:51,Barcelona,Spain 135 | 41.4117404,2.1501468,06/16/2014 17:07,Barcelona,Spain 136 | 41.3696825,2.1532646,06/16/2014 20:22,Barcelona,Spain 137 | 41.3865458,2.1674572999999997,06/17/2014 09:22,Barcelona,Spain 138 | 41.3736929,2.1902022,06/17/2014 15:22,Barcelona,Spain 139 | 41.3912195,2.166509,06/19/2014 15:21,Barcelona,Spain 140 | 41.294760499999995,2.0597215,06/20/2014 22:15,Viladecans,Spain 141 | 44.8211643,20.289820600000002,06/21/2014 01:59,Belgrade,Serbia 142 | 44.8204136,20.4634652,06/21/2014 18:44,Belgrade,Serbia 143 | 44.7615833,19.5779042,06/22/2014 07:58,Slepčević,Serbia 144 | 43.8594591,18.4259616,06/22/2014 21:12,Sarajevo,Bosnia and Herzegovina 145 | 43.8585619,18.3974783,06/23/2014 14:12,Sarajevo,Bosnia and Herzegovina 146 | 43.337604999999996,17.8132777,06/23/2014 19:12,Mostar,Bosnia and Herzegovina 147 | 43.0494048,17.4348518,06/24/2014 13:49,Ploče,Croatia 148 | 43.5076999,16.4341957,06/25/2014 16:56,Split,Croatia 149 | 42.660844899999994,18.0853275,06/27/2014 15:55,Dubrovnik,Croatia 150 | 42.423295,18.7716171,06/28/2014 05:09,Kotor,Montenegro 151 | 42.2096308,20.741421,06/30/2014 17:56,Prizren,Kosovo 152 | 41.1143281,20.799085100000003,07/02/2014 16:12,Ohrid,Macedonia (FYROM) 153 | 40.7021838,19.946288199999998,07/04/2014 08:14,Berat,Albania 154 | 40.187824600000006,20.0793033,07/04/2014 17:42,Gjirokastër,Albania 155 | 39.9111658,20.3618013,07/04/2014 20:49,Kakavia,Greece 156 | 39.040021200000005,21.120076899999997,07/04/2014 23:58,Dytiki Ellada,Greece 157 | 38.55833199,21.26011286,07/05/2014 00:50,Dytiki Ellada,Greece 158 | 38.4890041,21.3400313,07/05/2014 00:52,Dytiki Ellada,Greece 159 | 38.54147934,21.28409073,07/05/2014 00:52,Dytiki Ellada,Greece 160 | 38.3747664,21.6339471,07/05/2014 01:33,Dytiki Ellada,Greece 161 | 38.1490189,22.351421300000002,07/05/2014 03:06,Dytiki Ellada,Greece 162 | 37.921659000000005,22.929848,07/05/2014 03:52,Peloponnese,Greece 163 | 37.9667664,23.7278716,07/05/2014 15:05,Athens,Greece 164 | 37.989224799999995,23.731597600000004,07/06/2014 10:42,Athens,Greece 165 | 37.9672018,23.7284295,07/06/2014 15:27,Athens,Greece 166 | 37.9336091,23.9460748,07/06/2014 18:57,Attica,Greece 167 | 40.9823519,28.815756099999998,07/06/2014 20:42,Istanbul,Turkey 168 | 41.0016292,28.949190500000004,07/06/2014 21:58,Istanbul,Turkey 169 | 41.0445078,28.983312899999994,07/07/2014 08:50,Istanbul,Turkey 170 | 41.0091726,28.9658489,07/07/2014 13:05,Istanbul,Turkey 171 | 41.0142474,29.011214600000002,07/07/2014 17:43,Istanbul,Turkey 172 | 41.01947427,28.98448986,07/07/2014 18:14,Istanbul,Turkey 173 | 41.0444898,28.983313,07/07/2014 22:00,Istanbul,Turkey 174 | 41.0155939,28.962900399999995,07/08/2014 11:45,Istanbul,Turkey 175 | 41.044555700000004,28.983285499999997,07/08/2014 16:44,Istanbul,Turkey 176 | 41.0089915,28.9682677,07/08/2014 20:03,Istanbul,Turkey 177 | 41.0434875,28.9854878,07/08/2014 22:18,Istanbul,Turkey 178 | 40.977637200000004,28.8238788,07/09/2014 09:03,Istanbul,Turkey 179 | 48.357110399999996,11.7913456,07/09/2014 13:20,Munich,Germany 180 | -------------------------------------------------------------------------------- /modules/module05 - data visualization/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 05: Data Visualization 3 | 4 | ### Overview: 5 | 6 | Today we will introduce Python data visualization with pandas and matplotlib. 7 | 8 | ### Readings: 9 | 10 | Chapter 9 of McKinney's *Python for Data Analysis*. Also make sure you're fluent in the material in chapters 6-8 and 10 assigned in the previous weeks. 11 | 12 | ### Class Session: 13 | 14 | Run through the Jupyter notebook. 15 | 16 | ### After-Class Assignment: 17 | 18 | Complete assignment 2 (see the assignments folder on GitHub) by Sunday 23:59 and submit it via Blackboard. 19 | -------------------------------------------------------------------------------- /modules/module05 - data visualization/module05.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Advanced Spatial Analysis\n", 8 | "# Module 04: Data visualization with matplotlib\n", 9 | "\n", 10 | "- documentation: https://matplotlib.org/api/api_overview.html\n", 11 | "- examples: https://matplotlib.org/gallery/index.html\n", 12 | "- anatomy of mpl: https://matplotlib.org/_images/anatomy.png\n", 13 | "\n", 14 | "Today we'll dissect matplotlib. Once you've learned how to do all of this, other Python visualization libraries are easy to pick up. There are several other visualization libraries out there, such as:\n", 15 | "\n", 16 | "- seaborn: http://seaborn.pydata.org/\n", 17 | "- bokeh: http://bokeh.pydata.org/" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import matplotlib.cm as cm\n", 27 | "import matplotlib.font_manager as fm\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import numpy as np\n", 30 | "import pandas as pd\n", 31 | "import seaborn as sns\n", 32 | "\n", 33 | "%matplotlib notebook" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## 1. matplotlib basics" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# create a figure with a single axis\n", 50 | "fig, ax = plt.subplots()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# create a figure with 4 axes and choose its size\n", 60 | "fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "The anatomy of mpl: https://matplotlib.org/_images/anatomy.png" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## 2. Bar charts" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# load some data\n", 84 | "df = pd.read_csv('data/tracts.csv')\n", 85 | "df.shape" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# what variables are present?\n", 95 | "df.columns" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# top 10 cities by number of tracts\n", 105 | "cities = df['place_name'].value_counts().head(10)\n", 106 | "cities" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "# default matplotlib bar chart via pandas\n", 116 | "ax = cities.plot(kind='bar')" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# change the default font\n", 126 | "plt.rcParams['font.family'] = 'helvetica,arial'\n", 127 | "plt.rcParams['font.size'] = 12" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# style the plot to make it look nicer\n", 137 | "ax = cities.plot(kind='bar', figsize=(8, 6), width=0.6, alpha=0.6, \n", 138 | " color='g', edgecolor='k', zorder=2)\n", 139 | "\n", 140 | "ax.yaxis.grid(True, ls=':')\n", 141 | "ax.set_xticklabels(cities.index, rotation=45, rotation_mode='anchor', ha='right')\n", 142 | "\n", 143 | "ax.set_title('Cities with the most tracts')\n", 144 | "ax.set_ylabel('Number of tracts')\n", 145 | "\n", 146 | "plt.show()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "# same thing, only instead of pandas directly, use the mpl object-oriented API directly\n", 156 | "fig, ax = plt.subplots(figsize=(8, 6))\n", 157 | "ax.bar(x=cities.index, height=cities, width=0.6, alpha=0.6,\n", 158 | " color='g', edgecolor='k', zorder=2)\n", 159 | "\n", 160 | "ax.yaxis.grid(True, ls=':')\n", 161 | "ax.set_xticklabels(cities.index, rotation=45, rotation_mode='anchor', ha='right')\n", 162 | "\n", 163 | "ax.set_title('Cities with the most tracts')\n", 164 | "ax.set_ylabel('Number of tracts')\n", 165 | "\n", 166 | "plt.show()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "# now it's your turn\n", 176 | "# recreate the plot above, but give it an x-axis label and make the bars orange with maroon edges\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "# plot log data\n", 186 | "fig, ax = plt.subplots(figsize=(8, 6))\n", 187 | "ax.bar(x=cities.index, height=np.log(cities), width=0.6, alpha=0.6,\n", 188 | " color='g', edgecolor='k', zorder=2)\n", 189 | "\n", 190 | "ax.yaxis.grid(True, ls=':')\n", 191 | "ax.set_ylim((0, 8))\n", 192 | "\n", 193 | "ax.set_xticklabels(cities.index, rotation=45, rotation_mode='anchor', ha='right')\n", 194 | "ax.set_title('Cities with the most tracts')\n", 195 | "ax.set_ylabel('Number of tracts (log)')\n", 196 | "\n", 197 | "plt.show()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# now it's your turn\n", 207 | "# plot a bar chart of the top 10 cities by average tract median income (use df.groupby)\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## 3. Histograms and KDE" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "# default histogram, via pandas\n", 224 | "ax = df['median_age'].hist()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "# you can style your plot from pandas nicer\n", 234 | "ax = df['median_age'].hist(bins=50, edgecolor='w', alpha=0.8, zorder=2)\n", 235 | "ax.grid(ls=':')\n", 236 | "\n", 237 | "# rather than setting an axis range, you can set a single limit\n", 238 | "ax.set_xlim(left=0)\n", 239 | "ax.set_ylim(top=1300)\n", 240 | "\n", 241 | "ax.set_title('Tract median age histogram')\n", 242 | "plt.show()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "# plot a simple kde function\n", 252 | "ax = df['median_age'].plot.kde()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "# make the KDE look nicer\n", 262 | "ax = df['median_age'].plot.kde(lw=4, alpha=0.6, bw_method=0.1)\n", 263 | "ax.grid(ls=':')\n", 264 | "\n", 265 | "ax.set_xlim((0, 80))\n", 266 | "ax.set_ylim(bottom=0)\n", 267 | "\n", 268 | "ax.set_title('Tract median age probability density')\n", 269 | "plt.show()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# plot the histogram and KDE together\n", 279 | "fig, ax = plt.subplots(figsize=(8, 6))\n", 280 | "ax = df['median_age'].hist(ax=ax, bins=50, edgecolor='w', alpha=0.8, zorder=2)\n", 281 | "ax = df['median_age'].plot.kde(ax=ax, lw=2, secondary_y=True, alpha=0.8)\n", 282 | "\n", 283 | "ax.grid(ls=':')\n", 284 | "ax.set_xlim((0, 75))\n", 285 | "ax.set_ylim(bottom=0)\n", 286 | "\n", 287 | "ax.set_title('Tract median age')\n", 288 | "plt.show()" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "# plot histograms of 4 separate variables as subplots of a single mpl figure\n", 298 | "cols = ['median_age', 'med_income_k', 'median_gross_rent_k', 'med_home_value_k']\n", 299 | "fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 8))\n", 300 | "\n", 301 | "# zip together the axes and the columns to plot them\n", 302 | "for ax, col in zip(axes.flatten(), cols):\n", 303 | " df[col].hist(ax=ax, bins=50, alpha=0.8, zorder=2)\n", 304 | " ax.grid(ls=':')\n", 305 | " ax.set_xlim(left=0)\n", 306 | " ax.set_title(col)\n", 307 | "\n", 308 | "# add a super title to the figure\n", 309 | "fig.suptitle('Histograms of tract-level variables', y=0.95, fontsize=16, weight='bold')\n", 310 | "plt.show()" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "# now it's your turn\n", 320 | "# identify 2 additional variables in the dataframe to plot 6 histograms along with their KDEs\n" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "# compare white vs black tracts\n", 330 | "# first plot kde of majority white tracts' median income\n", 331 | "white_income = df[df['prop_white'] > 0.5]['med_income_k']\n", 332 | "ax = white_income.plot.kde(c='k', lw=2, ls='--', alpha=0.8, bw_method=1, label='majority white')\n", 333 | "\n", 334 | "# next plot kde of majority black tracts' median income\n", 335 | "black_income = df[df['prop_black'] > 0.5]['med_income_k']\n", 336 | "ax = black_income.plot.kde(c='k', lw=2, alpha=0.8, bw_method=1, label='majority black')\n", 337 | "\n", 338 | "ax.grid(ls=':')\n", 339 | "ax.set_xlim((-30, 200))\n", 340 | "ax.set_ylim(bottom=0)\n", 341 | "\n", 342 | "ax.set_title('White vs Black Census Tracts')\n", 343 | "ax.set_xlabel('Median income (USD, thousands)')\n", 344 | "ax.set_ylabel('Probability density')\n", 345 | "\n", 346 | "ax.legend()\n", 347 | "plt.show()" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# now it's your turn\n", 357 | "# plot contrasting KDEs comparing median home values in tracts with majority college degree or higher, vs not\n" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "## 4. Time series and line plots" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "# GPS coordinates\n", 374 | "dt = pd.read_csv('data/gps-coords.csv', index_col='date', parse_dates=True)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "# processing same as last week\n", 384 | "weekend_mask = (dt.index.weekday==5) | (dt.index.weekday==6)\n", 385 | "weekends = dt[weekend_mask]\n", 386 | "weekdays = dt[~weekend_mask]\n", 387 | "weekday_hourly_share = weekdays.groupby(weekdays.index.hour).size() / weekdays.groupby(weekdays.index.hour).size().sum()\n", 388 | "weekend_hourly_share = weekends.groupby(weekends.index.hour).size() / weekends.groupby(weekends.index.hour).size().sum()\n", 389 | "hourly_share = pd.DataFrame([weekday_hourly_share, weekend_hourly_share], index=['weekday', 'weekend']).T\n", 390 | "hourly_share.index = [s + ':00' for s in hourly_share.index.astype(str)]\n", 391 | "hourly_share.head()" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "# weekday vs weekend hourly observations as a bar chart\n", 401 | "ax = hourly_share.plot(figsize=(10, 6), kind='bar', alpha=0.7, \n", 402 | " title='Share of observations, by hour')" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": null, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "# stacked bar chart\n", 412 | "ax = hourly_share.plot(figsize=(10, 6), kind='bar', stacked=True, \n", 413 | " alpha=0.7, title='Share of observations, by hour')" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "ax = hourly_share.plot(figsize=(10, 6), kind='bar', stacked=False, width=0.5,\n", 423 | " alpha=0.7, color=['#336699', '#ff3366'], edgecolor='k')\n", 424 | "\n", 425 | "ax.yaxis.grid(True, ls=':')\n", 426 | "ax.set_xticklabels(hourly_share.index, rotation=60, rotation_mode='anchor', ha='right')\n", 427 | "ax.set_title('Share of observations, by hour')\n", 428 | "\n", 429 | "plt.show()" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "# get the count of records by date\n", 439 | "countdata = dt.groupby(dt.index.date).size()\n", 440 | "countdata.head()" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": {}, 447 | "outputs": [], 448 | "source": [ 449 | "# simple line plot via pandas\n", 450 | "ax = countdata.plot(kind='line', figsize=(10, 6))" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "# better-styled line plot\n", 460 | "ax = countdata.plot(kind='line', figsize=(10, 6), lw=2, c='m',\n", 461 | " marker='o', markerfacecolor='w', markeredgewidth=1.5)\n", 462 | "\n", 463 | "# only show ticks for the 1st and 15th of the month\n", 464 | "xticks = np.unique(dt.index.date[(dt.index.day == 15) | (dt.index.day == 1)])\n", 465 | "plt.xticks(ticks=xticks, rotation=45, rotation_mode='anchor', ha='right')\n", 466 | "\n", 467 | "ax.grid(ls=':')\n", 468 | "plt.show()" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "# now it's your turn\n", 478 | "# recreate the plot above, but make it a dashed red line with xticks at every day evenly divisible by 5\n" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "## 5. Scatterplots" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "fig, ax = plt.subplots(figsize=(6, 6))\n", 495 | "ax.scatter(x=df['med_income_k'], y=df['med_home_value_k'], s=0.5, alpha=0.5)\n", 496 | "ax.set_xlabel('Median Income (USD, thousands)')\n", 497 | "ax.set_ylabel('Meidan Home Value (USD, thousands)')\n", 498 | "plt.show()" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "def scatter_plot(df, xcol, ycol):\n", 508 | " fig, ax = plt.subplots(figsize=(6, 6))\n", 509 | " ax.scatter(x=df[xcol], y=df[ycol], s=0.5, alpha=0.5)\n", 510 | " ax.set_xlabel(xcol)\n", 511 | " ax.set_ylabel(ycol)\n", 512 | " plt.show()" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "# does distance to center co-vary with commute time?\n", 522 | "scatter_plot(df, 'distance_to_center_km', 'mean_travel_time_work')" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": null, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "# does the student population proportion co-vary with proportion renting?\n", 532 | "scatter_plot(df, 'prop_college_grad_student', 'prop_renting')" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "# compare home values vs income in majority white and minority white tracts\n", 542 | "fig, ax = plt.subplots(figsize=(10, 10))\n", 543 | "\n", 544 | "# first scatter minority white tracts, then majority white tracts\n", 545 | "mask = df['prop_white'] > 0.5\n", 546 | "ax.scatter(x=df[~mask]['med_income_k'], y=df[~mask]['med_home_value_k'],\n", 547 | " s=10, alpha=0.5, marker='o', c='none', edgecolor='r', label='minority white')\n", 548 | "ax.scatter(x=df[mask]['med_income_k'], y=df[mask]['med_home_value_k'],\n", 549 | " s=10, alpha=0.5, marker='o', c='none', edgecolor='k', label='majority white')\n", 550 | "\n", 551 | "# set axis limits\n", 552 | "ax.set_ylim((0, 1000))\n", 553 | "ax.set_xlim((0, 175))\n", 554 | "\n", 555 | "# add labels\n", 556 | "ax.set_xlabel('Median Income (USD, thousands)', fontsize=16)\n", 557 | "ax.set_ylabel('Meidan Home Value (USD, thousands)', fontsize=16)\n", 558 | "\n", 559 | "# add legend, show plot\n", 560 | "ax.legend()\n", 561 | "plt.show()" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": {}, 568 | "outputs": [], 569 | "source": [ 570 | "# now it's your turn\n", 571 | "# scatterplot majority-hispanic tracts median income vs median rent in blue and majority-white tracts median income vs median rent in orange\n" 572 | ] 573 | }, 574 | { 575 | "cell_type": "markdown", 576 | "metadata": {}, 577 | "source": [ 578 | "## In class exercise\n", 579 | "\n", 580 | "Many other Python visualization libraries build on the matplotlib functionality we have learned today. For example, seaborn abstracts much of the nitty-gritty matplotlib work to create simple plots of data sets. Once you've learned the underlying matplotlib code, it's easy to play around with other visualization libraries.\n", 581 | "\n", 582 | "1. Required: choose two topics from the Seaborn tutorial (https://seaborn.pydata.org/tutorial.html), and work through them adding your code to this notebook, below.\n", 583 | "1. Optional: install the bokeh package and work through its tutorial (https://nbviewer.jupyter.org/github/bokeh/bokeh-notebooks/blob/master/quickstart/quickstart.ipynb)\n", 584 | "1. Select a data set for the assignment due later this week\n", 585 | "1. Begin working on the assignment (instructions on GitHub)" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "metadata": {}, 592 | "outputs": [], 593 | "source": [] 594 | } 595 | ], 596 | "metadata": { 597 | "kernelspec": { 598 | "display_name": "Python 3", 599 | "language": "python", 600 | "name": "python3" 601 | }, 602 | "language_info": { 603 | "codemirror_mode": { 604 | "name": "ipython", 605 | "version": 3 606 | }, 607 | "file_extension": ".py", 608 | "mimetype": "text/x-python", 609 | "name": "python", 610 | "nbconvert_exporter": "python", 611 | "pygments_lexer": "ipython3", 612 | "version": "3.7.1" 613 | } 614 | }, 615 | "nbformat": 4, 616 | "nbformat_minor": 2 617 | } 618 | -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 06: Intro to geopandas 3 | 4 | ### Overview: 5 | 6 | Today we will introduce the Python geospatial data science stack focusing on geopandas, a library that spatializes pandas DataFrames. We will cover the basics of working with spatial data, projection, geometric operations, spatial joins, spatial indexing, and mapping. 7 | 8 | ### Readings: 9 | 10 | Read the geopandas User Guide at [http://geopandas.org/](http://geopandas.org/) 11 | 12 | ### Class Session: 13 | 14 | Run through the Jupyter notebook. 15 | 16 | ### After-Class Assignment: 17 | 18 | T.B.D. 19 | -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module06 - intro to geopandas/data/states_21basic/states.dbf -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.sbn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module06 - intro to geopandas/data/states_21basic/states.sbn -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.sbx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module06 - intro to geopandas/data/states_21basic/states.sbx -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module06 - intro to geopandas/data/states_21basic/states.shp -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.shp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | {6C2FDF67-E8D4-4EE4-BE98-859E452FFE24}2006110120281300TRUECreateFeatureclass C:\ESRI\AEJEE_XTRA\AEJEE23data\usa states # states_Layer SAME_AS_TEMPLATE SAME_AS_TEMPLATE "GEOGCS['GCS_North_American_1983',DATUM['D_North_American_1983',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]];-178.217598362366 18.921786345087 999999.999068677;0 100000;0 100000" # 0 0 0 C:\ESRI\AEJEE_XTRA\AEJEE23data\usa\states.shpAppend states_Layer C:\ESRI\AEJEE_XTRA\AEJEE23data\usa\states.shp TEST C:\ESRI\AEJEE_XTRA\AEJEE23data\usa\states.shpFeatureClassToFeatureClass D:\usa\census\states.sdc\states C:\ESRI\AEJEE_XTRA\AEJEE23data\usa states # "STATE_NAME STATE_NAME VISIBLE;STATE_FIPS STATE_FIPS VISIBLE;SUB_REGION SUB_REGION VISIBLE;STATE_ABBR STATE_ABBR VISIBLE;POP2000 POP2000 VISIBLE;POP2005 POP2005 VISIBLE;POP00_SQMI POP00_SQMI VISIBLE;POP05_SQMI POP05_SQMI VISIBLE;WHITE WHITE VISIBLE;BLACK BLACK VISIBLE;AMERI_ES AMERI_ES VISIBLE;ASIAN ASIAN VISIBLE;HAWN_PI HAWN_PI VISIBLE;OTHER OTHER VISIBLE;MULT_RACE MULT_RACE VISIBLE;HISPANIC HISPANIC VISIBLE;MALES MALES VISIBLE;FEMALES FEMALES VISIBLE;AGE_UNDER5 AGE_UNDER5 VISIBLE;AGE_5_17 AGE_5_17 VISIBLE;AGE_18_21 AGE_18_21 VISIBLE;AGE_22_29 AGE_22_29 VISIBLE;AGE_30_39 AGE_30_39 VISIBLE;AGE_40_49 AGE_40_49 VISIBLE;AGE_50_64 AGE_50_64 VISIBLE;AGE_65_UP AGE_65_UP VISIBLE;MED_AGE MED_AGE VISIBLE;MED_AGE_M MED_AGE_M VISIBLE;MED_AGE_F MED_AGE_F VISIBLE;HOUSEHOLDS HOUSEHOLDS VISIBLE;AVE_HH_SZ AVE_HH_SZ VISIBLE;HSEHLD_1_M HSEHLD_1_M VISIBLE;HSEHLD_1_F HSEHLD_1_F VISIBLE;MARHH_CHD MARHH_CHD VISIBLE;MARHH_NO_C MARHH_NO_C VISIBLE;MHH_CHILD MHH_CHILD VISIBLE;FHH_CHILD FHH_CHILD VISIBLE;FAMILIES FAMILIES VISIBLE;AVE_FAM_SZ AVE_FAM_SZ VISIBLE;HSE_UNITS HSE_UNITS VISIBLE;VACANT VACANT VISIBLE;OWNER_OCC OWNER_OCC VISIBLE;RENTER_OCC RENTER_OCC VISIBLE;NO_FARMS97 NO_FARMS97 VISIBLE;AVG_SIZE97 AVG_SIZE97 VISIBLE;CROP_ACR97 CROP_ACR97 VISIBLE;AVG_SALE97 AVG_SALE97 VISIBLE;SQMI SQMI VISIBLE" SAME_AS_TEMPLATE SAME_AS_TEMPLATE # 0 C:\ESRI\AEJEE_XTRA\AEJEE23data\usa\states.shp 4 | -------------------------------------------------------------------------------- /modules/module06 - intro to geopandas/data/states_21basic/states.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module06 - intro to geopandas/data/states_21basic/states.shx -------------------------------------------------------------------------------- /modules/module07 - maps and web mapping/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 07: Maps and web mapping 3 | 4 | ### Overview: 5 | 6 | Today we will continue introducing the Python geospatial data science stack focusing on geopandas, and in particular diving into mapping and visualization applications. We will also introduce web mapping with Leaflet + Folium, along with Carto. 7 | 8 | ### Readings: 9 | 10 | - http://geopandas.org/mapping.html 11 | - https://geopandas.readthedocs.io/en/latest/gallery/cartopy_convert.html 12 | - http://geopandas.org/gallery/plotting_with_geoplot.html 13 | 14 | ### Class Session: 15 | 16 | Present final project proposals and discuss. Run through the Jupyter notebook. Work through Leaflet and Carto tutorials. 17 | 18 | ### After-Class Assignment: 19 | 20 | Complete assignment 3 (see the assignments folder on GitHub) by Sunday 23:59 and submit it via Blackboard. 21 | -------------------------------------------------------------------------------- /modules/module07 - maps and web mapping/module07.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Advanced Spatial Analysis\n", 8 | "# Module 07: Mapping and Web Mapping" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import geopandas as gpd\n", 18 | "import folium\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from cartopy import crs as ccrs\n", 21 | "\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "gdf_world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n", 32 | "gdf_world.head()" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "gdf_cities = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))\n", 42 | "gdf_cities.head()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## 1. Choropleth mapping" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# map the world countries directly with geopandas\n", 59 | "ax = gdf_world.plot()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# remove antarctica from our geodataframe, then plot again\n", 69 | "mask = (gdf_world['name'] != 'Antarctica') & (gdf_world['pop_est'] > 0)\n", 70 | "gdf_world = gdf_world[mask]\n", 71 | "ax = gdf_world.plot()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 81 | "ax = gdf_world.plot(ax=ax, column='pop_est')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# create a column to contain a per-capita gdp indicator\n", 91 | "gdf_world['gdp_per_cap'] = gdf_world.gdp_md_est / gdf_world.pop_est\n", 92 | "fig, ax = plt.subplots(figsize=(20, 20))\n", 93 | "ax = gdf_world.plot(ax=ax, column='gdp_per_cap', cmap='inferno_r', edgecolor='k', lw=0.2)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "# inspect the highest GDP per capita countries\n", 103 | "gdf_world.sort_values(by='gdp_per_cap', ascending=False).head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# drop a couple outliers\n", 113 | "labels = gdf_world.sort_values(by='gdp_per_cap', ascending=False).iloc[:2].index\n", 114 | "gdf_world = gdf_world.drop(labels)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "# map again\n", 124 | "fig, ax = plt.subplots(figsize=(20, 20))\n", 125 | "ax = gdf_world.plot(ax=ax, column='gdp_per_cap', cmap='inferno_r', edgecolor='k', lw=0.2)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# now it's your turn\n", 135 | "# create a subset geodataframe of only african countries, then plot by gdp per capita\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## 2. Projecting" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "# what CRS are we using?\n", 152 | "gdf_world.crs" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "More info: http://spatialreference.org/ref/epsg/4326/" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# project data to the robinson projection\n", 169 | "robinson = '+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'\n", 170 | "gdf_world = gdf_world.to_crs(robinson)\n", 171 | "gdf_world.crs" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# map again\n", 181 | "fig, ax = plt.subplots(figsize=(15, 15))\n", 182 | "ax = gdf_world.plot(ax=ax, column='gdp_per_cap', cmap='inferno_r', edgecolor='w', lw=0.2)\n", 183 | "ax.axis('off')\n", 184 | "plt.show()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "# now it's your turn\n", 194 | "# calculate the population density of each country as a new column, then map the countries by density\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## 3. Mapping multiple layers\n", 202 | "\n", 203 | "Plot capital cities on top of countries basemap" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "gdf_cities.head()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "fig, ax = plt.subplots(figsize=(20, 20))\n", 222 | "\n", 223 | "# plot the basemap: the country boundaries\n", 224 | "ax = gdf_world.plot(ax=ax, color='w', edgecolor='#aaaaaa', lw=1)\n", 225 | "\n", 226 | "# plot the points: the city lat-lngs\n", 227 | "ax = gdf_cities.plot(ax=ax, color='r')\n", 228 | "\n", 229 | "ax.axis('off')\n", 230 | "plt.show()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "Why do we have one single red dot at lat=0 and lng=0?" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "gdf_cities.crs == gdf_world.crs" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "# gotta project the gdfs so they're in the same CRSs\n", 256 | "gdf_cities = gdf_cities.to_crs(gdf_world.crs)\n", 257 | "gdf_cities.crs == gdf_world.crs" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "fig, ax = plt.subplots(figsize=(20, 20))\n", 267 | "\n", 268 | "# plot the basemap: the country boundaries\n", 269 | "ax = gdf_world.plot(ax=ax, color='w', edgecolor='#aaaaaa', lw=1)\n", 270 | "\n", 271 | "# plot the points: the city lat-lngs\n", 272 | "ax = gdf_cities.plot(ax=ax, color='none', edgecolor='#003366', lw=2, alpha=0.7)\n", 273 | "\n", 274 | "ax.axis('off')\n", 275 | "plt.show()" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "# now it's your turn\n", 285 | "# map the entire world basemap, but only plot asian cities on top of it\n" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# join cities to countries, so we know which country each city belongs to\n", 295 | "gdf_cities_countries = gpd.sjoin(gdf_cities, gdf_world, how='inner', op='within')" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "# our spatial join isn't perfect, because of the low-resolution country boundaries\n", 305 | "gdf_cities_countries.head()" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "fig, ax = plt.subplots(figsize=(20, 20))\n", 315 | "\n", 316 | "# plot the basemap: the country boundaries\n", 317 | "ax = gdf_world.plot(ax=ax, color='#eeeeee', edgecolor='#999999', lw=1)\n", 318 | "\n", 319 | "# plot the points: the city lat-lngs\n", 320 | "ax = gdf_cities_countries.plot(ax=ax, column='gdp_per_cap', cmap='inferno_r', edgecolor='k', lw=1, alpha=0.8)\n", 321 | "\n", 322 | "ax.axis('off')\n", 323 | "plt.show()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "# now it's your turn\n", 333 | "# create a subset geodataframe of only african cities/countries, then plot the countries as a basemap and the cities colored by gdp per capita\n" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## 4. Choosing colors\n", 341 | "\n", 342 | "https://matplotlib.org/users/colormaps.html\n", 343 | "\n", 344 | "Easy rules to (usually) pick a good color map: \n", 345 | "\n", 346 | " - if you have data values rising from some baseline to some maximum, use a perceptually uniform sequential color map.\n", 347 | " - if you have data values diverging in both directions from some meaningful center point (e.g., center is zero and values can range positive or negative) then use a diverging color map\n", 348 | " - avoid rainbow/jet color maps" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "cmaps = ['viridis',\n", 358 | " 'plasma',\n", 359 | " 'inferno',\n", 360 | " 'YlOrRd',\n", 361 | " 'YlGnBu',\n", 362 | " 'summer',\n", 363 | " 'autumn',\n", 364 | " 'bone',\n", 365 | " 'RdPu']\n", 366 | "\n", 367 | "fig, axes = plt.subplots(3, 3, figsize=(16, 9), facecolor='#333333')\n", 368 | "for cmap, ax in zip(cmaps, axes.flatten()):\n", 369 | " ax = gdf_world.plot(ax=ax, cmap=cmap)\n", 370 | " ax.set_title(cmap, color='w')\n", 371 | " ax.axis('off')\n", 372 | "\n", 373 | "plt.show()" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "## 5. Cartopy\n", 381 | "\n", 382 | "https://scitools.org.uk/cartopy/docs/latest/crs/projections.html" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "# create a cartopy azimuthal equidistant crs object\n", 392 | "ae = ccrs.AzimuthalEquidistant()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "# convert it to a proj4 string compatible with geopandas\n", 402 | "crs_ae = ae.proj4_init\n", 403 | "gdf_world_ae = gdf_world.to_crs(crs_ae)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={'projection':ae})\n", 413 | "ax.add_geometries(gdf_world_ae['geometry'], crs=ae)\n", 414 | "plt.show()" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "# now it's your turn\n", 424 | "# look up a new cartopy projection, then use it to map the countries\n" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "metadata": {}, 430 | "source": [ 431 | "#### Cartopy directly with geopandas plotting" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "# create a cartopy orthographic crs object, then get a string\n", 441 | "ortho = ccrs.Orthographic()\n", 442 | "crs_ortho = ortho.proj4_init\n", 443 | "crs_ortho" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "# project then plot\n", 453 | "gdf_world_ortho = gdf_world.to_crs(crs_ortho)\n", 454 | "ax = gdf_world_ortho.plot()\n", 455 | "ax.axis('off')\n", 456 | "plt.show()" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": {}, 463 | "outputs": [], 464 | "source": [ 465 | "# choose your own lat/lon center\n", 466 | "crs_ortho_usa = '+ellps=WGS84 +proj=ortho +lon_0=-100 +lat_0=30 +no_defs'\n", 467 | "gdf_world_ortho_usa = gdf_world.to_crs(crs_ortho_usa)\n", 468 | "ax = gdf_world_ortho_usa.plot()\n", 469 | "ax.axis('off')\n", 470 | "plt.show()" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "# now it's your turn\n", 480 | "# plot an orthographic map of world countries colored by gdp per capita, centered on bangkok\n" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "## 6. Folium\n", 488 | "\n", 489 | "Folium lets you map your geodataframe as a leaflet (javascript) web map" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "# bin the data into quintiles\n", 499 | "bins = list(gdf_world['gdp_per_cap'].quantile([0, 0.2, 0.4, 0.6, 0.8, 1]))" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "# create leaflet web map\n", 509 | "m = folium.Map(location=(40, 20), zoom_start=4, tiles='cartodbpositron')" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "# add data as choropleth\n", 519 | "c = folium.Choropleth(gdf_world, data=gdf_world, bins=bins,\n", 520 | " columns=['name', 'gdp_per_cap'],\n", 521 | " key_on='feature.properties.name', \n", 522 | " highlight=True, fill_color='YlOrRd', \n", 523 | " legend_name='GDP Per Capita').add_to(m)" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "# add mouseover tooltip to the countries\n", 533 | "c.geojson.add_child(folium.features.GeoJsonTooltip(['name', 'gdp_per_cap']))\n", 534 | "\n", 535 | "# save web map to disk\n", 536 | "m.save('webmap.html')" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "# display the web map inline\n", 546 | "m" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "# now it's your turn\n", 556 | "# create a new leaflet web map via folium, coloring the countries by population\n" 557 | ] 558 | }, 559 | { 560 | "cell_type": "markdown", 561 | "metadata": {}, 562 | "source": [ 563 | "## 7. Web mapping sans Python\n", 564 | "\n", 565 | "Leaflet: https://leafletjs.com/examples.html\n", 566 | "\n", 567 | "Carto: https://carto.com/developers/carto-js/v3/guides/getting-started/\n", 568 | "\n", 569 | "See also: https://go.carto.com/spatial-data-science-carto-python-webinar-recorded" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": {}, 576 | "outputs": [], 577 | "source": [] 578 | } 579 | ], 580 | "metadata": { 581 | "kernelspec": { 582 | "display_name": "Python 3", 583 | "language": "python", 584 | "name": "python3" 585 | }, 586 | "language_info": { 587 | "codemirror_mode": { 588 | "name": "ipython", 589 | "version": 3 590 | }, 591 | "file_extension": ".py", 592 | "mimetype": "text/x-python", 593 | "name": "python", 594 | "nbconvert_exporter": "python", 595 | "pygments_lexer": "ipython3", 596 | "version": "3.7.1" 597 | } 598 | }, 599 | "nbformat": 4, 600 | "nbformat_minor": 2 601 | } 602 | -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 08: APIs, geocoding, and geolocation 3 | 4 | ### Overview: 5 | 6 | Today we'll learn how to geocode addresses to latitude/longitude, explore locations with the Google Places API, reverse geocode latitude/longitude to an address, and reverse geocode latitude/longitude to block FIPS code. Finally, we'll learn how to work with open data portal APIs. 7 | 8 | ### Before Class: 9 | 10 | You'll need a Google API key to use the Google Maps Geocoding API and the Google Places API Web Service. These APIs require you to set up billing info, but we won't use them beyond the free threshold. Complete the following steps before the class session. 11 | 12 | 1. Go to the Google API console: https://console.developers.google.com/ 13 | 1. Sign in, create a new project for class, then click enable APIs. 14 | 1. Enable the Google Maps Geocoding API and then the Google Places API. 15 | 1. Go to credentials, create an API key, then copy it. 16 | 1. On your computer, create a new file (in the same folder as this notebook) called `keys.py` with one line: `google_api_key = 'PASTE-YOUR-KEY-HERE'` 17 | 18 | ### Class Session: 19 | 20 | Run through the IPython notebook. 21 | 22 | ### After-Class Assignment: 23 | 24 | In-class exercise in the notebook. 25 | -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.cpg: -------------------------------------------------------------------------------- 1 | UTF-8 -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.dbf -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.shp -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_place/tl_2018_25_place.shx -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.cpg: -------------------------------------------------------------------------------- 1 | UTF-8 -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.dbf -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.shp -------------------------------------------------------------------------------- /modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module08 - apis, geocoding, geolocation/data/tl_2018_25_tract/tl_2018_25_tract.shx -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 09: Spatial weights and ESDA 3 | 4 | ### Overview: 5 | 6 | This week we will learn about exploratory spatial analysis with geopandas and pysal. We will focus on exploratory data analysis, spatial weights matrices, calculating spatial lag, moran's I, moran plots, and LISAs. 7 | 8 | ### Readings: 9 | 10 | https://pysal.org/ 11 | 12 | ### Class Session: 13 | 14 | Run through the Jupyter notebook 15 | 16 | ### After-Class Assignment: 17 | 18 | Described in the notebook 19 | -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/download-census-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Download census tract data from API\n", 8 | "\n", 9 | " - Available data: https://api.census.gov/data.html\n", 10 | " - Variables (aka, fields) you can query for: https://api.census.gov/data/2017/acs/acs5/profile/variables.html\n", 11 | " - Browse variables for a place here: https://www.census.gov/acs/www/data/data-tables-and-tools/data-profiles/2014/\n", 12 | " - Sample query: https://api.census.gov/data/2017/acs/acs5/profile?get=DP05_0001E&for=tract:400100&in=state:06+county:001\n", 13 | " - County FIPS codes: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/?cid=nrcs143_013697\n", 14 | " \n", 15 | "#### Variables name format\n", 16 | "\n", 17 | "More info: https://www.census.gov/data/developers/data-sets/acs-5year/data-notes.html\n", 18 | "\n", 19 | "variable name format: [TableID]_[RowNumber][VariableType]\n", 20 | "\n", 21 | "Example: Variable DP02_0002PE, \"Family households (families)\", represents the percent estimate for table DP02 row number 2.\n", 22 | "\n", 23 | "DP (Data Profile): Table type containing broad social, economic, housing, and demographic information in a total of four profiles.\n", 24 | "\n", 25 | " - DP02: Social Characteristics — includes Education, Marital Status, Relationships, Fertility, Grandparents... \n", 26 | " - DP03: Economic Characteristics — includes Income, Employment, Occupation, Commuting to Work... \n", 27 | " - DP04: Housing Characteristics — includes Occupancy and Structure, Housing Value and Costs, Utilities... \n", 28 | " - DP05: Demographic Characteristics — includes Sex and Age, Race, Hispanic Origin, Housing Units... \n", 29 | "\n", 30 | "Variable suffixes:\n", 31 | "\n", 32 | " - E = estimate\n", 33 | " - M = margin of error\n", 34 | " - PE = percent estimate (of total)\n", 35 | " - PM = margin of error for corresponding PE\n", 36 | " - A = annotation" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import geopandas as gpd\n", 46 | "import getcensus as gc\n", 47 | "import os\n", 48 | "import pandas as pd\n", 49 | "from shapely import geometry\n", 50 | "from keys import census_api_key" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# which census dataset\n", 60 | "dataset = 'acs/acs5'\n", 61 | "\n", 62 | "# which vintage year\n", 63 | "year = 2017\n", 64 | "\n", 65 | "# which census variables to retrieve for each tract\n", 66 | "variables = {'DP05_0001E':'total_pop', #total pop\n", 67 | " 'DP05_0018E':'median_age', #median age\n", 68 | " 'DP05_0071PE':'pct_hispanic', #pct pop hispanic or latino\n", 69 | " 'DP05_0077PE':'pct_white', #pct pop non-hispanic white alone\n", 70 | " 'DP05_0078PE':'pct_black', #pct pop non-hispanic black\n", 71 | " 'DP05_0080E':'pct_asian', #pct pop non-hispanic asian\n", 72 | " 'DP05_0002PE':'pct_male', #pct pop male\n", 73 | " 'DP04_0007PE':'pct_single_family_home', #pct single family detached homes\n", 74 | " 'DP04_0089E':'med_home_value', #median value of owner occupied units (dollars)\n", 75 | " 'DP04_0037E':'med_rooms_per_home', #median number of rooms in house\n", 76 | " 'DP04_0026PE':'pct_built_before_1940', #pct structure built 1939 or earlier\n", 77 | " 'DP04_0047PE':'pct_renting', #pct renter-occupied housing units\n", 78 | " 'DP04_0005E':'rental_vacancy_rate', #rental vacancy rate\n", 79 | " 'DP04_0049E':'avg_renter_household_size', #average household size of renter-occupied housing units\n", 80 | " 'DP04_0134E':'med_gross_rent', #median gross rent (dollars)\n", 81 | " 'DP03_0062E':'med_household_income', #median household income\n", 82 | " 'DP03_0025E':'mean_commute_time', #mean travel time to work\n", 83 | " 'DP03_0019PE':'pct_commute_drive_alone', #pct commute drove alone\n", 84 | " 'DP03_0128PE':'pct_below_poverty', #pct people with income below povery level\n", 85 | " 'DP02_0057PE':'pct_college_grad_student', #pct who are students currently enrolled in college or grad school\n", 86 | " 'DP02_0079PE':'pct_same_residence_year_ago', #pct residence 1 year ago was same house\n", 87 | " 'DP02_0067PE':'pct_bachelors_degree', #pct bachelor's degree or higher\n", 88 | " 'DP02_0111PE':'pct_english_only', #pct with english only language spoken at home\n", 89 | " 'DP02_0092PE':'pct_foreign_born'} #pct of population foreign born\n", 90 | "\n", 91 | "# data directories\n", 92 | "tracts_path = 'tl_2018_25_tract'\n", 93 | "output_path = 'census_tracts_data.geojson'" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 3, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "total_pop\tEstimate!!SEX AND AGE!!Total population\n", 106 | "median_age\tEstimate!!SEX AND AGE!!Total population!!Median age (years)\n", 107 | "pct_hispanic\tPercent Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)\n", 108 | "pct_white\tPercent Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone\n", 109 | "pct_black\tPercent Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone\n", 110 | "pct_asian\tEstimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Asian alone\n", 111 | "pct_male\tPercent Estimate!!SEX AND AGE!!Total population!!Male\n", 112 | "pct_single_family_home\tPercent Estimate!!UNITS IN STRUCTURE!!Total housing units!!1-unit detached\n", 113 | "med_home_value\tEstimate!!VALUE!!Owner-occupied units!!Median (dollars)\n", 114 | "med_rooms_per_home\tEstimate!!ROOMS!!Total housing units!!Median rooms\n", 115 | "pct_built_before_1940\tPercent Estimate!!YEAR STRUCTURE BUILT!!Total housing units!!Built 1939 or earlier\n", 116 | "pct_renting\tPercent Estimate!!HOUSING TENURE!!Occupied housing units!!Renter-occupied\n", 117 | "rental_vacancy_rate\tEstimate!!HOUSING OCCUPANCY!!Total housing units!!Rental vacancy rate\n", 118 | "avg_renter_household_size\tEstimate!!HOUSING TENURE!!Occupied housing units!!Average household size of renter-occupied unit\n", 119 | "med_gross_rent\tEstimate!!GROSS RENT!!Occupied units paying rent!!Median (dollars)\n", 120 | "med_household_income\tEstimate!!INCOME AND BENEFITS (IN 2017 INFLATION-ADJUSTED DOLLARS)!!Total households!!Median household income (dollars)\n", 121 | "mean_commute_time\tEstimate!!COMMUTING TO WORK!!Workers 16 years and over!!Mean travel time to work (minutes)\n", 122 | "pct_commute_drive_alone\tPercent Estimate!!COMMUTING TO WORK!!Workers 16 years and over!!Car truck or van drove alone\n", 123 | "pct_below_poverty\tPercent Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PAST 12 MONTHS IS BELOW THE POVERTY LEVEL!!All people\n", 124 | "pct_college_grad_student\tPercent Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over enrolled in school!!College or graduate school\n", 125 | "pct_same_residence_year_ago\tPercent Estimate!!RESIDENCE 1 YEAR AGO!!Population 1 year and over!!Same house\n", 126 | "pct_bachelors_degree\tPercent Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Percent bachelor's degree or higher\n", 127 | "pct_english_only\tPercent Estimate!!LANGUAGE SPOKEN AT HOME!!Population 5 years and over!!English only\n", 128 | "pct_foreign_born\tPercent Estimate!!PLACE OF BIRTH!!Total population!!Foreign born\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "# download and display census descriptions of each variable\n", 134 | "variable_descriptions = gc.get_census_variable_descriptions(dataset=dataset, \n", 135 | " year=year, \n", 136 | " variables=variables)\n", 137 | "for v, d in variable_descriptions.items():\n", 138 | " print('{}\\t{}'.format(variables[v], d['label']))" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## Get vars from ACS DP" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "1478" 157 | ] 158 | }, 159 | "execution_count": 4, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "# load the tracts in our study area\n", 166 | "gdf = gpd.read_file(tracts_path).sort_values(by='GEOID')\n", 167 | "len(gdf)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 5, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Downloading 24 census vars in 25001 for 57 tracts.\n", 180 | "Downloading 24 census vars in 25003 for 39 tracts.\n", 181 | "Downloading 24 census vars in 25005 for 126 tracts.\n", 182 | "Downloading 24 census vars in 25007 for 5 tracts.\n", 183 | "Downloading 24 census vars in 25009 for 163 tracts.\n", 184 | "Downloading 24 census vars in 25011 for 18 tracts.\n", 185 | "Downloading 24 census vars in 25013 for 103 tracts.\n", 186 | "Downloading 24 census vars in 25015 for 36 tracts.\n", 187 | "Downloading 24 census vars in 25017 for 318 tracts.\n", 188 | "Downloading 24 census vars in 25019 for 6 tracts.\n", 189 | "Downloading 24 census vars in 25021 for 130 tracts.\n", 190 | "Downloading 24 census vars in 25023 for 101 tracts.\n", 191 | "Downloading 24 census vars in 25025 for 204 tracts.\n", 192 | "Downloading 24 census vars in 25027 for 172 tracts.\n", 193 | "Wall time: 25.4 s\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "%%time\n", 199 | "df = gc.get_census_tracts_data(tract_fips=gdf['GEOID'], api_key=census_api_key, dataset=dataset,\n", 200 | " year=year, variables=variables, clean=True)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 6, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "# merge the tracts with the acs variables, rename columns, then make sure everything we merged is the same length\n", 210 | "merged = pd.merge(left=gdf.set_index('GEOID'), right=df, how='inner', left_index=True, right_index=True)\n", 211 | "merged = merged.rename(columns=variables)\n", 212 | "assert len(gdf) == len(df) == len(merged)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 7, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/html": [ 223 | "
\n", 224 | "\n", 237 | "\n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | "
STATEFPCOUNTYFPTRACTCENAMENAMELSADMTFCCFUNCSTATALANDAWATERINTPTLAT...mean_commute_timepct_commute_drive_alonepct_below_povertypct_college_grad_studentpct_same_residence_year_agopct_bachelors_degreepct_english_onlypct_foreign_bornstatecounty
2500101010025001010100101Census Tract 101G5020S2504621812765873+42.0598291...13.939.710.747.491.848.888.59.225001
2500101020625001010206102.06Census Tract 102.06G5020S5124091718830100+41.9226356...22.668.011.327.585.452.695.37.825001
2500101020825001010208102.08Census Tract 102.08G5020S5426886111461462+42.0135566...16.869.511.29.599.645.993.69.625001
2500101030425001010304103.04Census Tract 103.04G5020S183476597830612+41.8251080...23.579.54.830.293.451.293.77.025001
2500101030625001010306103.06Census Tract 103.06G5020S178285561730602+41.8593758...17.872.88.210.288.145.196.95.025001
\n", 387 | "

5 rows × 38 columns

\n", 388 | "
" 389 | ], 390 | "text/plain": [ 391 | " STATEFP COUNTYFP TRACTCE NAME NAMELSAD MTFCC \\\n", 392 | "25001010100 25 001 010100 101 Census Tract 101 G5020 \n", 393 | "25001010206 25 001 010206 102.06 Census Tract 102.06 G5020 \n", 394 | "25001010208 25 001 010208 102.08 Census Tract 102.08 G5020 \n", 395 | "25001010304 25 001 010304 103.04 Census Tract 103.04 G5020 \n", 396 | "25001010306 25 001 010306 103.06 Census Tract 103.06 G5020 \n", 397 | "\n", 398 | " FUNCSTAT ALAND AWATER INTPTLAT ... mean_commute_time \\\n", 399 | "25001010100 S 25046218 12765873 +42.0598291 ... 13.9 \n", 400 | "25001010206 S 51240917 18830100 +41.9226356 ... 22.6 \n", 401 | "25001010208 S 54268861 11461462 +42.0135566 ... 16.8 \n", 402 | "25001010304 S 18347659 7830612 +41.8251080 ... 23.5 \n", 403 | "25001010306 S 17828556 1730602 +41.8593758 ... 17.8 \n", 404 | "\n", 405 | " pct_commute_drive_alone pct_below_poverty \\\n", 406 | "25001010100 39.7 10.7 \n", 407 | "25001010206 68.0 11.3 \n", 408 | "25001010208 69.5 11.2 \n", 409 | "25001010304 79.5 4.8 \n", 410 | "25001010306 72.8 8.2 \n", 411 | "\n", 412 | " pct_college_grad_student pct_same_residence_year_ago \\\n", 413 | "25001010100 47.4 91.8 \n", 414 | "25001010206 27.5 85.4 \n", 415 | "25001010208 9.5 99.6 \n", 416 | "25001010304 30.2 93.4 \n", 417 | "25001010306 10.2 88.1 \n", 418 | "\n", 419 | " pct_bachelors_degree pct_english_only pct_foreign_born state \\\n", 420 | "25001010100 48.8 88.5 9.2 25 \n", 421 | "25001010206 52.6 95.3 7.8 25 \n", 422 | "25001010208 45.9 93.6 9.6 25 \n", 423 | "25001010304 51.2 93.7 7.0 25 \n", 424 | "25001010306 45.1 96.9 5.0 25 \n", 425 | "\n", 426 | " county \n", 427 | "25001010100 001 \n", 428 | "25001010206 001 \n", 429 | "25001010208 001 \n", 430 | "25001010304 001 \n", 431 | "25001010306 001 \n", 432 | "\n", 433 | "[5 rows x 38 columns]" 434 | ] 435 | }, 436 | "execution_count": 7, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "merged.head()" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": {}, 448 | "source": [ 449 | "## Save to disk" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 8, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "upcast_dispatch = {geometry.Point: geometry.MultiPoint, \n", 459 | " geometry.LineString: geometry.MultiLineString, \n", 460 | " geometry.Polygon: geometry.MultiPolygon}\n", 461 | "\n", 462 | "def maybe_cast_to_multigeometry(geom):\n", 463 | " caster = upcast_dispatch.get(type(geom), lambda x: x[0])\n", 464 | " return caster([geom])\n", 465 | "\n", 466 | "merged['geometry'] = merged['geometry'].apply(maybe_cast_to_multigeometry)" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 9, 472 | "metadata": {}, 473 | "outputs": [ 474 | { 475 | "name": "stdout", 476 | "output_type": "stream", 477 | "text": [ 478 | "census_tracts_data.geojson\n", 479 | "Wall time: 4.88 s\n" 480 | ] 481 | } 482 | ], 483 | "source": [ 484 | "%%time\n", 485 | "merged.reset_index().to_file(output_path, driver='GeoJSON')\n", 486 | "print(output_path)" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [] 495 | } 496 | ], 497 | "metadata": { 498 | "kernelspec": { 499 | "display_name": "Python 3", 500 | "language": "python", 501 | "name": "python3" 502 | }, 503 | "language_info": { 504 | "codemirror_mode": { 505 | "name": "ipython", 506 | "version": 3 507 | }, 508 | "file_extension": ".py", 509 | "mimetype": "text/x-python", 510 | "name": "python", 511 | "nbconvert_exporter": "python", 512 | "pygments_lexer": "ipython3", 513 | "version": "3.7.1" 514 | } 515 | }, 516 | "nbformat": 4, 517 | "nbformat_minor": 2 518 | } 519 | -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/getcensus.py: -------------------------------------------------------------------------------- 1 | # Module: getcensus 2 | # Author: Geoff Boeing 3 | # Description: Download tract-level census variables from the API 4 | import requests 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | 10 | 11 | def get_census_variable_descriptions(dataset, year, variables): 12 | """ 13 | Download descriptions of census variables from the API 14 | """ 15 | url_template = 'https://api.census.gov/data/{year}/{dataset}/profile/variables/{variable}.json' 16 | variable_descriptions = {} 17 | 18 | for variable in variables: 19 | url = url_template.format(year=year, dataset=dataset, variable=variable) 20 | response = requests.get(url) 21 | data = response.json() 22 | variable_descriptions[variable] = {'concept':data['concept'], 23 | 'label':data['label']} 24 | 25 | return variable_descriptions 26 | 27 | 28 | 29 | 30 | def get_census_tracts_data(tract_fips, api_key, dataset, year, variables, max_tracts=1000, clean=False): 31 | """ 32 | Download census variables (given some year and dataset) for a series of tracts 33 | 34 | limit the max number tracts to download data for in a single api request 35 | """ 36 | 37 | # convert vars to string to send to api 38 | variables_str = ','.join(variables) 39 | 40 | # census dataframe called cd 41 | cd = pd.DataFrame() 42 | 43 | states_counties_tracts = get_states_counties_tracts(tract_fips=tract_fips) 44 | for state in states_counties_tracts: 45 | for county in states_counties_tracts[state]: 46 | 47 | tracts = states_counties_tracts[state][county] 48 | 49 | # if we pass it too many tracts at once, the census api chokes, so 50 | # break up counties with > max_tracts number of tracts into chunks 51 | for tracts_chunk in chunks(tracts, max_tracts): 52 | 53 | # convert tracts to string to send to api 54 | tracts_str = ','.join(tracts_chunk) 55 | print('Downloading {} census vars in {}{} for {} tracts.'.format(len(variables), state, 56 | county, len(tracts_chunk))) 57 | 58 | # get census vars for these tracts and append them to df 59 | df_tmp = get_tracts_census_vars(api_key=api_key, dataset=dataset, 60 | variables=variables_str, state=state, county=county, 61 | tracts=tracts_str, year=year, clean=clean) 62 | 63 | df_tmp['state'] = state 64 | df_tmp['county'] = county 65 | cd = cd.append(df_tmp) 66 | return cd 67 | 68 | 69 | 70 | 71 | def get_states_counties_tracts(tract_fips): 72 | """ 73 | turn a list of tract fips codes into a nested dict keyed by state, 74 | then keyed by county, finally with tract as the value 75 | """ 76 | 77 | if not isinstance(tract_fips, pd.Series): 78 | raise TypeError('tract_fips must be a pandas series') 79 | 80 | df = pd.DataFrame() 81 | df['state'] = tract_fips.str.slice(0, 2) 82 | df['county'] = tract_fips.str.slice(2, 5) 83 | df['tract'] = tract_fips.str.slice(5) 84 | grouped = df[['state', 'county', 'tract']].groupby(['state', 'county']) 85 | 86 | states_counties_tracts = {} 87 | for (state, county), group in grouped: 88 | if state not in states_counties_tracts: 89 | states_counties_tracts[state] = {} 90 | states_counties_tracts[state][county] = group['tract'].tolist() 91 | 92 | return states_counties_tracts 93 | 94 | 95 | 96 | 97 | def parse_tract_fips(tract_fips): 98 | """ 99 | turn a full tract fips code into a tuple of state, county, tract 100 | """ 101 | 102 | return tract_fips[:2], tract_fips[2:5], tract_fips[5:] 103 | 104 | 105 | 106 | 107 | def get_tract_ids(fips_codes): 108 | """ 109 | convert a list of full tract fips codes into just tract fips only 110 | """ 111 | 112 | tracts = [] 113 | for fips_code in fips_codes: 114 | _, _, tract_fips = parse_tract_fips(fips_code) 115 | tracts.append(tract_fips) 116 | return tracts 117 | 118 | 119 | 120 | 121 | def get_tracts_census_vars(api_key, dataset, variables, state, county, tracts, year, clean): 122 | """ 123 | download a set of census variables for a state + county + tracts 124 | """ 125 | 126 | url_template = 'https://api.census.gov/data/{year}/{dataset}/profile?' \ 127 | 'get={variables}&for=tract:{tracts}&key={api_key}&in=state:{state}+county:{county}' 128 | 129 | url = url_template.format(api_key=api_key, dataset=dataset, variables=variables, 130 | state=state, county=county, tracts=tracts, year=year) 131 | 132 | try: 133 | response = requests.get(url, timeout=30) 134 | json_data = response.json() 135 | except Exception as e: 136 | print(e, response.status_code, response.text, response.url) 137 | 138 | # load as dataframe and index by geoid (state+county+tract) 139 | df = pd.DataFrame(json_data) 140 | df = df.rename(columns=df.iloc[0]).drop(df.index[0]) 141 | df['GEOID10'] = df.apply(lambda row: '{}{}{}'.format(row['state'], row['county'], row['tract']), axis='columns') 142 | df = df.set_index('GEOID10').drop(['state', 'county', 'tract'], axis='columns') 143 | 144 | if clean: 145 | df = clean_census_data(df) 146 | 147 | return df 148 | 149 | 150 | 151 | 152 | def clean_census_data(df): 153 | """ 154 | Clean up the census data results from the API. By default, the census data often 155 | includes non-numeric characters as annotations or missing values. 156 | 157 | # see https://www.census.gov/data/developers/data-sets/acs-5year/data-notes.html 158 | # for estimate and annotation values 159 | 160 | # A '+' following a median estimate means the median falls in the upper interval 161 | # of an open-ended distribution. 162 | 163 | # A '-' entry in the estimate column indicates that either no sample observations 164 | # or too few sample observations were available to compute an estimate, or a ratio 165 | # of medians cannot be calculated because one or both of the median estimates falls 166 | # in the lowest interval or upper interval of an open-ended distribution. 167 | 168 | # An 'N' entry in the estimate and margin of error columns indicates that data for 169 | # this geographic area cannot be displayed because the number of sample cases is too 170 | # small. 171 | 172 | # An '(X)' means that the estimate is not applicable or not available. 173 | """ 174 | 175 | # clean up any non-numeric strings, column by column 176 | df = df.astype(str) 177 | bad_strings = ['-', 'N', '(X)', '*'] 178 | for col in df.columns: 179 | 180 | # replace any cell with '-' or 'N' or '(X)' or '*' in this column with NaN 181 | df[col] = df[col].map(lambda value: np.nan if any(s in value for s in bad_strings) else value) 182 | 183 | # if every result in this col was replaced by nans, then col is now of type 184 | # float and we can skip the following cleaning step 185 | if not df[col].dtype==np.float64: 186 | # strip out any '+' or ',' or '*' 187 | df[col] = df[col].str.replace('+', '').str.replace(',', '') 188 | 189 | # convert data to floats, assert uniqueness, and return 190 | def convert_float(value): 191 | try: 192 | return float(value) 193 | except: 194 | print('error', value, '\n', df) 195 | return np.nan 196 | df = df.applymap(convert_float) 197 | 198 | assert df.index.is_unique 199 | 200 | return df 201 | 202 | 203 | 204 | 205 | def chunks(l, n): 206 | """ 207 | yield successive n-sized chunks from list l 208 | """ 209 | for i in range(0, len(l), n): 210 | yield l[i:i+n] 211 | -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.cpg: -------------------------------------------------------------------------------- 1 | UTF-8 -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.dbf -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.shp -------------------------------------------------------------------------------- /modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module09 - spatial weights and esda/census/tl_2018_25_tract/tl_2018_25_tract.shx -------------------------------------------------------------------------------- /modules/module10 - inferential spatial models/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 10: Inferential spatial modeling 3 | 4 | ### Overview: 5 | 6 | This week we will review inferential statistics, inferential models, and spatial models including spatial fixed effects, spatial regimes, spatial lag, and spatial error models. 7 | 8 | ### Readings: 9 | 10 | https://pysal.org/ 11 | 12 | https://doi.org/10.1111/j.1435-5957.2012.00480.x 13 | 14 | Anselin and Rey (2014). Modern Spatial Econometrics in Practice. 15 | 16 | ### Class Session: 17 | 18 | Run through the Jupyter notebook. 19 | 20 | ### After-Class Assignment: 21 | 22 | See assignments folder. 23 | -------------------------------------------------------------------------------- /modules/module10 - inferential spatial models/img/frequentists_vs_bayesians.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module10 - inferential spatial models/img/frequentists_vs_bayesians.png -------------------------------------------------------------------------------- /modules/module10 - inferential spatial models/img/regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gboeing/asa/b0fad18d5e7ec227cd66b4809d9d355fa8ff4b59/modules/module10 - inferential spatial models/img/regression.png -------------------------------------------------------------------------------- /modules/module11 - postgis/01-store-data-in-postgis-db.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Store tracts and points in PostGIS\n", 8 | "\n", 9 | "...for a fast spatial-join of points to tracts.\n", 10 | "\n", 11 | "First, install postgres, postgis, and psycopg2. Then create the database from command prompt if it doesn't already exist:\n", 12 | "\n", 13 | "```\n", 14 | "createdb -U postgres points_tracts\n", 15 | "psql -U postgres -d points_tracts -c \"CREATE EXTENSION postgis;\"\n", 16 | "```\n", 17 | "\n", 18 | "More info in the psycopg2 docs: http://initd.org/psycopg/docs/usage.html" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import geopandas as gpd\n", 28 | "import pandas as pd\n", 29 | "import psycopg2\n", 30 | "from shapely.geometry import Point\n", 31 | "from keys import pg_user, pg_pass, pg_host, pg_port, pg_db\n", 32 | "\n", 33 | "crs = {'init' : 'epsg:4326'}" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "%%time\n", 43 | "# load tracts and project to 4326\n", 44 | "tracts = gpd.read_file('data/us_census_tracts_2014')\n", 45 | "tracts = tracts.to_crs(crs)\n", 46 | "print(len(tracts))" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "%%time\n", 56 | "# load points and set initial crs to 4326\n", 57 | "points = pd.read_csv('data/points-dataset.csv')\n", 58 | "geometry = points.apply(lambda row: Point((row['lng'], row['lat'])), axis=1)\n", 59 | "points = gpd.GeoDataFrame(points, geometry=geometry, crs=crs)\n", 60 | "print(len(points))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "assert tracts.crs == points.crs\n", 70 | "# srid is the numeric spatial reference ID PostGIS uses\n", 71 | "srid = tracts.crs['init'].strip('epsg:')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "#points = points.sample(1000)\n", 81 | "#tracts = tracts[['GEOID', 'ALAND', 'geometry']].sample(1000)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Upload tracts and points to PostGIS" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "connection = psycopg2.connect(database=pg_db,\n", 98 | " user=pg_user,\n", 99 | " password=pg_pass,\n", 100 | " host=pg_host,\n", 101 | " port=pg_port)\n", 102 | "cursor = connection.cursor()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# list all tables\n", 112 | "cursor.execute(\"select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)'\")\n", 113 | "cursor.fetchall()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "#### add tracts table" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# drop tracts table if it already exists, then create tracts table\n", 130 | "cursor.execute(\"DROP TABLE IF EXISTS tracts\")\n", 131 | "cursor.execute(\"CREATE TABLE tracts (id SERIAL PRIMARY KEY, geoid VARCHAR NOT NULL, aland BIGINT NOT NULL)\")\n", 132 | "cursor.execute(\"SELECT AddGeometryColumn ('tracts', 'geom', %s, 'MULTIPOLYGON', 2)\", [srid])\n", 133 | "cursor.execute(\"CREATE INDEX tract_index ON tracts USING GIST(geom)\")\n", 134 | "connection.commit()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "%%time\n", 144 | "cursor.execute(\"DELETE FROM tracts\")\n", 145 | "\n", 146 | "# insert each tract into the tracts table one at a time\n", 147 | "for label, row in tracts.iterrows():\n", 148 | " geoid = row['GEOID']\n", 149 | " aland = row['ALAND']\n", 150 | " geometry_wkt = row['geometry'].wkt\n", 151 | " \n", 152 | " query = \"\"\"INSERT INTO tracts (geoid, aland, geom) \n", 153 | " VALUES (%s, %s, ST_Multi(ST_GeomFromText(%s, %s)))\"\"\"\n", 154 | " data = (geoid, aland, geometry_wkt, srid)\n", 155 | " cursor.execute(query, data)\n", 156 | "\n", 157 | "connection.commit()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "#### add points table" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "# drop points table if it already exists, then create points table\n", 174 | "cursor.execute(\"DROP TABLE IF EXISTS points\")\n", 175 | "cursor.execute(\"\"\"CREATE TABLE points (id SERIAL PRIMARY KEY,\n", 176 | " date VARCHAR NOT NULL,\n", 177 | " region VARCHAR NOT NULL,\n", 178 | " bedrooms INTEGER,\n", 179 | " rent REAL,\n", 180 | " sqft REAL)\"\"\")\n", 181 | "cursor.execute(\"SELECT AddGeometryColumn ('points', 'geom', %s, 'POINT', 2)\", [srid])\n", 182 | "cursor.execute(\"CREATE INDEX point_index ON points USING GIST(geom)\")\n", 183 | "connection.commit()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "%%time\n", 193 | "cursor.execute(\"DELETE FROM points\")\n", 194 | "\n", 195 | "# insert each point into the points table one at a time\n", 196 | "for label, row in points.iterrows():\n", 197 | " date = row['date']\n", 198 | " region = row['region']\n", 199 | " bedrooms = row['bedrooms']\n", 200 | " rent = row['rent']\n", 201 | " sqft = row['sqft']\n", 202 | " geometry_wkt = row['geometry'].wkt\n", 203 | " \n", 204 | " # bedrooms can be null, but must be None for psycopg2 to insert it as a null value, not a 'NaN' string\n", 205 | " if pd.isnull(bedrooms):\n", 206 | " bedrooms = None\n", 207 | " \n", 208 | " query = \"\"\"\n", 209 | " INSERT INTO points (date, region, bedrooms, rent, sqft, geom)\n", 210 | " VALUES (%s, %s, %s, %s, %s, ST_GeomFromText(%s, %s))\n", 211 | " \"\"\"\n", 212 | " data = (date, region, bedrooms, rent, sqft, geometry_wkt, srid)\n", 213 | " cursor.execute(query, data)\n", 214 | "\n", 215 | "connection.commit()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "#### optimize the database" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "%%time\n", 232 | "# vacuum and analyze the database to optimize it after building indices and inserting rows\n", 233 | "original_isolation_level = connection.isolation_level\n", 234 | "connection.set_isolation_level(0)\n", 235 | "cursor.execute(\"VACUUM ANALYZE\")\n", 236 | "connection.commit()\n", 237 | "connection.set_isolation_level(original_isolation_level)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "#### verify SRIDs, row counts, and data" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "# look up the SRIDs\n", 254 | "cursor.execute(\"\"\"SELECT\n", 255 | " Find_SRID('public', 'tracts', 'geom') as tracts_srid,\n", 256 | " Find_SRID('public', 'points', 'geom') as points_srid\"\"\")\n", 257 | "cursor.fetchall()" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "cursor.execute(\"SELECT count(*) AS exact_count FROM tracts\")\n", 267 | "rows = cursor.fetchall()\n", 268 | "rows[0][0]" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "cursor.execute(\"SELECT geoid, aland, ST_AsText(geom) FROM tracts LIMIT 3\")\n", 278 | "rows = cursor.fetchall()\n", 279 | "gpd.GeoDataFrame(rows, columns=['GEOID', 'ALAND', 'geometry'])" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "cursor.execute(\"SELECT count(*) AS exact_count FROM points\")\n", 289 | "rows = cursor.fetchall()\n", 290 | "rows[0][0]" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "cursor.execute(\"\"\"SELECT date, region, bedrooms, rent, sqft, ST_AsText(geom)\n", 300 | " FROM points LIMIT 3\"\"\")\n", 301 | "rows = cursor.fetchall()\n", 302 | "gpd.GeoDataFrame(rows, columns=['date', 'region', 'bedrooms', 'rent', 'sqft', 'geometry'])" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## all done" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "cursor.close()\n", 319 | "connection.close()" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [] 328 | } 329 | ], 330 | "metadata": { 331 | "kernelspec": { 332 | "display_name": "Python 3", 333 | "language": "python", 334 | "name": "python3" 335 | }, 336 | "language_info": { 337 | "codemirror_mode": { 338 | "name": "ipython", 339 | "version": 3 340 | }, 341 | "file_extension": ".py", 342 | "mimetype": "text/x-python", 343 | "name": "python", 344 | "nbconvert_exporter": "python", 345 | "pygments_lexer": "ipython3", 346 | "version": "3.7.1" 347 | } 348 | }, 349 | "nbformat": 4, 350 | "nbformat_minor": 2 351 | } 352 | -------------------------------------------------------------------------------- /modules/module11 - postgis/02-postgis-spatial-join.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import psycopg2\n", 11 | "from shapely.wkt import loads as wkt_loads\n", 12 | "from keys import pg_user, pg_pass, pg_host, pg_port, pg_db\n", 13 | "\n", 14 | "output_path = 'data/points_joined_tracts.csv'" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "connection = psycopg2.connect(database=pg_db,\n", 24 | " user=pg_user,\n", 25 | " password=pg_pass,\n", 26 | " host=pg_host,\n", 27 | " port=pg_port)\n", 28 | "cursor = connection.cursor()" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# verify that the SRIDs match\n", 38 | "cursor.execute(\"\"\"SELECT\n", 39 | " Find_SRID('public', 'tracts', 'geom') as tracts_srid,\n", 40 | " Find_SRID('public', 'points', 'geom') as points_srid\"\"\")\n", 41 | "cursor.fetchall()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# joining how many rows to how many rows?\n", 51 | "cursor.execute(\"SELECT count(*) AS row_count FROM tracts\")\n", 52 | "count_tracts_table = cursor.fetchall()[0][0]\n", 53 | "cursor.execute(\"SELECT count(*) AS row_count FROM points\")\n", 54 | "count_points_table = cursor.fetchall()[0][0]\n", 55 | "count_tracts_table, count_points_table" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Do the spatial join" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "%%time\n", 72 | "# drop the table if it already exists and recreate it\n", 73 | "# select distinct to keep only one point/tract pair even if the point lies on the border of multiple tracts\n", 74 | "query = \"\"\"\n", 75 | " DROP TABLE IF EXISTS points_tracts;\n", 76 | " CREATE TABLE points_tracts AS\n", 77 | " SELECT DISTINCT ON (point_id)\n", 78 | " points.id AS point_id,\n", 79 | " points.date AS date,\n", 80 | " points.region AS region,\n", 81 | " points.bedrooms AS bedrooms,\n", 82 | " points.rent AS rent,\n", 83 | " points.sqft AS sqft,\n", 84 | " points.geom AS geometry,\n", 85 | " tracts.geoid AS tract_geoid\n", 86 | " FROM points LEFT JOIN tracts\n", 87 | " ON ST_Intersects(points.geom, tracts.geom)\n", 88 | " \"\"\"\n", 89 | "\n", 90 | "cursor.execute(query)\n", 91 | "connection.commit()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "%%time\n", 101 | "# vacuum and analyze the database to optimize it after building indices and inserting rows\n", 102 | "old_isolation_level = connection.isolation_level\n", 103 | "connection.set_isolation_level(0)\n", 104 | "cursor.execute(\"VACUUM ANALYZE\")\n", 105 | "connection.commit()\n", 106 | "connection.set_isolation_level(old_isolation_level)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## Retrieve the data" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "# some points still lack tract geoid, as they are offshore or just across canada/mexico border\n", 123 | "cursor.execute(\"SELECT count(*) AS row_count, count(tract_geoid) AS geoid_count FROM points_tracts\")\n", 124 | "print(count_points_table)\n", 125 | "cursor.fetchall()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "%%time\n", 135 | "# select the results from the new table, ignoring any points with null tract_geoid\n", 136 | "# because they are offshore or just across canada/mexico border\n", 137 | "cursor.execute(\"\"\"SELECT date, region, bedrooms, rent, sqft, ST_AsText(geometry), tract_geoid\n", 138 | " FROM points_tracts\n", 139 | " WHERE tract_geoid IS NOT NULL\"\"\")\n", 140 | "rows = cursor.fetchall()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "cursor.close()\n", 150 | "connection.close()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "%%time\n", 160 | "# turn the selected rows into a geodataframe\n", 161 | "gdf = gpd.GeoDataFrame(rows, columns=['date', 'region', 'bedrooms', 'rent', 'sqft', 'geometry', 'tract_geoid'])\n", 162 | "gdf['geometry'] = gdf['geometry'].map(lambda x: wkt_loads(x))\n", 163 | "gdf['lat'] = gdf['geometry'].map(lambda pt: pt.y)\n", 164 | "gdf['lng'] = gdf['geometry'].map(lambda pt: pt.x)\n", 165 | "gdf = gdf.drop('geometry', axis=1)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "print(len(gdf))\n", 175 | "gdf.head()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "%%time\n", 185 | "gdf.to_csv(output_path, encoding='utf-8', index=False)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 3", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.7.1" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 2 217 | } 218 | -------------------------------------------------------------------------------- /modules/module11 - postgis/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 11: PostGIS 3 | 4 | ### Overview: 5 | 6 | Today we will introduce PostGIS, an open-source spatial database built on top of Postgres. 7 | 8 | ### Readings: 9 | 10 | Read through the [PostGIS intro workshop](https://postgis.net/workshops/postgis-intro/) (modules 1-20). 11 | 12 | ### Class Session: 13 | 14 | Run through the Jupyter notebook. 15 | 16 | See also: https://github.com/gboeing/ms-bldg-footprints 17 | 18 | ### After-Class Assignment: 19 | 20 | T.B.D. 21 | -------------------------------------------------------------------------------- /modules/module12 - spatial networks/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 12: Spatial networks, part 1 3 | 4 | ### Overview: 5 | 6 | This week we will learn how to model, analyze, and visualize urban street networks using Python, OSMnx, and OpenStreetMap data. 7 | 8 | ### Readings: 9 | 10 | https://osmnx.readthedocs.io/ 11 | 12 | ### Class Session: 13 | 14 | Run through the Jupyter notebook, then work through the linked examples repo. 15 | 16 | ### After-Class Assignment: 17 | 18 | Complete assignment 5. 19 | -------------------------------------------------------------------------------- /modules/module12 - spatial networks/module12.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Advanced Spatial Analysis\n", 8 | "\n", 9 | "# Module 12: Spatial networks\n", 10 | "\n", 11 | "**Download, model, analyze, and visualize street networks (and other spatial data) anywhere in the world from OpenStreetMap.**\n", 12 | "\n", 13 | "What you can do with OSMnx:\n", 14 | "\n", 15 | " - Download street networks anywhere in the world with a single line of code\n", 16 | " - Download other infrastructure network types, place polygons, building footprints, and points of interest\n", 17 | " - Download by city name, polygon, bounding box, or point/address + network distance\n", 18 | " - Download drivable, walkable, bikeable, or all street networks (or pass in custom query filters)\n", 19 | " - Load street network from a local .osm file\n", 20 | " - Visualize street network as a static image or leaflet web map\n", 21 | " - Simplify and correct the network’s topology to clean and consolidate intersections\n", 22 | " - Save networks to disk as shapefiles, GraphML, or node/edge lists\n", 23 | " - Conduct topological and spatial analyses to automatically calculate dozens of indicators\n", 24 | " - Calculate and plot shortest-path routes as a static image or leaflet web map\n", 25 | " - Fast map-matching of points, routes, or trajectories to nearest graph edges or nodes\n", 26 | " - Plot figure-ground diagrams of street networks and/or building footprints\n", 27 | " - Download node elevations and calculate street grades\n", 28 | " - Visualize travel distance and travel time with isoline and isochrone maps\n", 29 | " - Calculate and visualize street bearings and orientations\n", 30 | "\n", 31 | "More info:\n", 32 | "\n", 33 | " - [OSMnx documentation](https://osmnx.readthedocs.io)\n", 34 | " - [Examples, demos, tutorials](https://github.com/gboeing/osmnx-examples)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import networkx as nx\n", 44 | "import osmnx as ox\n", 45 | "import requests\n", 46 | "import matplotlib.cm as cm\n", 47 | "import matplotlib.colors as colors\n", 48 | "ox.config(use_cache=True, log_console=True)\n", 49 | "ox.__version__" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# get a graph for some city\n", 59 | "G = ox.graph_from_place('Piedmont, California, USA', network_type='drive')\n", 60 | "fig, ax = ox.plot_graph(G)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "# what sized area does our network cover in square meters?\n", 70 | "G_proj = ox.project_graph(G)\n", 71 | "nodes_proj = ox.graph_to_gdfs(G_proj, edges=False)\n", 72 | "graph_area_m = nodes_proj.unary_union.convex_hull.area\n", 73 | "graph_area_m" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# show some basic stats about the network\n", 83 | "ox.basic_stats(G_proj, area=graph_area_m, clean_intersects=True, circuity_dist='euclidean')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "stats documentation: https://osmnx.readthedocs.io/en/stable/osmnx.html#module-osmnx.stats" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "# see more stats (mostly topological stuff) with extended_stats\n", 100 | "more_stats = ox.extended_stats(G, ecc=True, bc=True, cc=True) #use arguments to turn other toplogical analyses on/off\n", 101 | "for key in sorted(more_stats.keys()):\n", 102 | " print(key)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# pull up some stat's value\n", 112 | "more_stats['radius']" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# save graph to disk as shapefile (for GIS) or graphml file (for gephi etc)\n", 122 | "ox.save_graph_shapefile(G, filename='mynetwork_shapefile')\n", 123 | "ox.save_graphml(G, filename='mynetwork.graphml')" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Visualize street centrality" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# edge closeness centrality: convert graph to line graph so edges become nodes and vice versa\n", 140 | "edge_centrality = nx.closeness_centrality(nx.line_graph(G))" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "# list of edge values for the orginal graph\n", 150 | "ev = [edge_centrality[edge + (0,)] for edge in G.edges()]\n", 151 | "\n", 152 | "# color scale converted to list of colors for graph edges\n", 153 | "norm = colors.Normalize(vmin=min(ev)*0.8, vmax=max(ev))\n", 154 | "cmap = cm.ScalarMappable(norm=norm, cmap=cm.inferno)\n", 155 | "ec = [cmap.to_rgba(cl) for cl in ev]\n", 156 | "\n", 157 | "# color the edges in the original graph with closeness centralities in the line graph\n", 158 | "fig, ax = ox.plot_graph(G, bgcolor='k', axis_off=True, node_size=0,\n", 159 | " edge_color=ec, edge_linewidth=1.5, edge_alpha=1)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Routing" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "# get the nearest network node to each point\n", 176 | "orig_node = ox.get_nearest_node(G, (37.828903, -122.245846))\n", 177 | "dest_node = ox.get_nearest_node(G, (37.812303, -122.215006))" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# find the route between these nodes then plot it\n", 187 | "route = nx.shortest_path(G, orig_node, dest_node, weight='length')\n", 188 | "fig, ax = ox.plot_graph_route(G, route, node_size=0)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "# how long is our route in meters?\n", 198 | "nx.shortest_path_length(G, orig_node, dest_node, weight='length')" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "# how far is it between these two nodes as the crow flies?\n", 208 | "ox.great_circle_vec(G.node[orig_node]['y'], G.node[orig_node]['x'],\n", 209 | " G.node[dest_node]['y'], G.node[dest_node]['x'])" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "## Get networks other ways\n", 217 | "\n", 218 | "make queries less ambiguous to help the geocoder out if it's not finding what you're looking for" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "# make query an unambiguous dict to help the geocoder find specifically what you're looking for\n", 228 | "place = {'city' : 'San Francisco',\n", 229 | " 'state' : 'California',\n", 230 | " 'country' : 'USA'}\n", 231 | "G = ox.graph_from_place(place, network_type='drive')\n", 232 | "fig, ax = ox.plot_graph(G, fig_height=12, node_size=0, edge_linewidth=0.5)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# you can get networks anywhere in the world\n", 242 | "G = ox.graph_from_place('2nd Arrondissement, Paris, France', buffer_dist=100, network_type='drive_service')\n", 243 | "fig, ax = ox.plot_graph(G, fig_height=8, node_size=0, edge_linewidth=0.5)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "# or get network by coordinates, bounding box, or any custom polygon shape\n", 253 | "# useful when OSM just doesn't have a polygon for the place you want\n", 254 | "wurster_hall = (37.870605, -122.254830)\n", 255 | "one_mile = 1609 #meters\n", 256 | "G = ox.graph_from_point(wurster_hall, distance=one_mile, network_type='drive')\n", 257 | "fig, ax = ox.plot_graph(G, fig_height=8, node_size=0)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "examples of getting networks by coordinates, bounding box, or any custom polygon shape: https://github.com/gboeing/osmnx-examples/blob/master/notebooks/01-overview-osmnx.ipynb" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "## Get other infrastructure types\n", 272 | "\n", 273 | "like rail or electric grids" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# get rail network\n", 283 | "# note this is rail *infrastructure* and thus includes crossovers, sidings, spurs, yards, etc\n", 284 | "# for station-based rail network, you should prob download a station adjacency matrix elsewhere\n", 285 | "G = ox.graph_from_place('New York City, New York',\n", 286 | " retain_all=False, truncate_by_edge=True, simplify=True,\n", 287 | " network_type='none', infrastructure='way[\"railway\"~\"subway\"]')\n", 288 | "\n", 289 | "fig, ax = ox.plot_graph(G, fig_height=10, node_size=0)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## In-class workshop\n", 297 | "\n", 298 | "Git clone this repo to your desktop: https://github.com/gboeing/osmnx-examples\n", 299 | "\n", 300 | "Work through its notebooks, one at a time (skip #00)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": "Python (asa)", 314 | "language": "python", 315 | "name": "asa" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.7.1" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 2 332 | } 333 | -------------------------------------------------------------------------------- /modules/module13 - course wrap up/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 13: Course wrap up 3 | 4 | ### Overview: 5 | 6 | This week we will review the lessons learned in the previous modules and hold a workshop to practice any tricky concepts. 7 | 8 | ### Readings: 9 | 10 | https://osmnx.readthedocs.io/ 11 | 12 | https://pysal.org/ 13 | 14 | ### Class Session: 15 | 16 | Run through the Jupyter notebook. 17 | 18 | ### After-Class Assignment: 19 | 20 | Final projects due next week. 21 | -------------------------------------------------------------------------------- /modules/module13 - course wrap up/module13.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Advanced Spatial Analysis\n", 8 | "# Module 10: Course Wrap Up\n", 9 | "\n", 10 | "This week we will review the lessons learned in the previous modules and hold a workshop to practice any tricky concepts." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import geopandas as gpd\n", 20 | "import pysal as ps\n", 21 | "import statsmodels.api as sm\n", 22 | "from scipy import stats\n", 23 | "from statsmodels.tools.tools import add_constant\n", 24 | "\n", 25 | "%matplotlib inline" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# load the data\n", 35 | "tracts = gpd.read_file('data/census_tracts_data.geojson')\n", 36 | "tracts.shape" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "# choose a response variable and drop any rows in which it is null\n", 46 | "response = 'med_home_value'\n", 47 | "tracts = tracts.dropna(subset=[response])" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# map the data\n", 57 | "tracts.plot(column=response, scheme='quantiles')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "tracts.columns" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## 1. Statistical models" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "### 1a. Simple (bivariate) linear regression\n", 81 | "\n", 82 | "OLS regression with a single predictor" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# create design matrix containing predictors (drop nulls), and a response variable vector\n", 92 | "predictors = 'med_household_income'\n", 93 | "X = tracts[predictors].dropna()\n", 94 | "y = tracts.loc[X.index][response]" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# estimate a simple linear regression model with scipy\n", 104 | "m, b, r, p, se = stats.linregress(x=X, y=y)\n", 105 | "print('m={:.4f}, b={:.4f}, r^2={:.4f}, p={:.4f}'.format(m, b, r ** 2, p))" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# estimate a simple linear regression model with statsmodels\n", 115 | "Xc = add_constant(X)\n", 116 | "model = sm.OLS(y, Xc)\n", 117 | "result = model.fit()\n", 118 | "print(result.summary())" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "This single predictor explains about half the variation of the response. To explain more, we need more predictors.\n", 126 | "\n", 127 | "### 1b. Multiple regression\n", 128 | "\n", 129 | "OLS regression with multiple predictors" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "# create design matrix containing predictors (drop nulls), and a response variable vector\n", 139 | "predictors = ['med_household_income', 'pct_white']\n", 140 | "X = tracts[predictors].dropna()\n", 141 | "y = tracts.loc[X.index][response]" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# estimate a linear regression model\n", 151 | "Xc = add_constant(X)\n", 152 | "model = sm.OLS(y, Xc)\n", 153 | "result = model.fit()\n", 154 | "print(result.summary())" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "#### Now add in more variables..." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "tracts.columns" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "# create design matrix containing predictors (drop nulls), and a response variable vector\n", 180 | "predictors = ['med_household_income', 'pct_white', 'pct_single_family_home', 'pct_built_before_1940',\n", 181 | " 'med_rooms_per_home', 'pct_bachelors_degree']\n", 182 | "X = tracts[predictors].dropna()\n", 183 | "y = tracts.loc[X.index][response]" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# estimate a linear regression model\n", 193 | "Xc = add_constant(X)\n", 194 | "model = sm.OLS(y, Xc)\n", 195 | "result = model.fit()\n", 196 | "print(result.summary())" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "## 2. Spatial models\n", 204 | "\n", 205 | "Basic types:\n", 206 | "\n", 207 | " - **Spatial heterogeneity**: account for systematic differences across space without explicitly modeling interdependency (non-spatial estimation)\n", 208 | " - spatial fixed effects (intercept varies for each spatial group)\n", 209 | " - spatial regimes (intercept and coefficients vary for each spatial group)\n", 210 | " - **Spatial dependence**: model interdependencies between observations through space\n", 211 | " - spatial lag model (spatially-lagged endogenous variable added as predictor; because of endogeneity, cannot use OLS to estimate)\n", 212 | " - spatial error model (spatial effects in error term)\n", 213 | " - spatial lag+error combo model" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "### 2a. Spatial fixed effects\n", 221 | "\n", 222 | "Using dummy variables representing the counties into which our observations (tracts) are nested" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "# create a new dummy variable for each county, with 1 if tract is in this county and 0 if not\n", 232 | "for county in tracts['COUNTYFP'].unique():\n", 233 | " new_col = f'dummy_county_{county}'\n", 234 | " tracts[new_col] = (tracts['COUNTYFP'] == county).astype(int)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "# remove one dummy from dummies to prevent perfect collinearity\n", 244 | "# ie, a subset of predictors sums to 1 (which full set of dummies will do)\n", 245 | "county_dummies = [f'dummy_county_{county}' for county in tracts['COUNTYFP'].unique()]\n", 246 | "county_dummies = county_dummies[1:]" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "# create design matrix containing predictors (drop nulls), and a response variable vector\n", 256 | "predictors = ['med_household_income', 'pct_white', 'pct_single_family_home', 'pct_built_before_1940',\n", 257 | " 'med_rooms_per_home', 'pct_bachelors_degree']\n", 258 | "X = tracts[predictors + county_dummies].dropna()\n", 259 | "y = tracts.loc[X.index][response]" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# estimate a linear regression model\n", 269 | "Xc = add_constant(X)\n", 270 | "model = sm.OLS(y, Xc)\n", 271 | "result = model.fit()\n", 272 | "print(result.summary())" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### 2b. Spatial regimes\n", 280 | "\n", 281 | "Each spatial regime can have different model coefficients. Here, the regimes are counties. We'll take a subset of our data (all the tracts appearing in 3 counties). This subsection just uses OLS for estimation, but you can also combine spatial regimes with spatial autogression models (the latter is introduced later)." 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "# pick 3 counties as the regimes, and only estimate a regimes model for this subset\n", 291 | "counties = tracts['COUNTYFP'].value_counts().index[:3]\n", 292 | "mask = tracts['COUNTYFP'].isin(counties)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "# create design matrix containing predictors (drop nulls), a response variable matrix, and a regimes vector\n", 302 | "X = tracts.loc[mask, predictors].dropna() #only take rows in the 3 counties\n", 303 | "Y = tracts.loc[X.index][[response]] #notice this is a matrix this time for pysal\n", 304 | "regimes = tracts.loc[X.index]['COUNTYFP'] #define the regimes" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "# estimate spatial regimes model with OLS\n", 314 | "olsr = ps.model.spreg.OLS_Regimes(y=Y.values, x=X.values, regimes=regimes.values, name_regimes='county',\n", 315 | " name_x=X.columns.tolist(), name_y=response, name_ds='tracts')\n", 316 | "print(olsr.summary)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "### 2c. Spatial diagnostics\n", 324 | "\n", 325 | "So far we've seen two spatial heterogeneity models. Now we'll explore spatial dependence, starting by using queen-contiguity spatial weights to model spatial relationships between observations and OLS to check diagnostics." 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "# create design matrix containing predictors (drop nulls), and a response variable matrix\n", 335 | "predictors = ['med_household_income', 'pct_white', 'pct_single_family_home', 'pct_built_before_1940',\n", 336 | " 'med_rooms_per_home', 'pct_bachelors_degree']\n", 337 | "X = tracts[predictors].dropna()\n", 338 | "Y = tracts.loc[X.index][[response]] #notice this is a matrix this time for pysal" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "# compute spatial weights from tract geometries (but only those tracts that appear in design matrix!)\n", 348 | "W = ps.lib.weights.Queen.from_dataframe(tracts.loc[X.index])\n", 349 | "W.transform = 'r'" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "# compute OLS spatial diagnostics to check the nature of spatial dependence\n", 359 | "ols = ps.model.spreg.OLS(y=Y.values, x=X.values, w=W, spat_diag=True, moran=True)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "# calculate moran's I (for the response) and its significance\n", 369 | "mi = ps.explore.esda.Moran(y=Y, w=W, two_tailed=True)\n", 370 | "print(mi.I)\n", 371 | "print(mi.p_sim)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "# moran's I (for the residuals): moran's i, standardized i, p-value\n", 381 | "ols.moran_res" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "#### Interpreting the results\n", 389 | "\n", 390 | "A significant Moran's *I* suggests spatial autocorrelation, but doesn't tell us which alternative specification should be used. Lagrange Multiplier (LM) diagnostics can help with that. If one LM test is significant and the other isn't, then that tells us which model specification (spatial lag vs spatial error) to use:" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "# lagrange multiplier test for spatial lag model: stat, p\n", 400 | "ols.lm_lag" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "# lagrange multiplier test for spatial error model: stat, p\n", 410 | "ols.lm_error" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "#### Interpreting the results\n", 418 | "\n", 419 | "If (and only if) both the LM tests produce significant statistics, try the robust versions (the nonrobust LM tests are sensitive to each other):" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "# robust lagrange multiplier test for spatial lag model: stat, p\n", 429 | "ols.rlm_lag" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "# robust lagrange multiplier test for spatial error model: stat, p\n", 439 | "ols.rlm_error" 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": {}, 445 | "source": [ 446 | "#### So... which model specification to choose?\n", 447 | "\n", 448 | "If neither LM test is significant: use regular OLS.\n", 449 | "\n", 450 | "If only one LM test is significant: use that model spec.\n", 451 | "\n", 452 | "If both LM tests are significant: run robust versions.\n", 453 | "\n", 454 | "If only one robust LM test is significant: use that model spec.\n", 455 | "\n", 456 | "If both robust LM tests are significant (this can often happen with large sample sizes):\n", 457 | "\n", 458 | " - first consider if the initial model specification is actually a good fit\n", 459 | " - if so, use the spatial specification corresponding to the larger robust-LM statistic\n", 460 | " - or consider a combo model\n", 461 | "\n", 462 | "### 2d. Spatial lag model\n", 463 | "\n", 464 | "When the diagnostics indicate the presence of a spatial diffusion process.\n", 465 | "\n", 466 | "Model specification:\n", 467 | "\n", 468 | "$y = \\rho W y + X \\beta + u$\n", 469 | "\n", 470 | "where $y$ is a $n \\times 1$ vector of observations (response), $W$ is a $n \\times n$ spatial weights matrix (thus $Wy$ is the spatially-lagged response), $\\rho$ is the spatial autoregressive parameter to be estimated, $X$ is a $n \\times k$ matrix of observations (exogenous predictors), $\\beta$ is a $k \\times 1$ vector of parameters (coefficients) to be estimated, and $u$ is a $n \\times 1$ vector of errors." 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "# maximum-likelihood estimation with full matrix expression\n", 480 | "mll = ps.model.spreg.ML_Lag(y=Y.values, x=X.values, w=W, method='full', name_w='queen',\n", 481 | " name_x=X.columns.tolist(), name_y=response, name_ds='tracts')\n", 482 | "print(mll.summary)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "# the spatial autoregressive parameter estimate, rho\n", 492 | "mll.rho" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "### 2e. Spatial error model\n", 500 | "\n", 501 | "When the diagnostics indicate the presence of spatial error dependence.\n", 502 | "\n", 503 | "Model specification:\n", 504 | "\n", 505 | "$y = X \\beta + u$\n", 506 | "\n", 507 | "where $X$ is a $n \\times k$ matrix of observations (exogenous predictors), $\\beta$ is a $k \\times 1$ vector of parameters (coefficients) to be estimated, and $u$ is a $n \\times 1$ vector of errors. The errors $u$ follow a spatial autoregressive specification:\n", 508 | "\n", 509 | "$u = \\lambda Wu + \\epsilon$\n", 510 | "\n", 511 | "where $\\lambda$ is a spatial autoregressive parameter to be estimated and $\\epsilon$ is the vector of errors." 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": null, 517 | "metadata": {}, 518 | "outputs": [], 519 | "source": [ 520 | "# maximum-likelihood estimation with full matrix expression\n", 521 | "mle = ps.model.spreg.ML_Error(y=Y.values, x=X.values, w=W, method='full', name_w='queen',\n", 522 | " name_x=X.columns.tolist(), name_y=response, name_ds='tracts')\n", 523 | "print(mle.summary)" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "# the spatial autoregressive parameter estimate, lambda\n", 533 | "mle.lam" 534 | ] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "### 2f. Spatial lag+error combo model\n", 541 | "\n", 542 | "Estimated with GMM (generalized method of moments). Essentially a spatial error model with endogenous explanatory variables.\n", 543 | "\n", 544 | "Model specification:\n", 545 | "\n", 546 | "$y = \\rho W y + X \\beta + u$\n", 547 | "\n", 548 | "where $y$ is a $n \\times 1$ vector of observations (response), $W$ is a $n \\times n$ spatial weights matrix (thus $Wy$ is the spatially-lagged response), $\\rho$ is the spatial autoregressive parameter to be estimated, $X$ is a $n \\times k$ matrix of observations (exogenous predictors), $\\beta$ is a $k \\times 1$ vector of parameters (coefficients) to be estimated, and $u$ is a $n \\times 1$ vector of errors.\n", 549 | "\n", 550 | "The errors $u$ follow a spatial autoregressive specification:\n", 551 | "\n", 552 | "$u = \\lambda Wu + \\epsilon$\n", 553 | "\n", 554 | "where $\\lambda$ is a spatial autoregressive parameter to be estimated and $\\epsilon$ is the vector of errors." 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [ 563 | "gmc = ps.model.spreg.GM_Combo_Het(y=Y.values, x=X.values, w=W, name_w='queen', name_ds='tracts',\n", 564 | " name_x=X.columns.tolist(), name_y=response)\n", 565 | "print(gmc.summary)" 566 | ] 567 | } 568 | ], 569 | "metadata": { 570 | "kernelspec": { 571 | "display_name": "Python (asa)", 572 | "language": "python", 573 | "name": "asa" 574 | }, 575 | "language_info": { 576 | "codemirror_mode": { 577 | "name": "ipython", 578 | "version": 3 579 | }, 580 | "file_extension": ".py", 581 | "mimetype": "text/x-python", 582 | "name": "python", 583 | "nbconvert_exporter": "python", 584 | "pygments_lexer": "ipython3", 585 | "version": "3.7.1" 586 | } 587 | }, 588 | "nbformat": 4, 589 | "nbformat_minor": 2 590 | } 591 | -------------------------------------------------------------------------------- /modules/module14 - final projects/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Module 14: Final projects 3 | 4 | ### Overview: 5 | 6 | Final projects are due Apr 24 via Blackboard. See [the instructions](../../assignments/final-project) for full details. 7 | -------------------------------------------------------------------------------- /readings/readme.md: -------------------------------------------------------------------------------- 1 | # Advanced Spatial Analysis 2 | # Reading Materials 3 | 4 | Due to copyrights, course reading materials will be shared on blackboard for enrolled students to download. 5 | -------------------------------------------------------------------------------- /syllabus/readme.md: -------------------------------------------------------------------------------- 1 | # PPUA 7237: Advanced Spatial Analysis 2 | 3 | Northeastern University / Spring 19 4 | 5 | ## Instructor info 6 | 7 | Dr. Geoff Boeing 8 | 9 | Email: g dot boeing at northeastern dot edu 10 | 11 | Web: [https://geoffboeing.com](https://geoffboeing.com) 12 | 13 | ## Course Overview 14 | 15 | Each week will focus on a single thematic learning module consisting of a lecture and a workshop. The former will introduce core concepts and methods for a specific spatial analysis skill and the latter allows the students to practice these skills together in class. This syllabus is a living document and may be updated by the instructor throughout the semester. 16 | 17 | This course takes a computational social science approach to spatial analysis. It uses Jupyter notebooks to demonstrate coding and geospatial methods that students can reproduce and experiment with in real-time in the classroom. We start the semester with the fundamentals of coding, then move on to data wrangling and analysis, then on to the theory and practice of analytical visualization, web mapping, spatial analysis, and applied spatial statistics. Building up core concepts in this way gives students the confidence of fundamental quick-wins before moving on to high-level applications. 18 | 19 | Each week, students will be expected to: 20 | 21 | 1. Complete all assigned readings 22 | 2. Attend the lecture 23 | 3. Participate in the workshop exercises following the lecture 24 | 4. Complete and submit any assignments 25 | 26 | The course has no specific prerequisites, but students are expected to have prior experience with GIS and basic statistics. Python will be taught from the ground-up. 27 | 28 | ## Materials 29 | 30 | Reading materials will be provided on blackboard. The course lectures assume you that have read the assigned readings prior to course and are now reasonably fluent in their contents. 31 | 32 | Coursework will be based on free and/or open-source software including Anaconda Python, QGIS, Carto, Docker, Git. 33 | 34 | ## Schedule 35 | 36 | - Jan 09 - Module 00 - getting started 37 | - Jan 16 - Module 01 - intro to python (basic syntax and data types) 38 | - Jan 23 - Module 02 - intro to python (functions and control flow) 39 | - Jan 30 - Module 03 - pandas data wrangling/analysis (assignment 1) 40 | - Feb 06 - Module 04 - pandas data wrangling/analysis 41 | - Feb 13 - Module 05 - data visualization (assignment 2) 42 | - Feb 20 - Module 06 - geopandas and spatial data 43 | - Feb 27 - Module 07 - mapping and web mapping (assignment 3) 44 | - Mar 06 - Spring break 45 | - Mar 13 - Module 08 - APIs, geocoding, geolocation 46 | - Mar 20 - Module 09 - spatial weights and ESDA 47 | - Mar 27 - Module 10 - inferential spatial models (assignment 4) 48 | - Apr 03 - Module 11 - PostGIS 49 | - Apr 10 - Module 12 - spatial networks (assignment 5) 50 | - Apr 17 - Module 13 - spatial networks 51 | - Apr 24 - Final projects due 52 | 53 | ## Assignments and Evaluation 54 | 55 | Final grades will be assigned according to the following: 56 | 57 | - 50%: 5 assignments (10% each) 58 | - 40%: final project 59 | - 10%: attendance and active participation in the classroom 60 | 61 | Assignments are due by 23:59 eastern time on their due date, to be submitted on blackboard. Late assignment submissions will be deducted 20% per day. Late final project submissions will not be accepted or graded. 62 | 63 | Assignments will be graded on the following critera: does your code fully run? Does it do what it's supposed to do the way it's supposed to do it? Is it well-commented and documented? Is your code clear, straightforward, and reasonably efficient? 64 | --------------------------------------------------------------------------------