├── .gitignore ├── README.md ├── check_env.py ├── data ├── katrina.csv ├── manhattan.png ├── njgeol │ ├── njgeol_poly_dd.dbf │ ├── njgeol_poly_dd.prj │ ├── njgeol_poly_dd.sbn │ ├── njgeol_poly_dd.sbx │ ├── njgeol_poly_dd.shp │ ├── njgeol_poly_dd.shp.xml │ └── njgeol_poly_dd.shx ├── nybb.geojson ├── nybb │ ├── nybb.dbf │ ├── nybb.prj │ ├── nybb.shp │ └── nybb.shx ├── nyc_dem_reduce.tif ├── state_plane_codes.csv ├── wifi.sqlite └── wifi │ ├── wifi.dbf │ ├── wifi.prj │ ├── wifi.qpj │ ├── wifi.shp │ └── wifi.shx ├── exercises ├── Plotting Great Circles in Python (Ans).ipynb ├── Plotting Great Circles in Python.ipynb ├── Working with Projections in Python (Ans).ipynb ├── Working with Projections in Python.ipynb ├── Working with Rasters in Python (Ans).ipynb ├── Working with Rasters in Python.ipynb ├── Working with Vectors in Python (Ans).ipynb └── Working with Vectors in Python.ipynb ├── install.md └── notebooks ├── Notebook0a.ipynb ├── Notebook0b.ipynb ├── Notebook1.ipynb ├── Notebook2.ipynb ├── Notebook3.ipynb └── Notebook4.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # IPython files 9 | .ipynb_checkpoints/ 10 | 11 | # Other stuff 12 | presentation/ 13 | data/manhattan.tif 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | bin/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | 44 | # Translations 45 | *.mo 46 | 47 | # Mr Developer 48 | .mr.developer.cfg 49 | .project 50 | .pydevproject 51 | 52 | # Rope 53 | .ropeproject 54 | 55 | # Django stuff: 56 | *.log 57 | *.pot 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Geospatial Data in Python 2 | ========================= 3 | 4 | Geospatial Data in Python Tutorial Materials 5 | 6 | See [`install.md`](install.md) for installation instructions, and [`check_env.py`](check_env.py) for a Python script to check your Python environment for required packages. 7 | 8 | For the tutorial, all [data](data), [exercises](exercises), and [notebooks](notebooks) are available by cloning this repository (see link on right). There is one additional GeoTIFF file (not required), which is [available here](https://www.dropbox.com/s/mba7obrfh2b2ucb/manhattan.tif). 9 | 10 | The presentation materials are available at the following links: 11 | 12 | * [Part 1](https://dl.dropboxusercontent.com/u/14940391/python_geospatial/Presentation1.slides.html) 13 | * [Part 2](https://dl.dropboxusercontent.com/u/14940391/python_geospatial/Presentation2.slides.html) 14 | * [Part 3](https://dl.dropboxusercontent.com/u/14940391/python_geospatial/Presentation3.slides.html) 15 | * [Part 4](https://dl.dropboxusercontent.com/u/14940391/python_geospatial/Presentation4.slides.html) 16 | * [Part 5](https://dl.dropboxusercontent.com/u/14940391/python_geospatial/Presentation5.slides.html) 17 | -------------------------------------------------------------------------------- /check_env.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | with warnings.catch_warnings(record=True) as w: 4 | 5 | # Main stuff 6 | import numpy 7 | import pandas 8 | import scipy 9 | import sqlite3 10 | import matplotlib 11 | import shapely 12 | import fiona 13 | import rasterio 14 | import pyproj 15 | import descartes 16 | import geopandas 17 | import shapefile 18 | import cartopy 19 | import mplexporter 20 | import mplleaflet 21 | import geojsonio 22 | 23 | # Optional stuff 24 | try: 25 | import psycopg2 26 | import mpl_toolkits.basemap 27 | except ImportError, err: 28 | print("Error: " + unicode(err)) 29 | print("This is not a required package. " 30 | "See Known Issues for additional information:\n" 31 | "https://github.com/carsonfarmer/python_geospatial/blob/master/" 32 | "install.md#known-issues") 33 | import sys 34 | sys.exit(0) 35 | 36 | print("Everything looks good!") 37 | -------------------------------------------------------------------------------- /data/katrina.csv: -------------------------------------------------------------------------------- 1 | lon,lat 2 | -75.1,23.1 3 | -75.7,23.4 4 | -76.2,23.8 5 | -76.5,24.5 6 | -76.9,25.4 7 | -77.7,26.0 8 | -78.4,26.1 9 | -79.0,26.2 10 | -79.6,26.2 11 | -80.1,26.0 12 | -80.3,25.9 13 | -81.3,25.4 14 | -82.0,25.1 15 | -82.6,24.9 16 | -83.3,24.6 17 | -84.0,24.4 18 | -84.7,24.4 19 | -85.3,24.5 20 | -85.9,24.8 21 | -86.7,25.2 22 | -87.7,25.7 23 | -88.6,26.3 24 | -89.2,27.2 25 | -89.6,28.2 26 | -89.6,29.3 27 | -89.6,29.5 28 | -89.6,30.2 29 | -89.6,31.1 30 | -89.1,32.6 31 | -88.6,34.1 32 | -88.0,35.6 33 | -87.0,37.0 34 | -85.3,38.6 35 | -82.9,40.1 -------------------------------------------------------------------------------- /data/manhattan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/manhattan.png -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/njgeol/njgeol_poly_dd.dbf -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1927",DATUM["D_North_American_1927",SPHEROID["Clarke_1866",6378206.4,294.9786982]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.sbn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/njgeol/njgeol_poly_dd.sbn -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.sbx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/njgeol/njgeol_poly_dd.sbx -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/njgeol/njgeol_poly_dd.shp -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.shp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 2007110214420500FALSE20030709181625002003070918162500{CFCD0F00-22DF-467A-BDA8-1B79D7FB92AE}Microsoft Windows 2000 Version 5.0 (Build 2195) Service Pack 3; ESRI ArcCatalog 8.3.0.800enREQUIRED: A brief narrative summary of the data set.REQUIRED: A summary of the intentions with which the data set was developed.REQUIRED: The name of an organization or individual that developed the data set.REQUIRED: The date when the data set is published or otherwise made available for release.st_bndryst_bndryvector digital dataREQUIRED: The basis on which the time period of content information is determined.REQUIRED: The year (and optionally month, or month and day) for which the data set corresponds to the ground.REQUIRED: The state of the data set.REQUIRED: The frequency with which changes and additions are made to the data set after the initial data set is completed.-126.488422-66.74333749.38452324.276474-126.488422-66.74333724.27647449.384523REQUIRED: Reference to a formally registered thesaurus or a similar authoritative source of theme keywords.REQUIRED: Common-use word or phrase used to describe the subject of the data set.REQUIRED: Restrictions and legal prerequisites for accessing the data set.REQUIRED: Restrictions and legal prerequisites for using the data set after access is granted.ArcInfo CoverageMicrosoft Windows 2000 Version 5.0 (Build 2195) Service Pack 3; ESRI ArcCatalog 8.3.0.800st_bndry-126.488422-66.74333749.38452324.2764741-126.488422-66.74333749.38452324.2764741enFGDC Content Standards for Digital Geospatial MetadataFGDC-STD-001-1998local timehttp://www.esri.com/metadata/esriprof80.htmlESRI Metadata ProfileREQUIRED: The person responsible for the metadata information.REQUIRED: The organization responsible for the metadata information.REQUIRED: The mailing and/or physical address for the organization or individual.REQUIRED: The city of the address.REQUIRED: The state or province of the address.REQUIRED: The ZIP or other postal code of the address.REQUIRED: The telephone number by which individuals can speak to the organization or individual.20030709ISO 19115 Geographic Information - MetadataDIS_ESRI1.0datasetDownloadable Data2.6622.662002file://Local Area Network2.662ArcInfo CoverageVectorSimpleArcFALSE512FALSEFALSEComplete chain512SimpleLabelFALSE49FALSEFALSELabel point49SimplePolygonTRUE49FALSEFALSEGT-polygon composed of chains49SimpleTicFALSE496FALSEFALSEPoint496GCS_North_American_1927Decimal degrees0.0000010.000001North American Datum of 1927Clarke 18666378206.400000294.978698GCS_North_American_19275124949496st_bndry.aatFeature Class512FIDFIDOID400Internal feature number.ESRISequential unique whole numbers that are automatically generated.ShapeShapeGeometry000Feature geometry.ESRICoordinates defining the features.FNODE#45BinaryInternal node number for the beginning of an arc (from-node).ESRIWhole numbers that are automatically generated.TNODE#45BinaryInternal node number for the end of an arc (to-node).ESRIWhole numbers that are automatically generated.LPOLY#45BinaryInternal node number for the left polygon.ESRIWhole numbers that are automatically generated.RPOLY#45BinaryInternal node number for the right polygon.ESRIWhole numbers that are automatically generated.LENGTH818Float5Length of feature in internal units.ESRIPositive real numbers that are automatically generated.ST_BNDRY#45BinaryInternal feature number.ESRISequential unique whole numbers that are automatically generated.ST_BNDRY-ID45BinaryUser-defined feature number.ESRIst_bndry.patFeature Class49FIDFIDOID400Internal feature number.ESRISequential unique whole numbers that are automatically generated.ShapeShapeGeometry000Feature geometry.ESRICoordinates defining the features.AREA818Float5Area of feature in internal units squared.ESRIPositive real numbers that are automatically generated.PERIMETER818Float5Perimeter of feature in internal units.ESRIPositive real numbers that are automatically generated.ST_BNDRY#45BinaryInternal feature number.ESRISequential unique whole numbers that are automatically generated.ST_BNDRY-ID45BinaryUser-defined feature number.ESRISTATE22Character20030709Dataset copied.20071102 4 | -------------------------------------------------------------------------------- /data/njgeol/njgeol_poly_dd.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/njgeol/njgeol_poly_dd.shx -------------------------------------------------------------------------------- /data/nybb/nybb.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/nybb/nybb.dbf -------------------------------------------------------------------------------- /data/nybb/nybb.prj: -------------------------------------------------------------------------------- 1 | PROJCS["NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-74.0],PARAMETER["Standard_Parallel_1",40.66666666666666],PARAMETER["Standard_Parallel_2",41.03333333333333],PARAMETER["Latitude_Of_Origin",40.16666666666666],UNIT["Foot_US",0.3048006096012192]] -------------------------------------------------------------------------------- /data/nybb/nybb.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/nybb/nybb.shp -------------------------------------------------------------------------------- /data/nybb/nybb.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/nybb/nybb.shx -------------------------------------------------------------------------------- /data/nyc_dem_reduce.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/nyc_dem_reduce.tif -------------------------------------------------------------------------------- /data/state_plane_codes.csv: -------------------------------------------------------------------------------- 1 | spcs,name,epsg 101,NAD83 / Alabama East,26929 102,NAD83 / Alabama West,26930 201,NAD83 / Arizona East,26948 202,NAD83 / Arizona Central,26949 203,NAD83 / Arizona West,26950 301,NAD83 / Arkansas North,26951 302,NAD83 / Arkansas South,26952 401,NAD83 / California zone 1,26941 402,NAD83 / California zone 2,26942 403,NAD83 / California zone 3,26943 404,NAD83 / California zone 4,26944 405,NAD83 / California zone 5,26945 406,NAD83 / California zone 6,26946 501,NAD83 / Colorado North,26953 502,NAD83 / Colorado Central,26954 503,NAD83 / Colorado South,26955 600,NAD83 / Connecticut,26956 700,NAD83 / Delaware,26957 901,NAD83 / Florida East,26958 902,NAD83 / Florida West,26959 903,NAD83 / Florida North,26960 1001,NAD83 / Georgia East,26966 1002,NAD83 / Georgia West,26967 1101,NAD83 / Idaho East,26968 1102,NAD83 / Idaho Central,26969 1103,NAD83 / Idaho West,26970 1201,NAD83 / Illinois East,26971 1202,NAD83 / Illinois West,26972 1301,NAD83 / Indiana East,26973 1302,NAD83 / Indiana West,26974 1401,NAD83 / Iowa North,26975 1402,NAD83 / Iowa South,26976 1501,NAD83 / Kansas North,26977 1502,NAD83 / Kansas South,26978 1600,NAD83 / Kentucky Single Zone,3088 1601,NAD83 / Kentucky North,2205 1602,NAD83 / Kentucky South,26980 1701,NAD83 / Louisiana North,26981 1702,NAD83 / Louisiana South,26982 1703,NAD83 / Louisiana Offshore,32199 1801,NAD83 / Maine East,26983 1802,NAD83 / Maine West,26984 1900,NAD83 / Maryland,26985 2001,NAD83 / Massachusetts Mainland,26986 2002,NAD83 / Massachusetts Island,26987 2111,NAD83 / Michigan North,26988 2112,NAD83 / Michigan Central,26989 2113,NAD83 / Michigan South,26990 2201,NAD83 / Minnesota North,26991 2202,NAD83 / Minnesota Central,26992 2203,NAD83 / Minnesota South,26993 2301,NAD83 / Mississippi East,26994 2302,NAD83 / Mississippi West,26995 2401,NAD83 / Missouri East,26996 2402,NAD83 / Missouri Central,26997 2403,NAD83 / Missouri West,26998 2500,NAD83 / Montana,32100 2600,NAD83 / Nebraska,32104 2701,NAD83 / Nevada East,32107 2702,NAD83 / Nevada Central,32108 2703,NAD83 / Nevada West,32109 2800,NAD83 / New Hampshire,32110 2900,NAD83 / New Jersey,32111 3001,NAD83 / New Mexico East,32112 3002,NAD83 / New Mexico Central,32113 3003,NAD83 / New Mexico West,32114 3101,NAD83 / New York East,32115 3102,NAD83 / New York Central,32116 3103,NAD83 / New York West,32117 3104,NAD83 / New York Long Island,32118 3200,NAD83 / North Carolina,32119 3301,NAD83 / North Dakota North,32120 3302,NAD83 / North Dakota South,32121 3401,NAD83 / Ohio North,32122 3402,NAD83 / Ohio South,32123 3501,NAD83 / Oklahoma North,32124 3502,NAD83 / Oklahoma South,32125 3601,NAD83 / Oregon North,32126 3602,NAD83 / Oregon South,32127 3701,NAD83 / Pennsylvania North,32128 3702,NAD83 / Pennsylvania South,32129 3800,NAD83 / Rhode Island,32130 3900,NAD83 / South Carolina,32133 4001,NAD83 / South Dakota North,32134 4002,NAD83 / South Dakota South,32135 4100,NAD83 / Tennessee,32136 4201,NAD83 / Texas North,32137 4202,NAD83 / Texas North Central,32138 4203,NAD83 / Texas Central,32139 4204,NAD83 / Texas South Central,32140 4205,NAD83 / Texas South,32141 4301,NAD83 / Utah North,32142 4302,NAD83 / Utah Central,32143 4303,NAD83 / Utah South,32144 4400,NAD83 / Vermont,32145 4501,NAD83 / Virginia North,32146 4502,NAD83 / Virginia South,32147 4601,NAD83 / Washington North,32148 4602,NAD83 / Washington South,32149 4701,NAD83 / West Virginia North,32150 4702,NAD83 / West Virginia South,32151 4801,NAD83 / Wisconsin North,32152 4802,NAD83 / Wisconsin Central,32153 4803,NAD83 / Wisconsin South,32154 4901,NAD83 / Wyoming East,32155 4902,NAD83 / Wyoming East Central,32156 4903,NAD83 / Wyoming West Central,32157 4904,NAD83 / Wyoming West,32158 5001,NAD83 / Alaska zone 1,26931 5002,NAD83 / Alaska zone 2,26932 5003,NAD83 / Alaska zone 3,26933 5004,NAD83 / Alaska zone 4,26934 5005,NAD83 / Alaska zone 5,26935 5006,NAD83 / Alaska zone 6,26936 5007,NAD83 / Alaska zone 7,26937 5008,NAD83 / Alaska zone 8,26938 5009,NAD83 / Alaska zone 9,26939 5010,NAD83 / Alaska zone 10,26940 5101,NAD83 / Hawaii zone 1,26961 5102,NAD83 / Hawaii zone 2,26962 5103,NAD83 / Hawaii zone 3,26963 5104,NAD83 / Hawaii zone 4,26964 5105,NAD83 / Hawaii zone 5,26965 5200,NAD83 / Puerto Rico & Virgin Is.,32161 10101,NAD27 / Alabama East,26729 10102,NAD27 / Alabama West,26730 10201,NAD27 / Arizona East,26748 10202,NAD27 / Arizona Central,26749 10203,NAD27 / Arizona West,26750 10301,NAD27 / Arkansas North,26751 10302,NAD27 / Arkansas South,26752 10401,NAD27 / California zone I,26741 10402,NAD27 / California zone II,26742 10403,NAD27 / California zone III,26743 10404,NAD27 / California zone IV,26744 10405,NAD27 / California zone V,26745 10406,NAD27 / California zone VI,26746 10407,NAD27 / California zone VII,26799 10501,NAD27 / Colorado North,26753 10502,NAD27 / Colorado Central,26754 10503,NAD27 / Colorado South,26755 10600,NAD27 / Connecticut,26756 10700,NAD27 / Delaware,26757 10901,NAD27 / Florida West,26759 10901,NAD27 / Florida East,26758 10903,NAD27 / Florida North,26760 11001,NAD27 / Georgia East,26766 11002,NAD27 / Georgia West,26767 11101,NAD27 / Idaho East,26768 11102,NAD27 / Idaho Central,26769 11103,NAD27 / Idaho West,26770 11201,NAD27 / Illinois East,26771 11202,NAD27 / Illinois West,26772 11301,NAD27 / Indiana East,26773 11302,NAD27 / Indiana West,26774 11401,NAD27 / Iowa North,26775 11402,NAD27 / Iowa South,26776 11501,NAD27 / Kansas North,26777 11502,NAD27 / Kansas South,26778 11601,NAD27 / Kentucky North,26779 11602,NAD27 / Kentucky South,26780 11701,NAD27 / Louisiana North,26781 11702,NAD27 / Louisiana South,26782 11801,NAD27 / Maine East,26783 11802,NAD27 / Maine West,26784 11900,NAD27 / Maryland,26785 12001,NAD27 / Massachusetts Mainland,26786 12002,NAD27 / Massachusetts Island,26787 12101,NAD27 / Michigan East,5623 12102,NAD27 / Michigan Old Central,5624 12103,NAD27 / Michigan West,5625 12111,NAD Michigan / Michigan North,26811 12112,NAD Michigan / Michigan Central,26812 12113,NAD Michigan / Michigan South,26813 12201,NAD27 / Minnesota North,26791 12202,NAD27 / Minnesota Central,26792 12203,NAD27 / Minnesota South,26793 12301,NAD27 / Mississippi East,26794 12302,NAD27 / Mississippi West,26795 12401,NAD27 / Missouri East,26796 12402,NAD27 / Missouri Central,26797 12403,NAD27 / Missouri West,26798 12501,NAD27 / Montana North,32001 12502,NAD27 / Montana Central,32002 12503,NAD27 / Montana South,32003 12601,NAD27 / Nebraska North,32005 12602,NAD27 / Nebraska South,32006 12701,NAD27 / Nevada East,32007 12702,NAD27 / Nevada Central,32008 12703,NAD27 / Nevada West,32009 12800,NAD27 / New Hampshire,32010 12900,NAD27 / New Jersey,32011 13001,NAD27 / New Mexico East,32012 13002,NAD27 / New Mexico Central,32013 13003,NAD27 / New Mexico West,32014 13101,NAD27 / New York East,32015 13102,NAD27 / New York Central,32016 13103,NAD27 / New York West,32017 13104,NAD27 / New York Long Island,32018 13200,NAD27 / North Carolina,32019 13301,NAD27 / North Dakota North,32020 13302,NAD27 / North Dakota South,32021 13401,NAD27 / Ohio North,32022 13402,NAD27 / Ohio South,32023 13501,NAD27 / Oklahoma North,32024 13502,NAD27 / Oklahoma South,32025 13601,NAD27 / Oregon North,32026 13602,NAD27 / Oregon South,32027 13701,NAD27 / Pennsylvania North,32028 13702,NAD27 / Pennsylvania South,32029 13800,NAD27 / Rhode Island,32030 13901,NAD27 / South Carolina North,32031 13902,NAD27 / South Carolina South,32033 14001,NAD27 / South Dakota North,32034 14002,NAD27 / South Dakota South,32035 14100,NAD27 / Tennessee,2204 14201,NAD27 / Texas North,32037 14202,NAD27 / Texas North Central,32038 14203,NAD27 / Texas Central,32039 14204,NAD27 / Texas South Central,32040 14205,NAD27 / Texas South,32041 14301,NAD27 / Utah North,32042 14302,NAD27 / Utah Central,32043 14303,NAD27 / Utah South,32044 14400,NAD27 / Vermont,32045 14501,NAD27 / Virginia North,32046 14502,NAD27 / Virginia South,32047 14601,NAD27 / Washington North,32048 14602,NAD27 / Washington South,32049 14701,NAD27 / West Virginia North,32050 14702,NAD27 / West Virginia South,32051 14801,NAD27 / Wisconsin North,32052 14802,NAD27 / Wisconsin Central,32053 14803,NAD27 / Wisconsin South,32054 14901,NAD27 / Wyoming East,32055 14902,NAD27 / Wyoming East Central,32056 14903,NAD27 / Wyoming West Central,32057 14904,NAD27 / Wyoming West,32058 15001,NAD27 / Alaska zone 1,26731 15002,NAD27 / Alaska zone 2,26732 15003,NAD27 / Alaska zone 3,26733 15004,NAD27 / Alaska zone 4,26734 15005,NAD27 / Alaska zone 5,26735 15006,NAD27 / Alaska zone 6,26736 15007,NAD27 / Alaska zone 7,26737 15008,NAD27 / Alaska zone 8,26738 15009,NAD27 / Alaska zone 9,26739 15010,NAD27 / Alaska zone 10,26740 15101,Old Hawaiian / Hawaii zone 1,3561 15102,Old Hawaiian / Hawaii zone 2,3562 15103,Old Hawaiian / Hawaii zone 3,3563 15104,Old Hawaiian / Hawaii zone 4,3564 15105,Old Hawaiian / Hawaii zone 5,3565 15201,Puerto Rico State Plane CS of 1927,3991 15202,Puerto Rico / St. Croix,3992 15300,American Samoa 1962 / American Samoa Lambert,3102 15400,Guam 1963 / Guam SPCS,3993 -------------------------------------------------------------------------------- /data/wifi.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/wifi.sqlite -------------------------------------------------------------------------------- /data/wifi/wifi.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/wifi/wifi.dbf -------------------------------------------------------------------------------- /data/wifi/wifi.prj: -------------------------------------------------------------------------------- 1 | PROJCS["NAD83_New_York_Long_Island_ftUS",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["standard_parallel_1",41.03333333333333],PARAMETER["standard_parallel_2",40.66666666666666],PARAMETER["latitude_of_origin",40.16666666666666],PARAMETER["central_meridian",-74],PARAMETER["false_easting",984250.0000000002],PARAMETER["false_northing",0],UNIT["Foot_US",0.30480060960121924]] -------------------------------------------------------------------------------- /data/wifi/wifi.qpj: -------------------------------------------------------------------------------- 1 | PROJCS["NAD83 / New York Long Island (ftUS)",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4269"]],PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["standard_parallel_1",41.03333333333333],PARAMETER["standard_parallel_2",40.66666666666666],PARAMETER["latitude_of_origin",40.16666666666666],PARAMETER["central_meridian",-74],PARAMETER["false_easting",984250.0000000002],PARAMETER["false_northing",0],UNIT["US survey foot",0.3048006096012192,AUTHORITY["EPSG","9003"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","2263"]] 2 | -------------------------------------------------------------------------------- /data/wifi/wifi.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/wifi/wifi.shp -------------------------------------------------------------------------------- /data/wifi/wifi.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonfarmer/python_geospatial/5549bf22ba677edc8d2fac913399e2e8e65755f0/data/wifi/wifi.shx -------------------------------------------------------------------------------- /exercises/Plotting Great Circles in Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:c3c3c978eec3acd7b011db30172352ee735791df27ae599d0fc2e6b67492d6e5" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Great Circles in Python\n", 16 | "\n", 17 | "## Working with Projections and Geometries\n", 18 | "\n", 19 | "There are plenty of really bad maps of Edward Snowden's movements around the world (like this one from Sky News), and [some good ones](http://oobrien.com/2013/06/snowden-route-maps/). In this exercise, we're going to try to produce a good one using `cartopy`.\n", 20 | "\n", 21 | "![Snowden's Route](http://oobrien.com/wp-content/uploads/2013/06/230613-snowden-route-map-v2-final-1-522x293.jpg)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "collapsed": false, 27 | "input": [ 28 | "import matplotlib.pyplot as plt\n", 29 | "%matplotlib inline\n", 30 | "import cartopy.crs as ccrs" 31 | ], 32 | "language": "python", 33 | "metadata": {}, 34 | "outputs": [] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Here are the coordinates of the various places he is said to have visited on his journey:" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "collapsed": false, 46 | "input": [ 47 | "hawaii = (-157.8, 21.3)\n", 48 | "hongkong = (114.16, 22.28)\n", 49 | "moscow = (37.62, 55.75)\n", 50 | "havana = (-82.38, 23.13)\n", 51 | "caracas = (-66.92, 10.50)\n", 52 | "quito = (-78.58, -0.25)\n", 53 | "\n", 54 | "# Here's a list to make things easier\n", 55 | "stops = [hawaii, hongkong, moscow, havana, caracas, quito]\n", 56 | "labels = [\"Hawaii\", \"Hong Kong\", \"Moscow\", \"Havana\", \"Caracas\", \"Quito\"]" 57 | ], 58 | "language": "python", 59 | "metadata": {}, 60 | "outputs": [] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### 1) Create a simple features `LineString` using shapely and print the `WKT`" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "collapsed": false, 72 | "input": [], 73 | "language": "python", 74 | "metadata": {}, 75 | "outputs": [] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "### 2) Plot a simple map with Great Circle lines for the journey (using background image and coastlines)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "collapsed": false, 87 | "input": [ 88 | "plt.figure(figsize=(8, 8))\n", 89 | "ax = plt.axes(projection=ccrs.Robinson())\n", 90 | "ax.set_global()\n", 91 | "\n", 92 | "# Your plot commands go here\n", 93 | "plt.show()" 94 | ], 95 | "language": "python", 96 | "metadata": {}, 97 | "outputs": [] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Bonus) Try to create the same plot with labels\n", 104 | "\n", 105 | "See [this example](http://scitools.org.uk/cartopy/docs/latest/examples/eyja_volcano.html) for help with labels and transforms." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "collapsed": false, 111 | "input": [ 112 | "from matplotlib.transforms import offset_copy\n", 113 | "\n", 114 | "plt.figure(figsize=(8, 8))\n", 115 | "ax = plt.axes(projection=ccrs.Robinson())\n", 116 | "ax.set_global()\n", 117 | "\n", 118 | "# Your plot commands go here\n", 119 | "plt.show()" 120 | ], 121 | "language": "python", 122 | "metadata": {}, 123 | "outputs": [] 124 | } 125 | ], 126 | "metadata": {} 127 | } 128 | ] 129 | } -------------------------------------------------------------------------------- /exercises/Working with Projections in Python (Ans).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:f88a118baefbe109ddf2b29ff65dbe6f6504e8bd8c6baf4643a6f4ed02d73e60" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Working with Projections in Python\n", 16 | "## Converting Coordinates\n", 17 | "\n", 18 | "Here are the x/y coordinates for the Empire State Building in the local State Plane coordinate reference system:\n", 19 | "\n", 20 | "`(301211.4076849834, 64602.88651922046)`\n", 21 | "\n", 22 | "### 1) Figure out the correct State Plane reference zone, and convert the above coordinates to long/lat (`EPSG: 4326`)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Here's a quick example to remind you how this works with `PyProj`:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "collapsed": false, 35 | "input": [ 36 | "from pyproj import Proj\n", 37 | "p = Proj(init='epsg:3857') # This is Web (Google) Mercator" 38 | ], 39 | "language": "python", 40 | "metadata": {}, 41 | "outputs": [], 42 | "prompt_number": 1 43 | }, 44 | { 45 | "cell_type": "code", 46 | "collapsed": false, 47 | "input": [ 48 | "p(-73.985656, 40.748433) # These are the correct long/lat coordinates" 49 | ], 50 | "language": "python", 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "metadata": {}, 55 | "output_type": "pyout", 56 | "prompt_number": 2, 57 | "text": [ 58 | "(-8236045.5519263055, 4975306.102820314)" 59 | ] 60 | } 61 | ], 62 | "prompt_number": 2 63 | }, 64 | { 65 | "cell_type": "code", 66 | "collapsed": false, 67 | "input": [ 68 | "p(-8236045.551926, 4975306.102820, inverse=True) # These are the coordinates in Web Mercator" 69 | ], 70 | "language": "python", 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "metadata": {}, 75 | "output_type": "pyout", 76 | "prompt_number": 3, 77 | "text": [ 78 | "(-73.98565599999725, 40.74843299999786)" 79 | ] 80 | } 81 | ], 82 | "prompt_number": 3 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "There's a handy csv file that contains all state plane coordinate reference systems in the `data` folder. You can load it with `Pandas`." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "collapsed": false, 94 | "input": [ 95 | "import pandas as pd\n", 96 | "import os\n", 97 | "\n", 98 | "# Source: http://www.eye4software.com/resources/stateplane/\n", 99 | "df = pd.read_csv(os.path.join(\"..\", \"data\", \"state_plane_codes.csv\"))" 100 | ], 101 | "language": "python", 102 | "metadata": {}, 103 | "outputs": [], 104 | "prompt_number": 4 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "What about EPSG codes for New York?\n", 111 | "\n", 112 | "Source: http://www.eye4software.com/resources/stateplane/New_York\n", 113 | "\n", 114 | "**Hint**: You can use the DataFrame to figure this out..." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "collapsed": false, 120 | "input": [ 121 | "df[df.name.str.contains(\"New York\")]" 122 | ], 123 | "language": "python", 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "html": [ 128 | "
\n", 129 | "\n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | "
spcsnameepsg
66 3101 NAD83 / New York East 32115
67 3102 NAD83 / New York Central 32116
68 3103 NAD83 / New York West 32117
69 3104 NAD83 / New York Long Island 32118
195 13101 NAD27 / New York East 32015
196 13102 NAD27 / New York Central 32016
197 13103 NAD27 / New York West 32017
198 13104 NAD27 / New York Long Island 32018
\n", 189 | "
" 190 | ], 191 | "metadata": {}, 192 | "output_type": "pyout", 193 | "prompt_number": 5, 194 | "text": [ 195 | " spcs name epsg\n", 196 | "66 3101 NAD83 / New York East 32115\n", 197 | "67 3102 NAD83 / New York Central 32116\n", 198 | "68 3103 NAD83 / New York West 32117\n", 199 | "69 3104 NAD83 / New York Long Island 32118\n", 200 | "195 13101 NAD27 / New York East 32015\n", 201 | "196 13102 NAD27 / New York Central 32016\n", 202 | "197 13103 NAD27 / New York West 32017\n", 203 | "198 13104 NAD27 / New York Long Island 32018" 204 | ] 205 | } 206 | ], 207 | "prompt_number": 5 208 | }, 209 | { 210 | "cell_type": "code", 211 | "collapsed": false, 212 | "input": [ 213 | "# Manhattan is in \"New York Long Island\"\n", 214 | "p = Proj(init=\"epsg:32118\")\n", 215 | "p(301211.4076849834, 64602.88651922046, inverse=True)" 216 | ], 217 | "language": "python", 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "metadata": {}, 222 | "output_type": "pyout", 223 | "prompt_number": 6, 224 | "text": [ 225 | "(-73.985656, 40.74843299999984)" 226 | ] 227 | } 228 | ], 229 | "prompt_number": 6 230 | } 231 | ], 232 | "metadata": {} 233 | } 234 | ] 235 | } -------------------------------------------------------------------------------- /exercises/Working with Projections in Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:7f089307ae366278e9dcb5b6815fe93c1c8efdd58af1cfa58134d8ce62143b9b" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Working with Projections in Python\n", 16 | "## Converting Coordinates\n", 17 | "\n", 18 | "Here are the x/y coordinates for the Empire State Building in the local State Plane coordinate reference system:\n", 19 | "\n", 20 | "`(301211.4076849834, 64602.88651922046)`\n", 21 | "\n", 22 | "### 1) Figure out the correct State Plane reference zone, and convert the above coordinates to long/lat (`EPSG: 4326`)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Here's a quick example to remind you how this works with `PyProj`:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "collapsed": false, 35 | "input": [ 36 | "from pyproj import Proj\n", 37 | "p = Proj(init='epsg:3857') # This is Web (Google) Mercator" 38 | ], 39 | "language": "python", 40 | "metadata": {}, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "collapsed": false, 46 | "input": [ 47 | "p(-73.985656, 40.748433) # These are the correct long/lat coordinates" 48 | ], 49 | "language": "python", 50 | "metadata": {}, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "collapsed": false, 56 | "input": [ 57 | "p(-8236045.551926, 4975306.102820, inverse=True) # These are the coordinates in Web Mercator" 58 | ], 59 | "language": "python", 60 | "metadata": {}, 61 | "outputs": [] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "There's a handy csv file that contains all state plane coordinate reference systems in the `data` folder. You can load it with `Pandas`." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "collapsed": false, 73 | "input": [ 74 | "import pandas as pd\n", 75 | "import os\n", 76 | "\n", 77 | "# Source: http://www.eye4software.com/resources/stateplane/\n", 78 | "df = pd.read_csv(os.path.join(\"..\", \"data\", \"state_plane_codes.csv\"))" 79 | ], 80 | "language": "python", 81 | "metadata": {}, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "What about EPSG codes for New York?\n", 89 | "\n", 90 | "Source: http://www.eye4software.com/resources/stateplane/New_York\n", 91 | "\n", 92 | "**Hint**: You can use the DataFrame to figure this out..." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "collapsed": false, 98 | "input": [ 99 | "df" 100 | ], 101 | "language": "python", 102 | "metadata": {}, 103 | "outputs": [] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "collapsed": false, 108 | "input": [], 109 | "language": "python", 110 | "metadata": {}, 111 | "outputs": [] 112 | } 113 | ], 114 | "metadata": {} 115 | } 116 | ] 117 | } -------------------------------------------------------------------------------- /exercises/Working with Rasters in Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:3d36ccd8c5d69843ff6f0ad601e0eb2f223b0300f830c8ae235d95062564fdd6" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Workin with Rasters in Python\n", 16 | "\n", 17 | "## Reading raster data from file" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "collapsed": false, 23 | "input": [ 24 | "import os\n", 25 | "import rasterio\n", 26 | "import numpy as np" 27 | ], 28 | "language": "python", 29 | "metadata": {}, 30 | "outputs": [] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### 1a) Read in the raster (DEM) using rasterio" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "collapsed": false, 42 | "input": [ 43 | "filename = os.path.join('..', 'data', 'nyc_dem_reduce.tif')" 44 | ], 45 | "language": "python", 46 | "metadata": {}, 47 | "outputs": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### 1b) Convert bounds to right configuration for plotting" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "collapsed": false, 59 | "input": [], 60 | "language": "python", 61 | "metadata": {}, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### 1c) Get the metadata for this raster file" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "collapsed": false, 74 | "input": [], 75 | "language": "python", 76 | "metadata": {}, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "### 1d) Get the data for this raster file" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "collapsed": false, 89 | "input": [], 90 | "language": "python", 91 | "metadata": {}, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Plotting Raster Data" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "collapsed": false, 104 | "input": [ 105 | "import matplotlib.pyplot as plt\n", 106 | "%matplotlib inline" 107 | ], 108 | "language": "python", 109 | "metadata": {}, 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### 2) Plot the raster" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "collapsed": false, 122 | "input": [], 123 | "language": "python", 124 | "metadata": {}, 125 | "outputs": [] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "## Working with raster data\n", 132 | "\n", 133 | "According to the Department of Environmental Conservation website, by 2050, we can expect about 19-29 inches of sea level rise in the Lower Hudson Valley & Long Island regions (under a rapid ice-melt scenario)\n", 134 | "\n", 135 | "Source: http://www.dec.ny.gov/energy/45202.html" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### 3a) Create a new grid (matrix) and set all cells that are less than 29 inches (0.74 meters) to 0" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "collapsed": false, 148 | "input": [], 149 | "language": "python", 150 | "metadata": {}, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "### 3b) What is the percentage of cells *above* sea level that may be impacted?" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "collapsed": false, 163 | "input": [], 164 | "language": "python", 165 | "metadata": {}, 166 | "outputs": [] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "### 3c) Compute a histogram of elevation values" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "collapsed": false, 178 | "input": [], 179 | "language": "python", 180 | "metadata": {}, 181 | "outputs": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "### Bonus) Plotting contours\n", 188 | "\n", 189 | "We can potentially improve this figure by adding contours, and highlighting those areas *under* 1 metre above sea level" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "collapsed": false, 195 | "input": [], 196 | "language": "python", 197 | "metadata": {}, 198 | "outputs": [] 199 | } 200 | ], 201 | "metadata": {} 202 | } 203 | ] 204 | } -------------------------------------------------------------------------------- /exercises/Working with Vectors in Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:680f715a61852e6bfe3fa34908b6fc2e66481f8915621ab4e63bcd71474f04e9" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Working with Vectors in Python\n", 16 | "\n", 17 | "## Points in Polygon Analysis\n", 18 | "\n", 19 | "For this first exercise, we're going to go through the steps of performing a simple 'points in polygon' analysis together, using NYC boroughs and public WiFi hotspots to demonstrate the simplicity of spatial and aspatial queries with GeoPandas." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "collapsed": false, 25 | "input": [ 26 | "import geopandas as gpd\n", 27 | "import sqlite3\n", 28 | "import os" 29 | ], 30 | "language": "python", 31 | "metadata": {}, 32 | "outputs": [] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Since I didn't want to have to rely on getting PostGIS, Spatialite, or some other spatial database up and running on everyone's laptop, we're going to fake it using SQLite.\n", 39 | "\n", 40 | "We'll use plain ol' Pandas to query the database (which contains `WKB` geometries), and then 'upgrade' the `DataFrame` to a `GeoDataFrame` for further processing\\*." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "collapsed": false, 46 | "input": [ 47 | "# Create connection to sqlite database\n", 48 | "conn = sqlite3.connect(os.path.join('..', 'data', 'wifi.sqlite'))\n", 49 | "sql = \"select * from wifi\"\n", 50 | "\n", 51 | "# GeoPandas has its own reference to Pandas for convinience\n", 52 | "wifi = gpd.pd.read_sql(sql, conn)\n", 53 | "wifi.head()" 54 | ], 55 | "language": "python", 56 | "metadata": {}, 57 | "outputs": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "We could add `where` clauses and other queries to the above `SQL` (for example, to select only the 'Free' WiFi hotspots), but for the sake of this demonstration we'll do most of this stuff in Python instead.\n", 64 | "\n", 65 | "Now that we have a `DataFrame` with a `GEOMETRY` column, we need to convert the `WKB` to something that GeoPandas (and Shapely) understands. We'll create a helper function for converting from `WKB` to Shapely `geometry` objects:" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "collapsed": false, 71 | "input": [ 72 | "from shapely import wkb\n", 73 | "from binascii import unhexlify\n", 74 | "from codecs import encode\n", 75 | "\n", 76 | "# Helper function\n", 77 | "def as_geom(hex):\n", 78 | " return wkb.loads(unhexlify(encode(hex, \"hex\")))" 79 | ], 80 | "language": "python", 81 | "metadata": {}, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "Now that we have our helper function, let's create a new column in the data frame called `geom`, and then use our awesome `set_geometry` method to upgrade the `DataFrame` to a `GeoDataFrame`:" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "collapsed": false, 94 | "input": [ 95 | "wifi[\"geom\"] = wifi[\"GEOMETRY\"].apply(as_geom)\n", 96 | "del wifi[\"GEOMETRY\"] # Don't need this anymore\n", 97 | "\n", 98 | "# Coerse to GeoDataFrame... cool!\n", 99 | "wifi = wifi.set_geometry(\"geom\", crs={\"init\":\"epsg:2263\"})\n", 100 | "wifi.head()" 101 | ], 102 | "language": "python", 103 | "metadata": {}, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "We'll also need a polygon layer to work with. For this, we'll use the NYC boroughs shapefile in your data directory:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "collapsed": false, 116 | "input": [ 117 | "boros_file = os.path.join('..', 'data', 'nybb', 'nybb.shp')\n", 118 | "boros = gpd.read_file(boros_file)\n", 119 | "boros" 120 | ], 121 | "language": "python", 122 | "metadata": {}, 123 | "outputs": [] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "For our points in polygon analysis, we'll focus on the borough of Manhattan:" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "collapsed": false, 135 | "input": [ 136 | "# Set the index to the borough names\n", 137 | "boros.set_index('BoroName', inplace=True)\n", 138 | "boros.sort()\n", 139 | "\n", 140 | "# We'll focus on Manhattan\n", 141 | "poly = boros.geometry[\"Manhattan\"]" 142 | ], 143 | "language": "python", 144 | "metadata": {}, 145 | "outputs": [] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "In order to facilitate our point in polygon analysis, we're going to 'prepare' our selected polygon for efficient queries. Shapely geometries can be processed into a state that supports more efficient batches of operations by creating 'prepared geometries'. For more information on this, see the [Shapely documentation](http://toblerity.org/shapely/manual.html#prepared-geometry-operations).\n", 152 | "\n", 153 | "Note that this ignores any CRS differences, so if your two 'layers' are in different CRSs, you may get unexpected results. Best practice is to ensure that they are in the same CRS (in this case, they are)." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "collapsed": false, 159 | "input": [ 160 | "from shapely.prepared import prep\n", 161 | "prep_poly = prep(poly)" 162 | ], 163 | "language": "python", 164 | "metadata": {}, 165 | "outputs": [] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "How many WiFi hotspots in manhattan?" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "collapsed": false, 177 | "input": [ 178 | "mht_wifi = wifi.geometry.apply(poly.contains)\n", 179 | "print(sum(mht_wifi))" 180 | ], 181 | "language": "python", 182 | "metadata": {}, 183 | "outputs": [] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "What percentage of WiFi hotspots in Manhattan are **Free**?" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "collapsed": false, 195 | "input": [ 196 | "free = wifi[wifi[\"type\"]==\"Free\"]\n", 197 | "import numpy as np\n", 198 | "\n", 199 | "mht_free = free.geometry.apply(poly.contains)\n", 200 | "print(float(sum(mht_free)) / float(sum(mht_wifi)) * 100.)" 201 | ], 202 | "language": "python", 203 | "metadata": {}, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Ok, let's plot *only* the **Free** WiFi hotspots in the borough of **Manhattan**:" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "collapsed": false, 216 | "input": [ 217 | "import matplotlib.pyplot as plt\n", 218 | "%matplotlib inline\n", 219 | "\n", 220 | "fig = plt.figure(figsize=(8, 8))\n", 221 | "ax = free[mht_free].plot()\n", 222 | "\n", 223 | "# Let's clean things up a bit...\n", 224 | "for i, l in enumerate(ax.lines):\n", 225 | " l.set_markersize(16)\n", 226 | " l.set_markeredgecolor(\"green\")\n", 227 | " l.set_color(\"green\")\n", 228 | "ax.axis('off') # Turn off axes\n", 229 | "plt.show()" 230 | ], 231 | "language": "python", 232 | "metadata": {}, 233 | "outputs": [] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "Now, just because we can, let's make a 'web-map' of this 'analysis'. This time we'll use a non-default basemap. I like [Stamen](http://stamen.com/)'s maptiles, but if you have the tile url, you can use any maptiles you want!" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "collapsed": false, 245 | "input": [ 246 | "import mplleaflet\n", 247 | "# Credit where credit is due...\n", 248 | "attr = 'Map tiles by Stamen Data by OpenStreetMap, under CC BY SA.'\n", 249 | "tile = \"http://tile.stamen.com/%s/{z}/{x}/{y}.jpg\" % \"toner\" # Could also use \"terrain\" or \"watercolor\"\n", 250 | "mplleaflet.display(fig=ax.figure, crs=free.crs, tiles=(tile, attr))" 251 | ], 252 | "language": "python", 253 | "metadata": {}, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## Geology Summaries\n", 261 | "\n", 262 | "This second exercise is self-directed. Try to perform the following tasks using a shapefile of the geology of New Jersey that was downloaded from the USGS." 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "collapsed": false, 268 | "input": [ 269 | "geol_file = os.path.join(\"..\", \"data\", \"njgeol\", \"njgeol_poly_dd.shp\")" 270 | ], 271 | "language": "python", 272 | "metadata": {}, 273 | "outputs": [] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### 1a) Read in the `nygeol_poly_dd.shp` shapefile using GeoPandas" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "collapsed": false, 285 | "input": [], 286 | "language": "python", 287 | "metadata": {}, 288 | "outputs": [] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "### 1b) What is the CRS for this 'layer'? Is it 'projected'?" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "collapsed": false, 300 | "input": [], 301 | "language": "python", 302 | "metadata": {}, 303 | "outputs": [] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "### 2a) Create a list of all the unique rock types in the data (in properties ROCKTYPE1 and ROCKTYPE2). How many are there?" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "collapsed": false, 315 | "input": [], 316 | "language": "python", 317 | "metadata": {}, 318 | "outputs": [] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "### 2b) Calculate the total area of each primary rocktype (ROCKTYPE1) by summing the AREA column.\n", 325 | "\n", 326 | "Note: The areas are in square degrees, not true area." 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "collapsed": false, 332 | "input": [], 333 | "language": "python", 334 | "metadata": {}, 335 | "outputs": [] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "### 2c) What would be a better way to compute these areas?\n", 342 | "\n", 343 | "Hint: The New Jersey State Plane EPSG code is 32111." 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "collapsed": false, 349 | "input": [], 350 | "language": "python", 351 | "metadata": {}, 352 | "outputs": [] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "### 2d) Recompute the areas for each ROCKTYPE1 using this method." 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "collapsed": false, 364 | "input": [], 365 | "language": "python", 366 | "metadata": {}, 367 | "outputs": [] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "### 3a) Plot the polygons (using any packages you want), coloring by primary rock type.\n", 374 | "\n", 375 | "Keep the plotting simple, and you can use random colors for the rock types." 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "collapsed": false, 381 | "input": [], 382 | "language": "python", 383 | "metadata": {}, 384 | "outputs": [] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "### Bonus) How would you share this 'map' with colleagues? It would be useful if they could also edit the map themselves.\n", 391 | "\n", 392 | "Warning: This is relatively large vector file (for the web) and may be extremely slow, maybe you should subset the data first!" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "collapsed": false, 398 | "input": [ 399 | "import numpy as np\n", 400 | "\n", 401 | "# Take a random subset\n", 402 | "rand = np.random.randint(0, geol.shape[0], 100)\n", 403 | "subset = geol.iloc[rand]" 404 | ], 405 | "language": "python", 406 | "metadata": {}, 407 | "outputs": [] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "collapsed": false, 412 | "input": [ 413 | "import geojsonio\n", 414 | "\n", 415 | "# Other commands go here" 416 | ], 417 | "language": "python", 418 | "metadata": {}, 419 | "outputs": [] 420 | }, 421 | { 422 | "cell_type": "markdown", 423 | "metadata": {}, 424 | "source": [ 425 | "If geojson.io isn't working (it might not work nicely with Safari?), go to the following url (yours will differ) to find the anonymous gist:" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "collapsed": false, 431 | "input": [ 432 | "print(\"https://gist.github.com/anonymous/%s\" % res[res.find(\"gist:/\")+6:])" 433 | ], 434 | "language": "python", 435 | "metadata": {}, 436 | "outputs": [] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "\\*In future releases of GeoPandas, this will work much more smoothly, but for now we're 'hacking it'." 443 | ] 444 | } 445 | ], 446 | "metadata": {} 447 | } 448 | ] 449 | } -------------------------------------------------------------------------------- /install.md: -------------------------------------------------------------------------------- 1 | # Installation Instructions 2 | 3 | If you are planning to participate in the "Geospatial data in Python: Database, desktop, and the web" tutorial associated with this Github repository, the following are the suggested installation instructions. We *will* take time during the session to go over installation issues, but if possible, at least *try* to have some of these installation steps completed before we start. If you are unsure of how/where to start, no problem, just show up to the session (or better yet, the 'install party' beforehand) and we'll get you going! 4 | 5 | ## Base Install 6 | 7 | * First, check the [Known Issues](#known-issues) below for OS-specific Initial Setups. 8 | * Install [`Anaconda`](http://continuum.io/downloads) (I'm using Anaconda-2.1.0 [64-Bit - Python 2.7]) 9 | * We're using `Anaconda` because it helps us to keep our `Python` environment clean and manageable. If you prefer, you can also use [`Canopy`](https://store.enthought.com/downloads/) or an [alternative `Python` distribution](http://www.scipy.org/install.html#scientific-python-distributions). 10 | 11 | * Create a new virtual environment (Skip this step on Windows, trust me, it'll be easier): 12 | 13 | ```bash 14 | conda create -n pygeo pandas ipython-notebook matplotlib 15 | source activate pygeo 16 | ``` 17 | * Install `pip` for later (`pip` allows us to install additional `Python` packages not available via `conda`: 18 | ```bash 19 | conda install pip 20 | ``` 21 | 22 | * If you don't already have it, you might need to install `cython` 23 | 24 | ```bash 25 | conda install cython 26 | ``` 27 | 28 | * Install required packages (on Windows, you might need to install [binaries from here](http://www.lfd.uci.edu/~gohlke/pythonlibs/) for `shapely`, `pyproj`, and `rasterio`). 29 | 30 | ```bash 31 | pip install shapely 32 | pip install fiona 33 | pip install PIL 34 | pip install pyproj 35 | pip install descartes 36 | pip install rasterio 37 | ``` 38 | * You can use `pillow` in place of `PIL` if you like. 39 | * If any of the above commands cause an error, you can try using `conda` instead (replace `PACKAGE` below with the package you are trying to install): 40 | 41 | ```bash 42 | conda install PACKAGE 43 | ``` 44 | * or check to see if a `conda` package exists using `binstar`: 45 | 46 | ```bash 47 | conda install binstar 48 | binstar search -t conda PACKAGE 49 | ``` 50 | * Make sure you find one for your OS. You can get more info about a package using the following command, which will also explain how to install the package: 51 | 52 | ```bash 53 | binstar show 54 | ``` 55 | * For example, you could used the following to install `pyproj` on OS X: 56 | 57 | ```bash 58 | conda install --channel https://conda.binstar.org/asmeurer pyproj 59 | ``` 60 | 61 | * Install `geopandas` (important!) 62 | 63 | ```bash 64 | pip install geopandas 65 | ``` 66 | 67 | ## (Web)mapping Packages 68 | 69 | * Install `cartopy` (on Windows, use [binaries from here](http://www.lfd.uci.edu/~gohlke/pythonlibs/) for `cartopy`.) 70 | 71 | ```bash 72 | pip install pyshp 73 | pip install cartopy 74 | ``` 75 | 76 | * Install `mplleaflet` (for making slippy maps). See the `Known Issues` below about install `git`. 77 | 78 | ```bash 79 | pip install git+git://github.com/mpld3/mplexporter.git 80 | pip install git+git://github.com/jwass/mplleaflet.git 81 | ``` 82 | 83 | * Install `geojson.py` for shooting data to the web! 84 | 85 | ```bash 86 | pip install git+git://github.com/jwass/geojsonio.py.git 87 | ``` 88 | 89 | ## Optional Packages 90 | 91 | * Install `basemap`, a common package for making static maps (I didn't install this): 92 | 93 | ```bash 94 | conda install basemap 95 | ``` 96 | 97 | * Install `psycopg2` for interacting with PostGIS (We don't need this, but I will do a demo with this): 98 | 99 | ```bash 100 | pip install psycopg2 101 | ``` 102 | 103 | ## Install QGIS 104 | 105 | * Go to the [official QGIS page](http://qgis.org/en/site/forusers/download.html) for details, or install via `brew` on OSX, `apt-get` on Linux, or `OSGeo4W` on Windows (Either way, this install will likely take quite a while). 106 | * I installed this on OSX via `homebrew` with: 107 | 108 | ```bash 109 | brew tap osgeo/osgeo4mac 110 | brew install qgis-26 111 | ``` 112 | * On Linux, if you follow the `Linux Initial Setup` below first, you should be able to install QGIS with: 113 | 114 | ```bash 115 | sudo apt-get install qgis 116 | ``` 117 | 118 | * On Windows, follow the instructions on the official QGIS page. 119 | 120 | ## Alternative Install Guide 121 | 122 | * Here is the [install guide](https://github.com/kjordahl/SciPy2013#installation-instructions) from a [similar course](https://github.com/kjordahl/SciPy2013) last year. 123 | 124 | ## Known Issues 125 | 126 | * In *some* cases, it may be better to `pip install shapely` than to `conda install shapely`, particularly when using `cartopy`. 127 | * If `cartopy` and `shapely` don't place nice together, it may be because of version issues. Please take a look here for a possible fix: https://github.com/carsonfarmer/python_geospatial/issues/3 128 | 129 | ### Linux Issues 130 | 131 | * Initial Setup 132 | 133 | ```bash 134 | sudo apt-get install git 135 | sudo add-apt-repository -y ppa:ubuntugis/ppa 136 | sudo apt-get update 137 | sudo apt-get install -y gdal-bin libgdal-dev 138 | ``` 139 | 140 | * On newer versions of Ubuntu (14.04 +), you may want to use the *unstable* repo (which is actually quite stable): 141 | 142 | ```bash 143 | sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable 144 | ``` 145 | * On vanilla Ubuntu, you might need to install `g++` before installing `rasterio` and others: 146 | 147 | ```bash 148 | sudo apt-get install g++ 149 | ``` 150 | 151 | ### OS X Issues 152 | 153 | * Initial Setup 154 | * First install [`brew`](http://brew.sh/): `ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"` 155 | * Then update and install `gdal` (You might be able to skip this step if you `conda install fiona` [see above]). 156 | 157 | ```bash 158 | brew doctor 159 | brew update 160 | brew install git 161 | brew tap osgeo/osgeo4mac 162 | brew install gdal 163 | brew install qgis 164 | ``` 165 | 166 | * On Mavericks+ if you don't already have developer tools installed, `pip install pyproj` will 167 | probably fail (due to missing `gcc`) and then ask you if you want to install them, so click 'yes' and 168 | then rerun `pip install pyproj`. 169 | 170 | * In *some* cases, importing `shapely` on OS X might fail while loading the GEOS library: 171 | ```bash 172 | OSError: Could not find library c or load any of its variants. 173 | ``` 174 | * This can be fixed by using a newer version, or worked around by setting the following environment variable (add to your `.bash_profile`; see [this issue](https://github.com/cfarmer/python_geospatial/issues/3) for details): 175 | 176 | ```bash 177 | export DYLD_FALLBACK_LIBRARY_PATH=$(HOME)/lib:/usr/local/lib:/lib:/usr/lib 178 | ``` 179 | 180 | ### Windows Issues 181 | 182 | * Initial Setup 183 | * Download and install GDAL [from here](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal) 184 | * If you use this one, then you should also use the `fiona` binary from that site. Otherwise, you'll have to play around with system PATHs etc. Others have had success with [OSGeo4W](http://trac.osgeo.org/osgeo4w/), which includes many important libraries and their Python bindings. 185 | 186 | * If you don't already have it, you'll need to install `git`: 187 | * Download and install git [from here](http://www.git-scm.com/downloads). 188 | * When installing, make sure you choose to "Use Git from the Windows Command Prompt" (You may also want to install optional Unix tools). 189 | * You can also download GitHub for Windows [here](https://windows.github.com/). 190 | 191 | * On Windows, `source` is not needed when activating a virtual environment if you are using the Anaconda Command Prompt: `activate scipygis`. 192 | -------------------------------------------------------------------------------- /notebooks/Notebook0a.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:d69e0a34de096f0bdc712f85439dc3c254a3aa615151c14a7031a924e038cad4" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Geospatial Data in Python: Database, Desktop, and the Web\n", 16 | "## Tutorial (Part 0a)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# Getting started with Python" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Once you have the required packages installed, you are ready to get started and try out some examples. However, to use the powerful range of tools, functions, commands, and spatial libraries available in Python, you first need to learn a little bit about the syntax and meaning of Python commands. Once you have learned this, operations become simple to perform." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Invoking an Operation\n", 38 | "\n", 39 | "Complex computations are built up from simpler computations. This may seem obvious, but it is a powerful idea. An **algorithm** is just a description of a computation in terms of other computations that you already know how to perform. To help distinguish between the computation as a whole and the simpler parts, it is helpful to introduce a new word: an **operator** performs a computation.\n", 40 | "\n", 41 | "It's helpful to think of the computation carried out by an operator as involving four parts:\n", 42 | "\n", 43 | "1. The name of the operator\n", 44 | "2. The input arguments\n", 45 | "3. The output value\n", 46 | "4. Side effects" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "A typical operation takes one or more **input arguments** and uses the information in these to produce an **output value**. Along the way, the computer might take some action: display a graph, store a file, make a sound, etc. These actions are called **side effects**.\n", 54 | "\n", 55 | "Since Python is a general-purpose programming language, we usually need to `import` special packages for doing specific things (like working with spatial data). You can think of this as adding words to the language. For Scientific Python, the most important library that we need is `numpy` (Numerical Python), which can be loaded like this:" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "collapsed": false, 61 | "input": [ 62 | "import numpy as np" 63 | ], 64 | "language": "python", 65 | "metadata": {}, 66 | "outputs": [] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "To tell the computer to perform a computation - call this **invoking an operation** or giving a **command** - you need to provide the name and the input arguments in a specific format. The computer then returns the output value. For example, the command `np.sqrt(25)` invokes the square root operator (named `sqrt` from the `numpy` library) on the argument `25`. The output from the computation will, of course, be `5`.\n", 73 | "\n", 74 | "The syntax of invoking an operation consists of the operator's name, followed by round parentheses. The input arguments go inside the parentheses.\n", 75 | "\n", 76 | "The software program that you use to invoke operators is called an **interpreter** (the interpreter is the program you are running when you start Python). You enter your commands as a 'dialog' between you and the interpreter (just like when converting between any two languages!). Commands can be entered as part of a script (a text file with a list of commands to perform) or directly at a 'command prompt':" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "collapsed": false, 82 | "input": [ 83 | "np.sqrt(25)" 84 | ], 85 | "language": "python", 86 | "metadata": {}, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "In the above situation, the 'prompt' is `In [2]:`, and the 'command' is `np.sqrt(25)`. When you press 'Enter', the interpreter reads your command and performs the computation. For commands such as the one above, the interpreter will print the output value from the computation:" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "collapsed": false, 99 | "input": [ 100 | "np.sqrt(25)" 101 | ], 102 | "language": "python", 103 | "metadata": {}, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "In the above example, the 'output marker' is `Out[3]:`, and the output value is `5.0`. If we were working at the command-line right now, the dialog would continue as the interpreter prints another prompt and waits for your further command, here however, we just move to the next code cell." 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "Often, operations involve more than one argument. The various arguments are separated by commas. For example, here is an operation named `arange` from the `numpy` library that produces a range of numbers (increasing values between 3 and 10):" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "collapsed": false, 123 | "input": [ 124 | "np.arange(3, 10)" 125 | ], 126 | "language": "python", 127 | "metadata": {}, 128 | "outputs": [] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "The first argument tells where to start the range and the second tells where to end it. The order of the arguments is important. For instance, *here* is the range produced when 10 is the first argument, 3 is the second, and the third is -1 (decreasing values between 10 and 3):" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "collapsed": false, 140 | "input": [ 141 | "np.arange(10, 3, -1)" 142 | ], 143 | "language": "python", 144 | "metadata": {}, 145 | "outputs": [] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "For some operators, particularly those that have many input arguments, some of the arguments can be referred to by name rather than position. This is particularly useful when the named argument has a sensible default value. For example, the `arange` operator from the `numpy` library can be instructed what type of output values to produce (integers, floats, etc). This is accomplished using an argument named `dtype`:" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "collapsed": false, 157 | "input": [ 158 | "np.arange(10, 3, -1, dtype='float')" 159 | ], 160 | "language": "python", 161 | "metadata": {}, 162 | "outputs": [] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "Note that all the values in the range now have decimal places. Depending on the circumstances, all four parts of an operation need not be present. For example, the `ctime` operation from the `time` library returns the current time and date; no input arguments are required:" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "collapsed": false, 174 | "input": [ 175 | "import time\n", 176 | "time.ctime()" 177 | ], 178 | "language": "python", 179 | "metadata": {}, 180 | "outputs": [] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "In the above example, we first imported the `time` library, which provides a series of commands that help us work with dates and times. Next, even though there are no arguments, the parentheses are still used when calling the `ctime` command. Think of the pair of parentheses as meaning, '*do this*'." 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "### Naming and Storing Values\n", 194 | "\n", 195 | "Often the value returned by an operation will be used later on. Values can be stored for later use with the **assignment operator**. This has a different syntax that reminds the user that a value is being stored. Here's an example of a simple assignment:" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "collapsed": false, 201 | "input": [ 202 | "x = 16" 203 | ], 204 | "language": "python", 205 | "metadata": {}, 206 | "outputs": [] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "The command has stored the value 16 under the name `x`. The syntax is always the same: an equal sign (=) with a name on the left side and a value on the right. \n", 213 | "Such stored values are called **objects**. Making an assignment to an object defines the object. Once an object has been defined, it can be referred to and used in later computations. Notice that an assignment operation does not return a value or display a value. Its sole purpose is to have the side effects of defining the object and thereby storing a value under the object's name.\n", 214 | "\n", 215 | "To refer to the value stored in the object, just use the object's name itself. For instance:" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "collapsed": false, 221 | "input": [ 222 | "x" 223 | ], 224 | "language": "python", 225 | "metadata": {}, 226 | "outputs": [] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "Doing a computation on the value store in an object is much the same (and provides and extremely rich syntax for performing complex calculations):" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "collapsed": false, 238 | "input": [ 239 | "np.sqrt(x)" 240 | ], 241 | "language": "python", 242 | "metadata": {}, 243 | "outputs": [] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "You can create as many objects as you like and give them names that remind you of their purpose. Some examples: `wilma`, `ages`, `temp`, `dog_houses`, `foo3`. There *are* some general rules for object names:\n", 250 | "\n", 251 | "* Use only letters and numbers and 'underscores' (_)\n", 252 | "* Do NOT use spaces anywhere in the name (Python won't let you)\n", 253 | "* A number cannot be the first character in the name\n", 254 | "* Capital letters are treated as distinct from lower-case letters (i.e., Python is *case-sensitive*)\n", 255 | " * the objects named `wilma`, `Wilma`, and `WILMA` are all different\n", 256 | "* If possible, use an 'underscore' between words (i.e., `my_object`)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "For the sake of readability, keep object names short. But if you really must have an object named something like `ages_of_children_from_the _clinical_trial`, feel free (it's just more typing for you later!).\n", 264 | "\n", 265 | "Objects can store all sorts of things, for example a range of numbers:" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "collapsed": false, 271 | "input": [ 272 | "x = np.arange(1, 7)" 273 | ], 274 | "language": "python", 275 | "metadata": {}, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "When you assign a new value to an existing object, as just done to `x` above, the former values of that object is erased from the computer memory. The former value of `x` was 16, but after the new assignment above, it is:" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "collapsed": false, 288 | "input": [ 289 | "x" 290 | ], 291 | "language": "python", 292 | "metadata": {}, 293 | "outputs": [] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "The value of an object is changed only via the assignment operator. Using an object in a computation does not change the value. For example, suppose you invoke the square-root operator on `x`:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "collapsed": false, 305 | "input": [ 306 | "np.sqrt(x)" 307 | ], 308 | "language": "python", 309 | "metadata": {}, 310 | "outputs": [] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "The square roots have been returned as a value, but this doesn't change the value of `x`:" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "collapsed": false, 322 | "input": [ 323 | "x" 324 | ], 325 | "language": "python", 326 | "metadata": {}, 327 | "outputs": [] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "\n", 334 | "An assignment command like x=np.sqrt(x) can be confusing to people who are used to algebraic notation. In algebra, the equal sign describes a relationship between the left and right sides. So, $x = \\sqrt{x}$ tells us about how the quantity $x$ and the quantity $\\sqrt{x}$ are related. Students are usually trained to 'solve' such a relationship, going through a series of algebraic steps to find values for $x$ that are consistent with the mathematical statement (for $x = \\sqrt{x}$, the solutions are $x = 0$ and $x = 1$). In contrast, the assignment command x = np.sqrt(x) is a way of replacing the previous values stored in x with new values that are the square-root of the old ones.\n", 335 | "" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "If you want to change the value of `x`, you need to use the assignment operator:" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "collapsed": false, 348 | "input": [ 349 | "x = np.sqrt(x)" 350 | ], 351 | "language": "python", 352 | "metadata": {}, 353 | "outputs": [] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": {}, 358 | "source": [ 359 | "### Connecting Computations\n", 360 | "\n", 361 | "The brilliant thing about organizing operators in terms of unput arguments and output values is that the output of one operator can be used as an input to another. This lets complicated computations be built out of simpler ones.\n", 362 | "\n", 363 | "For example, suppose you have a list of 10000 voters in a precinct and you want to select a random sample of 20 of them for a survey. The `np.arange` operator can be used to generate a set of 10000 choices. The `np.random.choice` operator can then be used to select a subset of these values at random.\n", 364 | "\n", 365 | "One way to connect the computations is by using objects to store the intermediate outputs:" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "collapsed": false, 371 | "input": [ 372 | "choices = np.arange(1, 10000)\n", 373 | "np.random.choice(choices, 20, replace=False) # sample _without_ replacement" 374 | ], 375 | "language": "python", 376 | "metadata": {}, 377 | "outputs": [] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "You can also pass the output of an operator *directly* as an argument to another operator. Here's another way to accomplish exactly the same thing as the above (note that the values will differ because we are performing a *random* sample):" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "collapsed": false, 389 | "input": [ 390 | "np.random.choice(np.arange(1, 10000), 20, replace=False)" 391 | ], 392 | "language": "python", 393 | "metadata": {}, 394 | "outputs": [] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "### Numbers and Arithmetic\n", 401 | "\n", 402 | "The `Python` language has a concise notation for arithmetic that looks very much like the traditional one:" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "collapsed": false, 408 | "input": [ 409 | "7. + 2." 410 | ], 411 | "language": "python", 412 | "metadata": {}, 413 | "outputs": [] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "collapsed": false, 418 | "input": [ 419 | "3. * 4." 420 | ], 421 | "language": "python", 422 | "metadata": {}, 423 | "outputs": [] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "collapsed": false, 428 | "input": [ 429 | "5. / 2." 430 | ], 431 | "language": "python", 432 | "metadata": {}, 433 | "outputs": [] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "collapsed": false, 438 | "input": [ 439 | "3. - 8." 440 | ], 441 | "language": "python", 442 | "metadata": {}, 443 | "outputs": [] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "collapsed": false, 448 | "input": [ 449 | "-3." 450 | ], 451 | "language": "python", 452 | "metadata": {}, 453 | "outputs": [] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "collapsed": false, 458 | "input": [ 459 | "5.**2. # same as 5^2 (or 5 to the power of 2)" 460 | ], 461 | "language": "python", 462 | "metadata": {}, 463 | "outputs": [] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | "Arithmetic operators, like any other operators, can be connected to form more complicated computations. For instance:" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "collapsed": false, 475 | "input": [ 476 | "8. + 4. / 2." 477 | ], 478 | "language": "python", 479 | "metadata": {}, 480 | "outputs": [] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "The a human reader, the command `8+4/2` might seem ambiguous. Is it intended to be `(8+4)/2` or `8+(4/2)`? The computer uses unambiguous rules to interpret the expression, but it's a good idea for you to use parentheses so that you can make sure that what you *intend* is what the computer carries out:" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "collapsed": false, 492 | "input": [ 493 | "(8. + 4.) / 2." 494 | ], 495 | "language": "python", 496 | "metadata": {}, 497 | "outputs": [] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "Traditional mathematical notations uses superscripts and radicals to indicate exponentials and roots, e.g. $3^2$ or $\\sqrt{3}$ or $\\sqrt[3]{8}$. This special typography doesn't work well with an ordinary keyboard, so `Python` and most other computer languages uses a different notation:" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "collapsed": false, 509 | "input": [ 510 | "3.**2." 511 | ], 512 | "language": "python", 513 | "metadata": {}, 514 | "outputs": [] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "collapsed": false, 519 | "input": [ 520 | "np.sqrt(3.) # or 3.**0.5" 521 | ], 522 | "language": "python", 523 | "metadata": {}, 524 | "outputs": [] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "collapsed": false, 529 | "input": [ 530 | "8.**(1./3.)" 531 | ], 532 | "language": "python", 533 | "metadata": {}, 534 | "outputs": [] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "There is a large set of mathematical functions: exponentials, logs, trigonometric and inverse trigonometric functions, etc. Some examples:\n", 541 | "\n", 542 | "\n", 543 | "\n", 544 | "\n", 545 | "\n", 546 | "\n", 547 | "\n", 548 | "\n", 549 | "\n", 550 | "\n", 551 | "\n", 552 | " \n", 553 | " \n", 554 | "\n", 555 | "\n", 556 | " \n", 557 | " \n", 558 | "\n", 559 | "\n", 560 | " \n", 561 | " \n", 562 | "\n", 563 | "\n", 564 | " \n", 565 | " \n", 566 | "\n", 567 | "\n", 568 | " \n", 569 | " \n", 570 | "\n", 571 | "\n", 572 | " \n", 573 | " \n", 574 | "\n", 575 | "\n", 576 | " \n", 577 | " \n", 578 | "\n", 579 | "
TraditionalPython
$e^2$np.exp(2)
$\\log_{e}(100)$np.log(100)
$\\log_{10}(100)$np.log10(100)
$\\log_{2}(100)$np.log2(100)
$\\cos(\\frac{\\pi}{2})$np.cos(np.pi/2)
$\\sin(\\frac{\\pi}{2})$np.sin(np.pi/2)
$\\tan(\\frac{\\pi}{2})$np.tan(np.pi/2)
$\\cos^{-1}(-1)$np.acos(-1)
\n", 580 | "\n", 581 | "Numbers can be written in **scientific notation**. For example, the 'universal gravitational constant' that describes the gravitational attraction between masses is $6.67428 \\times 10^{11}$ (with units meters-cubed per kilogram per second squared). In the computer notation, this would be written as `6.67428e-11`. The Avogadro constant, which gives the number of atoms in a mole, is $6.02214179 \\times 10^{23}$ per mole, or `6.02214179e+23`.\n", 582 | "\n", 583 | "The computer language does not directly support the recording of units. This is unfortunate, since in the real world numbers often have units and the units matter. For example, in 1999 the Mars Climate Orbiter crashed into Mars because the design engineers specified the engine's thrust in units of pounds, while the guidance engineers thought the units were newtons.\n", 584 | "\n", 585 | "Computer arithmetic is accurate and reliable, but it often involves very slight rounding of numbers. Ordinarily, this is not noticeable. However, it can become apparent in some calculations that produce results that are (near) zero. For example, mathematically, $sin(\\pi) = 0$, however, the computer does not duplicate the mathematical relationship exactly:\n", 586 | "\n", 587 | "\n", 590 | "\n", 591 | "[pint]: https://pint.readthedocs.org/en/latest/\n", 592 | "[quantities]: http://pythonhosted.org/quantities/\n", 593 | "[units]: https://pypi.python.org/pypi/units/\n", 594 | "[sympy.physics.units]: http://docs.sympy.org/latest/modules/physics/units.html\n", 595 | "[etc]: http://conference.scipy.org/scipy2013/presentation_detail.php?id=174" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "collapsed": false, 601 | "input": [ 602 | "np.sin(np.pi)" 603 | ], 604 | "language": "python", 605 | "metadata": {}, 606 | "outputs": [] 607 | }, 608 | { 609 | "cell_type": "markdown", 610 | "metadata": {}, 611 | "source": [ 612 | "Whether a number like this is properly interpreted as 'close to zero' depends on the context and, for quantities that have units, on the units themselves. For instance, the unit 'parsec' is used in astronomy in reporting distances between stars. The closest start to the Sun is Proxima, at a distance of 1.3 parsecs. A distance of $1.22 \\times 10^{-16}$ parsecs is tiny in astronomy but translates to about 2.5 meters - not so small on the human scale. In statistics, many calculations relate to probabilities which are always in the range 0 to 1. On this scale, `1.22e-16` is very close to zero.\n", 613 | "\n", 614 | "There are several 'special' numbers in the `Python` world; two of which are `inf`, which stands for $\\infty$ (infinity), and `nan`, which stands for 'not a number' (nan results when a numerical operation isn't define), for instance:" 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "\n", 622 | "Mathematically oriented readers will wonder why Python should have any trouble with a computation like $\\sqrt{-9}$; the result is the imaginary number $3\\jmath$ (imaginary numbers may be represented by a $\\jmath$ or a $\\imath$, depending on the field). Python works with complex numbers, but you have to explicitly tell the system that this is what you want to do. To calculate $\\sqrt{-9}$ for example, simply use np.sqrt(-9+0j).\n", 623 | "" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "collapsed": false, 629 | "input": [ 630 | "np.float64(1.) / 0." 631 | ], 632 | "language": "python", 633 | "metadata": {}, 634 | "outputs": [] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "collapsed": false, 639 | "input": [ 640 | "np.float64(0.) / 0." 641 | ], 642 | "language": "python", 643 | "metadata": {}, 644 | "outputs": [] 645 | }, 646 | { 647 | "cell_type": "markdown", 648 | "metadata": {}, 649 | "source": [ 650 | "### Types of Objects\n", 651 | "\n", 652 | "Most of the examples used so far have dealt with numbers. But computers work with other kinds of information as well: text, photographs, sounds, sets of data, and so on. The word **type** is used to refer to the *kind* of information. Modern computer languages support a great variety of types. It's important to know about the types of data because operators expect their input arguments to be of specific types. When you use the wrong type of input, the computer might not be able to process your command." 653 | ] 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "metadata": {}, 658 | "source": [ 659 | "\n", 660 | "In Python, data frames are not 'built in' as part of the basic language, but the excellent ['pandas'][pandas] library provides data frames and a whole slew of other functionality for researchers doing data analysis with Python. We will be learning more about 'pandas' comming up.\n", 661 | "\n", 662 | "\n", 663 | "[pandas]: http://pandas.pydata.org/" 664 | ] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": {}, 669 | "source": [ 670 | "#### A Note on Strings\n", 671 | "\n", 672 | "Whenever you refer to an object name, make sure that you don't use quotes. For example, in the following, we are first assigning the string `\"python\"` to the `name` object, and then returning (and printing automatically) the `name` object." 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "collapsed": false, 678 | "input": [ 679 | "name = \"python\"\n", 680 | "name" 681 | ], 682 | "language": "python", 683 | "metadata": {}, 684 | "outputs": [] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": {}, 689 | "source": [ 690 | "If you make a command with the object name in quotes, it won't be treated as referring to an object. Instead, it will merely mean the text itself:" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "collapsed": false, 696 | "input": [ 697 | "\"name\"" 698 | ], 699 | "language": "python", 700 | "metadata": {}, 701 | "outputs": [] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "Similarly, if you omit the quotation marks from around the text, the computer will treat it as if it were an object name and will look for the object of that name. For instance, the following command directs the computer to look up the value contained in an object named `python` and insert that value into the object `name`:" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "collapsed": false, 713 | "input": [ 714 | "name = python" 715 | ], 716 | "language": "python", 717 | "metadata": {}, 718 | "outputs": [] 719 | }, 720 | { 721 | "cell_type": "markdown", 722 | "metadata": {}, 723 | "source": [ 724 | "As it happens, there was no object named `python` because it had not been defined by any previous assignment command. So, the computer generated an error." 725 | ] 726 | } 727 | ], 728 | "metadata": {} 729 | } 730 | ] 731 | } -------------------------------------------------------------------------------- /notebooks/Notebook0b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:af2901089fe431c3c9794d6878e7eeff568c2bc204f2f019c92180eac7e09600" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Geospatial Data in Python: Database, Desktop, and the Web\n", 16 | "## Tutorial (Part 0b)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# Working with Data in Python\n", 24 | "\n", 25 | "Data is central to GIS, and the tabular arrangement of data is very common. Accordingly, Python provides a large number of ways to read in tabular data. These vary depending on how the data are stored, where they are located, etc. To help keep things as simple as possible, the 'pandas' Python library provides an operator, `read_csv()` that allows you to access data \ufb01les in tabular format on your computer as well as data stored in online repositories, or one that a course instructor might set up for his or her students.\n", 26 | "\n", 27 | "If you successfully ran the install script, then you already have 'pandas' installed. Now, you simply need to `import pandas` in order to to use `read_csv()`, as well as a variety of other 'pandas' operators that you will encounter later (it is also usually a good idea to `import numpy as np` at the same time that we `import pandas as pd`)." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "collapsed": false, 33 | "input": [ 34 | "import pandas as pd\n", 35 | "import numpy as np" 36 | ], 37 | "language": "python", 38 | "metadata": {}, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "You need do this only once in each session of Python, and on systems such as IPython, the library will sometimes be reloaded automatically (if you get an error message, it\u2019s likely that the 'pandas' library has not been installed on your system.)\n", 46 | "\n", 47 | "Reading in a data table with `read_csv()` is simply a matter of knowing the name (and location) of the data set. For instance, one data table I use in my statistics classes is `\"swim100m.csv\"`. To read in this data table and create an object in Python that contains the data, use a command like this:" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "collapsed": false, 53 | "input": [ 54 | "swim = pd.read_csv(\"http://www.mosaic-web.org/go/datasets/swim100m.csv\")" 55 | ], 56 | "language": "python", 57 | "metadata": {}, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "The csv part of the name in `\"swim100m.csv\"` indicates that the \ufb01le has been stored in a particular data format, comma-separated values that is handled by spreadsheet software as well as many other kinds of software. The part of this command that requires creativity is choosing a name for the Python object that will hold the data. In the above command it is called `swim`, but you might prefer another name (e.g., `s` or `sdata` or even `ralph`). Of course, it's sensible to choose names that are short, easy to type and remember, and remind you what the contents of the object are about." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Data Frames\n", 72 | "\n", 73 | "The type of Python object created by `read_csv()` is called a data frame and is essentially a tabular layout. To illustrate, here are the \ufb01rst several cases of the `swim` data frame created by the previous use of `read_csv()`:\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "collapsed": false, 79 | "input": [ 80 | "swim.head()" 81 | ], 82 | "language": "python", 83 | "metadata": {}, 84 | "outputs": [] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Note that the `head()` function, one of several functions built-into 'pandas' data frames, is a function of the Python object (data frame) itself; not from the main 'pandas' library.\n", 91 | "\n", 92 | "Data frames, like tabular data generally, involve variables and cases. In 'pandas' data frames, each of the variables is given a name. You can refer to the variable by name in a couple of di\ufb00erent ways. To see the variable names in a data frame, something you might want to do to remind yourself of how names a spelled and capitalized, use the `columns` attribute of the data frame object:" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "collapsed": false, 98 | "input": [ 99 | "swim.columns" 100 | ], 101 | "language": "python", 102 | "metadata": {}, 103 | "outputs": [] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "Note that we have **not** used brackets `()` in the above command. This is because `columns` is not a function; it is an *attribute* of the data frame. Attributes add 'extra' information (or metadata) to objects in the form of additional Python objects. In this case, the attributes describe the names (and data types) of the columns. Another way to get quick information about the variables in a data frame is with `describe()`:" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "collapsed": false, 115 | "input": [ 116 | "swim.describe()" 117 | ], 118 | "language": "python", 119 | "metadata": {}, 120 | "outputs": [] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "This provides a numerical summary of each of the variables contained in the data frame. To keep things simple, the output from `describe()` is itself a data frame.\n", 127 | "\n", 128 | "There are lots of different functions and attributes available for data frames (and any other Python objects). For instance, to see how many cases and variables there are in a data frame, you can use the `shape` attribute:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "collapsed": false, 134 | "input": [ 135 | "swim.shape" 136 | ], 137 | "language": "python", 138 | "metadata": {}, 139 | "outputs": [] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Variables in Data Frames\n", 146 | "\n", 147 | "Perhaps the most common operation on a data frame is to refer to the values in a single variable. The two ways you will most commonly use involve referring to a variable by string-quoted name (`swim[\"year\"]`) and as an attribute of a data frame without quotes (`swim.year`)." 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "\n", 155 | "Each column or variable in a 'pandas' data frame is called a 'series', and each series can contain one of many different data types. For more information on series', data frames, and other objects in 'pandas', [have a look here][intro].\n", 156 | "\n", 157 | "\n", 158 | "[intro]: http://pandas.pydata.org/pandas-docs/dev/dsintro.html" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Most of the statistical/mathematical functions you will encounter in this tutorial are designed to work with data frames and allow you to refer directly to variables within a data frame. For instance:" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "collapsed": false, 171 | "input": [ 172 | "swim.year.mean()" 173 | ], 174 | "language": "python", 175 | "metadata": {}, 176 | "outputs": [] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "collapsed": false, 181 | "input": [ 182 | "swim[\"year\"].min()" 183 | ], 184 | "language": "python", 185 | "metadata": {}, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "It is also possible to combine 'numpy' operators with 'pandas' variables:" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "collapsed": false, 198 | "input": [ 199 | "np.min(swim[\"year\"])" 200 | ], 201 | "language": "python", 202 | "metadata": {}, 203 | "outputs": [] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "collapsed": false, 208 | "input": [ 209 | "np.min(swim.year)" 210 | ], 211 | "language": "python", 212 | "metadata": {}, 213 | "outputs": [] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "The `swim` portion of the above commands tells Python which data frame we want to operate on. Leaving o\ufb00 that argument leads to an error:" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "collapsed": false, 225 | "input": [ 226 | "year.min()" 227 | ], 228 | "language": "python", 229 | "metadata": {}, 230 | "outputs": [] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "The advantage of referring to variables by name becomes evident when you construct statements that involve more than one variable within a data frame. For instance, here's a calculation of the mean year, separately for (grouping by) the different sexes:" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "collapsed": false, 242 | "input": [ 243 | "swim.groupby('sex')['year'].mean()" 244 | ], 245 | "language": "python", 246 | "metadata": {}, 247 | "outputs": [] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "Both the `mean()` and `min()` functions have been arranged by the 'pandas' library to look in the data frame when interpreting variables, but not all Python functions are designed this way. For instance:" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "collapsed": false, 259 | "input": [ 260 | "swim.year.sqrt()" 261 | ], 262 | "language": "python", 263 | "metadata": {}, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "When you encounter a function that isn't supported by data frames, you can use 'numpy' functions and the special `apply` function built-into data frames (note that the `func` argument is optional):" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "collapsed": false, 276 | "input": [ 277 | "swim.year.apply(func=np.sqrt).head() # There are 62 cases in total" 278 | ], 279 | "language": "python", 280 | "metadata": {}, 281 | "outputs": [] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "Alternatively, since columns are basically just arrays, we can use built-in numpy functions directly on the columns:" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "collapsed": false, 293 | "input": [ 294 | "np.sqrt(swim.year).head() # Again, there are 62 cases in total" 295 | ], 296 | "language": "python", 297 | "metadata": {}, 298 | "outputs": [] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "### Adding a New Variable\n", 305 | "\n", 306 | "Sometimes you will compute a new quantity from the existing variables and want to treat this as a new variable. Adding a new variable to a data frame can be done similarly to *accessing* a variable. For instance, here is how to create a new variable in `swim` that holds the `time` converted from seconds to units of minutes:" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "collapsed": false, 312 | "input": [ 313 | "swim['minutes'] = swim.time/60. # or swim['time']/60." 314 | ], 315 | "language": "python", 316 | "metadata": {}, 317 | "outputs": [] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "By default, columns get inserted at the end. The `insert` function is available to insert at a particular location in the columns. " 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "collapsed": false, 329 | "input": [ 330 | "swim.insert(1, 'mins', swim.time/60.)" 331 | ], 332 | "language": "python", 333 | "metadata": {}, 334 | "outputs": [] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "You could also, if you want, rede\ufb01ne an existing variable, for instance:" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "collapsed": false, 346 | "input": [ 347 | "swim['time'] = swim.time/60." 348 | ], 349 | "language": "python", 350 | "metadata": {}, 351 | "outputs": [] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "As always, we can take a quick look at the results of our operations by using the `head()` fuction of our data frame:" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "collapsed": false, 363 | "input": [ 364 | "swim.head()" 365 | ], 366 | "language": "python", 367 | "metadata": {}, 368 | "outputs": [] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "Such assignment operations do not change the original file from which the data were read, only the data frame in the current session of Python. This is an advantage, since it means that your data in the data file stay in their original state and therefore won\u2019t be corrupted by operations made during analysis." 375 | ] 376 | } 377 | ], 378 | "metadata": {} 379 | } 380 | ] 381 | } -------------------------------------------------------------------------------- /notebooks/Notebook1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:e997bcc65d4ed8d4e17e808f973e7759a9bfabc34f78eeedbdd31c81671f79ac" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": { 14 | "slideshow": { 15 | "slide_type": "slide" 16 | } 17 | }, 18 | "source": [ 19 | "# Geospatial Data in Python: Database, Desktop, and the Web\n", 20 | "## Tutorial (Part 1)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "slideshow": { 27 | "slide_type": "subslide" 28 | } 29 | }, 30 | "source": [ 31 | "# Converting coordinates with PyProj" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "collapsed": false, 37 | "input": [ 38 | "from pyproj import Proj\n", 39 | "\n", 40 | "# Create projection transformation object\n", 41 | "p = Proj(init='epsg:3857') # EPSG code for Web Mercator\n", 42 | "\n", 43 | "# Convert from long/lat to Mercator and back\n", 44 | "print(p(-97.740372, 30.282642))\n", 45 | "print(p(-10880408.440985134, 3539932.8204972977, inverse=True))" 46 | ], 47 | "language": "python", 48 | "metadata": { 49 | "slideshow": { 50 | "slide_type": "-" 51 | } 52 | }, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "collapsed": false, 58 | "input": [ 59 | "# Fiona (which we will be using shortly) has several \n", 60 | "# helper functions for working with proj4 strings\n", 61 | "from fiona.crs import to_string, from_epsg, from_string\n", 62 | "\n", 63 | "# Create a crs dict from a proj4 string\n", 64 | "crs = from_string('+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 '\n", 65 | " '+lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 '\n", 66 | " '+y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs')\n", 67 | "\n", 68 | "# Using a proj4 string\n", 69 | "nyc_proj = Proj(crs, preserve_units=True)\n", 70 | "\n", 71 | "# Using an EPSG code\n", 72 | "nyc_epsg = Proj(init='epsg:2263', preserve_units=True)" 73 | ], 74 | "language": "python", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "subslide" 78 | } 79 | }, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "collapsed": false, 85 | "input": [ 86 | "# Here's about where my office in NYC is located (in long/lat)\n", 87 | "office = (-73.9637, 40.7684)\n", 88 | "\n", 89 | "# Are they close?\n", 90 | "print(nyc_proj(*office))\n", 91 | "print(nyc_epsg(*office))" 92 | ], 93 | "language": "python", 94 | "metadata": { 95 | "slideshow": { 96 | "slide_type": "fragment" 97 | } 98 | }, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "subslide" 106 | } 107 | }, 108 | "source": [ 109 | "## Plotting Eyjafjallaj\u00f6kull volcano with Cartopy" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "collapsed": false, 115 | "input": [ 116 | "import matplotlib.pyplot as plt\n", 117 | "%matplotlib inline\n", 118 | "import cartopy.crs as ccrs\n", 119 | "import matplotlib.pyplot as plt\n", 120 | "import cartopy.io.img_tiles as cimgt\n", 121 | "\n", 122 | "# Create a MapQuest open aerial instance\n", 123 | "map_quest_aerial = cimgt.MapQuestOpenAerial()\n", 124 | "\n", 125 | "# What is the projection?\n", 126 | "print(map_quest_aerial.crs.proj4_init)" 127 | ], 128 | "language": "python", 129 | "metadata": { 130 | "slideshow": { 131 | "slide_type": "-" 132 | } 133 | }, 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "collapsed": false, 139 | "input": [ 140 | "# Specify the lon/lat for the volcano\n", 141 | "volcano = (-19.613333, 63.62)\n", 142 | "\n", 143 | "# Define the plotting extent of the map\n", 144 | "extent = [-22, -15, 63, 65]\n", 145 | "\n", 146 | "# Specify the transform to use when plotting\n", 147 | "transform=ccrs.Geodetic()" 148 | ], 149 | "language": "python", 150 | "metadata": { 151 | "slideshow": { 152 | "slide_type": "-" 153 | } 154 | }, 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "collapsed": false, 160 | "input": [ 161 | "fig = plt.figure(figsize=(10,8))\n", 162 | "# Create a GeoAxes in the tile's projection\n", 163 | "ax = plt.axes(projection=map_quest_aerial.crs)\n", 164 | "ax.set_extent(extent)\n", 165 | "# Add the MapQuest data at zoom level 8\n", 166 | "ax.add_image(map_quest_aerial, 8)\n", 167 | "ax.plot(*volcano, marker='o', color='yellow', markersize=12,\n", 168 | " alpha=0.7, transform=transform)\n", 169 | "ax.set_title(u'Eyjafjallaj\u00f6kull Volcano')\n", 170 | "plt.show()" 171 | ], 172 | "language": "python", 173 | "metadata": { 174 | "slideshow": { 175 | "slide_type": "subslide" 176 | } 177 | }, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": { 183 | "slideshow": { 184 | "slide_type": "slide" 185 | } 186 | }, 187 | "source": [ 188 | "### Time to work on Notebook:\n", 189 | "\n", 190 | "[`Working with Projections in Python`](../exercises/Working with Projections in Python.ipynb)" 191 | ] 192 | } 193 | ], 194 | "metadata": {} 195 | } 196 | ] 197 | } -------------------------------------------------------------------------------- /notebooks/Notebook2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:4d754bcea3d58808b0848f3e45708522b0bf3b135b99e028b2f67fdc390578aa" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": { 14 | "slideshow": { 15 | "slide_type": "slide" 16 | } 17 | }, 18 | "source": [ 19 | "# Geospatial Data in Python: Database, Desktop, and the Web\n", 20 | "## Tutorial (Part 2)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "collapsed": false, 26 | "input": [ 27 | "from shapely.geometry import LineString\n", 28 | "\n", 29 | "# Dilating a line\n", 30 | "line = LineString([(0, 0), (1, 1), (0, 2), (2, 2), (3, 1), (1, 0)])\n", 31 | "dilated = line.buffer(0.5)\n", 32 | "eroded = dilated.buffer(-0.3)\n", 33 | "\n", 34 | "# Demonstate Python Geo Interface\n", 35 | "print(line.__geo_interface__)" 36 | ], 37 | "language": "python", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "subslide" 41 | } 42 | }, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "slideshow": { 49 | "slide_type": "subslide" 50 | } 51 | }, 52 | "source": [ 53 | "# Exploring the path of Hurican Katrina\n", 54 | "\n", 55 | "The data was originally sourced from the HURDAT2 dataset from [AOML/NOAA](http://www.aoml.noaa.gov/hrd/hurdat/newhurdat-all.html), and the Python lists are from the [cartopy documentation](http://scitools.org.uk/cartopy/docs/latest/examples/hurricane_katrina.html)." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "collapsed": false, 61 | "input": [ 62 | "from shapely.geometry import LineString\n", 63 | "\n", 64 | "lons = [-75.1, -75.7, -76.2, -76.5, -76.9, -77.7, -78.4, -79.0,\n", 65 | " -79.6, -80.1, -80.3, -81.3, -82.0, -82.6, -83.3, -84.0,\n", 66 | " -84.7, -85.3, -85.9, -86.7, -87.7, -88.6, -89.2, -89.6,\n", 67 | " -89.6, -89.6, -89.6, -89.6, -89.1, -88.6, -88.0, -87.0,\n", 68 | " -85.3, -82.9]\n", 69 | "lats = [23.1, 23.4, 23.8, 24.5, 25.4, 26.0, 26.1, 26.2, 26.2, 26.0,\n", 70 | " 25.9, 25.4, 25.1, 24.9, 24.6, 24.4, 24.4, 24.5, 24.8, 25.2,\n", 71 | " 25.7, 26.3, 27.2, 28.2, 29.3, 29.5, 30.2, 31.1, 32.6, 34.1,\n", 72 | " 35.6, 37.0, 38.6, 40.1]\n", 73 | "\n", 74 | "# Turn the lons and lats into a shapely LineString\n", 75 | "katrina_track = LineString(zip(lons, lats))" 76 | ], 77 | "language": "python", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "-" 81 | } 82 | }, 83 | "outputs": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "slideshow": { 89 | "slide_type": "-" 90 | } 91 | }, 92 | "source": [ 93 | "Buffer the linestring by two degrees (doesn't really make sense!). This *should* be about 200kms, but as we'll see, it's not quite accurate... **Why not**?" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "collapsed": false, 99 | "input": [ 100 | "katrina_buffer = katrina_track.buffer(2)" 101 | ], 102 | "language": "python", 103 | "metadata": {}, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "slideshow": { 110 | "slide_type": "subslide" 111 | } 112 | }, 113 | "source": [ 114 | "What if we reproject the lon/lats to a projection that preserves distances better?\n", 115 | "\n", 116 | "We *could* use `EPSG:32616` (UTM Zone 16), which covers where Katrina meets New Orleans, but we're probably better off using a custom `proj4` string based on a Lambert Conformal Conic projection. **Why**?" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "collapsed": false, 122 | "input": [ 123 | "from pyproj import Proj, transform\n", 124 | "from fiona.crs import from_string\n", 125 | "\n", 126 | "# Create custom proj4 string\n", 127 | "proj = from_string('+ellps=WGS84 +proj=lcc +lon_0=-96.0 +lat_0=39.0 '\n", 128 | " '+x_0=0.0 +y_0=0.0 +lat_1=33 +lat_2=45 +no_defs')\n", 129 | "\n", 130 | "# Create source and destination Proj objects (source is WGS84 lons/lats)\n", 131 | "src = Proj(init='epsg:4326')\n", 132 | "dst = Proj(proj)\n", 133 | "\n", 134 | "# Create a LineString from the transformed points\n", 135 | "proj_track = LineString(zip(*transform(src, dst, lons, lats)))\n", 136 | "# Buffer the LineString by 200 km (multiply by 1000 to work in meters)\n", 137 | "proj_buffer = proj_track.buffer(200*1000)" 138 | ], 139 | "language": "python", 140 | "metadata": { 141 | "slideshow": { 142 | "slide_type": "fragment" 143 | } 144 | }, 145 | "outputs": [] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "slideshow": { 151 | "slide_type": "subslide" 152 | } 153 | }, 154 | "source": [ 155 | "## Aside: Coordinate tuples and x, y sequences\n", 156 | "\n", 157 | "`zip` is your friend! Use it with tuple unpacking to change between sequences of `(x, y)` pairs and seperate `x` and `y` sequences." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "collapsed": false, 163 | "input": [ 164 | "pts = [(0, 0), (1, 0), (1, 1), (2, 1), (2, 2)]\n", 165 | "x, y = zip(*pts)\n", 166 | "print x, y" 167 | ], 168 | "language": "python", 169 | "metadata": { 170 | "slideshow": { 171 | "slide_type": "-" 172 | } 173 | }, 174 | "outputs": [] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": { 179 | "slideshow": { 180 | "slide_type": "-" 181 | } 182 | }, 183 | "source": [ 184 | "Also, instead of calling `f(x, y)`, you can just use" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "collapsed": false, 190 | "input": [ 191 | "# Skip this slide, not really needed for demo, just need Python function\n", 192 | "# Simple function to add 0.5 to each coordinate\n", 193 | "def f(x, y):\n", 194 | " new_x = [i + 0.5 for i in x]\n", 195 | " new_y = [j + 0.5 for j in y]\n", 196 | " return new_x, new_y" 197 | ], 198 | "language": "python", 199 | "metadata": { 200 | "slideshow": { 201 | "slide_type": "skip" 202 | } 203 | }, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "collapsed": false, 209 | "input": [ 210 | "f(*zip(*pts)) # Function f adds 0.5 to each coordinate" 211 | ], 212 | "language": "python", 213 | "metadata": {}, 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": { 219 | "slideshow": { 220 | "slide_type": "slide" 221 | } 222 | }, 223 | "source": [ 224 | "## Plotting the path of Hurican Katrina" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "collapsed": false, 230 | "input": [ 231 | "fig = plt.figure(figsize=(8, 8))\n", 232 | "ax = plt.axes(projection=ccrs.LambertConformal())\n", 233 | "ax.stock_img() # Add background image (slow)\n", 234 | "ax.coastlines(resolution='110m')\n", 235 | "ax.add_geometries([katrina_buffer], ccrs.PlateCarree(), facecolor='blue', alpha=0.5)\n", 236 | "ax.add_geometries([katrina_track], ccrs.PlateCarree(), facecolor='none')\n", 237 | "\n", 238 | "# Let's add the projected buffer for comparison\n", 239 | "ax.add_geometries([proj_buffer], ccrs.LambertConformal(), facecolor='green', alpha=0.5)\n", 240 | "\n", 241 | "ax.set_extent([-125, -66.5, 20, 50], ccrs.PlateCarree())\n", 242 | "ax.gridlines()\n", 243 | "plt.show()" 244 | ], 245 | "language": "python", 246 | "metadata": { 247 | "slideshow": { 248 | "slide_type": "subslide" 249 | } 250 | }, 251 | "outputs": [] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "Which `shapely` geometry method could we use to find where the tracks *differ*?" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": { 263 | "slideshow": { 264 | "slide_type": "subslide" 265 | } 266 | }, 267 | "source": [ 268 | "# Simple Transformation for Georeferencing a Raster\n", 269 | "\n", 270 | "What makes geospatial raster datasets different from other raster files is that their pixels map to regions of the Earth. In this case, we have a raster image which maps to Midtown Manhattan." 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "collapsed": false, 276 | "input": [ 277 | "import matplotlib.image as mpimg\n", 278 | "import os\n", 279 | "\n", 280 | "# Read in a regular PNG image of Manhattan\n", 281 | "png_file = os.path.join('..', 'data', 'manhattan.png')\n", 282 | "img = mpimg.imread(png_file)\n", 283 | "\n", 284 | "# Take a quick look at the shape\n", 285 | "print(img.shape)" 286 | ], 287 | "language": "python", 288 | "metadata": { 289 | "slideshow": { 290 | "slide_type": "-" 291 | } 292 | }, 293 | "outputs": [] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "collapsed": false, 298 | "input": [ 299 | "# Specify the affine transformation\n", 300 | "from affine import Affine # You might not have this library installed\n", 301 | "A = Affine(0.999948245999997, 0.0, 583057.357, 0.0, -0.999948245999997, 4516255.36)\n", 302 | "\n", 303 | "# Compute the upper left and lower right corners\n", 304 | "ul = (0, 0)\n", 305 | "lr = img.shape[:2][::-1]\n", 306 | "\n", 307 | "print(\"Upper left: \" + str(A * ul))\n", 308 | "print(\"Lower right: \" + str(A * lr))" 309 | ], 310 | "language": "python", 311 | "metadata": {}, 312 | "outputs": [] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "Here, the coordinate reference system is `EPSG:26918` (NAD83 / UTM Zone 18N), and the affine transformation matrix is given (in later examples, we'll get this information directly from the input raster datasets)." 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "collapsed": false, 324 | "input": [ 325 | "# Upper left and bottom right corners in UTM coords\n", 326 | "left, top = A * ul\n", 327 | "right, bottom = A * lr\n", 328 | "\n", 329 | "# Plot showing original PNG image (axes correspond to rows and cols) on left\n", 330 | "# and 'transformed' PNG (axes correspond to UTM coords) on the right\n", 331 | "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))\n", 332 | "ax1.imshow(img)\n", 333 | "ax1.set_title(\"PNG with row, col bounds\")\n", 334 | "ax2.imshow(img, extent=(left, right, bottom, top), aspect=\"equal\")\n", 335 | "ax2.set_title(\"PNG with correct bounds\")\n", 336 | "plt.show()" 337 | ], 338 | "language": "python", 339 | "metadata": { 340 | "slideshow": { 341 | "slide_type": "subslide" 342 | } 343 | }, 344 | "outputs": [] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "### Time to work on Notebook:\n", 351 | "\n", 352 | "[`Plotting Great Circles in Python`](../exercises/Plotting Great Circles in Python.ipynb)" 353 | ] 354 | } 355 | ], 356 | "metadata": {} 357 | } 358 | ] 359 | } -------------------------------------------------------------------------------- /notebooks/Notebook3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:34721fb75d1a69f20f368d7384d1c7b3d5b2f5874790b4eb673aed55e0b47177" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": { 14 | "slideshow": { 15 | "slide_type": "slide" 16 | } 17 | }, 18 | "source": [ 19 | "# Geospatial Data in Python: Database, Desktop, and the Web\n", 20 | "## Tutorial (Part 3)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "slideshow": { 27 | "slide_type": "subslide" 28 | } 29 | }, 30 | "source": [ 31 | "## Convert vector data formats (Shapefile -> GeoJSON)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "collapsed": false, 37 | "input": [ 38 | "import fiona\n", 39 | "from fiona.crs import to_string\n", 40 | "import os\n", 41 | "\n", 42 | "boro_file = os.path.join(\"..\", \"data\", \"nybb\", \"nybb.shp\")\n", 43 | "out_file = os.path.join(\"..\", \"data\", \"nybb\", \"nybb.geojson\")\n", 44 | "\n", 45 | "# Register format drivers with a context manager\n", 46 | "with fiona.drivers():\n", 47 | " # Open the shapefile (can also open directly from zip files with vfs!)\n", 48 | " with fiona.open(boro_file) as source:\n", 49 | " print(\"Feature Count: %s\" % len(source))\n", 50 | " print(\"Input Driver: %s\" % source.driver)\n", 51 | " \n", 52 | " meta = source.meta\n", 53 | " meta.update(driver=\"GeoJSON\")\n", 54 | " \n", 55 | " if os.path.exists(out_file):\n", 56 | " os.remove(out_file)\n", 57 | " with fiona.open(out_file, 'w', **meta) as sink:\n", 58 | " print(\"Output Driver: %s\" % sink.driver)\n", 59 | " for rec in source:\n", 60 | " sink.write(rec)\n", 61 | "# Did it work?\n", 62 | "print(\"File Exists: %s\" % os.path.exists(out_file))" 63 | ], 64 | "language": "python", 65 | "metadata": {}, 66 | "outputs": [] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "slideshow": { 72 | "slide_type": "subslide" 73 | } 74 | }, 75 | "source": [ 76 | "## Read geospatial raster data\n", 77 | "\n", 78 | "Note: You can download the GeoTIFF of Manhattan used in this example [from here](https://www.dropbox.com/s/mba7obrfh2b2ucb/manhattan.tif). Make sure you put it in your `data` folder." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "collapsed": false, 84 | "input": [ 85 | "import rasterio\n", 86 | "from fiona.crs import to_string\n", 87 | "import os\n", 88 | "import numpy as np\n", 89 | "\n", 90 | "image_file = os.path.join('..', 'data', 'manhattan.tif')\n", 91 | "\n", 92 | "# Register format drivers with a context manager\n", 93 | "with rasterio.drivers():\n", 94 | " with rasterio.open(image_file, 'r') as source:\n", 95 | " print(source.count, source.shape)\n", 96 | " print(source.driver)\n", 97 | " print(to_string(source.crs))\n", 98 | " \n", 99 | " # Get data from each band (newer versions of rasterio use source.read())\n", 100 | " r, g, b = map(source.read_band, (1, 2, 3))\n", 101 | " data = np.dstack((r, g, b)) # Each band is just an ndarray!\n", 102 | " print(type(data))\n", 103 | " \n", 104 | " # Get the bounds of the raster (for plotting later)\n", 105 | " bounds = source.bounds[::2] + source.bounds[1::2]" 106 | ], 107 | "language": "python", 108 | "metadata": {}, 109 | "outputs": [] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "An alternative way (outside of `with` context manager as above):" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "collapsed": false, 121 | "input": [ 122 | "source = rasterio.open(image_file, 'r')" 123 | ], 124 | "language": "python", 125 | "metadata": {}, 126 | "outputs": [] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "slideshow": { 132 | "slide_type": "subslide" 133 | } 134 | }, 135 | "source": [ 136 | "## Simple plot of geospatial raster" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "collapsed": false, 142 | "input": [ 143 | "import matplotlib.pyplot as plt\n", 144 | "%matplotlib inline\n", 145 | "fig = plt.figure(figsize=(8, 8))\n", 146 | "ax = plt.imshow(data, extent=bounds)\n", 147 | "plt.show()" 148 | ], 149 | "language": "python", 150 | "metadata": {}, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": { 156 | "slideshow": { 157 | "slide_type": "slide" 158 | } 159 | }, 160 | "source": [ 161 | "### Time to work on Notebook:\n", 162 | "\n", 163 | "[`Working with Rasters in Python`](../exercises/Working with Rasters in Python.ipynb)" 164 | ] 165 | } 166 | ], 167 | "metadata": {} 168 | } 169 | ] 170 | } -------------------------------------------------------------------------------- /notebooks/Notebook4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:86728bd1f81b030b0152580821dbc6554431bd42b99cb9368907a6506fff70f1" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": { 14 | "slideshow": { 15 | "slide_type": "slide" 16 | } 17 | }, 18 | "source": [ 19 | "# Geospatial Data in Python: Database, Desktop, and the Web\n", 20 | "## Tutorial (Part 4)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "slideshow": { 27 | "slide_type": "subslide" 28 | } 29 | }, 30 | "source": [ 31 | "## Creating a simple GeoSeries" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "collapsed": false, 37 | "input": [ 38 | "from shapely.geometry import Polygon\n", 39 | "from geopandas import GeoSeries, GeoDataFrame\n", 40 | "\n", 41 | "# Create three simple polygons\n", 42 | "p1 = Polygon([(0, 0), (1, 0), (1, 1)])\n", 43 | "p2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])\n", 44 | "p3 = Polygon([(2, 0), (3, 0), (3, 1), (2, 1)])\n", 45 | "\n", 46 | "s = GeoSeries([p1, p2, p3])\n", 47 | "s" 48 | ], 49 | "language": "python", 50 | "metadata": {}, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "Some geographic operations return a normal pandas object. The `area` property of a `GeoSeries` will return a pandas `Series` containing the area of each item in the `GeoSeries`:" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "collapsed": false, 63 | "input": [ 64 | "print(s.area)" 65 | ], 66 | "language": "python", 67 | "metadata": {}, 68 | "outputs": [] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "slideshow": { 74 | "slide_type": "subslide" 75 | } 76 | }, 77 | "source": [ 78 | "## Simple file I/O" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "collapsed": false, 84 | "input": [ 85 | "# Specify file name\n", 86 | "import os\n", 87 | "boro_file = os.path.join(\"..\", \"data\", \"nybb\", \"nybb.shp\")\n", 88 | "\n", 89 | "# Create from file (one line)\n", 90 | "boros = GeoDataFrame.from_file(boro_file)\n", 91 | "\n", 92 | "# Do some pandas stuff\n", 93 | "boros.set_index('BoroCode', inplace=True)\n", 94 | "boros.sort()\n", 95 | "boros" 96 | ], 97 | "language": "python", 98 | "metadata": {}, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "subslide" 106 | } 107 | }, 108 | "source": [ 109 | "## Plotting a simple GeoSeries" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "collapsed": false, 115 | "input": [ 116 | "import matplotlib.pyplot as plt\n", 117 | "%matplotlib inline\n", 118 | "\n", 119 | "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), sharey=True)\n", 120 | "s.plot(axes=ax1)\n", 121 | "ax1.set_title(\"Original Polygons\")\n", 122 | "ax1.set_xlim(-0.5, 3.5)\n", 123 | "s.buffer(0.4).plot(axes=ax2)\n", 124 | "ax2.set_title(\"Buffered Polygons\")\n", 125 | "ax2.set_ylim(-0.5, 1.5)\n", 126 | "plt.show()" 127 | ], 128 | "language": "python", 129 | "metadata": {}, 130 | "outputs": [] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "subslide" 137 | } 138 | }, 139 | "source": [ 140 | "## Plotting a GeoDataFrame" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "collapsed": false, 146 | "input": [ 147 | "fig = plt.figure(figsize=(8, 8))\n", 148 | "ax = boros.plot() # Regular plot\n", 149 | "# We can access and plot the geometries directly as well\n", 150 | "ax = boros.geometry.convex_hull.plot(axes=ax)\n", 151 | "plt.show()" 152 | ], 153 | "language": "python", 154 | "metadata": {}, 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": { 160 | "slideshow": { 161 | "slide_type": "subslide" 162 | } 163 | }, 164 | "source": [ 165 | "## GeoPandas makes databases easy\n", 166 | "\n", 167 | "You can access `PostGIS` and other spatially aware databases with a similar `API` to Pandas. The added bonus here is that GeoPandas *understands* geospatial data (such as `WKB`):\n", 168 | "\n", 169 | "It automatically converts `WKB` to the appropriate Shapely `geometry` type.\n", 170 | "\n", 171 | "*Note: If you're following along, you can use the included shapefile instead of a database*" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "collapsed": false, 177 | "input": [ 178 | "import geopandas as gpd\n", 179 | "\n", 180 | "wifi_file = os.path.join(\"..\", \"data\", \"wifi\", \"wifi.shp\")\n", 181 | "\n", 182 | "df_geo = gpd.read_file(wifi_file)\n", 183 | "df_geo.head()" 184 | ], 185 | "language": "python", 186 | "metadata": {}, 187 | "outputs": [] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "slideshow": { 193 | "slide_type": "subslide" 194 | } 195 | }, 196 | "source": [ 197 | "## Now that we have a GeoDataFrame...\n", 198 | "\n", 199 | "### We can do all sorts of fun things!\n", 200 | "\n", 201 | "Like compute **summaries** of the geometries:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "collapsed": false, 207 | "input": [ 208 | "# What is the total area of all 5 NYC boroughs?\n", 209 | "print(boros.area.sum())" 210 | ], 211 | "language": "python", 212 | "metadata": {}, 213 | "outputs": [] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "...change the **CRS**:" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "collapsed": false, 225 | "input": [ 226 | "# Convert from NY State Plane to WGS84 (long/lat)\n", 227 | "df_wgs84 = df_geo.to_crs(epsg=4326) # 4326 is the EPSG code for WGS84\n", 228 | "print(df_geo.crs)\n", 229 | "print(df_wgs84.crs)" 230 | ], 231 | "language": "python", 232 | "metadata": {}, 233 | "outputs": [] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "...and **much** more... plus anything else that Pandas can do" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": { 245 | "slideshow": { 246 | "slide_type": "subslide" 247 | } 248 | }, 249 | "source": [ 250 | "## Super quick plotting for the web\n", 251 | "\n", 252 | "Here's a super simple map, which shows `Free` and `Fee-based` WiFi hotspots in NYC:" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "collapsed": false, 258 | "input": [ 259 | "ax = df_wgs84.plot() # This is all that's needed, but...\n", 260 | "# ...here's a hack to plot things a bit more nicely (GeoPandas is still new!)\n", 261 | "free = df_wgs84[\"type\"] == \"Free\" # Is it free?\n", 262 | "col = {True: \"green\", False: \"blue\"} # If free, green, otherwise blue\n", 263 | "for i, l in enumerate(ax.lines):\n", 264 | " l.set_markersize(6)\n", 265 | " l.set_color(col[free[i]])\n", 266 | "ax.axis('off') # Turn off axes\n", 267 | "plt.show()" 268 | ], 269 | "language": "python", 270 | "metadata": { 271 | "slideshow": { 272 | "slide_type": "-" 273 | } 274 | }, 275 | "outputs": [] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": { 280 | "slideshow": { 281 | "slide_type": "subslide" 282 | } 283 | }, 284 | "source": [ 285 | "### Let's turn this map into a 'slippy' web-map" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "collapsed": false, 291 | "input": [ 292 | "import mplleaflet\n", 293 | "mplleaflet.display(fig=ax.figure, crs=df_wgs8.crs)" 294 | ], 295 | "language": "python", 296 | "metadata": {}, 297 | "outputs": [] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": { 302 | "slideshow": { 303 | "slide_type": "subslide" 304 | } 305 | }, 306 | "source": [ 307 | "#### Want that as a standalone HTML page?" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "collapsed": false, 313 | "input": [ 314 | "mplleaflet.show(fig=ax.figure, crs=df_wgs84.crs)" 315 | ], 316 | "language": "python", 317 | "metadata": {}, 318 | "outputs": [] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": { 323 | "slideshow": { 324 | "slide_type": "-" 325 | } 326 | }, 327 | "source": [ 328 | "#### Or perhaps you want to share something with others online? Use `geojson.io`.\n", 329 | "\n", 330 | "Note, your map should really be reprojected to EPSG:4326 (WGS84 lon/lat) for this to align properly." 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "collapsed": false, 336 | "input": [ 337 | "import geojsonio\n", 338 | "res = geojsonio.display(boros.to_crs(epsg=4326).to_json(), force_gist=True)\n", 339 | "print(res)" 340 | ], 341 | "language": "python", 342 | "metadata": {}, 343 | "outputs": [] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "Don't forget to pop on over to geojson.io and play around/edit your GeoJSON layer..." 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": { 355 | "slideshow": { 356 | "slide_type": "slide" 357 | } 358 | }, 359 | "source": [ 360 | "### Time to work on Notebook:\n", 361 | "\n", 362 | "[`Working with Vectors in Python`](../exercises/Working with Vectors in Python.ipynb)" 363 | ] 364 | } 365 | ], 366 | "metadata": {} 367 | } 368 | ] 369 | } --------------------------------------------------------------------------------