├── shp └── .gitkeep ├── geojson └── .gitkeep ├── .gitignore ├── setup.sh ├── updateData.sh ├── requirements.txt ├── LICENSE.md ├── storm_names.txt ├── README.md └── makeCurrentGeoJSON.py /shp/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /geojson/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | virt-hurricanemap/ 2 | geojson/*.json 3 | shp/* 4 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | virtualenv -p python2.7 virt-hurricanemap 3 | source virt-hurricanemap/bin/activate 4 | pip install --upgrade pip 5 | pip install -r requirements.txt 6 | deactivate 7 | -------------------------------------------------------------------------------- /updateData.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -r shp/* 3 | 4 | source virt-hurricanemap/bin/activate 5 | python makeCurrentGeoJSON.py 6 | deactivate 7 | 8 | #don't iterate when there are no files 9 | shopt -s nullglob 10 | for filename in ./geojson/*.json; do 11 | #upload/rsync your files using a command here 12 | echo "$filename" 13 | done 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2017.7.27.1 2 | chardet==3.0.4 3 | dateparser==0.6.0 4 | feedparser==5.2.1 5 | geojson==2.0.0 6 | idna==2.6 7 | pyshp==1.2.10 8 | python-dateutil==2.6.1 9 | pytz==2017.2 10 | regex==2017.7.28 11 | requests==2.18.4 12 | ruamel.ordereddict==0.4.13 13 | ruamel.yaml==0.15.32 14 | six==1.10.0 15 | tzlocal==1.4 16 | urllib3==1.22 17 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Internet Systems Consortium (ISC) license 2 | 3 | Copyright (c) 2017, Dow Jones 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 8 | -------------------------------------------------------------------------------- /storm_names.txt: -------------------------------------------------------------------------------- 1 | Arlene 2 | Bret 3 | Cindy 4 | Don 5 | Emily 6 | Franklin 7 | Gert 8 | Harvey 9 | Irma 10 | Jose 11 | Katia 12 | Lee 13 | Maria 14 | Nate 15 | Ophelia 16 | Philippe 17 | Rina 18 | Sean 19 | Tammy 20 | Vince 21 | Whitney 22 | Alberto 23 | Beryl 24 | Chris 25 | Debby 26 | Ernesto 27 | Florence 28 | Gordon 29 | Helene 30 | Isaac 31 | Joyce 32 | Kirk 33 | Leslie 34 | Michael 35 | Nadine 36 | Oscar 37 | Patty 38 | Rafael 39 | Sara 40 | Tony 41 | Valerie 42 | William 43 | Andrea 44 | Barry 45 | Chantal 46 | Dorian 47 | Erin 48 | Fernand 49 | Gabrielle 50 | Humberto 51 | Imelda 52 | Jerry 53 | Karen 54 | Lorenzo 55 | Melissa 56 | Nestor 57 | Olga 58 | Pablo 59 | Rebekah 60 | Sebastien 61 | Tanya 62 | Van 63 | Wendy 64 | Arthur 65 | Bertha 66 | Cristobal 67 | Dolly 68 | Edouard 69 | Fay 70 | Gonzalo 71 | Hanna 72 | Isaias 73 | Josephine 74 | Kyle 75 | Laura 76 | Marco 77 | Nana 78 | Omar 79 | Paulette 80 | Rene 81 | Sally 82 | Teddy 83 | Vicky 84 | Wilfred 85 | Ana 86 | Bill 87 | Claudette 88 | Danny 89 | Elsa 90 | Fred 91 | Grace 92 | Henri 93 | Ida 94 | Julian 95 | Kate 96 | Larry 97 | Mindy 98 | Nicholas 99 | Odette 100 | Peter 101 | Rose 102 | Sam 103 | Teresa 104 | Victor 105 | Wanda 106 | Alex 107 | Bonnie 108 | Colin 109 | Danielle 110 | Earl 111 | Fiona 112 | Gaston 113 | Hermine 114 | Ian 115 | Julia 116 | Karl 117 | Lisa 118 | Martin 119 | Nicole 120 | Owen 121 | Paula 122 | Richard 123 | Shary 124 | Tobias 125 | Virginie 126 | Walter 127 | Adrian 128 | Beatriz 129 | Calvin 130 | Dora 131 | Eugene 132 | Fernanda 133 | Greg 134 | Hilary 135 | Irwin 136 | Jova 137 | Kenneth 138 | Lidia 139 | Max 140 | Norma 141 | Otis 142 | Pilar 143 | Ramon 144 | Selma 145 | Todd 146 | Veronica 147 | Wiley 148 | Xina 149 | York 150 | Zelda 151 | Aletta 152 | Bud 153 | Carlotta 154 | Daniel 155 | Emilia 156 | Fabio 157 | Gilma 158 | Hector 159 | Ileana 160 | John 161 | Kristy 162 | Lane 163 | Miriam 164 | Norman 165 | Olivia 166 | Paul 167 | Rosa 168 | Sergio 169 | Tara 170 | Vicente 171 | Willa 172 | Xavier 173 | Yolanda 174 | Zeke 175 | Alvin 176 | Barbara 177 | Cosme 178 | Dalila 179 | Erick 180 | Flossie 181 | Gil 182 | Henriette 183 | Ivo 184 | Juliette 185 | Kiko 186 | Lorena 187 | Mario 188 | Narda 189 | Octave 190 | Priscilla 191 | Raymond 192 | Sonia 193 | Tico 194 | Velma 195 | Wallis 196 | Amanda 197 | Boris 198 | Cristina 199 | Douglas 200 | Elida 201 | Fausto 202 | Genevieve 203 | Hernan 204 | Iselle 205 | Julio 206 | Karina 207 | Lowell 208 | Marie 209 | Norbert 210 | Odalys 211 | Polo 212 | Rachel 213 | Simon 214 | Trudy 215 | Vance 216 | Winnie 217 | Andres 218 | Blanca 219 | Carlos 220 | Dolores 221 | Enrique 222 | Felicia 223 | Guillermo 224 | Hilda 225 | Ignacio 226 | Jimena 227 | Kevin 228 | Linda 229 | Marty 230 | Nora 231 | Olaf 232 | Pamela 233 | Rick 234 | Sandra 235 | Terry 236 | Vivian 237 | Waldo 238 | Agatha 239 | Blas 240 | Celia 241 | Darby 242 | Estelle 243 | Frank 244 | Georgette 245 | Howard 246 | Ivette 247 | Javier 248 | Kay 249 | Lester 250 | Madeline 251 | Newton 252 | Orlene 253 | Paine 254 | Roslyn 255 | Seymour 256 | Tina 257 | Virgil 258 | Winifred 259 | Akoni 260 | Ema 261 | Hone 262 | Iona 263 | Keli 264 | Lala 265 | Moke 266 | Nolo 267 | Olana 268 | Pena 269 | Ulana 270 | Wale 271 | Aka 272 | Ekeka 273 | Hene 274 | Iolana 275 | Keoni 276 | Lino 277 | Mele 278 | Nona 279 | Oliwa 280 | Pama 281 | Upana 282 | Wene 283 | Alika 284 | Ele 285 | Huko 286 | Iopa 287 | Kika 288 | Lana 289 | Maka 290 | Neki 291 | Omeka 292 | Pewa 293 | Unala 294 | Wali 295 | Ela 296 | Halola 297 | Iune 298 | Kilo 299 | Loke 300 | Malia 301 | Niala 302 | Oho 303 | Pali 304 | Ulika 305 | Walaka 306 | Alpha 307 | Beta 308 | Gamma 309 | Delta 310 | Epsilon 311 | Zeta 312 | Eta 313 | Theta 314 | Iota 315 | Kappa 316 | Lambda 317 | Mu 318 | Nu 319 | Xi 320 | Omicron 321 | Pi 322 | Rho 323 | Sigma 324 | Tau 325 | Upsilon 326 | Phi 327 | Chi 328 | Psi 329 | Omega 330 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hurricane Helper 2 | Hurricane Helper is the data processor for [The Wall Street Journal's Hurricane Tracker](http://www.wsj.com/graphics/hurricane-map/). This script is an opinionated parser of [National Hurricane Center](http://www.nhc.noaa.gov/) storm data, written in Python. For every named storm tracked by the center in the [Atlantic](http://www.nhc.noaa.gov/gis-at.xml) and [Eastern Pacific](http://www.nhc.noaa.gov/gis-ep.xml) oceans, Hurricane Helper creates a GeoJSON FeatureCollection and saves it as a .json file in `geojson/` with the name of the storm. The FeatureCollection contains: 3 | 4 | 1. A polygon feature of the forecast cone with the following properties: 5 | * **storm** The storm name in title case 6 | * **cat** The storm intensity 7 | * **fcstpd** The forecast period in hours (always "120", 5 days) 8 | 9 | 2. Linestring features representing the historical and forecast track segments, each with the following properties, attributable to the first point of the linestring: 10 | * **storm** The storm name in title case 11 | * **cat** The storm intensity 12 | * **datetime** ISO 8601 datetime in UTC/GMT, 13 | * **wind** wind speed in miles per hour, 14 | * **pressure** pressure in millibars (or `null` if `source` is `forecast`), 15 | * **source** "historical" or "forecast" 16 | * **current** boolean true for the first forecast point, listed as "current center location" by the NHC 17 | 18 | 3. Point features, in oldest-to-newest order, each with the same properties as the linestring segments. 19 | 20 | All storms are also added to one big FeatureCollection and saved to `geojson/currentGeoJSON.json`. 21 | 22 | Since the goal of this project is to show _all_ current hurricanes, if you wish to display a single storm, you can filter on the **storm** name property. 23 | 24 | ### Storm categories 25 | For polygons, we use the category provided by the NHC: 26 | * **HU** Hurricane 27 | * **MH** Major hurricane ([Saffir-Simpson scale](http://www.nhc.noaa.gov/aboutsshws.php) category 3 and above) 28 | 29 | For points and linestrings, we replace HU or MH with H1-H5 depending on the [Saffir-Simpson scale](http://www.nhc.noaa.gov/aboutsshws.php). 30 | 31 | For less intense wind speed measurements, we've recently observed the following categories: 32 | * **TS** [Tropical storm](http://www.nhc.noaa.gov/aboutgloss.shtml#TROPSTRM) 33 | * **STS** [Subtropical storm](http://www.nhc.noaa.gov/aboutgloss.shtml#SUBSTRM) 34 | * **STD** [Subtropical depression](http://www.nhc.noaa.gov/aboutgloss.shtml#SUBDEP) 35 | * **DB** [Tropical disturbance](http://www.nhc.noaa.gov/aboutgloss.shtml#TROPDIST) 36 | * **LO** [Remnant low](http://www.nhc.noaa.gov/aboutgloss.shtml#REM) 37 | 38 | Other categories may be reported. 39 | 40 | ### Problems solved 41 | * Forecasts are produced in "local" time, while historical positions are recorded in UTC/GMT. This standardizes all times to UTC/GMT. 42 | * The full datetime shown by the NHC for the first point in the forecast, labeled "current center location," is reported in a different field than following forecast points. This uses the `ADVDATE` field for the current storm location, and `FLDATELBL` for the rest of the forecast, pending clarification from NHC. 43 | * Small, unimportant tropical disturbances are included in the official RSS feed. This outputs only named storms. 44 | * Sometimes the forecast cones are not in the shapefiles. This suppresses the output until all features are present for each storm. 45 | * Wind speeds are reported in knots. This converts to miles per hour using the correct precision. 46 | * Hurricane numbers aren't specified. This puts each hurricane on the 1-5 scale. 47 | * Nonexistent values are given as `9999.0`. This changes those to `None` (Python)/`null` (JavaScript). 48 | * Storms disappear after they are no longer tracked. This saves the last data for each storm to a file for that storm. 49 | 50 | ## Development 51 | `sh setup.sh` to create a Python virtual environment and install requirements 52 | 53 | ## Production 54 | `sh updateData.sh` provides a shell script template for processing the files with a cron job. 55 | 56 | ## License 57 | [ISC](/LICENSE) 58 | -------------------------------------------------------------------------------- /makeCurrentGeoJSON.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | """ 5 | makeCurrentGeoJSON.py 6 | 7 | 1. Read RSS feed 8 | 2. Download each shapefile 9 | 3. Convert to GeoJSON 10 | 4. Save to file 11 | 12 | """ 13 | 14 | import requests 15 | import feedparser 16 | import os 17 | import glob 18 | import shutil 19 | import zipfile 20 | import shapefile 21 | import geojson 22 | import datetime 23 | import dateparser 24 | 25 | RSS_URLS = ["http://www.nhc.noaa.gov/gis-at.xml","http://www.nhc.noaa.gov/gis-ep.xml"] 26 | ITEMS_WANTED = {"Preliminary Best Track [shp]":"historical","Forecast [shp]":"forecast"} 27 | COMPONENTS = {"historical":["pts"],"forecast":["pgn","pts"]} 28 | STORM_NAMES = open("storm_names.txt").read().splitlines() 29 | 30 | def checkDataQuality(rss_feature_list): 31 | observed_source_types = set([x.properties["source"] for x in rss_feature_list if "source" in x.properties]) 32 | observed_shape_types = set([x.geometry.type for x in rss_feature_list]) 33 | observed_storm_names = set([x.properties["storm"] for x in rss_feature_list if not x.properties["remnant_flag"]]) 34 | polygon_count = len([True for x in rss_feature_list if x.geometry.type == "Polygon" and not x.properties["remnant_flag"]]) 35 | if len(rss_feature_list) == 0: 36 | print "no features to check" 37 | return True 38 | else: 39 | if not set(["historical", "forecast"]) == observed_source_types: 40 | print "only observed",observed_source_types 41 | return False 42 | if not set(["Point","LineString","Polygon"]) == observed_shape_types: 43 | print "only observed",observed_shape_types 44 | return False 45 | if not len(observed_storm_names) == polygon_count: 46 | print "observed",len(observed_storm_names),"storms but",len([True for x in rss_feature_list if x.geometry.type == "Polygon"]),"polygons" 47 | return False 48 | print "storms parsed ok" 49 | return True 50 | 51 | def download(url): 52 | print "downloading",url 53 | filename = url.split("/")[-1] 54 | filepath = "shp/"+filename 55 | #always re-download best track 56 | if not os.path.exists(filepath) or "best_track" in filename: 57 | response = requests.get(url, stream=True) 58 | with open(filepath, "wb") as out_file: 59 | shutil.copyfileobj(response.raw, out_file) 60 | del response 61 | return filepath 62 | 63 | def unzip(filepath): 64 | print "unzipping",filepath 65 | zh = zipfile.ZipFile(filepath, "r") 66 | unzippedpath = os.path.splitext(filepath)[0] 67 | zh.extractall(unzippedpath) 68 | zh.close() 69 | return unzippedpath 70 | 71 | def parseRSS(url): 72 | rss_response = requests.get(url) 73 | rss_object = feedparser.parse(rss_response.text) 74 | rss_feature_list = [] 75 | for rss_item in rss_object.entries: 76 | if any(item_wanted in rss_item.title for item_wanted in ITEMS_WANTED.keys()) and any(storm_wanted.upper() in rss_item.title.upper() for storm_wanted in STORM_NAMES): 77 | for storm in STORM_NAMES: 78 | if storm in rss_item.title: 79 | for item in ITEMS_WANTED.keys(): 80 | if item in rss_item.title: 81 | if "Remnants" in rss_item.title: 82 | remnant_flag = True 83 | else: 84 | remnant_flag = False 85 | shp_feature_list = parseSHP(rss_item.link,ITEMS_WANTED[item],storm,remnant_flag) 86 | rss_feature_list.extend(shp_feature_list) 87 | if checkDataQuality(rss_feature_list): 88 | return rss_feature_list 89 | else: 90 | print "failed to parse storms for",url 91 | raise SystemExit 92 | 93 | def strToInt(s): 94 | return int(s.split(".")[0]) 95 | 96 | def convertKnotsToMiles(knots): 97 | return round((float(knots)*1.15078)/5)*5 98 | 99 | def hurricaneNumber(wind): 100 | #wind in knots 101 | if wind <= 63: 102 | return None 103 | if 64 <= wind <= 82: 104 | return 1 105 | if 83 <= wind <= 95: 106 | return 2 107 | if 96 <= wind <= 112: 108 | return 3 109 | if 113 <= wind <= 136: 110 | return 4 111 | if wind >= 137: 112 | return 5 113 | 114 | def parseProperties(p,shp_type,component,storm,remnant_flag,shape_index): 115 | #set up an empty dictionary to return 116 | o = {} 117 | #add storm name 118 | o["storm"] = storm 119 | o["remnant_flag"] = remnant_flag 120 | #if polygon, add forecast period and category 121 | if component == "pgn": 122 | o["fcstpd"] = p["FCSTPRD"] 123 | o["cat"] = p["STORMTYPE"] 124 | #add properties to points 125 | if component == "pts": 126 | #say if it"s historical or forecast 127 | o["source"] = shp_type 128 | #for forecast 129 | if shp_type == "forecast": 130 | #%Y-%m-%d %I:%M %p %a 131 | #convert to GMT and then drop the timezone reference 132 | #first forecast time is the same as the advisory issue date 133 | if shape_index == 0: 134 | time_fragment = p["ADVDATE"].split(" ")[0] 135 | if time_fragment.isdigit() and (3 <= len(time_fragment) <= 4): 136 | time_fragment_formatted = time_fragment[:-2] + ":" + time_fragment[-2:] 137 | if time_fragment_formatted == p["DATELBL"].split(" ")[0]: 138 | o["datetime"] = dateparser.parse(time_fragment_formatted + " " + p["ADVDATE"].split(" ",1)[1],settings={"TO_TIMEZONE": "UTC"}).replace(tzinfo=None).isoformat() 139 | else: 140 | print "advisory hour/label hour mismatch" 141 | raise ValueError 142 | else: 143 | print "problem extracting hours from",p["ADVDATE"] 144 | raise ValueError 145 | else: 146 | o["datetime"] = dateparser.parse(p["FLDATELBL"],settings={"TO_TIMEZONE": "UTC"}).replace(tzinfo=None).isoformat() 147 | #pressure in millibars or none 148 | if p["MSLP"] == "9999.0": 149 | o["pressure"] = None 150 | else: 151 | o["pressure"] = strToInt(p["MSLP"]) 152 | #maximum sustained wind speed 153 | o["wind"] = strToInt(p["MAXWIND"]) 154 | if shape_index == 0: 155 | o["current"] = True 156 | else: 157 | o["current"] = False 158 | #for historical 159 | if shp_type == "historical": 160 | o["current"] = False 161 | #this is GMT already 162 | o["datetime"] = datetime.datetime(strToInt(p["YEAR"]),strToInt(p["MONTH"]),strToInt(p["DAY"]),int(p["HHMM"][0:2])).isoformat() 163 | #pressure in millibars 164 | o["pressure"] = strToInt(p["MSLP"]) 165 | #maximum sustained wind speed a.k.a. intensity 166 | #http://www.nhc.noaa.gov/outreach/presentations/NHC2017_IntensityChallenges.pdf 167 | o["wind"] = strToInt(p["INTENSITY"]) 168 | #storm category for hurricanes and major hurricanes 169 | if p["STORMTYPE"] in ["HU","MH"]: 170 | hurricane_number = hurricaneNumber(o["wind"]) 171 | if hurricaneNumber: 172 | o["cat"] = "H"+str(hurricane_number) 173 | else: 174 | o["cat"] = p["STORMTYPE"] 175 | #convert wind speed from knots to mph 176 | o["wind"] = convertKnotsToMiles(o["wind"]) 177 | return o 178 | 179 | def parseSHP(url,shp_type,storm,remnant_flag): 180 | print "parsing",url,"as",shp_type 181 | shppath = unzip(download(url)) 182 | print "opening",shppath 183 | shp_feature_list = [] 184 | for component in COMPONENTS[shp_type]: 185 | for filename in glob.glob(shppath+"/*.shp"): 186 | if "_"+component in filename: 187 | component_shp_feature_list = [] 188 | sf = shapefile.Reader(filename) 189 | shape_records = sf.shapeRecords() 190 | shape_fields = [f[0] for f in sf.fields][1:] 191 | for shape_index,shape_record in enumerate(shape_records): 192 | properties = dict(zip(shape_fields,shape_record.record)) 193 | properties = parseProperties(properties,shp_type,component,storm,remnant_flag,shape_index) 194 | feature = geojson.Feature(geometry=shape_record.shape.__geo_interface__,properties=properties) 195 | component_shp_feature_list.append(feature) 196 | if component == "pts": 197 | #make linestrings from points 198 | linestrings_list = [] 199 | for i in range(len(component_shp_feature_list)-1): 200 | ls = geojson.LineString( 201 | [component_shp_feature_list[i].geometry.coordinates, 202 | component_shp_feature_list[i+1].geometry.coordinates] 203 | ) 204 | lsf = geojson.Feature(geometry=ls,properties=component_shp_feature_list[i].properties) 205 | linestrings_list.append(lsf) 206 | component_shp_feature_list = linestrings_list + component_shp_feature_list 207 | shp_feature_list += component_shp_feature_list 208 | return shp_feature_list 209 | 210 | if __name__ == "__main__": 211 | url_feature_list = [] 212 | 213 | for url in RSS_URLS: 214 | print "checking",url 215 | rss_feature_list = parseRSS(url) 216 | url_feature_list.extend(rss_feature_list) 217 | 218 | feature_collection = geojson.FeatureCollection(url_feature_list) 219 | with open("geojson/currentGeoJSON.json", "wb") as geojson_file_handle: 220 | print "saving geojson/currentGeoJSON.json" 221 | geojson.dump(feature_collection, geojson_file_handle, sort_keys=True) 222 | 223 | storm_set = set([s.properties["storm"] for s in url_feature_list]) 224 | 225 | for storm_name in storm_set: 226 | with open("geojson/"+storm_name+".json","wb") as geojson_file_handle: 227 | s_feature_collection = geojson.FeatureCollection([s for s in url_feature_list if s.properties["storm"] == storm_name]) 228 | print "saving","geojson/"+storm_name+".json" 229 | geojson.dump(s_feature_collection,geojson_file_handle, sort_keys=True) 230 | --------------------------------------------------------------------------------