├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── address.js ├── package-lock.json ├── package.json ├── parse-address.min.js └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "node" 4 | - "iojs" 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | 4 | ## [1.1.2] - 2019-04-15 5 | 6 | ### Changed 7 | - Added support for some un-common street suffixes that were missed. 8 | 9 | ## [1.1.1] - 2019-03-12 10 | 11 | ### Changed 12 | - Added support for grid type street numbers. 13 | 14 | ## [1.1.0] - 2018-09-27 15 | 16 | ### Changed 17 | - Added support for PO BOX (https://github.com/hassansin/parse-address/pull/20) 18 | 19 | ## [1.0.0] - 2018-07-15 20 | 21 | ### Changed 22 | - Implement Geo::StreetAddress::US's normalizer 23 | 24 | ## [0.0.9] - 2017-12-17 25 | 26 | ### Changed 27 | - added `plus4` field for ZIP+4 code 28 | 29 | ## [0.0.8] - 2017-08-15 30 | 31 | ### Changed 32 | - added `Row` street type 33 | 34 | ## [0.0.7] - 2017-07-29 35 | 36 | ### Changed 37 | - lazy init to avoid slow loading 38 | 39 | ## [0.0.6] - 2016-09-27 40 | 41 | ### Changed 42 | - XRegExp updated to 3.1.1 43 | - Removed XRegExp Addons 44 | 45 | ## [0.0.5] - 2015-08-18 46 | ### Changed 47 | - Strict Mode 48 | 49 | ## [0.0.4] - 2014-12-12 50 | ### Changed 51 | - Removed underscore dependency 52 | - Added browserify 53 | 54 | ## [0.0.3] - 2014-12-12 55 | ### Changed 56 | - Added tests 57 | - Update README 58 | 59 | ## [0.0.2] - 2014-12-12 60 | - Published to NPM 61 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | _The new code here was written by hassansin, and is licensed under the 2 | Internet Software Consortium (ISC) License, the text of which is given 3 | below._ 4 | 5 | Copyright (c) 2014-2015, hassansin 6 | 7 | Permission to use, copy, modify, and/or distribute this software for any 8 | purpose with or without fee is hereby granted, provided that the above 9 | copyright notice and this permission notice appear in all copies. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # US Street Address Parser [![Build Status](https://travis-ci.org/hassansin/parse-address.svg)](https://travis-ci.org/hassansin/parse-address) 2 | 3 | This is Node.js port for Perl [Geo::StreetAddress::US](http://search.cpan.org/~timb/Geo-StreetAddress-US-1.04/US.pm) package 4 | 5 | *Description from Geo::StreetAddress::US*: 6 | 7 | >Geo::StreetAddress::US is a regex-based street address and street intersection parser for the United States. Its basic goal is to be as forgiving as possible when parsing user-provided address strings. Geo::StreetAddress::US knows about directional prefixes and suffixes, fractional building numbers, building units, grid-based addresses (such as those used in parts of Utah), 5 and 9 digit ZIP codes, and all of the official USPS abbreviations for street types and state names... [more](http://search.cpan.org/~timb/Geo-StreetAddress-US-1.04/US.pm) 8 | 9 | ## Usage: 10 | 11 | ```javascript 12 | //from node: 13 | npm install parse-address 14 | var parser = require('parse-address'); 15 | var parsed = parser.parseLocation('1005 N Gravenstein Highway Sebastopol CA 95472'); 16 | 17 | //from browser: 18 | 19 | var parsed = parseAddress.parseLocation('1005 N Gravenstein Highway Sebastopol CA 95472'); 20 | 21 | //Parsed address: 22 | { 23 | number: '1005', 24 | prefix: 'N', 25 | street: 'Gravenstein', 26 | type: 'Hwy', 27 | city: 'Sebastopol', 28 | state: 'CA', 29 | zip: '95472' } 30 | ``` -------------------------------------------------------------------------------- /address.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014-2015, hassansin 2 | // 3 | //Perl Ref: http://cpansearch.perl.org/src/TIMB/Geo-StreetAddress-US-1.04/US.pm 4 | "use strict"; 5 | 6 | (function(){ 7 | var root; 8 | root = this; 9 | var XRegExp; 10 | 11 | if (typeof require !== "undefined"){ 12 | XRegExp = require('xregexp/src/xregexp.js'); 13 | } 14 | else 15 | XRegExp = root.XRegExp; 16 | 17 | var parser = {}; 18 | var Addr_Match = {}; 19 | 20 | var Directional = { 21 | north : "N", 22 | northeast : "NE", 23 | east : "E", 24 | southeast : "SE", 25 | south : "S", 26 | southwest : "SW", 27 | west : "W", 28 | northwest : "NW", 29 | }; 30 | 31 | var Street_Type = { 32 | allee : "aly", 33 | alley : "aly", 34 | ally : "aly", 35 | anex : "anx", 36 | annex : "anx", 37 | annx : "anx", 38 | arcade : "arc", 39 | av : "ave", 40 | aven : "ave", 41 | avenu : "ave", 42 | avenue : "ave", 43 | avn : "ave", 44 | avnue : "ave", 45 | bayoo : "byu", 46 | bayou : "byu", 47 | beach : "bch", 48 | bend : "bnd", 49 | bluf : "blf", 50 | bluff : "blf", 51 | bluffs : "blfs", 52 | bot : "btm", 53 | bottm : "btm", 54 | bottom : "btm", 55 | boul : "blvd", 56 | boulevard : "blvd", 57 | boulv : "blvd", 58 | branch : "br", 59 | brdge : "brg", 60 | bridge : "brg", 61 | brnch : "br", 62 | brook : "brk", 63 | brooks : "brks", 64 | burg : "bg", 65 | burgs : "bgs", 66 | bypa : "byp", 67 | bypas : "byp", 68 | bypass : "byp", 69 | byps : "byp", 70 | camp : "cp", 71 | canyn : "cyn", 72 | canyon : "cyn", 73 | cape : "cpe", 74 | causeway : "cswy", 75 | causway : "cswy", 76 | causwa : "cswy", 77 | cen : "ctr", 78 | cent : "ctr", 79 | center : "ctr", 80 | centers : "ctrs", 81 | centr : "ctr", 82 | centre : "ctr", 83 | circ : "cir", 84 | circl : "cir", 85 | circle : "cir", 86 | circles : "cirs", 87 | ck : "crk", 88 | cliff : "clf", 89 | cliffs : "clfs", 90 | club : "clb", 91 | cmp : "cp", 92 | cnter : "ctr", 93 | cntr : "ctr", 94 | cnyn : "cyn", 95 | common : "cmn", 96 | commons : "cmns", 97 | corner : "cor", 98 | corners : "cors", 99 | course : "crse", 100 | court : "ct", 101 | courts : "cts", 102 | cove : "cv", 103 | coves : "cvs", 104 | cr : "crk", 105 | crcl : "cir", 106 | crcle : "cir", 107 | crecent : "cres", 108 | creek : "crk", 109 | crescent : "cres", 110 | cresent : "cres", 111 | crest : "crst", 112 | crossing : "xing", 113 | crossroad : "xrd", 114 | crossroads : "xrds", 115 | crscnt : "cres", 116 | crsent : "cres", 117 | crsnt : "cres", 118 | crssing : "xing", 119 | crssng : "xing", 120 | crt : "ct", 121 | curve : "curv", 122 | dale : "dl", 123 | dam : "dm", 124 | div : "dv", 125 | divide : "dv", 126 | driv : "dr", 127 | drive : "dr", 128 | drives : "drs", 129 | drv : "dr", 130 | dvd : "dv", 131 | estate : "est", 132 | estates : "ests", 133 | exp : "expy", 134 | expr : "expy", 135 | express : "expy", 136 | expressway : "expy", 137 | expw : "expy", 138 | extension : "ext", 139 | extensions : "exts", 140 | extn : "ext", 141 | extnsn : "ext", 142 | fall : "fall", 143 | falls : "fls", 144 | ferry : "fry", 145 | field : "fld", 146 | fields : "flds", 147 | flat : "flt", 148 | flats : "flts", 149 | ford : "frd", 150 | fords : "frds", 151 | forest : "frst", 152 | forests : "frst", 153 | forg : "frg", 154 | forge : "frg", 155 | forges : "frgs", 156 | fork : "frk", 157 | forks : "frks", 158 | fort : "ft", 159 | freeway : "fwy", 160 | freewy : "fwy", 161 | frry : "fry", 162 | frt : "ft", 163 | frway : "fwy", 164 | frwy : "fwy", 165 | garden : "gdn", 166 | gardens : "gdns", 167 | gardn : "gdn", 168 | gateway : "gtwy", 169 | gatewy : "gtwy", 170 | gatway : "gtwy", 171 | glen : "gln", 172 | glens : "glns", 173 | grden : "gdn", 174 | grdn : "gdn", 175 | grdns : "gdns", 176 | green : "grn", 177 | greens : "grns", 178 | grov : "grv", 179 | grove : "grv", 180 | groves : "grvs", 181 | gtway : "gtwy", 182 | harb : "hbr", 183 | harbor : "hbr", 184 | harbors : "hbrs", 185 | harbr : "hbr", 186 | haven : "hvn", 187 | havn : "hvn", 188 | height : "hts", 189 | heights : "hts", 190 | hgts : "hts", 191 | highway : "hwy", 192 | highwy : "hwy", 193 | hill : "hl", 194 | hills : "hls", 195 | hiway : "hwy", 196 | hiwy : "hwy", 197 | hllw : "holw", 198 | hollow : "holw", 199 | hollows : "holw", 200 | holws : "holw", 201 | hrbor : "hbr", 202 | ht : "hts", 203 | hway : "hwy", 204 | inlet : "inlt", 205 | island : "is", 206 | islands : "iss", 207 | isles : "isle", 208 | islnd : "is", 209 | islnds : "iss", 210 | jction : "jct", 211 | jctn : "jct", 212 | jctns : "jcts", 213 | junction : "jct", 214 | junctions : "jcts", 215 | junctn : "jct", 216 | juncton : "jct", 217 | key : "ky", 218 | keys : "kys", 219 | knol : "knl", 220 | knoll : "knl", 221 | knolls : "knls", 222 | la : "ln", 223 | lake : "lk", 224 | lakes : "lks", 225 | land : "land", 226 | landing : "lndg", 227 | lane : "ln", 228 | lanes : "ln", 229 | ldge : "ldg", 230 | light : "lgt", 231 | lights : "lgts", 232 | lndng : "lndg", 233 | loaf : "lf", 234 | lock : "lck", 235 | locks : "lcks", 236 | lodg : "ldg", 237 | lodge : "ldg", 238 | loops : "loop", 239 | mall : "mall", 240 | manor : "mnr", 241 | manors : "mnrs", 242 | meadow : "mdw", 243 | meadows : "mdws", 244 | medows : "mdws", 245 | mews : "mews", 246 | mill : "ml", 247 | mills : "mls", 248 | mission : "msn", 249 | missn : "msn", 250 | mnt : "mt", 251 | mntain : "mtn", 252 | mntn : "mtn", 253 | mntns : "mtns", 254 | motorway : "mtwy", 255 | mount : "mt", 256 | mountain : "mtn", 257 | mountains : "mtns", 258 | mountin : "mtn", 259 | mssn : "msn", 260 | mtin : "mtn", 261 | neck : "nck", 262 | orchard : "orch", 263 | orchrd : "orch", 264 | overpass : "opas", 265 | ovl : "oval", 266 | parks : "park", 267 | parkway : "pkwy", 268 | parkways : "pkwy", 269 | parkwy : "pkwy", 270 | pass : "pass", 271 | passage : "psge", 272 | paths : "path", 273 | pikes : "pike", 274 | pine : "pne", 275 | pines : "pnes", 276 | pk : "park", 277 | pkway : "pkwy", 278 | pkwys : "pkwy", 279 | pky : "pkwy", 280 | place : "pl", 281 | plain : "pln", 282 | plaines : "plns", 283 | plains : "plns", 284 | plaza : "plz", 285 | plza : "plz", 286 | point : "pt", 287 | points : "pts", 288 | port : "prt", 289 | ports : "prts", 290 | prairie : "pr", 291 | prarie : "pr", 292 | prk : "park", 293 | prr : "pr", 294 | rad : "radl", 295 | radial : "radl", 296 | radiel : "radl", 297 | ranch : "rnch", 298 | ranches : "rnch", 299 | rapid : "rpd", 300 | rapids : "rpds", 301 | rdge : "rdg", 302 | rest : "rst", 303 | ridge : "rdg", 304 | ridges : "rdgs", 305 | river : "riv", 306 | rivr : "riv", 307 | rnchs : "rnch", 308 | road : "rd", 309 | roads : "rds", 310 | route : "rte", 311 | rvr : "riv", 312 | row : "row", 313 | rue : "rue", 314 | run : "run", 315 | shoal : "shl", 316 | shoals : "shls", 317 | shoar : "shr", 318 | shoars : "shrs", 319 | shore : "shr", 320 | shores : "shrs", 321 | skyway : "skwy", 322 | spng : "spg", 323 | spngs : "spgs", 324 | spring : "spg", 325 | springs : "spgs", 326 | sprng : "spg", 327 | sprngs : "spgs", 328 | spurs : "spur", 329 | sqr : "sq", 330 | sqre : "sq", 331 | sqrs : "sqs", 332 | squ : "sq", 333 | square : "sq", 334 | squares : "sqs", 335 | station : "sta", 336 | statn : "sta", 337 | stn : "sta", 338 | str : "st", 339 | strav : "stra", 340 | strave : "stra", 341 | straven : "stra", 342 | stravenue : "stra", 343 | stravn : "stra", 344 | stream : "strm", 345 | street : "st", 346 | streets : "sts", 347 | streme : "strm", 348 | strt : "st", 349 | strvn : "stra", 350 | strvnue : "stra", 351 | sumit : "smt", 352 | sumitt : "smt", 353 | summit : "smt", 354 | terr : "ter", 355 | terrace : "ter", 356 | throughway : "trwy", 357 | tpk : "tpke", 358 | tr : "trl", 359 | trace : "trce", 360 | traces : "trce", 361 | track : "trak", 362 | tracks : "trak", 363 | trafficway : "trfy", 364 | trail : "trl", 365 | trails : "trl", 366 | trk : "trak", 367 | trks : "trak", 368 | trls : "trl", 369 | trnpk : "tpke", 370 | trpk : "tpke", 371 | tunel : "tunl", 372 | tunls : "tunl", 373 | tunnel : "tunl", 374 | tunnels : "tunl", 375 | tunnl : "tunl", 376 | turnpike : "tpke", 377 | turnpk : "tpke", 378 | underpass : "upas", 379 | union : "un", 380 | unions : "uns", 381 | valley : "vly", 382 | valleys : "vlys", 383 | vally : "vly", 384 | vdct : "via", 385 | viadct : "via", 386 | viaduct : "via", 387 | view : "vw", 388 | views : "vws", 389 | vill : "vlg", 390 | villag : "vlg", 391 | village : "vlg", 392 | villages : "vlgs", 393 | ville : "vl", 394 | villg : "vlg", 395 | villiage : "vlg", 396 | vist : "vis", 397 | vista : "vis", 398 | vlly : "vly", 399 | vst : "vis", 400 | vsta : "vis", 401 | wall : "wall", 402 | walks : "walk", 403 | well : "wl", 404 | wells : "wls", 405 | wy : "way", 406 | }; 407 | 408 | var State_Code = { 409 | "alabama" : "AL", 410 | "alaska" : "AK", 411 | "american samoa" : "AS", 412 | "arizona" : "AZ", 413 | "arkansas" : "AR", 414 | "california" : "CA", 415 | "colorado" : "CO", 416 | "connecticut" : "CT", 417 | "delaware" : "DE", 418 | "district of columbia" : "DC", 419 | "federated states of micronesia" : "FM", 420 | "florida" : "FL", 421 | "georgia" : "GA", 422 | "guam" : "GU", 423 | "hawaii" : "HI", 424 | "idaho" : "ID", 425 | "illinois" : "IL", 426 | "indiana" : "IN", 427 | "iowa" : "IA", 428 | "kansas" : "KS", 429 | "kentucky" : "KY", 430 | "louisiana" : "LA", 431 | "maine" : "ME", 432 | "marshall islands" : "MH", 433 | "maryland" : "MD", 434 | "massachusetts" : "MA", 435 | "michigan" : "MI", 436 | "minnesota" : "MN", 437 | "mississippi" : "MS", 438 | "missouri" : "MO", 439 | "montana" : "MT", 440 | "nebraska" : "NE", 441 | "nevada" : "NV", 442 | "new hampshire" : "NH", 443 | "new jersey" : "NJ", 444 | "new mexico" : "NM", 445 | "new york" : "NY", 446 | "north carolina" : "NC", 447 | "north dakota" : "ND", 448 | "northern mariana islands" : "MP", 449 | "ohio" : "OH", 450 | "oklahoma" : "OK", 451 | "oregon" : "OR", 452 | "palau" : "PW", 453 | "pennsylvania" : "PA", 454 | "puerto rico" : "PR", 455 | "rhode island" : "RI", 456 | "south carolina" : "SC", 457 | "south dakota" : "SD", 458 | "tennessee" : "TN", 459 | "texas" : "TX", 460 | "utah" : "UT", 461 | "vermont" : "VT", 462 | "virgin islands" : "VI", 463 | "virginia" : "VA", 464 | "washington" : "WA", 465 | "west virginia" : "WV", 466 | "wisconsin" : "WI", 467 | "wyoming" : "WY", 468 | }; 469 | 470 | var Direction_Code; 471 | var initialized = false; 472 | 473 | var Normalize_Map = { 474 | prefix: Directional, 475 | prefix1: Directional, 476 | prefix2: Directional, 477 | suffix: Directional, 478 | suffix1: Directional, 479 | suffix2: Directional, 480 | type: Street_Type, 481 | type1: Street_Type, 482 | type2: Street_Type, 483 | state: State_Code, 484 | } 485 | 486 | function capitalize(s){ 487 | return s && s[0].toUpperCase() + s.slice(1); 488 | } 489 | function keys(o){ 490 | return Object.keys(o); 491 | } 492 | function values(o){ 493 | var v = []; 494 | keys(o).forEach(function(k){ 495 | v.push(o[k]); 496 | }); 497 | return v; 498 | } 499 | function each(o,fn){ 500 | keys(o).forEach(function(k){ 501 | fn(o[k],k); 502 | }); 503 | } 504 | function invert(o){ 505 | var o1= {}; 506 | keys(o).forEach(function(k){ 507 | o1[o[k]] = k; 508 | }); 509 | return o1; 510 | } 511 | function flatten(o){ 512 | return keys(o).concat(values(o)); 513 | } 514 | function lazyInit(){ 515 | if (initialized) { 516 | return; 517 | } 518 | initialized = true; 519 | 520 | Direction_Code = invert(Directional); 521 | 522 | /* 523 | var Street_Type_Match = {}; 524 | each(Street_Type,function(v,k){ Street_Type_Match[v] = XRegExp.escape(v) }); 525 | each(Street_Type,function(v,k){ Street_Type_Match[v] = Street_Type_Match[v] + "|" + XRegExp.escape(k); }); 526 | each(Street_Type_Match,function(v,k){ Street_Type_Match[k] = new RegExp( '\\b(?:' + Street_Type_Match[k] + ')\\b', 'i') }); 527 | */ 528 | 529 | Addr_Match = { 530 | type : flatten(Street_Type).sort().filter(function(v,i,arr){return arr.indexOf(v)===i }).join('|'), 531 | fraction : '\\d+\\/\\d+', 532 | state : '\\b(?:' + keys(State_Code).concat(values(State_Code)).map(XRegExp.escape).join('|') + ')\\b', 533 | direct : values(Directional).sort(function(a,b){return a.length < b.length}).reduce(function(prev,curr){return prev.concat([XRegExp.escape(curr.replace(/\w/g,'$&.')),curr])},keys(Directional)).join('|'), 534 | dircode : keys(Direction_Code).join("|"), 535 | zip : '(?\\d{5})[- ]?(?\\d{4})?', 536 | corner : '(?:\\band\\b|\\bat\\b|&|\\@)', 537 | }; 538 | 539 | Addr_Match.number = '(?(\\d+-?\\d*)|([N|S|E|W]\\d{1,3}[N|S|E|W]\\d{1,6}))(?=\\D)'; 540 | 541 | Addr_Match.street = ' \n\ 542 | (?: \n\ 543 | (?:(?'+Addr_Match.direct+')\\W+ \n\ 544 | (?'+Addr_Match.type+')\\b \n\ 545 | ) \n\ 546 | | \n\ 547 | (?:(?'+Addr_Match.direct+')\\W+)? \n\ 548 | (?: \n\ 549 | (?[^,]*\\d) \n\ 550 | (?:[^\\w,]*(?'+Addr_Match.direct+')\\b) \n\ 551 | | \n\ 552 | (?[^,]+) \n\ 553 | (?:[^\\w,]+(?'+Addr_Match.type+')\\b) \n\ 554 | (?:[^\\w,]+(?'+Addr_Match.direct+')\\b)? \n\ 555 | | \n\ 556 | (?[^,]+?) \n\ 557 | (?:[^\\w,]+(?'+Addr_Match.type+')\\b)? \n\ 558 | (?:[^\\w,]+(?'+Addr_Match.direct+')\\b)? \n\ 559 | ) \n\ 560 | )'; 561 | 562 | Addr_Match.po_box = 'p\\W*(?:[om]|ost\\ ?office)\\W*b(?:ox)?' 563 | 564 | Addr_Match.sec_unit_type_numbered = ' \n\ 565 | (?su?i?te \n\ 566 | |'+Addr_Match.po_box+' \n\ 567 | |(?:ap|dep)(?:ar)?t(?:me?nt)? \n\ 568 | |ro*m \n\ 569 | |flo*r? \n\ 570 | |uni?t \n\ 571 | |bu?i?ldi?n?g \n\ 572 | |ha?nga?r \n\ 573 | |lo?t \n\ 574 | |pier \n\ 575 | |slip \n\ 576 | |spa?ce? \n\ 577 | |stop \n\ 578 | |tra?i?le?r \n\ 579 | |box)(?![a-z] \n\ 580 | ) \n\ 581 | '; 582 | 583 | Addr_Match.sec_unit_type_unnumbered = ' \n\ 584 | (?ba?se?me?n?t \n\ 585 | |fro?nt \n\ 586 | |lo?bby \n\ 587 | |lowe?r \n\ 588 | |off?i?ce? \n\ 589 | |pe?n?t?ho?u?s?e? \n\ 590 | |rear \n\ 591 | |side \n\ 592 | |uppe?r \n\ 593 | )\\b'; 594 | 595 | Addr_Match.sec_unit = ' \n\ 596 | (?: #fix3 \n\ 597 | (?: #fix1 \n\ 598 | (?: \n\ 599 | (?:'+Addr_Match.sec_unit_type_numbered+'\\W*) \n\ 600 | |(?\\#)\\W* \n\ 601 | ) \n\ 602 | (?[\\w-]+) \n\ 603 | ) \n\ 604 | | \n\ 605 | '+Addr_Match.sec_unit_type_unnumbered+' \n\ 606 | )'; 607 | 608 | Addr_Match.city_and_state = ' \n\ 609 | (?: \n\ 610 | (?[^\\d,]+?)\\W+ \n\ 611 | (?'+Addr_Match.state+') \n\ 612 | ) \n\ 613 | '; 614 | 615 | Addr_Match.place = ' \n\ 616 | (?:'+Addr_Match.city_and_state+'\\W*)? \n\ 617 | (?:'+Addr_Match.zip+')? \n\ 618 | '; 619 | 620 | Addr_Match.address = XRegExp(' \n\ 621 | ^ \n\ 622 | [^\\w\\#]* \n\ 623 | ('+Addr_Match.number+')\\W* \n\ 624 | (?:'+Addr_Match.fraction+'\\W*)? \n\ 625 | '+Addr_Match.street+'\\W+ \n\ 626 | (?:'+Addr_Match.sec_unit+')?\\W* #fix2 \n\ 627 | '+Addr_Match.place+' \n\ 628 | \\W*$','ix'); 629 | 630 | var sep = '(?:\\W+|$)'; // no support for \Z 631 | 632 | Addr_Match.informal_address = XRegExp(' \n\ 633 | ^ \n\ 634 | \\s* \n\ 635 | (?:'+Addr_Match.sec_unit+sep+')? \n\ 636 | (?:'+Addr_Match.number+')?\\W* \n\ 637 | (?:'+Addr_Match.fraction+'\\W*)? \n\ 638 | '+Addr_Match.street+sep+' \n\ 639 | (?:'+Addr_Match.sec_unit.replace(/_\d/g,'$&1')+sep+')? \n\ 640 | (?:'+Addr_Match.place+')? \n\ 641 | ','ix'); 642 | 643 | Addr_Match.po_address = XRegExp(' \n\ 644 | ^ \n\ 645 | \\s* \n\ 646 | (?:'+Addr_Match.sec_unit.replace(/_\d/g,'$&1')+sep+')? \n\ 647 | (?:'+Addr_Match.place+')? \n\ 648 | ','ix'); 649 | 650 | Addr_Match.intersection = XRegExp(' \n\ 651 | ^\\W* \n\ 652 | '+Addr_Match.street.replace(/_\d/g,'1$&')+'\\W*? \n\ 653 | \\s+'+Addr_Match.corner+'\\s+ \n\ 654 | '+Addr_Match.street.replace(/_\d/g,'2$&') + '($|\\W+) \n\ 655 | '+Addr_Match.place+'\\W*$','ix'); 656 | } 657 | parser.normalize_address = function(parts){ 658 | lazyInit(); 659 | if(!parts) 660 | return null; 661 | var parsed = {}; 662 | 663 | Object.keys(parts).forEach(function(k){ 664 | if(['input','index'].indexOf(k) !== -1 || isFinite(k)) 665 | return; 666 | var key = isFinite(k.split('_').pop())? k.split('_').slice(0,-1).join('_'): k ; 667 | if(parts[k]) 668 | parsed[key] = parts[k].trim().replace(/^\s+|\s+$|[^\w\s\-#&]/g, ''); 669 | }); 670 | each(Normalize_Map, function(map,key) { 671 | if(parsed[key] && map[parsed[key].toLowerCase()]) { 672 | parsed[key] = map[parsed[key].toLowerCase()]; 673 | } 674 | }); 675 | 676 | ['type', 'type1', 'type2'].forEach(function(key){ 677 | if(key in parsed) 678 | parsed[key] = parsed[key].charAt(0).toUpperCase() + parsed[key].slice(1).toLowerCase(); 679 | }); 680 | 681 | if(parsed.city){ 682 | parsed.city = XRegExp.replace(parsed.city, 683 | XRegExp('^(?'+Addr_Match.dircode+')\\s+(?=\\S)','ix'), 684 | function(match){ 685 | return capitalize(Direction_Code[match.dircode.toUpperCase()]) +' '; 686 | }); 687 | } 688 | return parsed; 689 | }; 690 | 691 | parser.parseAddress = function(address){ 692 | lazyInit(); 693 | var parts = XRegExp.exec(address,Addr_Match.address); 694 | return parser.normalize_address(parts); 695 | }; 696 | parser.parseInformalAddress = function(address){ 697 | lazyInit(); 698 | var parts = XRegExp.exec(address,Addr_Match.informal_address); 699 | return parser.normalize_address(parts); 700 | }; 701 | parser.parsePoAddress = function(address){ 702 | lazyInit(); 703 | var parts = XRegExp.exec(address,Addr_Match.po_address); 704 | return parser.normalize_address(parts); 705 | }; 706 | parser.parseLocation = function(address){ 707 | lazyInit(); 708 | if (XRegExp(Addr_Match.corner,'xi').test(address)) { 709 | return parser.parseIntersection(address); 710 | } 711 | if (XRegExp('^'+Addr_Match.po_box,'xi').test(address)){ 712 | return parser.parsePoAddress(address); 713 | } 714 | return parser.parseAddress(address) 715 | || parser.parseInformalAddress(address); 716 | }; 717 | parser.parseIntersection = function(address){ 718 | lazyInit(); 719 | var parts = XRegExp.exec(address,Addr_Match.intersection); 720 | parts = parser.normalize_address(parts); 721 | if(parts){ 722 | parts.type2 = parts.type2 || ''; 723 | parts.type1 = parts.type1 || ''; 724 | if (parts.type2 && !parts.type1 || (parts.type1 === parts.type2)) { 725 | var type = parts.type2; 726 | type = XRegExp.replace(type,/s\W*$/,''); 727 | if (XRegExp('^'+Addr_Match.type+'$','ix').test(type)) { 728 | parts.type1 = parts.type2 = type; 729 | } 730 | } 731 | } 732 | 733 | return parts; 734 | }; 735 | 736 | // AMD / RequireJS 737 | if (typeof define !== 'undefined' && define.amd) { 738 | define([], function () { 739 | return parser; 740 | }); 741 | } 742 | // Node.js 743 | else if (typeof exports !== "undefined") { 744 | exports.parseIntersection = parser.parseIntersection; 745 | exports.parseLocation = parser.parseLocation; 746 | exports.parseInformalAddress = parser.parseInformalAddress; 747 | exports.parseAddress = parser.parseAddress; 748 | } 749 | // included directly via