├── .gitignore ├── History.txt ├── Makefile ├── Manifest.txt ├── README.rdoc ├── REST.rdoc ├── TODO.txt ├── build ├── build_indexes ├── rebuild_cluster ├── sql │ ├── cluster.sql │ ├── convert.sql │ ├── create.sql │ ├── index.sql │ ├── place.sql │ └── setup.sql ├── tiger2009_import └── tiger_import ├── conf ├── geocoder-us │ ├── geocoder.ru │ └── unicorn.rb └── init │ └── geocoder-us.conf ├── debian ├── README.Debian ├── changelog ├── compat ├── control ├── copyright ├── default ├── docs ├── geocoder-us.postinst ├── geocoder-us.prerm ├── rules └── source │ └── format ├── demos ├── api │ ├── server.rb │ └── views │ │ └── index.erb ├── cli.rb ├── demo │ ├── app │ │ ├── ext │ │ │ └── geocodewrap.rb │ │ └── views │ │ │ ├── index.builder │ │ │ └── index.erb │ ├── config.ru │ ├── config │ │ ├── bootstraps.rb │ │ └── geoenvironment.rb │ ├── geocoder_helper.rb │ ├── geocom_geocode.rb │ ├── main.rb │ ├── rakefile.rb │ └── tmp │ │ └── restart.txt ├── parse.rb └── simpledemo │ ├── views │ ├── index.builder │ └── index.erb │ └── ws.rb ├── doc ├── Makefile ├── html4css1.css ├── lookup.rst ├── parsing.rst └── voidspace.css ├── gemspec ├── lib └── geocoder │ ├── us.rb │ └── us │ ├── address.rb │ ├── constants.rb │ ├── database.rb │ ├── metaphone.rb │ ├── numbers.rb │ └── rest.rb ├── navteq ├── README ├── convert.sql ├── navteq_import └── prepare.sql ├── setup.rb ├── src ├── Makefile ├── README ├── liblwgeom │ ├── Makefile │ ├── box2d.c │ ├── lex.yy.c │ ├── liblwgeom.h │ ├── lwalgorithm.c │ ├── lwalgorithm.h │ ├── lwcircstring.c │ ├── lwcollection.c │ ├── lwcompound.c │ ├── lwcurvepoly.c │ ├── lwgeom.c │ ├── lwgeom_api.c │ ├── lwgparse.c │ ├── lwgunparse.c │ ├── lwline.c │ ├── lwmcurve.c │ ├── lwmline.c │ ├── lwmpoint.c │ ├── lwmpoly.c │ ├── lwmsurface.c │ ├── lwpoint.c │ ├── lwpoly.c │ ├── lwsegmentize.c │ ├── lwutil.c │ ├── measures.c │ ├── postgis_config.h │ ├── ptarray.c │ ├── vsprintf.c │ ├── wktparse.h │ ├── wktparse.lex │ ├── wktparse.tab.c │ ├── wktparse.tab.h │ └── wktparse.y ├── libsqlite3_geocoder │ ├── Makefile │ ├── Makefile.nix │ ├── Makefile.redhat │ ├── extension.c │ ├── extension.h │ ├── levenshtein.c │ ├── metaphon.c │ ├── util.c │ └── wkb_compress.c ├── metaphone │ ├── Makefile │ ├── README │ ├── extension.c │ └── metaphon.c └── shp2sqlite │ ├── Makefile │ ├── Makefile.nix │ ├── Makefile.redhat │ ├── dbfopen.c │ ├── getopt.c │ ├── getopt.h │ ├── shapefil.h │ ├── shp2sqlite.c │ └── shpopen.c └── test ├── address.rb ├── benchmark.rb ├── constants.rb ├── data ├── address-sample.csv ├── db-test.csv └── locations.csv ├── database.rb ├── generate.rb ├── numbers.rb └── run.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.gem 4 | pkg/ 5 | bin/shp2sqlite 6 | src/shp2sqlite/shp2sqlite 7 | src/liblwgeom/liblwgeom.a 8 | doc/*.html 9 | -------------------------------------------------------------------------------- /History.txt: -------------------------------------------------------------------------------- 1 | === 1.0.0 / 2009-06-02 2 | 3 | * 1 major enhancement 4 | 5 | * Birthday! 6 | 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | make -C src install 3 | gem build gemspec 4 | 5 | test: all 6 | ruby -Ilib tests/run.rb 7 | 8 | install: all 9 | # gem install *.gem 10 | 11 | clean: 12 | make -C src clean 13 | rm -f lib/geocoder/us/sqlite3.so 14 | rm -f *.gem 15 | -------------------------------------------------------------------------------- /Manifest.txt: -------------------------------------------------------------------------------- 1 | History.txt 2 | Manifest.txt 3 | README.rdoc 4 | Rakefile 5 | lib/geocoder/us/database.rb 6 | lib/geocoder/us/numbers.rb 7 | lib/geocoder/us/address.rb 8 | lib/geocoder/us/constants.rb 9 | tests/database.rb 10 | tests/numbers.rb 11 | tests/generate.rb 12 | tests/run.rb 13 | tests/address.rb 14 | tests/benchmark.rb 15 | tests/constants.rb 16 | tests/data/address-sample.csv 17 | tests/data/locations.csv 18 | tests/data/db-test.csv 19 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = Geocoder::US 2 | 3 | Geocoder::US 2.0 is a software package designed to geocode US street 4 | addresses. Although it is primarily intended for use with the US Census 5 | Bureau's free TIGER/Line dataset, it uses an abstract US address data model 6 | that can be employed with other sources of US street address range data. 7 | 8 | Geocoder::US 2.0 implements a Ruby interface to parse US street addresses, and 9 | perform fuzzy lookup against an SQLite 3 database. Geocoder::US is designed to 10 | return the best matches found, with geographic coordinates interpolated from 11 | the street range dataset. Geocoder::US will fill in missing information, and 12 | it knows about standard and common non-standard postal abbreviations, ordinal 13 | versus cardinal numbers, and more. 14 | 15 | Geocoder::US 2.0 is shipped with a free US ZIP code data set, compiled from 16 | public domain sources. 17 | 18 | == Synopsis 19 | 20 | >> require 'geocoder/us' 21 | >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db") 22 | >> p db.geocode("1600 Pennsylvania Av, Washington DC") 23 | 24 | [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502", 25 | :lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"", 26 | :precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave", 27 | :state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}] 28 | 29 | == Prerequisites 30 | 31 | To build Geocoder::US, you will need gcc/g++, make, bash or equivalent, the 32 | standard *NIX 'unzip' utility, and the SQLite 3 executable and development 33 | files installed on your system. 34 | 35 | To use the Ruby interface, you will need the 'Text' gem installed from 36 | rubyforge. To run the tests, you will also need the 'fastercsv' gem. 37 | 38 | Additionally, you will need a custom build of the 'sqlite3-ruby' gem that 39 | supports loading extension modules in SQLite. You can get a patched version of 40 | this gem from http://github.com/schuyler/sqlite3-ruby/. Until the sqlite3-ruby 41 | maintainers roll in the relevant patch, you will need *this* version. 42 | 43 | *NOTE*: If you do not have /usr/include/sqlite3ext.h installed, then your 44 | sqlite3 binaries are probably not configured to support dynamic extension 45 | loading. If not, you *must* compile and install SQLite from source, or rebuild 46 | your system packages. This is not believed to be a problem on Debian/Ubuntu, 47 | but is known to be a problem with Red Hat/CentOS. 48 | 49 | *NOTE*: If you *do* have to install from source, make sure that the 50 | source-installed 'sqlite3' program is in your path before proceeding (and not 51 | the system-installed version), using `which sqlite3`. Also, be sure that you've 52 | added your source install prefix (usually /usr/local) to /etc/ld.so.conf (or 53 | its moral equivalent) and that you've run /sbin/ldconfig. 54 | 55 | == Thread safety 56 | 57 | SQLite 3 is not designed for concurrent use of a single database handle across 58 | multiple threads. Therefore, to prevent segfaults, Geocoder::US::Database 59 | implements a global mutex that wraps all database access. The use of this mutex 60 | will ensure stability in multi-threaded applications, but incurs a performance 61 | penalty. However, since the database is read-only from Ruby, there's no reason 62 | in principle why multi-threaded apps can't each have their own database handle. 63 | 64 | To disable the mutex for better performance, you can do the following: 65 | 66 | * Read the following and make sure you understand them: 67 | * http://www.sqlite.org/faq.html#q6 68 | * http://www.sqlite.org/cvstrac/wiki?p=MultiThreading 69 | * Make sure you have compiled SQLite 3 with thread safety enabled. 70 | * Instantiate a separate Geocoder::US::Database object for *each* thread 71 | in your Ruby script, and pass :threadsafe => true to new() to disable mutex 72 | synchronization. 73 | 74 | Per the SQLite 3 documentation, do *not* attempt to retain a 75 | Geocoder::US::Database object across a fork! "Problems will result if you do." 76 | 77 | == Building Geocoder::US 78 | 79 | Unpack the source and run 'make'. This will compile the SQLite 3 extension 80 | needed by Geocoder::US, the Shapefile import utility, and the Geocoder-US 81 | gem. 82 | 83 | You can run 'make install' as root to install the gem systemwide. 84 | 85 | == Generating a Geocoder::US Database 86 | 87 | Build the package from source as described above. Generating the database 88 | involves three basic steps: 89 | 90 | * Import the Shapefile data into an SQLite database. 91 | * Build the database indexes. 92 | * Optionally, rebuild the database to cluster indexed rows. 93 | 94 | We will presume that you are building a Geocoder::US database from TIGER/Line, 95 | and that you have obtained the complete set of TIGER/Line ZIP files, and put 96 | the entire tree in /opt/tiger. Please adjust these instructions as needed. 97 | 98 | A full TIGER/Line database import takes ten hours to run on a normal Amazon 99 | EC2 instance, and takes up a little over 5 gigabytes after all is said and 100 | done. You will need to have at least 12 gigabytes of free disk space *after* 101 | downloading the TIGER/Line dataset, if you are building the full database. 102 | 103 | === Import TIGER/Line 104 | 105 | From inside the Geocoder::US source tree, run the following: 106 | 107 | $ bin/tiger_import /opt/tiger/geocoder.db /opt/tiger 108 | 109 | This will unpack each TIGER/Line ZIP file to a temporary directory, and 110 | perform the extract/transform/load sequence to incrementally build the 111 | database. The process takes about 10-12 hours on a normal Amazon EC2 instance, 112 | or about 5 CPU hours flat out on a modern PC. Note that not all TIGER/Line 113 | source files contain address range information, so you will see error messages 114 | for some counties, but this is normal. 115 | 116 | If you only want to import specific counties, you can pipe a list of 117 | TIGER/Line county directories to tiger_import on stdin. For example, 118 | the following will install just the data for the state of Delaware: 119 | 120 | $ ls -d /opt/tiger/10_DELAWARE/1* | bin/tiger_import ~/delaware.db 121 | 122 | The tiger_import process uses a binary utility, shp2sqlite, which is derived 123 | from shp2pgsql, which ships with PostGIS. The shp2sqlite utility converts 124 | .shp and .dbf files into SQL suitable for import into SQLite. This SQL 125 | is then piped into the sqlite3 command line tool, where it is loaded into 126 | temporary tables, and then a set of static SQL statements (kept in the sql/ 127 | directory) are used to transform this data and import it into the database 128 | itself. 129 | 130 | == Build metaphones using Ruby metaphone 131 | 132 | run bin/rebuild_metaphones /opt/tiger/geocoder.db 133 | 134 | This creates the metaphones using Ruby's metaphone function and will produce better geocoding results. 135 | 136 | === Build the indexes 137 | 138 | After the database import is complete, you will want to construct the database 139 | indexes: 140 | 141 | $ bin/build_indexes /opt/tiger/geocoder.db 142 | 143 | This process takes 25 minutes on an EC2 instance (8 CPU minutes), but it's a 144 | *lot* faster than building the indexes incrementally during the import 145 | process. Basically, this process simply feeds SQL statements to the sqlite3 146 | utility to construct the indexes on the existing database. 147 | 148 | === Cluster the database tables (optional) 149 | 150 | As a final optional step, you can cluster the database tables according to 151 | their indexes, which will make the database smaller, and lookups faster. This 152 | process will take an hour or two, and may be a micro-optimization. 153 | 154 | $ bin/rebuild_cluster /opt/tiger/geocoder.db 155 | 156 | You will need as much free disk space to run rebuild_cluster as the database 157 | takes up, because the process essentially reconstructs the database in a new 158 | file, and then it renames the new database over top of the old. 159 | 160 | == Running the unit tests 161 | 162 | From within the source tree, you can run the following: 163 | 164 | $ ruby tests/run.rb 165 | 166 | This tests the libraries, except for the database routines. If you have a 167 | database built, you can run the test harness like so: 168 | 169 | $ ruby tests/run.rb /opt/tiger/geocoder.db 170 | 171 | The full test suite may take 30 or so seconds to run completely. 172 | 173 | == License 174 | 175 | Geocoder::US 2.0 was based on earlier work by Schuyler Erle on 176 | a Perl module of the same name. You can find it at 177 | http://search.cpan.org/~sderle/. 178 | 179 | Geocoder::US 2.0 was written by Schuyler Erle, of Entropy Free LLC, 180 | with the gracious support of FortiusOne, Inc. Please send bug reports, 181 | patches, kudos, etc. to patches at geocoder.us. 182 | 183 | Copyright (c) 2009 FortiusOne, Inc. 184 | 185 | This program is free software: you can redistribute it and/or modify 186 | it under the terms of the GNU General Public License as published by 187 | the Free Software Foundation, either version 3 of the License, or 188 | (at your option) any later version. 189 | 190 | This program is distributed in the hope that it will be useful, 191 | but WITHOUT ANY WARRANTY; without even the implied warranty of 192 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 193 | GNU General Public License for more details. 194 | 195 | You should have received a copy of the GNU General Public License 196 | along with this program. If not, see . 197 | 198 | -------------------------------------------------------------------------------- /REST.rdoc: -------------------------------------------------------------------------------- 1 | GET /1.0/geocode/address.json 2 | 3 | The geocode/address endpoint returns the interpolated latitude and longitude of 4 | a US street address or street intersection. When given a US city or ZIP code, 5 | the approximate center point of that place will be returned instead. 6 | 7 | The geocoder attempts to return the most accurate possible result, including, 8 | where possible, correcting the given street type, city, or postal code, and 9 | identifying and correcting misspellings in the street or city name in the given 10 | address. 11 | 12 | Currently, address geocoding only works in the United States. 13 | 14 | Parameters: 15 | 16 | q = a string containing a US street address. 17 | 18 | Returns a GeoJSON feature collection: 19 | 20 | { 21 | "type": "FeatureCollection", 22 | "features": [ 23 | { 24 | "type": "Feature", 25 | "properties": { 26 | "number": "41", 27 | "street": "Decatur St", 28 | "city": "San Francisco", 29 | "state": "CA", 30 | "zip": "94103", 31 | "fips_county": "06075", 32 | "score": 1.0, 33 | "precision":"range" 34 | }, 35 | "geometry": { 36 | "type": "Point", 37 | "coordinates": [-122.406032, 37.772502] 38 | } 39 | } 40 | ], 41 | "address":"41 Decatur St, San Francisco CA 94103" 42 | } 43 | 44 | 45 | Each address match in the feature collection contains some combination of the 46 | following properties: 47 | 48 | number 49 | The building number of the address. When a building number is not 50 | included in a range stored in the address database, the nearest 51 | known building number will be returned in its place. 52 | 53 | street 54 | The name of the street found in the database that matches the address, 55 | given in a normalized form. 56 | 57 | street1 / street2 58 | When an address is parsed as an intersection, the intersecting streets 59 | are returned as `street1` and `street2` in place of the `number` and 60 | `street` fields. 61 | 62 | city 63 | The city matching the given address. In the US, this is typically 64 | determined from the matching ZIP code, so, for ZIP codes that cover 65 | more than one named place, the results may be different from what you 66 | expect, but will still be suitable for postal addressing. 67 | 68 | state 69 | The two letter postal abbreviation of the state containing the matching 70 | address. 71 | 72 | zip 73 | In the US, the five digit ZIP code of the matching address. 74 | 75 | plus4 76 | In the US, the ZIP+4 extension parsed from the address, if any. This 77 | extension is not actually used in the geocoding process, but is 78 | returned for convenience. 79 | 80 | fips_county 81 | In the US, the FIPS 6-4 code of the county containing the address. 82 | 83 | prenum / sufnum 84 | If the building number has a non-numeric prefix, it will be returned in 85 | `prenum`. Ditto `sufnum` for non-numeric suffixes. 86 | 87 | precision 88 | The qualitative precision of the geocode. The value will be one of 89 | `intersection`, 'range`, `street`, `zip`, or `city`. 90 | 91 | score 92 | The percentage of text match between the given address and the geocoded 93 | result, expressed as a float between 0 and 1. A higher score indicates 94 | a closer match. Results with a score below 0.5 should be regarded with 95 | care. 96 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | 1. Check interpolate measure: scale longitude or not? 2 | 5. Intersections... 3 | - import ALL linestrings (even those with without ranges) 4 | - throw away internal points on lines that don't have ranges 5 | 7. Documentation (*) 6 | 8. Make SQLite memory cache size an option to the Database constructor 7 | 9. Precision and accuracy measure 8 | 10. Street line set back 9 | -------------------------------------------------------------------------------- /build/build_indexes: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASE=$(dirname $0) 4 | PATH=$PATH:$BASE/bin 5 | SQL="$BASE/../sql" 6 | 7 | # Just run the SQL that constructs the indexes. 8 | sqlite3 $1 < ${SQL}/index.sql 9 | -------------------------------------------------------------------------------- /build/rebuild_cluster: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASE=$(dirname $0) 4 | PATH=$PATH:$BASE/bin 5 | SQL="$BASE/../sql" 6 | 7 | OLD_DB=$1 8 | DATABASE=${OLD_DB}.$$ 9 | 10 | [ -r $DATABASE ] && echo "$DATABASE already exists." && exit -1 11 | [ ! -r $OLD_DB ] && echo "Can't read $OLD_DB." && exit -1 12 | 13 | # Create a shiny new database, attach the old one, 14 | # extract the data from it, and then index that. 15 | # Finally, overwrite the old database with the new one. 16 | ( cat ${SQL}/create.sql && \ 17 | echo "ATTACH DATABASE '${OLD_DB}' AS old;" && \ 18 | cat ${SQL}/cluster.sql && \ 19 | echo "DETACH DATABASE old;" && \ 20 | cat ${SQL}/index.sql && \ 21 | echo "ANALYZE;" ) | sqlite3 $DATABASE \ 22 | && mv $DATABASE $OLD_DB 23 | -------------------------------------------------------------------------------- /build/sql/cluster.sql: -------------------------------------------------------------------------------- 1 | .echo on 2 | -- turn off various pragmas to make SQLite faster 3 | PRAGMA temp_store=MEMORY; 4 | PRAGMA journal_mode=OFF; 5 | PRAGMA synchronous=OFF; 6 | PRAGMA cache_size=500000; 7 | PRAGMA count_changes=0; 8 | BEGIN TRANSACTION; 9 | -- order the contents of each table by their indexes to reduce 10 | -- the number of disk pages that need to be read on each query. 11 | INSERT INTO place SELECT * FROM old.place ORDER BY zip, priority; 12 | INSERT INTO edge SELECT * FROM old.edge ORDER BY tlid; 13 | INSERT INTO feature SELECT * FROM old.feature ORDER BY street_phone, zip; 14 | INSERT INTO feature_edge SELECT * FROM old.feature_edge ORDER BY fid; 15 | INSERT INTO range SELECT * FROM old.range ORDER BY tlid; 16 | COMMIT; 17 | -------------------------------------------------------------------------------- /build/sql/convert.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | -- start by indexing the temporary tables created from the input data. 3 | CREATE INDEX featnames_tlid ON tiger_featnames (tlid); 4 | CREATE INDEX addr_tlid ON tiger_addr (tlid); 5 | CREATE INDEX edges_tlid ON tiger_edges (tlid); 6 | 7 | -- generate a summary table matching each edge to one or more ZIPs 8 | -- for those edges that are streets and have a name 9 | CREATE TEMPORARY TABLE linezip AS 10 | SELECT DISTINCT tlid, zip FROM ( 11 | SELECT tlid, zip FROM tiger_addr a 12 | UNION 13 | SELECT tlid, zipr AS zip FROM tiger_edges e 14 | WHERE e.mtfcc LIKE 'S%' AND zipr <> "" AND zipr IS NOT NULL 15 | UNION 16 | SELECT tlid, zipl AS zip FROM tiger_edges e 17 | WHERE e.mtfcc LIKE 'S%' AND zipl <> "" AND zipl IS NOT NULL 18 | ) AS whatever; 19 | 20 | CREATE INDEX linezip_tlid ON linezip (tlid); 21 | 22 | -- generate features from the featnames table for each desired edge 23 | -- computing the metaphone hash of the name in the process. 24 | 25 | -- CREATE TEMPORARY TABLE sqlite_sequence ( 26 | -- name VARCHAR(255), 27 | -- seq INTEGER); 28 | 29 | CREATE TEMPORARY TABLE feature_bin ( 30 | fid INTEGER PRIMARY KEY AUTOINCREMENT, 31 | street VARCHAR(100), 32 | street_phone VARCHAR(5), 33 | paflag BOOLEAN, 34 | zip CHAR(5)); 35 | 36 | INSERT OR IGNORE INTO sqlite_sequence (name, seq) VALUES ('feature_bin',0); 37 | UPDATE sqlite_sequence 38 | SET seq=(SELECT max(fid) FROM feature) 39 | WHERE name="feature_bin"; 40 | 41 | INSERT INTO feature_bin 42 | SELECT DISTINCT NULL, fullname, metaphone(name,5), paflag, zip 43 | FROM linezip l, tiger_featnames f 44 | WHERE l.tlid=f.tlid AND name <> "" AND name IS NOT NULL; 45 | 46 | CREATE INDEX feature_bin_idx ON feature_bin (street, zip); 47 | 48 | INSERT INTO feature_edge 49 | SELECT DISTINCT fid, f.tlid 50 | FROM linezip l, tiger_featnames f, feature_bin b 51 | WHERE l.tlid=f.tlid AND l.zip=b.zip 52 | AND f.fullname=b.street AND f.paflag=b.paflag; 53 | 54 | -- SELECT min(fid),max(fid) FROM feature_bin; 55 | 56 | INSERT INTO feature 57 | SELECT * FROM feature_bin; 58 | 59 | -- generate edges from the edges table for each desired edge, running 60 | -- a simple compression on the WKB geometry (because they're all 61 | -- linestrings). 62 | INSERT OR IGNORE INTO edge 63 | SELECT l.tlid, compress_wkb_line(the_geom) FROM 64 | (SELECT DISTINCT tlid FROM linezip) AS l, tiger_edges e 65 | WHERE l.tlid=e.tlid AND fullname <> "" AND fullname IS NOT NULL; 66 | 67 | -- generate all ranges from the addr table, stripping off any non-digit 68 | -- prefixes and putting them in a separate column. 69 | INSERT INTO range 70 | SELECT tlid, digit_suffix(fromhn), digit_suffix(tohn), 71 | nondigit_prefix(fromhn), zip, side 72 | FROM tiger_addr; 73 | END; 74 | 75 | DROP TABLE feature_bin; 76 | DROP TABLE linezip; 77 | DROP TABLE tiger_addr; 78 | DROP TABLE tiger_featnames; 79 | DROP TABLE tiger_edges; 80 | 81 | -------------------------------------------------------------------------------- /build/sql/create.sql: -------------------------------------------------------------------------------- 1 | -- initialize the database tables. 2 | -- 'place' contains the gazetteer of place names. 3 | CREATE TABLE place( 4 | zip CHAR(5), 5 | city VARCHAR(100), 6 | state CHAR(2), 7 | city_phone VARCHAR(5), 8 | lat NUMERIC(9,6), 9 | lon NUMERIC(9,6), 10 | status CHAR(1), 11 | fips_class CHAR(2), 12 | fips_place CHAR(7), 13 | fips_county CHAR(5), 14 | priority char(1)); 15 | -- 'edge' stores the line geometries and their IDs. 16 | CREATE TABLE edge ( 17 | tlid INTEGER(10) PRIMARY KEY, 18 | geometry BLOB); 19 | -- 'feature' stores the name(s) and ZIP(s) of each edge. 20 | CREATE TABLE feature ( 21 | fid INTEGER PRIMARY KEY, 22 | street VARCHAR(100), 23 | street_phone VARCHAR(5), 24 | paflag BOOLEAN, 25 | zip CHAR(5)); 26 | -- 'feature_edge' links each edge to a feature. 27 | CREATE TABLE feature_edge ( 28 | fid INTEGER, 29 | tlid INTEGER); 30 | -- 'range' stores the address range(s) for each edge. 31 | CREATE TABLE range ( 32 | tlid INTEGER(10), 33 | fromhn INTEGER(6), 34 | tohn INTEGER(6), 35 | prenum VARCHAR(12), 36 | zip CHAR(5), 37 | side CHAR(1)); 38 | -------------------------------------------------------------------------------- /build/sql/index.sql: -------------------------------------------------------------------------------- 1 | .echo on 2 | PRAGMA temp_store=MEMORY; 3 | PRAGMA journal_mode=MEMORY; 4 | PRAGMA synchronous=OFF; 5 | PRAGMA cache_size=500000; 6 | PRAGMA count_changes=0; 7 | -- create indexes for all the relevant ways each table is queried. 8 | CREATE INDEX place_city_phone_state_idx ON place (city_phone, state); 9 | CREATE INDEX place_zip_priority_idx ON place (zip, priority); 10 | CREATE INDEX feature_street_phone_zip_idx ON feature (street_phone, zip); 11 | CREATE INDEX feature_edge_fid_idx ON feature_edge (fid); 12 | CREATE INDEX range_tlid_idx ON range (tlid); 13 | -------------------------------------------------------------------------------- /build/sql/setup.sql: -------------------------------------------------------------------------------- 1 | -- create temporary tables to hold the TIGER/Line data before it's 2 | -- transformed and loaded into the permanent tables. 3 | -- 4 | -- this file was made by running 'shp2pgsql -p' on each of the 5 | -- TIGER/Line shapefiles and then massaging the result by hand. 6 | -- 7 | PRAGMA temp_store=MEMORY; 8 | PRAGMA journal_mode=MEMORY; 9 | PRAGMA synchronous=OFF; 10 | PRAGMA cache_size=500000; 11 | PRAGMA count_changes=0; 12 | CREATE TEMPORARY TABLE "tiger_edges" ( 13 | "statefp" varchar(2), 14 | "countyfp" varchar(3), 15 | "tlid" int8, 16 | "tfidl" int8, 17 | "tfidr" int8, 18 | "mtfcc" varchar(5), 19 | "fullname" varchar(100), 20 | "smid" varchar(22), 21 | "lfromadd" varchar(12), 22 | "ltoadd" varchar(12), 23 | "rfromadd" varchar(12), 24 | "rtoadd" varchar(12), 25 | "zipl" varchar(5), 26 | "zipr" varchar(5), 27 | "featcat" varchar(1), 28 | "hydroflg" varchar(1), 29 | "railflg" varchar(1), 30 | "roadflg" varchar(1), 31 | "olfflg" varchar(1), 32 | "passflg" varchar(1), 33 | "divroad" varchar(1), 34 | "exttyp" varchar(1), 35 | "ttyp" varchar(1), 36 | "deckedroad" varchar(1), 37 | "artpath" varchar(1), 38 | "persist" varchar(1), 39 | "gcseflg" varchar(1), 40 | "offsetl" varchar(1), 41 | "offsetr" varchar(1), 42 | "tnidf" int8, 43 | "tnidt" int8, 44 | "the_geom" blob 45 | ); 46 | -- SELECT AddGeometryColumn('','edges','the_geom','-1','MULTILINESTRING',2); 47 | CREATE TEMPORARY TABLE "tiger_featnames" ( 48 | "tlid" int8, 49 | "fullname" varchar(100), 50 | "name" varchar(100), 51 | "predirabrv" varchar(15), 52 | "pretypabrv" varchar(50), 53 | "prequalabr" varchar(15), 54 | "sufdirabrv" varchar(15), 55 | "suftypabrv" varchar(50), 56 | "sufqualabr" varchar(15), 57 | "predir" varchar(2), 58 | "pretyp" varchar(3), 59 | "prequal" varchar(2), 60 | "sufdir" varchar(2), 61 | "suftyp" varchar(3), 62 | "sufqual" varchar(2), 63 | "linearid" varchar(22), 64 | "mtfcc" varchar(5), 65 | "paflag" varchar(1)); 66 | CREATE TEMPORARY TABLE "tiger_addr" ( 67 | "tlid" int8, 68 | "fromhn" varchar(12), 69 | "tohn" varchar(12), 70 | "side" varchar(1), 71 | "zip" varchar(5), 72 | "plus4" varchar(4), 73 | "fromtyp" varchar(1), 74 | "totyp" varchar(1), 75 | "fromarmid" int4, 76 | "toarmid" int4, 77 | "arid" varchar(22), 78 | "mtfcc" varchar(5)); 79 | -------------------------------------------------------------------------------- /build/tiger2009_import: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TMP="/tmp/tiger-import.$$" 4 | SHPS="edges" 5 | DBFS="featnames addr" 6 | BASE=$(dirname $0) 7 | PATH=$PATH:$BASE 8 | SQL="$BASE/../sql" 9 | HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so" 10 | DATABASE=$1 11 | shift 12 | 13 | mkdir -p $TMP || exit 1 14 | 15 | # Initialize the database if it doesn't exist. 16 | [ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE 17 | 18 | # Marshal the county directories to import. 19 | # 20 | # If no directory was given on the command-line, read a list from STDIN. 21 | if [ x"$1" = x"" ]; then 22 | cat 23 | else 24 | # Otherwise, find all of the contents of each state directory. 25 | ls -d $1/[0-9]* | while read state; do 26 | ls -d ${state}/[0-9]* 27 | done 28 | fi | while read county; do 29 | echo "--- $county" 30 | # Unpack the county files into the temp directory. 31 | for file in $SHPS $DBFS; do 32 | ZIP=$(ls ${county}/*_${file}.zip 2>/dev/null) 33 | SHP=$(ls ${county}/*_${file}.* 2>/dev/null) 34 | if [ x"$ZIP" != x"" ]; then 35 | unzip -q $ZIP -d $TMP 36 | elif [ x"$SHP" != x"" ]; then 37 | ln -s $SHP $TMP 38 | fi 39 | done 40 | # Generate an SQL stream to feed into the sqlite3 binary. 41 | # Start by loading the helper libs and initializing the temporary tables 42 | # that will hold the TIGER data before ETL. 43 | (echo ".load $HELPER_LIB" && \ 44 | cat ${SQL}/setup.sql && \ 45 | for file in $SHPS; do 46 | # Convert each Shapefile into SQL statements. 47 | shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file} 48 | done && \ 49 | for file in $DBFS; do 50 | # Convert each DBF into SQL statements likewise. 51 | shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file} 52 | done && \ 53 | cat ${SQL}/convert.sql) | sqlite3 $DATABASE 54 | # Finally, do the transform/load phase (convert.sql) 55 | # and clean up the temporary files. 56 | rm -f $TMP/* 57 | done 2>&1 | tee import-$$.log 58 | rm -rf $TMP 59 | 60 | -------------------------------------------------------------------------------- /build/tiger_import: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TMP="/tmp/tiger-import.$$" 4 | SHPS="edges" 5 | DBFS="featnames addr" 6 | BASE=$(dirname $0) 7 | PATH=$PATH:$BASE 8 | SQL="$BASE/sql" 9 | HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so" 10 | DATABASE=$1 11 | SOURCE=$2 12 | shift 13 | shift 14 | 15 | mkdir -p $TMP || exit 1 16 | 17 | # Initialize the database if it doesn't exist. 18 | #[ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE 19 | [ ! -r $DATABASE ] && cat ${SQL}/create.sql | sqlite3 $DATABASE 20 | 21 | # Marshal the county directories to import. 22 | # 23 | # If no directory was given on the command-line, read a list of county IDs from STDIN. 24 | if [ x"$1" != x"" ]; then 25 | cat 26 | else 27 | # Otherwise, find all of the IDs from the contents of the directory structure. 28 | ls $SOURCE/**/tl_*_edges.zip | while read file; do 29 | file=$(basename $file) 30 | code=${file##tl_????_} 31 | echo ${code%%_edges.zip} 32 | done 33 | fi | sort | while read code; do 34 | echo "--- $code" 35 | # Unpack the county files into the temp directory. 36 | for file in $SHPS $DBFS; do 37 | ZIP=$(ls $SOURCE/**/*_${code}_${file}.zip 2>/dev/null) 38 | SHP=$(ls $SOURCE/**/*_${code}_${file}.* 2>/dev/null) 39 | if [ x"$ZIP" != x"" ]; then 40 | unzip -q $ZIP -d $TMP 41 | elif [ x"$SHP" != x"" ]; then 42 | ln -s $SHP $TMP 43 | fi 44 | done 45 | # Generate an SQL stream to feed into the sqlite3 binary. 46 | # Start by loading the helper libs and initializing the temporary tables 47 | # that will hold the TIGER data before ETL. 48 | (echo ".load $HELPER_LIB" && \ 49 | cat ${SQL}/setup.sql && \ 50 | for file in $SHPS; do 51 | # Convert each Shapefile into SQL statements. 52 | shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file} 53 | done && \ 54 | for file in $DBFS; do 55 | # Convert each DBF into SQL statements likewise. 56 | shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file} 57 | done && \ 58 | cat ${SQL}/convert.sql) | sqlite3 $DATABASE 59 | # Finally, do the transform/load phase (convert.sql) 60 | # and clean up the temporary files. 61 | rm -f $TMP/* 62 | done 2>&1 | tee import-$$.log 63 | rm -rf $TMP 64 | 65 | -------------------------------------------------------------------------------- /conf/geocoder-us/geocoder.ru: -------------------------------------------------------------------------------- 1 | require 'sinatra' 2 | disable :run, :reload 3 | require 'geocoder/us/rest' 4 | run Sinatra::Application 5 | -------------------------------------------------------------------------------- /conf/geocoder-us/unicorn.rb: -------------------------------------------------------------------------------- 1 | worker_processes 4 2 | user "www-data", "www-data" 3 | listen "/var/run/geocoder-us/unicorn.sock", :backlog => 64 4 | pid "/var/run/geocoder-us/unicorn.pid" 5 | stderr_path "/var/log/geocoder-us/geocoder-err.log" 6 | stdout_path "/var/log/geocoder-us/geocoder-out.log" 7 | 8 | # Have each process listen on a local port for debugging purposes. 9 | after_fork do |server, worker| 10 | addr = "127.0.0.1:#{40000 + worker.nr}" 11 | server.listen(addr, :tries => 1, :delay => 5, :tcp_nopush => true) 12 | end 13 | -------------------------------------------------------------------------------- /conf/init/geocoder-us.conf: -------------------------------------------------------------------------------- 1 | description "geocoder.us" 2 | 3 | start on runlevel [2345] 4 | stop on runlevel [!2345] 5 | 6 | respawn 7 | script 8 | . /etc/default/geocoder-us 9 | unicorn -c /etc/geocoder-us/unicorn.rb /etc/geocoder-us/geocoder.ru 10 | end script 11 | -------------------------------------------------------------------------------- /debian/README.Debian: -------------------------------------------------------------------------------- 1 | geocoder-us for Debian 2 | ---------------------- 3 | 4 | The Geocoder::US package is a Ruby library that uses a database built from the 5 | US Census Bureau's TIGER/Line data to interpolate a latitude/longitude 6 | coordinate for a given US street address. 7 | 8 | Binary shared objects 9 | --------------------- 10 | 11 | The Geocoder::US module depends on being able to load a native extension module 12 | in its SQLite driver. For this reason, a version of libsqlite-ruby >= 1.3.0 is 13 | needed. The module is built and included in the .deb as `sqlite.so`, and it is 14 | installed in the same directory as the Ruby modules. This may not be ideal, but 15 | this makes it easy for the Geocoder::US library to find it there; otherwise, a 16 | configuration option would be necessary. 17 | 18 | REST API server 19 | --------------- 20 | 21 | The library's API centers on a single method 'geocode' to the 22 | Geocoder::US::Database class that takes an address string and returns a list of 23 | dicts containing the most likely matches with coordinates. 24 | 25 | The `geocode` method is wrapped in a very simple Sinatra application with a single 26 | endpoint `/geocode` and a single argument `q`, which returns the result of the 27 | geocode method in JSON format. 28 | 29 | The Sinatra web framework does not support running as a daemon on its own, so 30 | the Thin web server is used as a container for the application. This package 31 | creates an `/etc/geocoder-us` directory containing two files: 32 | 33 | `/etc/geocoder-us/geocoder.ru` is the "rackup" adapter between Thin and Sinatra 34 | and should probably not be changed. This file doesn't have to live in /etc, but 35 | I couldn't figure out where else to put it. 36 | 37 | `/etc/geocoder-us/thin.yml` contains the configuration options to run the Thin 38 | server. This file as packaged runs the REST server as the www-data user on port 39 | 8080. This file *probably* doesn't need to be changed, but if the server starts 40 | doing weird things, different options to control Thin's behavior can be set 41 | here. 42 | 43 | The package creates `/var/log/geocoder-us` and `/var/run/geocoder-us` 44 | directories for the Thin log file and PID file, respectively, and chowns them 45 | to www-data. 46 | 47 | An init script is also included in `/etc/init.d/geocoder-us`. It is heavily 48 | hacked from the default Debian init.ex script to support the weirdnesses of 49 | Thin, but it is LSB compliant and supports the `status` command. 50 | 51 | Where to put the database 52 | ------------------------- 53 | 54 | The location of the database file should be set in `/etc/default/geocoder-us`. 55 | The package creates a `/var/lib/geocoder-us` directory and configures the 56 | database location by default to be `/var/lib/geocoder-us/geocoder.db`. If you 57 | have an EBS volume containing a file called `geocoder.db`, for example, you can 58 | just mount the volume at `/var/lib/geocoder-us` and then start the server and 59 | all will be well. 60 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 7 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: geocoder-us 2 | Section: ruby 3 | Priority: extra 4 | Maintainer: SimpleGeo Nerds 5 | Uploaders: Schuyler Erle 6 | Build-Depends: debhelper (>= 7), libsqlite3-dev, ruby1.8, cdbs, ruby-pkg-tools 7 | Standards-Version: 3.9.1 8 | Homepage: http://github.com/simplegeo/geocoder/ 9 | 10 | Package: geocoder-us 11 | Architecture: any 12 | Depends: ${misc:Depends}, ${shlibs:Depends}, ruby1.8, libsqlite3-ruby (>= 1.3.0), libsinatra-ruby, libjson-ruby, unicorn 13 | Description: A US address geocoder. 14 | A US address geocoder. Requires a suitable database. 15 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | This work was packaged for Debian by: 2 | 3 | Schuyler Erle on Sat, 07 Aug 2010 00:51:40 +0000 4 | 5 | It was downloaded from http://github.com/simplegeo/geocoder/ 6 | 7 | Upstream Author(s): 8 | 9 | Schuyler Erle 10 | 11 | Copyright: 12 | 13 | (c) 2009 FortiusOne, Inc. 14 | 15 | License: 16 | 17 | This program is free software: you can redistribute it and/or modify 18 | it under the terms of the GNU General Public License as published by 19 | the Free Software Foundation, either version 3 of the License, or 20 | (at your option) any later version. 21 | 22 | This package is distributed in the hope that it will be useful, 23 | but WITHOUT ANY WARRANTY; without even the implied warranty of 24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 | GNU General Public License for more details. 26 | 27 | You should have received a copy of the GNU General Public License 28 | along with this program. If not, see . 29 | 30 | 31 | The Debian packaging is: 32 | 33 | Copyright (C) 2010 SimpleGeo, Inc. 34 | 35 | and is licensed under the GPL version 3, see `/usr/share/common-licenses/GPL-3'. 36 | -------------------------------------------------------------------------------- /debian/default: -------------------------------------------------------------------------------- 1 | # Defaults for geocoder-us upstart job 2 | # sourced by /etc/init/geocoder-us.conf 3 | # installed at /etc/default/geocoder-us by maintainer scripts 4 | 5 | # Set the location of the geocoder database. 6 | export GEOCODER_DB="/var/lib/geocoder-us/geocoder.db" 7 | -------------------------------------------------------------------------------- /debian/docs: -------------------------------------------------------------------------------- 1 | History.txt 2 | Manifest.txt 3 | README.rdoc 4 | TODO.txt 5 | TODO.txt 6 | -------------------------------------------------------------------------------- /debian/geocoder-us.postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # postinst script for #PACKAGE# 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | set -e 7 | 8 | # summary of how this script can be called: 9 | # * `configure' 10 | # * `abort-upgrade' 11 | # * `abort-remove' `in-favour' 12 | # 13 | # * `abort-remove' 14 | # * `abort-deconfigure' `in-favour' 15 | # `removing' 16 | # 17 | # for details, see http://www.debian.org/doc/debian-policy/ or 18 | # the debian-policy package 19 | 20 | 21 | case "$1" in 22 | configure) 23 | # just make sure that /usr/bin/thin can write its PID file and logs 24 | chown www-data /var/run/geocoder-us 25 | chown www-data /var/log/geocoder-us 26 | start geocoder-us || /bin/true 27 | ;; 28 | 29 | abort-upgrade|abort-remove|abort-deconfigure) 30 | ;; 31 | 32 | *) 33 | echo "postinst called with unknown argument \`$1'" >&2 34 | exit 1 35 | ;; 36 | esac 37 | 38 | # dh_installdeb will replace this with shell code automatically 39 | # generated by other debhelper scripts. 40 | 41 | #DEBHELPER# 42 | 43 | exit 0 44 | -------------------------------------------------------------------------------- /debian/geocoder-us.prerm: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | case "$1" in 6 | remove|deconfigure) 7 | stop geocoder-us || true 8 | ;; 9 | upgrade) 10 | ;; 11 | failed-upgrade) 12 | ;; 13 | *) 14 | echo "prerm called with unknown argument \`$1'" >&2 15 | exit 0 16 | ;; 17 | esac 18 | 19 | # dh_installdeb will replace this with shell code automatically 20 | # generated by other debhelper scripts. 21 | 22 | 23 | 24 | exit 0 25 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | include /usr/share/cdbs/1/rules/debhelper.mk 4 | include /usr/share/ruby-pkg-tools/1/class/ruby-setup-rb.mk 5 | 6 | 7 | # Add here any variable or target overrides you need. 8 | 9 | build/geocoder-us:: 10 | make -C $(CURDIR)/src/libsqlite3_geocoder 11 | install -m 0644 $(CURDIR)/src/libsqlite3_geocoder/*.so \ 12 | $(CURDIR)/lib/geocoder/us/sqlite3.so 13 | 14 | install/geocoder-us:: 15 | install -d -m 0755 $(DEB_DESTDIR)var/lib/geocoder-us \ 16 | $(DEB_DESTDIR)var/run/geocoder-us \ 17 | $(DEB_DESTDIR)var/log/geocoder-us 18 | 19 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /demos/api/server.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'sinatra' 3 | require 'geocoder/us/database' 4 | require 'json' 5 | 6 | set :port, 8080 7 | @@db = Geocoder::US::Database.new("/home/sderle/geocoder/california.db") 8 | get '/geocode.json' do 9 | if params[:q] 10 | (@@db.geocode params[:q]).to_json 11 | else 12 | status 400 13 | "parameter 'q' is missing" 14 | end 15 | end 16 | get '/' do 17 | unless params[:q].nil? 18 | @records = @@db.geocode params[:q] 19 | end 20 | erb :index 21 | end 22 | -------------------------------------------------------------------------------- /demos/api/views/index.erb: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 11 | 12 | 13 |

Geocoder Demo

14 |

15 |

16 | 17 | 18 | 19 |
20 |

21 | <% unless @records.nil? %> 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | <% for record in @records %> 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 58 | 59 | <% end %> 60 |
MatchLatLon#QualDirTypeStreetTypeDirQualCityStZIP 
<%= format("%.2f", record[:score]*100) %>%<%= record[:lat].to_s %><%= record[:lon].to_s %><%= record[:prefix] if record[:prefix] %><%= record[:number] %><%= record[:pretyp] %><%= record[:predir] %><%= record[:prequal] %><%= record[:street] %><%= record[:suftyp] %><%= record[:sufdir] %><%= record[:sufqual] %><%= record[:city] %><%= record[:state] %><%= record[:zip] %>map
61 | <% end %> 62 | 63 | 64 | -------------------------------------------------------------------------------- /demos/cli.rb: -------------------------------------------------------------------------------- 1 | require 'geocoder/us/database' 2 | require 'pp' 3 | 4 | db = Geocoder::US::Database.new("/mnt/tiger2010/geocoder.db", :debug=>true) 5 | result = db.geocode(ARGV[0]) 6 | pp(result) 7 | print "#{result[0][:lat]} N, #{-result[0][:lon]} W\n" 8 | -------------------------------------------------------------------------------- /demos/demo/app/ext/geocodewrap.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'geocoder/us/database' 3 | require 'logger' 4 | 5 | module Sinatra 6 | module GeocodeWrap 7 | attr_accessor :db 8 | def self.registered(app) 9 | options = {:cache_size => 100000} 10 | @@db = Geocoder::US::Database.new("/Users/katechapman/usgeocode.db", options) 11 | stats = Logger.new("geocoderstats.log", 10, 1024000) 12 | app.get '/' do 13 | unless params[:address].nil? 14 | begin 15 | @records = @@db.geocode params[:address] 16 | stats.debug "Geocoded: 1, Failed: 0, Geocoded At: " << DateTime.now.to_s 17 | rescue Exception => e 18 | stats.debug "Geocoded: 1, Failed: 1, Geocoded At: " << DateTime.now.to_s 19 | puts e.message 20 | end 21 | end 22 | 23 | case params[:format] 24 | when /xml/ 25 | builder :index 26 | when /atom/ 27 | builder :atom 28 | when /json/ 29 | @records.to_json 30 | else 31 | erb :index 32 | end 33 | end 34 | 35 | app.post '/batch' do 36 | failed_codes = 0 37 | total_codes = 0 38 | puts Time.now 39 | if params[:uploaded_csv].nil? 40 | csv_file = request.env["rack.input"].read 41 | csv = FasterCSV.parse(csv_file, :row_sep => "*", :col_sep => "|") 42 | else 43 | FileUtils.mkdir_p('uploads/') 44 | FileUtils.mv(params[:uploaded_csv][:tempfile].path, "uploads/#{params[:uploaded_csv][:filename]}") 45 | csv_file = open("uploads/#{params[:uploaded_csv][:filename]}") 46 | @filename = params[:uploaded_csv][:filename].gsub(/\.csv/,"") 47 | csv = FasterCSV.parse(csv_file) 48 | end 49 | headers = csv[0] 50 | 51 | @records = csv.collect do |record| 52 | total_codes += 1 53 | next if record == headers 54 | begin 55 | result = @@db.geocode record[1] 56 | if result.empty? 57 | result[0] = {:lon => nil, :lat => nil, :precision => 'unmatched', :score => 0} 58 | failed_codes += 1 59 | end 60 | result.first.merge(headers[0] => record[0]) 61 | rescue Exception => e 62 | failed_codes += 1 63 | puts e.message 64 | next 65 | end 66 | end.compact 67 | puts Time.now 68 | stats.debug "Geocoded: " << total_codes.to_s << ", Failed: " << failed_codes.to_s << ",Geocoded At: " << DateTime.now.to_s 69 | case params[:format] 70 | when /xml/ 71 | builder :index 72 | when /atom/ 73 | builder :atom 74 | when /json/ 75 | @records.to_json 76 | 77 | else 78 | erb :index 79 | end 80 | end 81 | end 82 | end 83 | register GeocodeWrap 84 | end 85 | -------------------------------------------------------------------------------- /demos/demo/app/views/index.builder: -------------------------------------------------------------------------------- 1 | xml.locations do 2 | unless @records.nil? 3 | @records.each do |record| 4 | xml.location do 5 | xml.score format("%.2f", record[:score]*100) 6 | %w{lat lon number prefix pretyp predir prequal street suftyp sufdir sufqual city state zip}.each do |field| 7 | xml.tag! field, record[field.to_sym] 8 | end 9 | end 10 | end 11 | end 12 | end 13 | 14 | -------------------------------------------------------------------------------- /demos/demo/app/views/index.erb: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 11 | 12 | 13 |

Geocoder Demo

14 |

15 |

16 | 17 | 18 |
19 |
20 | 21 | 22 |
23 |

24 | <% unless @records.nil? %> 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | <% for record in @records %> 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 63 | 64 | <% end %> 65 |
MatchPrecisionLatLon#QualDirTypeStreetTypeDirQualCityStZIP 
<%= format("%.2f", record[:score]*100) %>%<%= record[:precision].to_s %><%= record[:lat].to_s %><%= record[:lon].to_s %><%= record[:prefix] if record[:prefix] %><%= record[:number] %><%= record[:pretyp] %><%= record[:predir] %><%= record[:prequal] %><%= record[:street] %><%= record[:suftyp] %><%= record[:sufdir] %><%= record[:sufqual] %><%= record[:city] %><%= record[:state] %><%= record[:zip] %>map
66 | <% end %> 67 | <% unless @filename.nil? %> 68 | Atom Feed 69 | <% end %> 70 | 71 | 72 | -------------------------------------------------------------------------------- /demos/demo/config.ru: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'sinatra' 3 | 4 | 5 | Sinatra::Application.default_options.merge!( 6 | :run => false, 7 | :env => ENV['RACK_ENV'] 8 | ) 9 | require 'geocom_geocode' 10 | run GeocomGeocode::GeocodeServer 11 | 12 | 13 | -------------------------------------------------------------------------------- /demos/demo/config/bootstraps.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | 3 | module BootStraps 4 | 5 | class Framework 6 | 7 | def initialize 8 | @methods = {} 9 | end 10 | 11 | def apply_settings!(app) 12 | @methods.each_pair do |method, calls| 13 | calls.each do |arg_set| 14 | app.send(method, *arg_set) 15 | end 16 | end 17 | end 18 | 19 | def method_missing(method, *args) 20 | @methods[method] ||= [] 21 | @methods[method] << args 22 | end 23 | end 24 | 25 | 26 | class DataStore 27 | def connect_action(&block) 28 | @connect_action = block 29 | end 30 | 31 | #TODO raise UndefinedConnectAction 32 | def connect 33 | @connect_action.call if @connect_action 34 | end 35 | end 36 | 37 | class Configuration 38 | attr_accessor :db, :global, :default_env, :vendor_dir, :lib_paths, :framework, :vendored 39 | attr_reader :gems 40 | 41 | def initialize 42 | @framework = Framework.new 43 | @gems = {} 44 | @global = {} 45 | @default_env = 'production' 46 | @vendor_dir = File.join(root, 'vendor') 47 | @lib_paths = [] 48 | @vendored = false 49 | end 50 | 51 | def env 52 | ENV['RACK_ENV'] ||= default_env 53 | end 54 | 55 | def env=(val) 56 | ENV['RACK_ENV'] = val 57 | end 58 | 59 | def root 60 | File.join(File.expand_path(File.dirname(__FILE__)), "..") 61 | end 62 | 63 | def gem(*args) 64 | gem = args.first 65 | ver = args.last 66 | 67 | @gems[gem] = ver 68 | 69 | #its concievable that vendored could be changed mid config 70 | use_vendor if vendored 71 | Kernel.send(:gem, *args) 72 | require gem 73 | end 74 | 75 | private 76 | def use_vendor 77 | Gem.clear_paths 78 | prepend_gem_path!(File.join(root, 'vendor')) 79 | end 80 | 81 | def prepend_gem_path!(path) 82 | ENV['GEM_PATH'] = path 83 | end 84 | end 85 | 86 | class Initializer 87 | @@config = Configuration.new 88 | class << self 89 | def configure 90 | unless @@config.frozen? 91 | yield @@config 92 | @@config.freeze 93 | end 94 | end 95 | 96 | def config 97 | @@config 98 | end 99 | 100 | def boot! 101 | require File.join(@@config.root, 'config', 'geoenvironment.rb') 102 | require_libs 103 | end 104 | 105 | 106 | private 107 | def require_libs 108 | [ 109 | subdir_expansion('lib'), 110 | subdir_expansion(File.join('app','ext')) 111 | ].each do |p| 112 | require_all(p) 113 | end 114 | end 115 | 116 | def require_all(path) 117 | Dir[path].each { |f| require f } 118 | end 119 | 120 | def subdir_expansion(subdir) 121 | File.join(@@config.root, subdir, '**', '*.rb') 122 | end 123 | end 124 | end 125 | end 126 | 127 | BootStraps::Initializer.boot! 128 | Straps = BootStraps::Initializer.config 129 | 130 | 131 | -------------------------------------------------------------------------------- /demos/demo/config/geoenvironment.rb: -------------------------------------------------------------------------------- 1 | 2 | BootStraps::Initializer.configure do |config| 3 | 4 | #Use the vendor directory 5 | config.vendored = true 6 | config.default_env = 'production' 7 | 8 | config.gem 'sinatra' 9 | config.gem 'fastercsv' 10 | config.gem 'json' 11 | 12 | 13 | 14 | 15 | config.framework.set :root, config.root 16 | config.framework.set :environment, config.env 17 | config.framework.set :raise_errors, true 18 | config.framework.set :views, File.join('app','views') 19 | config.framework.set :server, 'mongrel' 20 | config.framework.set :static, true 21 | config.framework.set :logging, true 22 | config.framework.set :port, 4567 23 | config.framework.set :lock, false 24 | 25 | end 26 | -------------------------------------------------------------------------------- /demos/demo/geocoder_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'geocoder/us/database' 3 | require 'fastercsv' 4 | require 'json' 5 | 6 | 7 | 8 | def initialize 9 | 10 | 11 | 12 | end 13 | -------------------------------------------------------------------------------- /demos/demo/geocom_geocode.rb: -------------------------------------------------------------------------------- 1 | require 'config/bootstraps' 2 | 3 | module GeocomGeocode 4 | class GeocodeServer < Sinatra::Base 5 | register Sinatra::GeocodeWrap 6 | configure do 7 | Straps.framework.apply_settings!(self) 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /demos/demo/main.rb: -------------------------------------------------------------------------------- 1 | require 'geocom_geocode' 2 | 3 | GeocomGeocode::GeocodeServer.run! 4 | -------------------------------------------------------------------------------- /demos/demo/rakefile.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | 3 | task :boot_env do 4 | require 'config/bootstraps'; 5 | end 6 | 7 | namespace :db do 8 | task :migrate => :connect do 9 | ActiveRecord::Base.logger = Logger.new(STDOUT) 10 | ActiveRecord::Migration.verbose = true 11 | ActiveRecord::Migrator.migrate('db/migrate/', nil) 12 | end 13 | 14 | task :connect => :boot_env do 15 | BootStraps::Initializer.config.db.connect 16 | end 17 | end -------------------------------------------------------------------------------- /demos/demo/tmp/restart.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simplegeo/geocoder/5c7f678a1abe79c77c36153fbcee32e4dec24e53/demos/demo/tmp/restart.txt -------------------------------------------------------------------------------- /demos/parse.rb: -------------------------------------------------------------------------------- 1 | require 'geocoder/us/address' 2 | require 'pp' 3 | 4 | pp(Geocoder::US::Address.new(ARGV[0])) 5 | -------------------------------------------------------------------------------- /demos/simpledemo/views/index.builder: -------------------------------------------------------------------------------- 1 | xml.locations do 2 | unless @records.nil? 3 | @records.each do |record| 4 | xml.location do 5 | xml.score format("%.2f", record[:score]*100) 6 | %w{lat lon number prefix pretyp predir prequal street suftyp sufdir sufqual city state zip}.each do |field| 7 | xml.tag! field, record[field.to_sym] 8 | end 9 | end 10 | end 11 | end 12 | end 13 | 14 | -------------------------------------------------------------------------------- /demos/simpledemo/views/index.erb: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 11 | 12 | 13 |

Geocoder Demo

14 |

15 |

16 | 17 | 18 |
19 |
20 | 21 | 22 |
23 |

24 | <% unless @records.nil? %> 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | <% for record in @records %> 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 61 | 62 | <% end %> 63 |
MatchLatLon#QualDirTypeStreetTypeDirQualCityStZIP 
<%= format("%.2f", record[:score]*100) %>%<%= record[:lat].to_s %><%= record[:lon].to_s %><%= record[:prefix] if record[:prefix] %><%= record[:number] %><%= record[:pretyp] %><%= record[:predir] %><%= record[:prequal] %><%= record[:street] %><%= record[:suftyp] %><%= record[:sufdir] %><%= record[:sufqual] %><%= record[:city] %><%= record[:state] %><%= record[:zip] %>map
64 | <% end %> 65 | <% unless @filename.nil? %> 66 | Atom Feed 67 | <% end %> 68 | 69 | 70 | -------------------------------------------------------------------------------- /demos/simpledemo/ws.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'sinatra' 3 | require 'geocoder/us/database' 4 | require 'fastercsv' 5 | require 'json' 6 | 7 | set :port, 8080 8 | @@db = Geocoder::US::Database.new("/fortiusone/geocoder/geocoder.db") 9 | get '/' do 10 | unless params[:address].nil? 11 | @records = @@db.geocode params[:address] 12 | end 13 | 14 | case params[:format] 15 | when /xml/ 16 | builder :index 17 | when /atom/ 18 | builder :atom 19 | else 20 | erb :index 21 | end 22 | end 23 | 24 | require 'open-uri' 25 | get '/link.:format' do 26 | if(params.include?(:url)) 27 | csv_file = params[:url] 28 | else 29 | csv_file = "uploads/#{params[:filename]}.csv" 30 | end 31 | csv = FasterCSV.parse(open(csv_file)) 32 | headers = csv[0] 33 | 34 | @records = csv.collect do |record| 35 | next if record == headers 36 | begin 37 | (@@db.geocode record[1]).first 38 | rescue Exception => e 39 | puts e.message 40 | next 41 | end 42 | end.compact 43 | case params[:format] 44 | when /atom/ 45 | builder :atom 46 | when /xml/ 47 | builder :index 48 | else 49 | erb :index 50 | end 51 | 52 | end 53 | 54 | 55 | post '/batch' do 56 | csv_file = request.env["rack.input"].read 57 | csv = FasterCSV.parse(csv_file, :row_sep => "*", :col_sep => "|") 58 | headers = csv[0] 59 | @records = csv.collect do |record| 60 | next if record == headers 61 | begin 62 | (@@db.geocode record[1]).first.merge(headers[0] => record[0]) 63 | rescue Exception => e 64 | puts e.message 65 | next 66 | end 67 | end.compact 68 | case params[:format] 69 | when /xml/ 70 | builder :index 71 | when /atom/ 72 | builder :atom 73 | when /json/ 74 | @records.to_json 75 | else 76 | erb :index 77 | end 78 | end 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | all: lookup.html parsing.html 2 | 3 | %.html: %.rst voidspace.css 4 | rst2html --stylesheet-path=voidspace.css --no-compact-lists $< > $@ 5 | 6 | clean: 7 | rm -f *.html 8 | -------------------------------------------------------------------------------- /doc/html4css1.css: -------------------------------------------------------------------------------- 1 | /* 2 | :Author: David Goodger 3 | :Contact: goodger@users.sourceforge.net 4 | :Date: $Date: 2005-12-18 01:56:14 +0100 (Sun, 18 Dec 2005) $ 5 | :Revision: $Revision: 4224 $ 6 | :Copyright: This stylesheet has been placed in the public domain. 7 | 8 | Default cascading style sheet for the HTML output of Docutils. 9 | 10 | See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to 11 | customize this style sheet. 12 | */ 13 | 14 | /* used to remove borders from tables and images */ 15 | .borderless, table.borderless td, table.borderless th { 16 | border: 0 } 17 | 18 | table.borderless td, table.borderless th { 19 | /* Override padding for "table.docutils td" with "! important". 20 | The right padding separates the table cells. */ 21 | padding: 0 0.5em 0 0 ! important } 22 | 23 | .first { 24 | /* Override more specific margin styles with "! important". */ 25 | margin-top: 0 ! important } 26 | 27 | .last, .with-subtitle { 28 | margin-bottom: 0 ! important } 29 | 30 | .hidden { 31 | display: none } 32 | 33 | a.toc-backref { 34 | text-decoration: none ; 35 | color: black } 36 | 37 | blockquote.epigraph { 38 | margin: 2em 5em ; } 39 | 40 | dl.docutils dd { 41 | margin-bottom: 0.5em } 42 | 43 | /* Uncomment (and remove this text!) to get bold-faced definition list terms 44 | dl.docutils dt { 45 | font-weight: bold } 46 | */ 47 | 48 | div.abstract { 49 | margin: 2em 5em } 50 | 51 | div.abstract p.topic-title { 52 | font-weight: bold ; 53 | text-align: center } 54 | 55 | div.admonition, div.attention, div.caution, div.danger, div.error, 56 | div.hint, div.important, div.note, div.tip, div.warning { 57 | margin: 2em ; 58 | border: medium outset ; 59 | padding: 1em } 60 | 61 | div.admonition p.admonition-title, div.hint p.admonition-title, 62 | div.important p.admonition-title, div.note p.admonition-title, 63 | div.tip p.admonition-title { 64 | font-weight: bold ; 65 | font-family: sans-serif } 66 | 67 | div.attention p.admonition-title, div.caution p.admonition-title, 68 | div.danger p.admonition-title, div.error p.admonition-title, 69 | div.warning p.admonition-title { 70 | color: red ; 71 | font-weight: bold ; 72 | font-family: sans-serif } 73 | 74 | /* Uncomment (and remove this text!) to get reduced vertical space in 75 | compound paragraphs. 76 | div.compound .compound-first, div.compound .compound-middle { 77 | margin-bottom: 0.5em } 78 | 79 | div.compound .compound-last, div.compound .compound-middle { 80 | margin-top: 0.5em } 81 | */ 82 | 83 | div.dedication { 84 | margin: 2em 5em ; 85 | text-align: center ; 86 | font-style: italic } 87 | 88 | div.dedication p.topic-title { 89 | font-weight: bold ; 90 | font-style: normal } 91 | 92 | div.figure { 93 | margin-left: 2em ; 94 | margin-right: 2em } 95 | 96 | div.footer, div.header { 97 | clear: both; 98 | font-size: smaller } 99 | 100 | div.line-block { 101 | display: block ; 102 | margin-top: 1em ; 103 | margin-bottom: 1em } 104 | 105 | div.line-block div.line-block { 106 | margin-top: 0 ; 107 | margin-bottom: 0 ; 108 | margin-left: 1.5em } 109 | 110 | div.sidebar { 111 | margin-left: 1em ; 112 | border: medium outset ; 113 | padding: 1em ; 114 | background-color: #ffffee ; 115 | width: 40% ; 116 | float: right ; 117 | clear: right } 118 | 119 | div.sidebar p.rubric { 120 | font-family: sans-serif ; 121 | font-size: medium } 122 | 123 | div.system-messages { 124 | margin: 5em } 125 | 126 | div.system-messages h1 { 127 | color: red } 128 | 129 | div.system-message { 130 | border: medium outset ; 131 | padding: 1em } 132 | 133 | div.system-message p.system-message-title { 134 | color: red ; 135 | font-weight: bold } 136 | 137 | div.topic { 138 | margin: 2em } 139 | 140 | h1.section-subtitle, h2.section-subtitle, h3.section-subtitle, 141 | h4.section-subtitle, h5.section-subtitle, h6.section-subtitle { 142 | margin-top: 0.4em } 143 | 144 | h1.title { 145 | text-align: center } 146 | 147 | h2.subtitle { 148 | text-align: center } 149 | 150 | hr.docutils { 151 | width: 75% } 152 | 153 | img.align-left { 154 | clear: left } 155 | 156 | img.align-right { 157 | clear: right } 158 | 159 | ol.simple, ul.simple { 160 | margin-bottom: 1em } 161 | 162 | ol.arabic { 163 | list-style: decimal } 164 | 165 | ol.loweralpha { 166 | list-style: lower-alpha } 167 | 168 | ol.upperalpha { 169 | list-style: upper-alpha } 170 | 171 | ol.lowerroman { 172 | list-style: lower-roman } 173 | 174 | ol.upperroman { 175 | list-style: upper-roman } 176 | 177 | p.attribution { 178 | text-align: right ; 179 | margin-left: 50% } 180 | 181 | p.caption { 182 | font-style: italic } 183 | 184 | p.credits { 185 | font-style: italic ; 186 | font-size: smaller } 187 | 188 | p.label { 189 | white-space: nowrap } 190 | 191 | p.rubric { 192 | font-weight: bold ; 193 | font-size: larger ; 194 | color: maroon ; 195 | text-align: center } 196 | 197 | p.sidebar-title { 198 | font-family: sans-serif ; 199 | font-weight: bold ; 200 | font-size: larger } 201 | 202 | p.sidebar-subtitle { 203 | font-family: sans-serif ; 204 | font-weight: bold } 205 | 206 | p.topic-title { 207 | font-weight: bold } 208 | 209 | pre.address { 210 | margin-bottom: 0 ; 211 | margin-top: 0 ; 212 | font-family: serif ; 213 | font-size: 100% } 214 | 215 | pre.literal-block, pre.doctest-block { 216 | margin-left: 2em ; 217 | margin-right: 2em ; 218 | background-color: #eeeeee } 219 | 220 | span.classifier { 221 | font-family: sans-serif ; 222 | font-style: oblique } 223 | 224 | span.classifier-delimiter { 225 | font-family: sans-serif ; 226 | font-weight: bold } 227 | 228 | span.interpreted { 229 | font-family: sans-serif } 230 | 231 | span.option { 232 | white-space: nowrap } 233 | 234 | span.pre { 235 | white-space: pre } 236 | 237 | span.problematic { 238 | color: red } 239 | 240 | span.section-subtitle { 241 | /* font-size relative to parent (h1..h6 element) */ 242 | font-size: 80% } 243 | 244 | table.citation { 245 | border-left: solid 1px gray; 246 | margin-left: 1px } 247 | 248 | table.docinfo { 249 | margin: 2em 4em } 250 | 251 | table.docutils { 252 | margin-top: 0.5em ; 253 | margin-bottom: 0.5em } 254 | 255 | table.footnote { 256 | border-left: solid 1px black; 257 | margin-left: 1px } 258 | 259 | table.docutils td, table.docutils th, 260 | table.docinfo td, table.docinfo th { 261 | padding-left: 0.5em ; 262 | padding-right: 0.5em ; 263 | vertical-align: top } 264 | 265 | table.docutils th.field-name, table.docinfo th.docinfo-name { 266 | font-weight: bold ; 267 | text-align: left ; 268 | white-space: nowrap ; 269 | padding-left: 0 } 270 | 271 | h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, 272 | h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { 273 | font-size: 100% } 274 | 275 | tt.docutils { 276 | background-color: #eeeeee } 277 | 278 | ul.auto-toc { 279 | list-style-type: none } 280 | -------------------------------------------------------------------------------- /doc/lookup.rst: -------------------------------------------------------------------------------- 1 | .. _lookup: 2 | 3 | =================================== 4 | Geocoder.us Address Lookup Strategy 5 | =================================== 6 | 7 | :Author: Schuyler Erle 8 | :Contact: schuyler at geocoder dot us 9 | :Created: 2009/03/13 10 | :Edited: 2009/03/14 11 | 12 | Definitions 13 | ----------- 14 | 15 | Edge 16 | Database representation of a street segment, consisting of a linestring 17 | geometry and an edge ID. Edges relate to many ranges and many features 18 | through its ID. 19 | 20 | Feature 21 | Database representation of a named street, consisting of street name 22 | and modifier elements, a reference ZIP code, and a primary/alternate flag. 23 | 24 | Range 25 | Database representation of a range of address numbers on a given 26 | street, consisting of range start and end numbers, an optional prefix 27 | ending with a non-numeric character, and a delivery ZIP code for that 28 | range. 29 | 30 | Place 31 | Database representation of a ZIP code, consisting of a city name, 32 | state abbreviation, a ZIP code, and a primary/alternate flag. 33 | 34 | Address record 35 | A set consisting of exactly one edge, one feature, and one range, related 36 | through the edge ID. 37 | 38 | Address query 39 | An ordered set of {Number Prefix, Number, Directional Prefix, Type Prefix, 40 | Qualifier Prefix, Street Name, Qualifier Suffix, Type Suffix, Directional 41 | Suffix, City, State, ZIP}. All of the elements are optional except Number and 42 | Street Name. Either ZIP or City must also be present. The State element 43 | and all of the prefix and suffix elements are assumed to be normalized to 44 | standard postal abbreviations. 45 | 46 | Address string 47 | A string including some or all of the elements of an address. 48 | 49 | Address Lookup Strategy 50 | ----------------------- 51 | 52 | 1. Given a an address query, initialize an empty set of candidate places, 53 | and an empty set of candidate address records. 54 | 55 | #. If a ZIP was given, look up `the place from the ZIP`_, and add the 56 | place, if any, to the candidate place set. 57 | 58 | #. If a city was given, look up all `the places matching the metaphone hash 59 | of the city name`_, and add them, if any, to the candidate place set. 60 | 61 | #. Generate a unique set of ZIPs from the set of candidate places, since a ZIP 62 | may have one or more names associated with it. 63 | 64 | #. Generate `a list of candidate address records`_ by fetching all the street 65 | features matching the metaphone hash of the street name and one of the ZIPs 66 | in the query set, along with the ranges matching the edge ID of each 67 | feature, where the given number is in the range. The edge does not 68 | need to be fetched yet. 69 | 70 | #. If the look up generates no results, optionally generate `more candidate 71 | records`_ by looking up all the street features matching the metaphone hash 72 | of the street name, along with the ranges matching the edge ID of each 73 | feature, where the given number is in the range. This may be a very time 74 | consuming database query, because some street names are quite common. 75 | 76 | #. Score each of the candidate records as follows: 77 | 78 | a. Score one point for every provided element of the address query that it 79 | matches exactly. 80 | #. Optionally, compute the scaled Damerau-Levenshtein distance (or 81 | alternately the simple Levenshtein distance) between each provided 82 | element of the address query and the corresponding element in the 83 | candidate. Score one minus the scaled distance, which yields a fraction 84 | of a point. 85 | #. Score one point if the parity of starting range number matches the parity 86 | of the queried address number. 87 | #. Note that the maximum possible score is equal to the number of provided 88 | elements in the address query. Divide the score by the maximum possible. 89 | This is the confidence value of the candidate. 90 | 91 | #. Sort the candidate address records by confidence. Retain only the records 92 | that share the highest confidence as candidates. 93 | 94 | #. Fetch `the edges and primary feature names`_ matching the edge IDs of 95 | the remaining candidate address records. 96 | 97 | #. For each remaining candidate record: 98 | 99 | a. Replace the candidate record feature elements with those of the 100 | primary feature name for that edge. 101 | #. Fetch `all of the ranges for the edge ID`_ of the candidate, sorted by 102 | starting number. 103 | #. Compute the sum of the differences of the starting and ending house 104 | number for each range. This is the total number width of the edge. 105 | #. Take the difference between the candidate starting number and the lowest 106 | starting number, add the difference between the queried number and the 107 | candidate starting number, and divide by the total number width. This is 108 | the interpolation distance. 109 | #. Optionally, find the local UTM zone and project the edge into it. 110 | #. Find the point along the line at the interpolation distance. 111 | #. If the edge was projected, unproject the point. 112 | #. Assign the point as the geocoded location of the query to the candidate 113 | record. 114 | 115 | #. Construct a set of result ZIPs from the remaining candidates, and look up 116 | `the primary name and state for each ZIP`_ in the set. Assign the matching 117 | primary city and state to each candidate. 118 | 119 | #. Return the set of candidate records as the result of the query. 120 | 121 | SQL Statements 122 | -------------- 123 | 124 | the place from the ZIP 125 | ~~~~~~~~~~~~~~~~~~~~~~~ 126 | 127 | :: 128 | 129 | SELECT * FROM place WHERE zip = '...'; 130 | 131 | the places matching the metaphone hash of the city name 132 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 133 | 134 | :: 135 | 136 | SELECT * FROM place WHERE city_phone = metaphone('...'); 137 | 138 | a list of candidate address records 139 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 140 | 141 | :: 142 | 143 | SELECT feature.*, range.* FROM feature, range 144 | WHERE name_phone = metaphone('...') AND feature.zip IN (...) 145 | AND range.tlid = feature.tlid 146 | AND fromhn <= ... AND tohn >= ...; 147 | 148 | more candidate records 149 | ~~~~~~~~~~~~~~~~~~~~~~ 150 | 151 | :: 152 | 153 | SELECT feature.*, range.* FROM feature, range 154 | WHERE name_phone = metaphone('...') 155 | AND range.tlid = feature.tlid 156 | AND fromhn <= ... AND tohn >= ...; 157 | 158 | the edges and primary feature names 159 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 160 | 161 | :: 162 | 163 | SELECT feature.*, edge.* FROM feature, edge 164 | WHERE feature.tlid = ... AND paflag = 'P' 165 | AND edge.tlid = feature.tlid; 166 | 167 | -- or 168 | 169 | SELECT feature.*, edge.* FROM feature, edge 170 | WHERE feature.tlid IN (...) 171 | AND paflag = 'P' 172 | AND edge.tlid = feature.tlid; 173 | 174 | all of the ranges for the edge ID 175 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 176 | 177 | :: 178 | 179 | SELECT * FROM range WHERE range.tlid = ...; 180 | 181 | -- or 182 | 183 | SELECT * FROM range WHERE range.tlid IN (...); 184 | 185 | the primary name and state for each ZIP 186 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 187 | 188 | 189 | :: 190 | 191 | SELECT * FROM place WHERE zip IN (...) AND paflag = 'P'; 192 | 193 | = 30 = 194 | -------------------------------------------------------------------------------- /doc/parsing.rst: -------------------------------------------------------------------------------- 1 | .. _parsing: 2 | 3 | ==================================== 4 | Geocoder.us Address Parsing Strategy 5 | ==================================== 6 | 7 | :Author: Schuyler Erle 8 | :Contact: schuyler at geocoder dot us 9 | :Created: 2009/03/18 10 | :Edited: 2009/03/18 11 | 12 | Structured address components 13 | ----------------------------- 14 | 15 | Unless otherwise labeled as "required", all components of a structured address 16 | are optional. 17 | 18 | prenum 19 | The alphanumeric prefix portion of a house or building number. (e.g. "32-" 20 | in "32-20 Jackson St". 21 | 22 | number 23 | The house or building number component. Required. 24 | 25 | sufnum 26 | The alphanumeric suffix portion of a house or building number. (e.g. "23B 27 | Baker St") 28 | 29 | fraction 30 | The fractional portion of a house or building number. (e.g. "23 1/2 Baker 31 | St") 32 | 33 | predir 34 | The prefixed street directional component. (e.g. "N", "SW") 35 | 36 | prequal 37 | The prefixed street qualifier component. (e.g. "Old", "Business") 38 | 39 | pretyp 40 | The prefixed street type component. (e.g. "US Hwy") 41 | 42 | street 43 | The main portion of the street name. Required. 44 | 45 | suftyp 46 | The suffixed street type component. (e.g. "Rd", "Ave") 47 | 48 | sufqual 49 | The suffixed street qualifier component. 50 | 51 | sufdir 52 | The suffixed street directional component. 53 | 54 | unittyp 55 | The unit type, if any. (e.g. "Fl", "Apt", "Ste") 56 | 57 | unit 58 | The unit identifer, if any. 59 | 60 | city 61 | The name of the city or locale. 62 | 63 | state 64 | The two letter postal state code. 65 | 66 | zip 67 | The zero padded, five digit ZIP postal code. 68 | 69 | plus4 70 | The zero padded, four digit ZIP+4 postal extension. 71 | 72 | Parsing Strategy 73 | ---------------- 74 | 75 | Each component will have a regular expression, and a maximum 76 | count. Components are ordered from first to last. 77 | 78 | Those components drawn from finite lists - directionals, qualifiers, 79 | types, and states - will have regular expressions composed of the union of 80 | the corresponding list. 81 | 82 | A *parse* will consist of a component state, a penalty count, a list of 83 | component strings and a counter for each component. 84 | 85 | 1. Initialize an input stack, consisting of a single blank parse. 86 | 87 | #. Split the address string on whitespace into tokens. 88 | 89 | #. For each token: 90 | 91 | A. For each component: 92 | 93 | i. Test the token against the regular expression. 94 | #. If the regexp matches, add the component name to a list of matching 95 | components. 96 | 97 | #. Initialize an empty output stack. 98 | 99 | #. For each parse in the input stack: 100 | 101 | i. Copy the current parse, increment the penalty count on the new parse, 102 | and add it to the output stack. 103 | #. For each matching component for the current token: 104 | 105 | a. If the component state for this parse is later than the 106 | matching component, continue to the next matching component. 107 | #. If the component count for this parse state is equal to the 108 | maximum count for the component, continue to the next matching 109 | component. 110 | #. Otherwise, copy the parse state, and append the token to the 111 | component string, with a leading space, if necessary. 112 | #. Increment the matching component counter for the current parse. 113 | #. Set the component state of the current parse to the matching 114 | component. 115 | #. Push the new parse on to the output stack. 116 | 117 | #. Replace the input stack with the output stack. 118 | 119 | #. Post-process number prefix/suffixes and ZIP+4 extensions. 120 | 121 | #. Score each parse by the number of components with non-empty strings, 122 | minus the penalty count of the parse. 123 | 124 | #. Return the sorted list of parsed string lists. 125 | 126 | -------------------------------------------------------------------------------- /doc/voidspace.css: -------------------------------------------------------------------------------- 1 | /* 2 | :Authors: Ian Bicking, Michael Foord 3 | :Contact: fuzzyman@voidspace.org.uk 4 | :Date: 2005/08/26 5 | :Version: 0.1.0 6 | :Copyright: This stylesheet has been placed in the public domain. 7 | :Modified By: Schuyler Erle, for geocoder.us, 2008-03-14 8 | 9 | Stylesheet for Docutils. 10 | Based on ``blue_box.css`` by Ian Bicking 11 | and ``html4css1.css`` revision 1.46. 12 | */ 13 | 14 | @import url(html4css1.css); 15 | 16 | /* changes made by SDE */ 17 | body { 18 | font-family: Arial, sans-serif; 19 | margin-left: 10%; 20 | margin-right: 10%; 21 | } 22 | 23 | p { text-align: justify; } 24 | dt { font-style: italic; } 25 | /* end changes */ 26 | 27 | em, i { 28 | /* Typically serif fonts have much nicer italics */ 29 | font-family: Times New Roman, Times, serif; 30 | } 31 | 32 | a.target { 33 | color: blue; 34 | } 35 | 36 | a.target { 37 | color: blue; 38 | } 39 | 40 | a.toc-backref { 41 | text-decoration: none; 42 | color: black; 43 | } 44 | 45 | a.toc-backref:hover { 46 | background-color: inherit; 47 | } 48 | 49 | a:hover { 50 | background-color: #cccccc; 51 | } 52 | 53 | div.attention, div.caution, div.danger, div.error, div.hint, 54 | div.important, div.note, div.tip, div.warning { 55 | background-color: #cccccc; 56 | padding: 3px; 57 | width: 80%; 58 | } 59 | 60 | div.admonition p.admonition-title, div.hint p.admonition-title, 61 | div.important p.admonition-title, div.note p.admonition-title, 62 | div.tip p.admonition-title { 63 | text-align: center; 64 | background-color: #999999; 65 | display: block; 66 | margin: 0; 67 | } 68 | 69 | div.attention p.admonition-title, div.caution p.admonition-title, 70 | div.danger p.admonition-title, div.error p.admonition-title, 71 | div.warning p.admonition-title { 72 | color: #cc0000; 73 | font-family: sans-serif; 74 | text-align: center; 75 | background-color: #999999; 76 | display: block; 77 | margin: 0; 78 | } 79 | 80 | h1, h2, h3, h4, h5, h6 { 81 | font-family: Helvetica, Arial, sans-serif; 82 | border: thin solid black; 83 | /* This makes the borders rounded on Mozilla, which pleases me */ 84 | -moz-border-radius: 8px; 85 | padding: 4px; 86 | } 87 | 88 | h1 { 89 | background-color: #444499; 90 | color: #ffffff; 91 | border: medium solid black; 92 | } 93 | 94 | h1 a.toc-backref, h2 a.toc-backref { 95 | color: #ffffff; 96 | } 97 | 98 | h2 { 99 | background-color: #666666; 100 | color: #ffffff; 101 | border: medium solid black; 102 | } 103 | 104 | h3, h4, h5, h6 { 105 | background-color: #cccccc; 106 | color: #000000; 107 | } 108 | 109 | h3 a.toc-backref, h4 a.toc-backref, h5 a.toc-backref, 110 | h6 a.toc-backref { 111 | color: #000000; 112 | } 113 | 114 | h1.title { 115 | text-align: center; 116 | background-color: #444499; 117 | color: #eeeeee; 118 | border: thick solid black; 119 | -moz-border-radius: 20px; 120 | } 121 | 122 | table.footnote { 123 | padding-left: 0.5ex; 124 | } 125 | 126 | table.citation { 127 | padding-left: 0.5ex 128 | } 129 | 130 | pre.literal-block, pre.doctest-block { 131 | border: thin black solid; 132 | padding: 5px; 133 | } 134 | 135 | .image img { border-style : solid; 136 | border-width : 2px; 137 | } 138 | 139 | h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt { 140 | font-size: 100%; 141 | } 142 | 143 | code, tt { 144 | color: #000066; 145 | } 146 | 147 | 148 | -------------------------------------------------------------------------------- /gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'Geocoder-US' 3 | s.version = "2.0.1pre" 4 | s.author = "Schuyler Erle" 5 | s.email = 'geocoder@entropyfree.com' 6 | s.description = "US address geocoding based on TIGER/Line." 7 | s.summary = "US address geocoding based on TIGER/Line." 8 | s.homepage = "http://geocoder.us/" 9 | s.files = ["lib/geocoder/us.rb"] + Dir["lib/geocoder/us/*"] + Dir["tests/*"] 10 | s.require_path = "lib" 11 | s.test_files = "tests/run.rb" 12 | s.has_rdoc = true 13 | s.extra_rdoc_files = ["README.rdoc"] 14 | end 15 | -------------------------------------------------------------------------------- /lib/geocoder/us.rb: -------------------------------------------------------------------------------- 1 | require "geocoder/us/database" 2 | require "geocoder/us/address" 3 | 4 | # Imports the Geocoder::US::Database and Geocoder::US::Address 5 | # modules. 6 | # 7 | # General usage is as follows: 8 | # 9 | # >> require 'geocoder/us' 10 | # >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db") 11 | # >> p db.geocode("1600 Pennsylvania Av, Washington DC") 12 | # 13 | # [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502", 14 | # :lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"", 15 | # :precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave", 16 | # :state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}] 17 | # 18 | # See Geocoder::US::Database and README.txt for more details. 19 | module Geocoder::US 20 | VERSION = "2.0.0" 21 | end 22 | -------------------------------------------------------------------------------- /lib/geocoder/us/metaphone.rb: -------------------------------------------------------------------------------- 1 | module Text # :nodoc: 2 | module Metaphone 3 | 4 | module Rules # :nodoc:all 5 | 6 | # Metaphone rules. These are simply applied in order. 7 | # 8 | STANDARD = [ 9 | # Regexp, replacement 10 | [ /([bcdfhjklmnpqrstvwxyz])\1+/, 11 | '\1' ], # Remove doubled consonants except g. 12 | # [PHP] remove c from regexp. 13 | [ /^ae/, 'E' ], 14 | [ /^[gkp]n/, 'N' ], 15 | [ /^wr/, 'R' ], 16 | [ /^x/, 'S' ], 17 | [ /^wh/, 'W' ], 18 | [ /mb$/, 'M' ], # [PHP] remove $ from regexp. 19 | [ /(?!^)sch/, 'SK' ], 20 | [ /th/, '0' ], 21 | [ /t?ch|sh/, 'X' ], 22 | [ /c(?=ia)/, 'X' ], 23 | [ /[st](?=i[ao])/, 'X' ], 24 | [ /s?c(?=[iey])/, 'S' ], 25 | [ /[cq]/, 'K' ], 26 | [ /dg(?=[iey])/, 'J' ], 27 | [ /d/, 'T' ], 28 | [ /g(?=h[^aeiou])/, '' ], 29 | [ /gn(ed)?/, 'N' ], 30 | [ /([^g]|^)g(?=[iey])/, 31 | '\1J' ], 32 | [ /g+/, 'K' ], 33 | [ /ph/, 'F' ], 34 | [ /([aeiou])h(?=\b|[^aeiou])/, 35 | '\1' ], 36 | [ /[wy](?![aeiou])/, '' ], 37 | [ /z/, 'S' ], 38 | [ /v/, 'F' ], 39 | [ /(?!^)[aeiou]+/, '' ], 40 | ] 41 | 42 | # The rules for the 'buggy' alternate implementation used by PHP etc. 43 | # 44 | BUGGY = STANDARD.dup 45 | BUGGY[0] = [ /([bdfhjklmnpqrstvwxyz])\1+/, '\1' ] 46 | BUGGY[6] = [ /mb/, 'M' ] 47 | end 48 | 49 | # Returns the Metaphone representation of a string. If the string contains 50 | # multiple words, each word in turn is converted into its Metaphone 51 | # representation. Note that only the letters A-Z are supported, so any 52 | # language-specific processing should be done beforehand. 53 | # 54 | # If the :buggy option is set, alternate 'buggy' rules are used. 55 | # 56 | def metaphone(str, options={}) 57 | return str.strip.split(/\s+/).map { |w| metaphone_word(w, options) }.join(' ') 58 | end 59 | 60 | private 61 | 62 | def metaphone_word(w, options={}) 63 | # Normalise case and remove non-ASCII 64 | s = w.downcase.gsub(/[^a-z]/, '') 65 | # Apply the Metaphone rules 66 | rules = options[:buggy] ? Rules::BUGGY : Rules::STANDARD 67 | rules.each { |rx, rep| s.gsub!(rx, rep) } 68 | return s.upcase 69 | end 70 | 71 | extend self 72 | 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/geocoder/us/numbers.rb: -------------------------------------------------------------------------------- 1 | module Geocoder 2 | end 3 | 4 | module Geocoder::US 5 | # The NumberMap class provides a means for mapping ordinal 6 | # and cardinal number words to digits and back. 7 | class NumberMap < Hash 8 | attr_accessor :regexp 9 | def self.[] (array) 10 | nmap = self.new({}) 11 | array.each {|item| nmap << item } 12 | nmap.build_match 13 | nmap 14 | end 15 | def initialize (array) 16 | @count = 0 17 | end 18 | def build_match 19 | @regexp = Regexp.new( 20 | '\b(' + keys.flatten.join("|") + ')\b', 21 | Regexp::IGNORECASE) 22 | end 23 | def clean (key) 24 | key.is_a?(String) ? key.downcase.gsub(/\W/o, "") : key 25 | end 26 | def <<(item) 27 | store clean(item), @count 28 | store @count, item 29 | @count += 1 30 | end 31 | def [] (key) 32 | super(clean(key)) 33 | end 34 | end 35 | 36 | # The Cardinals constant maps digits to cardinal number words and back. 37 | Cardinals = NumberMap[%w[ 38 | zero one two three four five six seven eight nine ten 39 | eleven twelve thirteen fourteen fifteen sixteen seventeen 40 | eighteen nineteen 41 | ]] 42 | Cardinal_Tens = %w[ twenty thirty forty fifty sixty seventy eighty ninety ] 43 | Cardinal_Tens.each {|tens| 44 | Cardinals << tens 45 | (1..9).each {|n| Cardinals << tens + "-" + Cardinals[n]} 46 | } 47 | 48 | # The Ordinals constant maps digits to ordinal number words and back. 49 | Ordinals = NumberMap[%w[ 50 | zeroth first second third fourth fifth sixth seventh eighth ninth 51 | tenth eleventh twelfth thirteenth fourteenth fifteenth sixteenth 52 | seventeenth eighteenth nineteenth 53 | ]] 54 | Cardinal_Tens.each {|tens| 55 | Ordinals << tens.gsub("y","ieth") 56 | (1..9).each {|n| Ordinals << tens + "-" + Ordinals[n]} 57 | } 58 | end 59 | -------------------------------------------------------------------------------- /lib/geocoder/us/rest.rb: -------------------------------------------------------------------------------- 1 | require 'sinatra' 2 | require 'geocoder/us/database' 3 | require 'json' 4 | 5 | @@db = Geocoder::US::Database.new(ENV["GEOCODER_DB"] || ARGV[0]) 6 | 7 | set :port, 8081 8 | get '/geocode' do 9 | if params[:q] 10 | results = @@db.geocode params[:q] 11 | features = [] 12 | results.each do |result| 13 | coords = [result.delete(:lon), result.delete(:lat)] 14 | result.keys.each do |key| 15 | if result[key].is_a? String 16 | result[key] = result[key].unpack("C*").pack("U*") # utf8 17 | end 18 | end 19 | features << { 20 | :type => "Feature", 21 | :properties => result, 22 | :geometry => { 23 | :type => "Point", 24 | :coordinates => coords 25 | } 26 | } 27 | end 28 | begin 29 | { 30 | :type => "FeatureCollection", 31 | :address => params[:q], 32 | :features => features 33 | }.to_json 34 | rescue JSON::GeneratorError 35 | { 36 | :type => "FeatureCollection", 37 | :error => "JSON::GeneratorError", 38 | :features => [] 39 | }.to_json 40 | end 41 | else 42 | status 400 43 | "parameter 'q' is missing" 44 | end 45 | end 46 | 47 | get '/health' do 48 | "All is well." 49 | end 50 | -------------------------------------------------------------------------------- /navteq/README: -------------------------------------------------------------------------------- 1 | The navteq_import script in this directory is designed to be used with Navteq's 2 | local_streets layer. It works basically like tiger_import, except that you 3 | provide either a list of .zip files containing the local_streets.* files on the 4 | command line, or via standard input. 5 | -------------------------------------------------------------------------------- /navteq/convert.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | CREATE INDEX navteq_link_id on local_streets (link_id); 3 | 4 | CREATE TEMPORARY TABLE linezip AS 5 | SELECT DISTINCT tlid, zip FROM ( 6 | SELECT link_id AS tlid, r_postcode AS zip FROM local_streets 7 | WHERE addr_type IS NOT NULL AND st_name IS NOT NULL 8 | AND r_postcode IS NOT NULL 9 | UNION 10 | SELECT link_id AS tlid, l_postcode AS zip FROM local_streets 11 | WHERE addr_type IS NOT NULL AND st_name IS NOT NULL 12 | AND l_postcode IS NOT NULL 13 | ) AS whatever; 14 | 15 | INSERT INTO feature 16 | SELECT l.tlid, st_nm_base, metaphone(st_nm_base,5), st_nm_pref, st_typ_bef, 17 | NULL, st_nm_suff, st_typ_aft, NULL, 'P', zip 18 | FROM linezip l, local_streets f 19 | WHERE l.tlid=f.link_id AND st_name IS NOT NULL; 20 | 21 | INSERT OR IGNORE INTO edge 22 | SELECT l.tlid, compress_wkb_line(the_geom) FROM 23 | (SELECT DISTINCT tlid FROM linezip) AS l, local_streets f 24 | WHERE l.tlid=f.link_id AND st_name IS NOT NULL; 25 | 26 | INSERT INTO range 27 | SELECT link_id, digit_suffix(l_refaddr), digit_suffix(l_nrefaddr), 28 | nondigit_prefix(l_refaddr), l_postcode, 'L' 29 | FROM linezip l, local_streets f 30 | WHERE l.tlid=f.link_id AND l_refaddr IS NOT NULL 31 | UNION 32 | SELECT link_id, digit_suffix(r_refaddr), digit_suffix(r_nrefaddr), 33 | nondigit_prefix(r_refaddr), r_postcode, 'R' 34 | FROM linezip l, local_streets f 35 | WHERE l.tlid=f.link_id AND r_refaddr IS NOT NULL; 36 | 37 | END; 38 | -------------------------------------------------------------------------------- /navteq/navteq_import: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TMP="/tmp/navteq-import.$$" 4 | SHPS="local_streets" 5 | DBFS="" 6 | BASE=$(dirname $0) 7 | PATH=$PATH:$BASE/../bin 8 | SQL="$BASE/../sql" 9 | HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so" 10 | DATABASE=$1 11 | shift 12 | 13 | mkdir -p $TMP || exit 1 14 | 15 | [ ! -r $DATABASE ] && cat ${SQL}/create.sql ${SQL}/place.sql | sqlite3 $DATABASE 16 | 17 | if [ x"$1" = x"" ]; then 18 | cat 19 | else 20 | ls $@ 21 | fi | while read county; do 22 | echo "--- $county" 23 | if [ -r ${county%.zip}.zip ]; then 24 | unzip -q $(ls ${county}.zip) -d $TMP 25 | else 26 | cp ${county%.*}.* $TMP 27 | fi 28 | (echo ".load $HELPER_LIB" && \ 29 | cat ${BASE}/prepare.sql && \ 30 | for file in $SHPS; do 31 | shp2sqlite -aS $(ls ${TMP}/${file}.shp) ${file} 32 | done && \ 33 | for file in $DBFS; do 34 | shp2sqlite -an $(ls ${TMP}/${file}.dbf) ${file} 35 | done && \ 36 | cat ${BASE}/convert.sql) | sqlite3 $DATABASE 37 | rm -f $TMP/* 38 | done 2>&1 | tee import-$$.log 39 | rm -rf $TMP 40 | -------------------------------------------------------------------------------- /navteq/prepare.sql: -------------------------------------------------------------------------------- 1 | PRAGMA temp_store=MEMORY; 2 | PRAGMA journal_mode=MEMORY; 3 | PRAGMA synchronous=OFF; 4 | PRAGMA cache_size=250000; 5 | PRAGMA count_changes=0; 6 | BEGIN; 7 | CREATE TABLE "local_streets" (gid integer PRIMARY KEY, 8 | "the_geom" blob, 9 | "link_id" integer, 10 | "st_name" varchar(80), 11 | "feat_id" integer, 12 | "st_langcd" varchar(3), 13 | "num_stnmes" integer, 14 | "st_nm_pref" varchar(2), 15 | "st_typ_bef" varchar(30), 16 | "st_nm_base" varchar(35), 17 | "st_nm_suff" varchar(2), 18 | "st_typ_aft" varchar(30), 19 | "st_typ_att" varchar(1), 20 | "addr_type" varchar(1), 21 | "l_refaddr" varchar(10), 22 | "l_nrefaddr" varchar(10), 23 | "l_addrsch" varchar(1), 24 | "l_addrform" varchar(1), 25 | "r_refaddr" varchar(10), 26 | "r_nrefaddr" varchar(10), 27 | "r_addrsch" varchar(1), 28 | "r_addrform" varchar(1), 29 | "ref_in_id" integer, 30 | "nref_in_id" integer, 31 | "n_shapepnt" integer, 32 | "func_class" varchar(1), 33 | "speed_cat" varchar(1), 34 | "fr_spd_lim" integer, 35 | "to_spd_lim" integer, 36 | "to_lanes" integer, 37 | "from_lanes" integer, 38 | "enh_geom" varchar(1), 39 | "lane_cat" varchar(1), 40 | "divider" varchar(1), 41 | "dir_travel" varchar(1), 42 | "l_area_id" integer, 43 | "r_area_id" integer, 44 | "l_postcode" varchar(11), 45 | "r_postcode" varchar(11), 46 | "l_numzones" integer, 47 | "r_numzones" integer, 48 | "num_ad_rng" integer, 49 | "ar_auto" varchar(1), 50 | "ar_bus" varchar(1), 51 | "ar_taxis" varchar(1), 52 | "ar_carpool" varchar(1), 53 | "ar_pedest" varchar(1), 54 | "ar_trucks" varchar(1), 55 | "ar_traff" varchar(1), 56 | "ar_deliv" varchar(1), 57 | "ar_emerveh" varchar(1), 58 | "paved" varchar(1), 59 | "private" varchar(1), 60 | "frontage" varchar(1), 61 | "bridge" varchar(1), 62 | "tunnel" varchar(1), 63 | "ramp" varchar(1), 64 | "tollway" varchar(1), 65 | "poiaccess" varchar(1), 66 | "contracc" varchar(1), 67 | "roundabout" varchar(1), 68 | "interinter" varchar(1), 69 | "undeftraff" varchar(1), 70 | "ferry_type" varchar(1), 71 | "multidigit" varchar(1), 72 | "maxattr" varchar(1), 73 | "spectrfig" varchar(1), 74 | "indescrib" varchar(1), 75 | "manoeuvre" varchar(1), 76 | "dividerleg" varchar(1), 77 | "inprocdata" varchar(1), 78 | "full_geom" varchar(1), 79 | "urban" varchar(1), 80 | "route_type" varchar(1), 81 | "dironsign" varchar(1), 82 | "explicatbl" varchar(1), 83 | "nameonrdsn" varchar(1), 84 | "postalname" varchar(1), 85 | "stalename" varchar(1), 86 | "vanityname" varchar(1), 87 | "junctionnm" varchar(1), 88 | "exitname" varchar(1), 89 | "scenic_rt" varchar(1), 90 | "scenic_nm" varchar(1)); 91 | --SELECT AddGeometryColumn('','local_streets','the_geom','-1','MULTILINESTRING',2); 92 | END; 93 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | $(MAKE) -C libsqlite3_geocoder 3 | $(MAKE) -C liblwgeom 4 | $(MAKE) -C shp2sqlite 5 | 6 | clean: 7 | $(MAKE) -C libsqlite3_geocoder clean 8 | $(MAKE) -C liblwgeom clean 9 | $(MAKE) -C shp2sqlite clean 10 | 11 | install: all 12 | cp libsqlite3_geocoder/*.so ../lib/geocoder/us/sqlite3.so 13 | $(MAKE) -C shp2sqlite install 14 | -------------------------------------------------------------------------------- /src/README: -------------------------------------------------------------------------------- 1 | What's in this directory 2 | ------------------------ 3 | 4 | shp2sqlite/ 5 | A fork of shp2pgsql that generates SQLite 3 compatible 6 | output. Used for import. 7 | liblwgeom/ 8 | Required by shp2sqlite for converting Shapefiles to WKB. 9 | libsqlite3_geocoder/ 10 | Not actually the geocoder itself, but a library of 11 | extensions to SQLite 3 to facilitate geocoding. 12 | metaphone/ 13 | Unused in this project. The metaphone functions have 14 | been rolled into libsqlite3_geocoder. 15 | -------------------------------------------------------------------------------- /src/liblwgeom/Makefile: -------------------------------------------------------------------------------- 1 | # ********************************************************************** 2 | # * $Id: Makefile.in 3 | # * 4 | # * PostGIS - Spatial Types for PostgreSQL 5 | # * http://postgis.refractions.net 6 | # * Copyright 2008 Mark Cave-Ayland 7 | # * 8 | # * This is free software; you can redistribute and/or modify it under 9 | # * the terms of the GNU General Public Licence. See the COPYING file. 10 | # * 11 | # ********************************************************************** 12 | 13 | CC=gcc 14 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes 15 | 16 | YACC=yacc 17 | LEX=flex 18 | 19 | # Standalone LWGEOM objects 20 | SA_OBJS = \ 21 | measures.o \ 22 | box2d.o \ 23 | ptarray.o \ 24 | lwgeom_api.o \ 25 | lwgeom.o \ 26 | lwpoint.o \ 27 | lwline.o \ 28 | lwpoly.o \ 29 | lwmpoint.o \ 30 | lwmline.o \ 31 | lwmpoly.o \ 32 | lwcollection.o \ 33 | lwcircstring.o \ 34 | lwcompound.o \ 35 | lwcurvepoly.o \ 36 | lwmcurve.o \ 37 | lwmsurface.o \ 38 | lwutil.o \ 39 | lwalgorithm.o \ 40 | lwgunparse.o \ 41 | lwgparse.o \ 42 | lwsegmentize.o \ 43 | wktparse.tab.o \ 44 | lex.yy.o \ 45 | vsprintf.o 46 | 47 | SA_HEADERS = \ 48 | liblwgeom.h \ 49 | lwalgorithm.h 50 | 51 | all: liblwgeom.a 52 | 53 | liblwgeom.a: $(SA_OBJS) $(SA_HEADERS) 54 | ar rs liblwgeom.a $(SA_OBJS) 55 | 56 | clean: 57 | rm -f $(SA_OBJS) 58 | rm -f liblwgeom.a 59 | 60 | check: liblwgeom.a 61 | make -C cunit check 62 | 63 | # Command to build each of the .o files 64 | $(SA_OBJS): %.o: %.c 65 | $(CC) $(CFLAGS) -c -o $@ $< 66 | 67 | # Commands to generate the lexer and parser from input files 68 | wktparse.tab.c: wktparse.y 69 | $(YACC) -vd -p lwg_parse_yy wktparse.y 70 | mv -f y.tab.c wktparse.tab.c 71 | mv -f y.tab.h wktparse.tab.h 72 | 73 | lex.yy.c: wktparse.lex wktparse.tab.c 74 | $(LEX) -Plwg_parse_yy -i -f -o'lex.yy.c' wktparse.lex 75 | 76 | -------------------------------------------------------------------------------- /src/liblwgeom/box2d.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "liblwgeom.h" 7 | 8 | #ifndef EPSILON 9 | #define EPSILON 1.0E-06 10 | #endif 11 | #ifndef FPeq 12 | #define FPeq(A,B) (fabs((A) - (B)) <= EPSILON) 13 | #endif 14 | 15 | 16 | /* Expand given box of 'd' units in all directions */ 17 | void 18 | expand_box2d(BOX2DFLOAT4 *box, double d) 19 | { 20 | box->xmin -= d; 21 | box->ymin -= d; 22 | 23 | box->xmax += d; 24 | box->ymax += d; 25 | } 26 | 27 | 28 | /* 29 | * This has been changed in PostGIS 1.1.2 to 30 | * check exact equality of values (rather then using 31 | * the FPeq macro taking into account coordinate drifts). 32 | */ 33 | char 34 | box2d_same(BOX2DFLOAT4 *box1, BOX2DFLOAT4 *box2) 35 | { 36 | return( (box1->xmax==box2->xmax) && 37 | (box1->xmin==box2->xmin) && 38 | (box1->ymax==box2->ymax) && 39 | (box1->ymin==box2->ymin)); 40 | #if 0 41 | return(FPeq(box1->xmax, box2->xmax) && 42 | FPeq(box1->xmin, box2->xmin) && 43 | FPeq(box1->ymax, box2->ymax) && 44 | FPeq(box1->ymin, box2->ymin)); 45 | #endif 46 | } 47 | 48 | BOX2DFLOAT4 * 49 | box2d_clone(const BOX2DFLOAT4 *in) 50 | { 51 | BOX2DFLOAT4 *ret = lwalloc(sizeof(BOX2DFLOAT4)); 52 | memcpy(ret, in, sizeof(BOX2DFLOAT4)); 53 | return ret; 54 | } 55 | -------------------------------------------------------------------------------- /src/liblwgeom/lwalgorithm.h: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwalgorithm.h 3688 2009-02-11 21:48:13Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2008 Paul Ramsey 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include "liblwgeom.h" 15 | 16 | enum CG_SEGMENT_INTERSECTION_TYPE { 17 | SEG_ERROR = -1, 18 | SEG_NO_INTERSECTION = 0, 19 | SEG_COLINEAR = 1, 20 | SEG_CROSS_LEFT = 2, 21 | SEG_CROSS_RIGHT = 3, 22 | SEG_TOUCH_LEFT = 4, 23 | SEG_TOUCH_RIGHT = 5 24 | }; 25 | 26 | double lw_segment_side(POINT2D *p1, POINT2D *p2, POINT2D *q); 27 | int lw_segment_intersects(POINT2D *p1, POINT2D *p2, POINT2D *q1, POINT2D *q2); 28 | int lw_segment_envelope_intersects(POINT2D p1, POINT2D p2, POINT2D q1, POINT2D q2); 29 | 30 | 31 | enum CG_LINE_CROSS_TYPE { 32 | LINE_NO_CROSS = 0, 33 | LINE_CROSS_LEFT = -1, 34 | LINE_CROSS_RIGHT = 1, 35 | LINE_MULTICROSS_END_LEFT = -2, 36 | LINE_MULTICROSS_END_RIGHT = 2, 37 | LINE_MULTICROSS_END_SAME_FIRST_LEFT = -3, 38 | LINE_MULTICROSS_END_SAME_FIRST_RIGHT = 3 39 | }; 40 | 41 | int lwline_crossing_direction(LWLINE *l1, LWLINE *l2); 42 | 43 | double lwpoint_get_ordinate(const POINT4D *p, int ordinate); 44 | void lwpoint_set_ordinate(POINT4D *p, int ordinate, double value); 45 | int lwpoint_interpolate(const POINT4D *p1, const POINT4D *p2, POINT4D *p, int ndims, int ordinate, double interpolation_value); 46 | LWCOLLECTION *lwline_clip_to_ordinate_range(LWLINE *line, int ordinate, double from, double to); 47 | LWCOLLECTION *lwmline_clip_to_ordinate_range(LWMLINE *mline, int ordinate, double from, double to); 48 | 49 | int lwgeom_geohash_precision(BOX3D bbox, BOX3D *bounds); 50 | char *lwgeom_geohash(const LWGEOM *lwgeom, int precision); 51 | char *geohash_point(double longitude, double latitude, int precision); 52 | 53 | -------------------------------------------------------------------------------- /src/liblwgeom/lwcompound.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwcompound.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "liblwgeom.h" 17 | 18 | LWCOMPOUND * 19 | lwcompound_deserialize(uchar *serialized) 20 | { 21 | LWCOMPOUND *result; 22 | LWGEOM_INSPECTED *insp; 23 | int type = lwgeom_getType(serialized[0]); 24 | int i; 25 | 26 | if(type != COMPOUNDTYPE) 27 | { 28 | lwerror("lwcompound_deserialize called on non compound: %d", type); 29 | return NULL; 30 | } 31 | 32 | insp = lwgeom_inspect(serialized); 33 | 34 | result = lwalloc(sizeof(LWCOMPOUND)); 35 | result->type = insp->type; 36 | result->SRID = insp->SRID; 37 | result->ngeoms = insp->ngeometries; 38 | result->geoms = lwalloc(sizeof(LWGEOM *)*insp->ngeometries); 39 | 40 | if(lwgeom_hasBBOX(serialized[0])) 41 | { 42 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 43 | memcpy(result->bbox, serialized + 1, sizeof(BOX2DFLOAT4)); 44 | } 45 | else result->bbox = NULL; 46 | 47 | for(i = 0; i < insp->ngeometries; i++) 48 | { 49 | if(lwgeom_getType(insp->sub_geoms[i][0]) == LINETYPE) 50 | result->geoms[i] = (LWGEOM *)lwline_deserialize(insp->sub_geoms[i]); 51 | else 52 | result->geoms[i] = (LWGEOM *)lwcircstring_deserialize(insp->sub_geoms[i]); 53 | if(TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type)) 54 | { 55 | lwerror("Mixed dimensions (compound: %d, line/circularstring %d:%d)", 56 | TYPE_NDIMS(result->type), i, 57 | TYPE_NDIMS(result->geoms[i]->type) 58 | ); 59 | lwfree(result); 60 | return NULL; 61 | } 62 | } 63 | return result; 64 | } 65 | 66 | /* 67 | * Add 'what' to this string at position 'where' 68 | * where=0 == prepend 69 | * where=-1 == append 70 | * Returns a COMPOUND or a GEOMETRYCOLLECTION 71 | */ 72 | LWGEOM * 73 | lwcompound_add(const LWCOMPOUND *to, uint32 where, const LWGEOM *what) 74 | { 75 | LWCOLLECTION *col; 76 | LWGEOM **geoms; 77 | int newtype; 78 | 79 | LWDEBUG(2, "lwcompound_add called."); 80 | 81 | if(where != -1 && where != 0) 82 | { 83 | lwerror("lwcompound_add only supports 0 or -1 as a second argument, not %d", where); 84 | return NULL; 85 | } 86 | 87 | /* dimensions compatibility are checked by caller */ 88 | 89 | /* Construct geoms array */ 90 | geoms = lwalloc(sizeof(LWGEOM *)*2); 91 | if(where == -1) /* append */ 92 | { 93 | geoms[0] = lwgeom_clone((LWGEOM *)to); 94 | geoms[1] = lwgeom_clone(what); 95 | } 96 | else /* prepend */ 97 | { 98 | geoms[0] = lwgeom_clone(what); 99 | geoms[1] = lwgeom_clone((LWGEOM *)to); 100 | } 101 | 102 | /* reset SRID and wantbbox flag from component types */ 103 | geoms[0]->SRID = geoms[1]->SRID = -1; 104 | TYPE_SETHASSRID(geoms[0]->type, 0); 105 | TYPE_SETHASSRID(geoms[1]->type, 0); 106 | TYPE_SETHASBBOX(geoms[0]->type, 0); 107 | TYPE_SETHASBBOX(geoms[1]->type, 0); 108 | 109 | /* Find appropriate geom type */ 110 | if(TYPE_GETTYPE(what->type) == LINETYPE || TYPE_GETTYPE(what->type) == CIRCSTRINGTYPE) newtype = COMPOUNDTYPE; 111 | else newtype = COLLECTIONTYPE; 112 | 113 | col = lwcollection_construct(newtype, 114 | to->SRID, NULL, 2, geoms); 115 | 116 | return (LWGEOM *)col; 117 | } 118 | 119 | -------------------------------------------------------------------------------- /src/liblwgeom/lwcurvepoly.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwcurvepoly.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | /* basic LWCURVEPOLY manipulation */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include "liblwgeom.h" 19 | 20 | 21 | LWCURVEPOLY * 22 | lwcurvepoly_deserialize(uchar *srl) 23 | { 24 | LWCURVEPOLY *result; 25 | LWGEOM_INSPECTED *insp; 26 | int type = lwgeom_getType(srl[0]); 27 | int i; 28 | 29 | LWDEBUG(3, "lwcurvepoly_deserialize called."); 30 | 31 | if(type != CURVEPOLYTYPE) 32 | { 33 | lwerror("lwcurvepoly_deserialize called on NON curvepoly: %d", 34 | type); 35 | return NULL; 36 | } 37 | 38 | insp = lwgeom_inspect(srl); 39 | 40 | result = lwalloc(sizeof(LWCURVEPOLY)); 41 | result->type = insp->type; 42 | result->SRID = insp->SRID; 43 | result->nrings = insp->ngeometries; 44 | result->rings = lwalloc(sizeof(LWGEOM *)*insp->ngeometries); 45 | 46 | if(lwgeom_hasBBOX(srl[0])) 47 | { 48 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 49 | memcpy(result->bbox, srl + 1, sizeof(BOX2DFLOAT4)); 50 | } 51 | else result->bbox = NULL; 52 | 53 | for(i = 0; i < insp->ngeometries; i++) 54 | { 55 | result->rings[i] = lwgeom_deserialize(insp->sub_geoms[i]); 56 | if(lwgeom_getType(result->rings[i]->type) != CIRCSTRINGTYPE 57 | && lwgeom_getType(result->rings[i]->type) != LINETYPE) 58 | { 59 | lwerror("Only Circularstrings and Linestrings are currently supported as rings, not %s (%d)", lwgeom_typename(result->rings[i]->type), result->rings[i]->type); 60 | lwfree(result); 61 | lwfree(insp); 62 | return NULL; 63 | } 64 | if(TYPE_NDIMS(result->rings[i]->type) != TYPE_NDIMS(result->type)) 65 | { 66 | lwerror("Mixed dimensions (curvepoly %d, ring %d)", 67 | TYPE_NDIMS(result->type), i, 68 | TYPE_NDIMS(result->rings[i]->type)); 69 | lwfree(result); 70 | lwfree(insp); 71 | return NULL; 72 | } 73 | } 74 | return result; 75 | } 76 | 77 | LWGEOM * 78 | lwcurvepoly_add(const LWCURVEPOLY *to, uint32 where, const LWGEOM *what) 79 | { 80 | /* TODO */ 81 | lwerror("lwcurvepoly_add not yet implemented."); 82 | return NULL; 83 | } 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /src/liblwgeom/lwgunparse.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simplegeo/geocoder/5c7f678a1abe79c77c36153fbcee32e4dec24e53/src/liblwgeom/lwgunparse.c -------------------------------------------------------------------------------- /src/liblwgeom/lwmcurve.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwmcurve.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "liblwgeom.h" 17 | 18 | LWMCURVE * 19 | lwmcurve_deserialize(uchar *srl) 20 | { 21 | LWMCURVE *result; 22 | LWGEOM_INSPECTED *insp; 23 | int stype; 24 | int type = lwgeom_getType(srl[0]); 25 | int i; 26 | 27 | if(type != MULTICURVETYPE) 28 | { 29 | lwerror("lwmcurve_deserialize called on NON multicurve: %d", type); 30 | return NULL; 31 | } 32 | 33 | insp = lwgeom_inspect(srl); 34 | 35 | result = lwalloc(sizeof(LWMCURVE)); 36 | result->type = insp->type; 37 | result->SRID = insp->SRID; 38 | result->ngeoms = insp->ngeometries; 39 | result->geoms = lwalloc(sizeof(LWGEOM *)*insp->ngeometries); 40 | 41 | if(lwgeom_hasBBOX(srl[0])) 42 | { 43 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 44 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4)); 45 | } 46 | else result->bbox = NULL; 47 | 48 | for(i = 0; i < insp->ngeometries; i++) 49 | { 50 | stype = lwgeom_getType(insp->sub_geoms[i][0]); 51 | if(stype == CIRCSTRINGTYPE) 52 | { 53 | result->geoms[i] = (LWGEOM *)lwcircstring_deserialize(insp->sub_geoms[i]); 54 | } 55 | else if(stype == LINETYPE) 56 | { 57 | result->geoms[i] = (LWGEOM *)lwline_deserialize(insp->sub_geoms[i]); 58 | } 59 | else 60 | { 61 | lwerror("Only Circular and Line strings are currenly permitted in a MultiCurve."); 62 | lwfree(result); 63 | lwfree(insp); 64 | return NULL; 65 | } 66 | 67 | if(TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type)) 68 | { 69 | lwerror("Mixed dimensions (multicurve: %d, curve %d:%d)", 70 | TYPE_NDIMS(result->type), i, 71 | TYPE_NDIMS(result->geoms[i]->type)); 72 | lwfree(result); 73 | lwfree(insp); 74 | return NULL; 75 | } 76 | } 77 | return result; 78 | } 79 | 80 | /* 81 | * Add 'what' to this multicurve at position 'where'. 82 | * where=0 == prepend 83 | * where=-1 == append 84 | * Returns a MULTICURVE or a COLLECTION 85 | */ 86 | LWGEOM * 87 | lwmcurve_add(const LWMCURVE *to, uint32 where, const LWGEOM *what) 88 | { 89 | LWCOLLECTION *col; 90 | LWGEOM **geoms; 91 | int newtype; 92 | uint32 i; 93 | 94 | if(where == -1) where = to->ngeoms; 95 | else if(where < -1 || where > to->ngeoms) 96 | { 97 | lwerror("lwmcurve_add: add position out of range %d..%d", 98 | -1, to->ngeoms); 99 | return NULL; 100 | } 101 | 102 | /* dimensions compatibility are checked by caller */ 103 | 104 | /* Construct geoms array */ 105 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1)); 106 | for(i = 0; i < where; i++) 107 | { 108 | geoms[i] = lwgeom_clone((LWGEOM *)to->geoms[i]); 109 | } 110 | geoms[where] = lwgeom_clone(what); 111 | for(i = where; i < to->ngeoms; i++) 112 | { 113 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]); 114 | } 115 | 116 | if(TYPE_GETTYPE(what->type) == CIRCSTRINGTYPE) newtype = MULTICURVETYPE; 117 | else newtype = COLLECTIONTYPE; 118 | 119 | col = lwcollection_construct(newtype, 120 | to->SRID, NULL, 121 | to->ngeoms + 1, geoms); 122 | 123 | return (LWGEOM *)col; 124 | } 125 | 126 | -------------------------------------------------------------------------------- /src/liblwgeom/lwmline.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwmline.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "liblwgeom.h" 17 | 18 | void 19 | lwmline_release(LWMLINE *lwmline) 20 | { 21 | lwgeom_release(lwmline_as_lwgeom(lwmline)); 22 | } 23 | 24 | LWMLINE * 25 | lwmline_deserialize(uchar *srl) 26 | { 27 | LWMLINE *result; 28 | LWGEOM_INSPECTED *insp; 29 | int type = lwgeom_getType(srl[0]); 30 | int i; 31 | 32 | if ( type != MULTILINETYPE ) 33 | { 34 | lwerror("lwmline_deserialize called on NON multiline: %d", 35 | type); 36 | return NULL; 37 | } 38 | 39 | insp = lwgeom_inspect(srl); 40 | 41 | result = lwalloc(sizeof(LWMLINE)); 42 | result->type = insp->type; 43 | result->SRID = insp->SRID; 44 | result->ngeoms = insp->ngeometries; 45 | result->geoms = lwalloc(sizeof(LWLINE *)*insp->ngeometries); 46 | 47 | if (lwgeom_hasBBOX(srl[0])) 48 | { 49 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 50 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4)); 51 | } 52 | else result->bbox = NULL; 53 | 54 | 55 | for (i=0; ingeometries; i++) 56 | { 57 | result->geoms[i] = lwline_deserialize(insp->sub_geoms[i]); 58 | if ( TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type) ) 59 | { 60 | lwerror("Mixed dimensions (multiline:%d, line%d:%d)", 61 | TYPE_NDIMS(result->type), i, 62 | TYPE_NDIMS(result->geoms[i]->type) 63 | ); 64 | return NULL; 65 | } 66 | } 67 | 68 | return result; 69 | } 70 | 71 | /* 72 | * Add 'what' to this multiline at position 'where'. 73 | * where=0 == prepend 74 | * where=-1 == append 75 | * Returns a MULTILINE or a COLLECTION 76 | */ 77 | LWGEOM * 78 | lwmline_add(const LWMLINE *to, uint32 where, const LWGEOM *what) 79 | { 80 | LWCOLLECTION *col; 81 | LWGEOM **geoms; 82 | int newtype; 83 | uint32 i; 84 | 85 | if ( where == -1 ) where = to->ngeoms; 86 | else if ( where < -1 || where > to->ngeoms ) 87 | { 88 | lwerror("lwmline_add: add position out of range %d..%d", 89 | -1, to->ngeoms); 90 | return NULL; 91 | } 92 | 93 | /* dimensions compatibility are checked by caller */ 94 | 95 | /* Construct geoms array */ 96 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1)); 97 | for (i=0; igeoms[i]); 100 | } 101 | geoms[where] = lwgeom_clone(what); 102 | for (i=where; ingeoms; i++) 103 | { 104 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]); 105 | } 106 | 107 | if ( TYPE_GETTYPE(what->type) == LINETYPE ) newtype = MULTILINETYPE; 108 | else newtype = COLLECTIONTYPE; 109 | 110 | col = lwcollection_construct(newtype, 111 | to->SRID, NULL, 112 | to->ngeoms+1, geoms); 113 | 114 | return (LWGEOM *)col; 115 | 116 | } 117 | 118 | void lwmline_free(LWMLINE *mline) 119 | { 120 | int i; 121 | if( mline->bbox ) 122 | { 123 | lwfree(mline->bbox); 124 | } 125 | for ( i = 0; i < mline->ngeoms; i++ ) 126 | { 127 | if( mline->geoms[i] ) { 128 | lwline_free(mline->geoms[i]); 129 | } 130 | } 131 | if( mline->geoms ) 132 | { 133 | lwfree(mline->geoms); 134 | } 135 | lwfree(mline); 136 | 137 | }; 138 | -------------------------------------------------------------------------------- /src/liblwgeom/lwmpoint.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwmpoint.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "liblwgeom.h" 17 | 18 | void 19 | lwmpoint_release(LWMPOINT *lwmpoint) 20 | { 21 | lwgeom_release(lwmpoint_as_lwgeom(lwmpoint)); 22 | } 23 | 24 | 25 | LWMPOINT * 26 | lwmpoint_deserialize(uchar *srl) 27 | { 28 | LWMPOINT *result; 29 | LWGEOM_INSPECTED *insp; 30 | int type = lwgeom_getType(srl[0]); 31 | int i; 32 | 33 | if ( type != MULTIPOINTTYPE ) 34 | { 35 | lwerror("lwmpoint_deserialize called on NON multipoint: %d", 36 | type); 37 | return NULL; 38 | } 39 | 40 | insp = lwgeom_inspect(srl); 41 | 42 | result = lwalloc(sizeof(LWMPOINT)); 43 | result->type = insp->type; 44 | result->SRID = insp->SRID; 45 | result->ngeoms = insp->ngeometries; 46 | result->geoms = lwalloc(sizeof(LWPOINT *)*result->ngeoms); 47 | 48 | if (lwgeom_hasBBOX(srl[0])) 49 | { 50 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 51 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4)); 52 | } 53 | else result->bbox = NULL; 54 | 55 | for (i=0; ingeometries; i++) 56 | { 57 | result->geoms[i] = lwpoint_deserialize(insp->sub_geoms[i]); 58 | if ( TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type) ) 59 | { 60 | lwerror("Mixed dimensions (multipoint:%d, point%d:%d)", 61 | TYPE_NDIMS(result->type), i, 62 | TYPE_NDIMS(result->geoms[i]->type) 63 | ); 64 | return NULL; 65 | } 66 | } 67 | 68 | return result; 69 | } 70 | 71 | /* 72 | * Add 'what' to this multipoint at position 'where'. 73 | * where=0 == prepend 74 | * where=-1 == append 75 | * Returns a MULTIPOINT or a COLLECTION 76 | */ 77 | LWGEOM * 78 | lwmpoint_add(const LWMPOINT *to, uint32 where, const LWGEOM *what) 79 | { 80 | LWCOLLECTION *col; 81 | LWGEOM **geoms; 82 | int newtype; 83 | uint32 i; 84 | 85 | if ( where == -1 ) where = to->ngeoms; 86 | else if ( where < -1 || where > to->ngeoms ) 87 | { 88 | lwerror("lwmpoint_add: add position out of range %d..%d", 89 | -1, to->ngeoms); 90 | return NULL; 91 | } 92 | 93 | /* dimensions compatibility are checked by caller */ 94 | 95 | /* Construct geoms array */ 96 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1)); 97 | for (i=0; igeoms[i]); 100 | } 101 | geoms[where] = lwgeom_clone(what); 102 | for (i=where; ingeoms; i++) 103 | { 104 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]); 105 | } 106 | 107 | if ( TYPE_GETTYPE(what->type) == POINTTYPE ) newtype = MULTIPOINTTYPE; 108 | else newtype = COLLECTIONTYPE; 109 | 110 | col = lwcollection_construct(newtype, 111 | to->SRID, NULL, 112 | to->ngeoms+1, geoms); 113 | 114 | return (LWGEOM *)col; 115 | 116 | } 117 | 118 | void lwmpoint_free(LWMPOINT *mpt) 119 | { 120 | int i; 121 | if( mpt->bbox ) 122 | { 123 | lwfree(mpt->bbox); 124 | } 125 | for ( i = 0; i < mpt->ngeoms; i++ ) 126 | { 127 | if( mpt->geoms[i] ) { 128 | lwpoint_free(mpt->geoms[i]); 129 | } 130 | } 131 | if( mpt->geoms ) 132 | { 133 | lwfree(mpt->geoms); 134 | } 135 | lwfree(mpt); 136 | 137 | }; 138 | 139 | -------------------------------------------------------------------------------- /src/liblwgeom/lwmpoly.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwmpoly.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "liblwgeom.h" 17 | 18 | 19 | void 20 | lwmpoly_release(LWMPOLY *lwmpoly) 21 | { 22 | lwgeom_release(lwmpoly_as_lwgeom(lwmpoly)); 23 | } 24 | 25 | 26 | LWMPOLY * 27 | lwmpoly_deserialize(uchar *srl) 28 | { 29 | LWMPOLY *result; 30 | LWGEOM_INSPECTED *insp; 31 | int type = lwgeom_getType(srl[0]); 32 | int i; 33 | 34 | LWDEBUG(2, "lwmpoly_deserialize called"); 35 | 36 | if ( type != MULTIPOLYGONTYPE ) 37 | { 38 | lwerror("lwmpoly_deserialize called on NON multipoly: %d", 39 | type); 40 | return NULL; 41 | } 42 | 43 | insp = lwgeom_inspect(srl); 44 | 45 | result = lwalloc(sizeof(LWMPOLY)); 46 | result->type = insp->type; 47 | result->SRID = insp->SRID; 48 | result->ngeoms = insp->ngeometries; 49 | result->geoms = lwalloc(sizeof(LWPOLY *)*insp->ngeometries); 50 | 51 | if (lwgeom_hasBBOX(srl[0])) 52 | { 53 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 54 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4)); 55 | } 56 | else result->bbox = NULL; 57 | 58 | for (i=0; ingeometries; i++) 59 | { 60 | result->geoms[i] = lwpoly_deserialize(insp->sub_geoms[i]); 61 | if ( TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type) ) 62 | { 63 | lwerror("Mixed dimensions (multipoly:%d, poly%d:%d)", 64 | TYPE_NDIMS(result->type), i, 65 | TYPE_NDIMS(result->geoms[i]->type) 66 | ); 67 | return NULL; 68 | } 69 | } 70 | 71 | return result; 72 | } 73 | 74 | /* 75 | * Add 'what' to this multiline at position 'where'. 76 | * where=0 == prepend 77 | * where=-1 == append 78 | * Returns a MULTIPOLY or a COLLECTION 79 | */ 80 | LWGEOM * 81 | lwmpoly_add(const LWMPOLY *to, uint32 where, const LWGEOM *what) 82 | { 83 | LWCOLLECTION *col; 84 | LWGEOM **geoms; 85 | int newtype; 86 | uint32 i; 87 | 88 | if ( where == -1 ) where = to->ngeoms; 89 | else if ( where < -1 || where > to->ngeoms ) 90 | { 91 | lwerror("lwmline_add: add position out of range %d..%d", 92 | -1, to->ngeoms); 93 | return NULL; 94 | } 95 | 96 | /* dimensions compatibility are checked by caller */ 97 | 98 | /* Construct geoms array */ 99 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1)); 100 | for (i=0; igeoms[i]); 103 | } 104 | geoms[where] = lwgeom_clone(what); 105 | for (i=where; ingeoms; i++) 106 | { 107 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]); 108 | } 109 | 110 | if ( TYPE_GETTYPE(what->type) == POLYGONTYPE ) newtype = MULTIPOLYGONTYPE; 111 | else newtype = COLLECTIONTYPE; 112 | 113 | col = lwcollection_construct(newtype, 114 | to->SRID, NULL, 115 | to->ngeoms+1, geoms); 116 | 117 | return (LWGEOM *)col; 118 | 119 | } 120 | 121 | void lwmpoly_free(LWMPOLY *mpoly) 122 | { 123 | int i; 124 | if( mpoly->bbox ) 125 | { 126 | lwfree(mpoly->bbox); 127 | } 128 | for ( i = 0; i < mpoly->ngeoms; i++ ) 129 | { 130 | if( mpoly->geoms[i] ) { 131 | lwpoly_free(mpoly->geoms[i]); 132 | } 133 | } 134 | if( mpoly->geoms ) 135 | { 136 | lwfree(mpoly->geoms); 137 | } 138 | lwfree(mpoly); 139 | 140 | }; 141 | 142 | -------------------------------------------------------------------------------- /src/liblwgeom/lwmsurface.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * $Id: lwmsurface.c 3639 2009-02-04 00:28:37Z pramsey $ 3 | * 4 | * PostGIS - Spatial Types for PostgreSQL 5 | * http://postgis.refractions.net 6 | * Copyright 2001-2006 Refractions Research Inc. 7 | * 8 | * This is free software; you can redistribute and/or modify it under 9 | * the terms of the GNU General Public Licence. See the COPYING file. 10 | * 11 | **********************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "liblwgeom.h" 17 | 18 | 19 | LWMSURFACE * 20 | lwmsurface_deserialize(uchar *srl) 21 | { 22 | LWMSURFACE *result; 23 | LWGEOM_INSPECTED *insp; 24 | int stype; 25 | int type = lwgeom_getType(srl[0]); 26 | int i; 27 | 28 | LWDEBUG(2, "lwmsurface_deserialize called"); 29 | 30 | if(type != MULTISURFACETYPE) 31 | { 32 | lwerror("lwmsurface_deserialize called on a non-multisurface: %d", type); 33 | return NULL; 34 | } 35 | 36 | insp = lwgeom_inspect(srl); 37 | 38 | result = lwalloc(sizeof(LWMSURFACE)); 39 | result->type = insp->type; 40 | result->SRID = insp->SRID; 41 | result->ngeoms = insp->ngeometries; 42 | result->geoms = lwalloc(sizeof(LWPOLY *)*insp->ngeometries); 43 | 44 | if(lwgeom_hasBBOX(srl[0])) 45 | { 46 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4)); 47 | memcpy(result->bbox, srl + 1, sizeof(BOX2DFLOAT4)); 48 | } 49 | else result->bbox = NULL; 50 | 51 | for(i = 0; i < insp->ngeometries; i++) 52 | { 53 | stype = lwgeom_getType(insp->sub_geoms[i][0]); 54 | if(stype == POLYGONTYPE) 55 | { 56 | result->geoms[i] = (LWGEOM *)lwpoly_deserialize(insp->sub_geoms[i]); 57 | } 58 | else if(stype == CURVEPOLYTYPE) 59 | { 60 | result->geoms[i] = (LWGEOM *)lwcurvepoly_deserialize(insp->sub_geoms[i]); 61 | } 62 | else 63 | { 64 | lwerror("Only Polygons and Curved Polygons are supported in a MultiSurface."); 65 | lwfree(result); 66 | lwfree(insp); 67 | return NULL; 68 | } 69 | 70 | if(TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type)) 71 | { 72 | lwerror("Mixed dimensions (multisurface: %d, surface %d:%d", 73 | TYPE_NDIMS(result->type), i, 74 | TYPE_NDIMS(result->geoms[i]->type)); 75 | lwfree(result); 76 | lwfree(insp); 77 | return NULL; 78 | } 79 | } 80 | return result; 81 | } 82 | 83 | /* 84 | * Add 'what' to this multisurface at position 'where' 85 | * where=0 == prepend 86 | * where=-1 == append 87 | * Returns a MULTISURFACE or a COLLECTION 88 | */ 89 | LWGEOM * 90 | lwmsurface_add(const LWMSURFACE *to, uint32 where, const LWGEOM *what) 91 | { 92 | LWCOLLECTION *col; 93 | LWGEOM **geoms; 94 | int newtype; 95 | uint32 i; 96 | 97 | if(where == -1) where = to->ngeoms; 98 | else if(where < -1 || where > to->ngeoms) 99 | { 100 | lwerror("lwmsurface_add: add position out of range %d..%d", 101 | -1, to->ngeoms); 102 | return NULL; 103 | } 104 | 105 | /* dimensions compatibility are checked by caller */ 106 | 107 | /* Construct geoms array */ 108 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1)); 109 | for(i = 0; i < where; i++) 110 | { 111 | geoms[i] = lwgeom_clone((LWGEOM *)to->geoms[i]); 112 | } 113 | geoms[where] = lwgeom_clone(what); 114 | for(i = where; i < to->ngeoms; i++) 115 | { 116 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]); 117 | } 118 | 119 | if(TYPE_GETTYPE(what->type) == POLYGONTYPE 120 | || TYPE_GETTYPE(what->type) == CURVEPOLYTYPE) 121 | newtype = MULTISURFACETYPE; 122 | else newtype = COLLECTIONTYPE; 123 | 124 | col = lwcollection_construct(newtype, 125 | to->SRID, NULL, to->ngeoms + 1, geoms); 126 | 127 | return (LWGEOM *)col; 128 | } 129 | 130 | -------------------------------------------------------------------------------- /src/liblwgeom/lwutil.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | /* Global variables */ 8 | #include "liblwgeom.h" 9 | 10 | void *init_allocator(size_t size); 11 | void init_freeor(void *mem); 12 | void *init_reallocator(void *mem, size_t size); 13 | void init_noticereporter(const char *fmt, va_list ap); 14 | void init_errorreporter(const char *fmt, va_list ap); 15 | 16 | lwallocator lwalloc_var = init_allocator; 17 | lwreallocator lwrealloc_var = init_reallocator; 18 | lwfreeor lwfree_var = init_freeor; 19 | lwreporter lwnotice_var = init_noticereporter; 20 | lwreporter lwerror_var = init_errorreporter; 21 | 22 | static char *lwgeomTypeName[] = { 23 | "Unknown", 24 | "Point", 25 | "Line", 26 | "Polygon", 27 | "MultiPoint", 28 | "MultiLine", 29 | "MultiPolygon", 30 | "GeometryCollection", 31 | "CircularString", 32 | "CompoundString", 33 | "Invalid Type", /* POINTTYPEI */ 34 | "Invalid Type", /* LINETYPEI */ 35 | "Invalid Type", /* POLYTYPEI */ 36 | "CurvePolygon", 37 | "MultiCurve", 38 | "MultiSurface" 39 | }; 40 | 41 | 42 | /* 43 | * lwnotice/lwerror handlers 44 | * 45 | * Since variadic functions cannot pass their parameters directly, we need 46 | * wrappers for these functions to convert the arguments into a va_list 47 | * structure. 48 | */ 49 | 50 | void 51 | lwnotice(const char *fmt, ...) 52 | { 53 | va_list ap; 54 | 55 | va_start(ap, fmt); 56 | 57 | /* Call the supplied function */ 58 | (*lwnotice_var)(fmt, ap); 59 | 60 | va_end(ap); 61 | } 62 | 63 | void 64 | lwerror(const char *fmt, ...) 65 | { 66 | va_list ap; 67 | 68 | va_start(ap, fmt); 69 | 70 | /* Call the supplied function */ 71 | (*lwerror_var)(fmt, ap); 72 | 73 | va_end(ap); 74 | } 75 | 76 | /* 77 | * Initialisation allocators 78 | * 79 | * These are used the first time any of the allocators are called 80 | * to enable executables/libraries that link into liblwgeom to 81 | * be able to set up their own allocators. This is mainly useful 82 | * for older PostgreSQL versions that don't have functions that 83 | * are called upon startup. 84 | */ 85 | 86 | void * 87 | init_allocator(size_t size) 88 | { 89 | lwgeom_init_allocators(); 90 | 91 | return lwalloc_var(size); 92 | } 93 | 94 | void 95 | init_freeor(void *mem) 96 | { 97 | lwgeom_init_allocators(); 98 | 99 | lwfree_var(mem); 100 | } 101 | 102 | void * 103 | init_reallocator(void *mem, size_t size) 104 | { 105 | lwgeom_init_allocators(); 106 | 107 | return lwrealloc_var(mem, size); 108 | } 109 | 110 | void 111 | init_noticereporter(const char *fmt, va_list ap) 112 | { 113 | lwgeom_init_allocators(); 114 | 115 | (*lwnotice_var)(fmt, ap); 116 | } 117 | 118 | void 119 | init_errorreporter(const char *fmt, va_list ap) 120 | { 121 | lwgeom_init_allocators(); 122 | 123 | (*lwerror_var)(fmt, ap); 124 | } 125 | 126 | 127 | /* 128 | * Default allocators 129 | * 130 | * We include some default allocators that use malloc/free/realloc 131 | * along with stdout/stderr since this is the most common use case 132 | * 133 | */ 134 | 135 | void * 136 | default_allocator(size_t size) 137 | { 138 | void *mem = malloc(size); 139 | return mem; 140 | } 141 | 142 | void 143 | default_freeor(void *mem) 144 | { 145 | free(mem); 146 | } 147 | 148 | void * 149 | default_reallocator(void *mem, size_t size) 150 | { 151 | void *ret = realloc(mem, size); 152 | return ret; 153 | } 154 | 155 | void 156 | default_noticereporter(const char *fmt, va_list ap) 157 | { 158 | char *msg; 159 | 160 | /* 161 | * This is a GNU extension. 162 | * Dunno how to handle errors here. 163 | */ 164 | if (!lw_vasprintf (&msg, fmt, ap)) 165 | { 166 | va_end (ap); 167 | return; 168 | } 169 | printf("%s\n", msg); 170 | free(msg); 171 | } 172 | 173 | void 174 | default_errorreporter(const char *fmt, va_list ap) 175 | { 176 | char *msg; 177 | 178 | /* 179 | * This is a GNU extension. 180 | * Dunno how to handle errors here. 181 | */ 182 | if (!lw_vasprintf (&msg, fmt, ap)) 183 | { 184 | va_end (ap); 185 | return; 186 | } 187 | fprintf(stderr, "%s\n", msg); 188 | free(msg); 189 | exit(1); 190 | } 191 | 192 | 193 | /* 194 | * This function should be called from lwgeom_init_allocators() by programs 195 | * which wish to use the default allocators above 196 | */ 197 | 198 | void lwgeom_install_default_allocators(void) 199 | { 200 | lwalloc_var = default_allocator; 201 | lwrealloc_var = default_reallocator; 202 | lwfree_var = default_freeor; 203 | lwerror_var = default_errorreporter; 204 | lwnotice_var = default_noticereporter; 205 | } 206 | 207 | 208 | const char * 209 | lwgeom_typename(int type) 210 | { 211 | /* something went wrong somewhere */ 212 | if ( type < 0 || type > 15 ) { 213 | /* assert(0); */ 214 | return "Invalid type"; 215 | } 216 | return lwgeomTypeName[type]; 217 | } 218 | 219 | void * 220 | lwalloc(size_t size) 221 | { 222 | void *mem = lwalloc_var(size); 223 | LWDEBUGF(5, "lwalloc: %d@%p", size, mem); 224 | return mem; 225 | } 226 | 227 | void * 228 | lwrealloc(void *mem, size_t size) 229 | { 230 | LWDEBUGF(5, "lwrealloc: %d@%p", size, mem); 231 | return lwrealloc_var(mem, size); 232 | } 233 | 234 | void 235 | lwfree(void *mem) 236 | { 237 | lwfree_var(mem); 238 | } 239 | 240 | /* 241 | * Removes trailing zeros and dot for a %f formatted number. 242 | * Modifies input. 243 | */ 244 | void 245 | trim_trailing_zeros(char *str) 246 | { 247 | char *ptr, *totrim=NULL; 248 | int len; 249 | int i; 250 | 251 | LWDEBUGF(3, "input: %s", str); 252 | 253 | ptr = strchr(str, '.'); 254 | if ( ! ptr ) return; /* no dot, no decimal digits */ 255 | 256 | LWDEBUGF(3, "ptr: %s", ptr); 257 | 258 | len = strlen(ptr); 259 | for (i=len-1; i; i--) 260 | { 261 | if ( ptr[i] != '0' ) break; 262 | totrim=&ptr[i]; 263 | } 264 | if ( totrim ) 265 | { 266 | if ( ptr == totrim-1 ) *ptr = '\0'; 267 | else *totrim = '\0'; 268 | } 269 | 270 | LWDEBUGF(3, "output: %s", str); 271 | } 272 | 273 | 274 | /* 275 | * Returns a new string which contains a maximum of maxlength characters starting 276 | * from startpos and finishing at endpos (0-based indexing). If the string is 277 | * truncated then the first or last characters are replaced by "..." as 278 | * appropriate. 279 | * 280 | * The caller should specify start or end truncation by setting the truncdirection 281 | * parameter as follows: 282 | * 0 - start truncation (i.e. characters are removed from the beginning) 283 | * 1 - end trunctation (i.e. characters are removed from the end) 284 | */ 285 | 286 | char *lwmessage_truncate(char *str, int startpos, int endpos, int maxlength, int truncdirection) 287 | { 288 | char *output; 289 | char *outstart; 290 | 291 | /* Allocate space for new string */ 292 | output = lwalloc(maxlength + 4); 293 | output[0] = '\0'; 294 | 295 | /* Start truncation */ 296 | if (truncdirection == 0) 297 | { 298 | /* Calculate the start position */ 299 | if (endpos - startpos < maxlength) 300 | { 301 | outstart = str + startpos; 302 | strncat(output, outstart, endpos - startpos + 1); 303 | } 304 | else 305 | { 306 | if (maxlength >= 3) 307 | { 308 | /* Add "..." prefix */ 309 | outstart = str + endpos + 1 - maxlength + 3; 310 | strncat(output, "...", 3); 311 | strncat(output, outstart, maxlength - 3); 312 | } 313 | else 314 | { 315 | /* maxlength is too small; just output "..." */ 316 | strncat(output, "...", 3); 317 | } 318 | } 319 | } 320 | 321 | /* End truncation */ 322 | if (truncdirection == 1) 323 | { 324 | /* Calculate the end position */ 325 | if (endpos - startpos < maxlength) 326 | { 327 | outstart = str + startpos; 328 | strncat(output, outstart, endpos - startpos + 1); 329 | } 330 | else 331 | { 332 | if (maxlength >= 3) 333 | { 334 | /* Add "..." suffix */ 335 | outstart = str + startpos; 336 | strncat(output, outstart, maxlength - 3); 337 | strncat(output, "...", 3); 338 | } 339 | else 340 | { 341 | /* maxlength is too small; just output "..." */ 342 | strncat(output, "...", 3); 343 | } 344 | } 345 | } 346 | 347 | return output; 348 | } 349 | 350 | 351 | char 352 | getMachineEndian(void) 353 | { 354 | static int endian_check_int = 1; /* dont modify this!!! */ 355 | 356 | return *((char *) &endian_check_int); /* 0 = big endian | xdr, 357 | * 1 = little endian | ndr 358 | */ 359 | } 360 | 361 | 362 | void 363 | errorIfSRIDMismatch(int srid1, int srid2) 364 | { 365 | if ( srid1 != srid2 ) 366 | { 367 | lwerror("Operation on mixed SRID geometries"); 368 | } 369 | } 370 | -------------------------------------------------------------------------------- /src/liblwgeom/postgis_config.h: -------------------------------------------------------------------------------- 1 | /* postgis_config.h. Generated from postgis_config.h.in by configure. */ 2 | /* postgis_config.h.in. Generated from configure.ac by autoheader. */ 3 | 4 | /* Define to 1 if you have the header file. */ 5 | #define HAVE_DLFCN_H 1 6 | 7 | /* Defined if libiconv headers and library are present */ 8 | #define HAVE_ICONV 0 9 | 10 | /* Define to 1 if you have the header file. */ 11 | #define HAVE_INTTYPES_H 1 12 | 13 | /* Define to 1 if you have the `geos_c' library (-lgeos_c). */ 14 | #define HAVE_LIBGEOS_C 1 15 | 16 | /* Define to 1 if you have the `pq' library (-lpq). */ 17 | #define HAVE_LIBPQ 0 18 | 19 | /* Define to 1 if you have the `proj' library (-lproj). */ 20 | #define HAVE_LIBPROJ 0 21 | 22 | /* Define to 1 if you have the header file. */ 23 | #define HAVE_MEMORY_H 1 24 | 25 | /* Define to 1 if you have the header file. */ 26 | #define HAVE_STDINT_H 1 27 | 28 | /* Define to 1 if you have the header file. */ 29 | #define HAVE_STDLIB_H 1 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #define HAVE_STRINGS_H 1 33 | 34 | /* Define to 1 if you have the header file. */ 35 | #define HAVE_STRING_H 1 36 | 37 | /* Define to 1 if you have the header file. */ 38 | #define HAVE_SYS_STAT_H 1 39 | 40 | /* Define to 1 if you have the header file. */ 41 | #define HAVE_SYS_TYPES_H 1 42 | 43 | /* Define to 1 if you have the header file. */ 44 | #define HAVE_UNISTD_H 1 45 | 46 | /* Enable caching of bounding box within geometries */ 47 | #define POSTGIS_AUTOCACHE_BBOX 0 48 | 49 | /* PostGIS build date */ 50 | #define POSTGIS_BUILD_DATE "2009-03-09 15:11:36" 51 | 52 | /* PostGIS library debug level (0=disabled) */ 53 | #define POSTGIS_DEBUG_LEVEL 0 54 | 55 | /* GEOS library version */ 56 | #define POSTGIS_GEOS_VERSION 30 57 | 58 | /* PostGIS library version */ 59 | #define POSTGIS_LIB_VERSION "1.4.0SVN" 60 | 61 | /* PostGIS major version */ 62 | #define POSTGIS_MAJOR_VERSION "1" 63 | 64 | /* PostGIS micro version */ 65 | #define POSTGIS_MICRO_VERSION "0SVN" 66 | 67 | /* PostGIS minor version */ 68 | #define POSTGIS_MINOR_VERSION "4" 69 | 70 | /* PostgreSQL server version */ 71 | #define POSTGIS_PGSQL_VERSION 83 72 | 73 | /* Enable GEOS profiling (0=disabled) */ 74 | #define POSTGIS_PROFILE 0 75 | 76 | /* PROJ library version */ 77 | #define POSTGIS_PROJ_VERSION 46 78 | 79 | /* PostGIS scripts version */ 80 | #define POSTGIS_SCRIPTS_VERSION "1.4.0SVN" 81 | 82 | /* Enable use of ANALYZE statistics */ 83 | #define POSTGIS_USE_STATS 1 84 | 85 | /* PostGIS version */ 86 | #define POSTGIS_VERSION "1.4 USE_GEOS=1 USE_PROJ=1 USE_STATS=1" 87 | 88 | /* Define to 1 if you have the ANSI C header files. */ 89 | #define STDC_HEADERS 1 90 | 91 | /* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a 92 | `char[]'. */ 93 | #define YYTEXT_POINTER 1 94 | -------------------------------------------------------------------------------- /src/liblwgeom/vsprintf.c: -------------------------------------------------------------------------------- 1 | /* Like vsprintf but provides a pointer to malloc'd storage, which must 2 | be freed by the caller. 3 | Copyright (C) 1994, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2, or (at your option) 8 | any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program; if not, write to the Free Software 17 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 18 | 19 | #ifdef HAVE_CONFIG_H 20 | # include 21 | #endif 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #if __STDC__ 28 | # include 29 | #else 30 | # include 31 | #endif 32 | 33 | #include 34 | 35 | #ifdef TEST 36 | int global_total_width; 37 | #endif 38 | 39 | 40 | int lw_vasprintf (char **result, const char *format, va_list args); 41 | int lw_asprintf 42 | #if __STDC__ 43 | (char **result, const char *format, ...); 44 | #else 45 | (result, va_alist); 46 | char **result; 47 | va_dcl 48 | #endif 49 | 50 | 51 | static int 52 | int_vasprintf (result, format, args) 53 | char **result; 54 | const char *format; 55 | va_list *args; 56 | { 57 | const char *p = format; 58 | /* Add one to make sure that it is never zero, which might cause malloc 59 | to return NULL. */ 60 | int total_width = strlen (format) + 1; 61 | va_list ap; 62 | 63 | memcpy (&ap, args, sizeof (va_list)); 64 | 65 | while (*p != '\0') 66 | { 67 | if (*p++ == '%') 68 | { 69 | while (strchr ("-+ #0", *p)) 70 | ++p; 71 | if (*p == '*') 72 | { 73 | ++p; 74 | total_width += abs (va_arg (ap, int)); 75 | } 76 | else 77 | total_width += strtoul (p, (char **) &p, 10); 78 | if (*p == '.') 79 | { 80 | ++p; 81 | if (*p == '*') 82 | { 83 | ++p; 84 | total_width += abs (va_arg (ap, int)); 85 | } 86 | else 87 | total_width += strtoul (p, (char **) &p, 10); 88 | } 89 | while (strchr ("hlLjtz", *p)) 90 | ++p; 91 | /* Should be big enough for any format specifier except %s 92 | and floats. */ 93 | total_width += 30; 94 | switch (*p) 95 | { 96 | case 'd': 97 | case 'i': 98 | case 'o': 99 | case 'u': 100 | case 'x': 101 | case 'X': 102 | case 'c': 103 | (void) va_arg (ap, int); 104 | break; 105 | case 'f': 106 | { 107 | double arg = va_arg (ap, double); 108 | if (arg >= 1.0 || arg <= -1.0) 109 | /* Since an ieee double can have an exponent of 307, we'll 110 | make the buffer wide enough to cover the gross case. */ 111 | total_width += 307; 112 | } 113 | break; 114 | case 'e': 115 | case 'E': 116 | case 'g': 117 | case 'G': 118 | (void) va_arg (ap, double); 119 | break; 120 | case 's': 121 | total_width += strlen (va_arg (ap, char *)); 122 | break; 123 | case 'p': 124 | case 'n': 125 | (void) va_arg (ap, char *); 126 | break; 127 | } 128 | p++; 129 | } 130 | } 131 | #ifdef TEST 132 | global_total_width = total_width; 133 | #endif 134 | *result = malloc (total_width); 135 | if (*result != NULL) 136 | return vsprintf (*result, format, *args); 137 | else 138 | return 0; 139 | } 140 | 141 | int 142 | lw_vasprintf (result, format, args) 143 | char **result; 144 | const char *format; 145 | va_list args; 146 | { 147 | va_list temp; 148 | 149 | /* Use va_copy for compatibility with both 32 and 64 bit args */ 150 | __va_copy(temp, args); 151 | 152 | return int_vasprintf (result, format, &temp); 153 | } 154 | 155 | int 156 | lw_asprintf 157 | #if __STDC__ 158 | (char **result, const char *format, ...) 159 | #else 160 | (result, va_alist) 161 | char **result; 162 | va_dcl 163 | #endif 164 | { 165 | va_list args; 166 | int done; 167 | 168 | #if __STDC__ 169 | va_start (args, format); 170 | #else 171 | char *format; 172 | va_start (args); 173 | format = va_arg (args, char *); 174 | #endif 175 | done = lw_vasprintf (result, format, args); 176 | va_end (args); 177 | 178 | return done; 179 | } 180 | -------------------------------------------------------------------------------- /src/liblwgeom/wktparse.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Ralph Mason ralph.masontelogis.com 3 | * 4 | * Copyright Telogis 2004 5 | * www.telogis.com 6 | * 7 | */ 8 | 9 | #ifndef _WKTPARSE_H 10 | #define _WKTPARSE_H 11 | 12 | #include 13 | 14 | 15 | #ifndef _LIBLWGEOM_H 16 | typedef unsigned char uchar; 17 | 18 | typedef struct serialized_lwgeom { 19 | uchar *lwgeom; 20 | int size; 21 | } SERIALIZED_LWGEOM; 22 | 23 | typedef struct struct_lwgeom_parser_result 24 | { 25 | const char *wkinput; 26 | SERIALIZED_LWGEOM *serialized_lwgeom; 27 | int size; 28 | const char *message; 29 | int errlocation; 30 | } LWGEOM_PARSER_RESULT; 31 | 32 | typedef struct struct_lwgeom_unparser_result 33 | { 34 | uchar *serialized_lwgeom; 35 | char *wkoutput; 36 | int size; 37 | const char *message; 38 | int errlocation; 39 | } LWGEOM_UNPARSER_RESULT; 40 | #endif 41 | typedef void* (*allocator)(size_t size); 42 | typedef void (*freeor)(void* mem); 43 | typedef void (*report_error)(const char* string, ...); 44 | 45 | /*typedef unsigned long int4;*/ 46 | 47 | /* How much memory is allocated at a time(bytes) for tuples */ 48 | #define ALLOC_CHUNKS 8192 49 | 50 | /* to shrink ints less than 0x7f to 1 byte */ 51 | /* #define SHRINK_INTS */ 52 | 53 | #define POINTTYPE 1 54 | #define LINETYPE 2 55 | #define POLYGONTYPE 3 56 | #define MULTIPOINTTYPE 4 57 | #define MULTILINETYPE 5 58 | #define MULTIPOLYGONTYPE 6 59 | #define COLLECTIONTYPE 7 60 | 61 | /* Extended lwgeom integer types */ 62 | #define POINTTYPEI 10 63 | #define LINETYPEI 11 64 | #define POLYGONTYPEI 12 65 | 66 | #define CIRCSTRINGTYPE 8 67 | #define COMPOUNDTYPE 9 68 | #define CURVEPOLYTYPE 13 69 | #define MULTICURVETYPE 14 70 | #define MULTISURFACETYPE 15 71 | 72 | extern int srid; 73 | 74 | /* 75 | 76 | These functions are used by the 77 | generated parser and are not meant 78 | for public use 79 | 80 | */ 81 | 82 | void set_srid(double srid); 83 | void alloc_lwgeom(int srid); 84 | 85 | void alloc_point_2d(double x,double y); 86 | void alloc_point_3d(double x,double y,double z); 87 | void alloc_point_4d(double x,double y,double z,double m); 88 | 89 | void alloc_point(void); 90 | void alloc_linestring(void); 91 | void alloc_linestring_closed(void); 92 | void alloc_circularstring(void); 93 | void alloc_circularstring_closed(void); 94 | void alloc_polygon(void); 95 | void alloc_compoundcurve(void); 96 | void alloc_curvepolygon(void); 97 | void alloc_multipoint(void); 98 | void alloc_multilinestring(void); 99 | void alloc_multicurve(void); 100 | void alloc_multipolygon(void); 101 | void alloc_multisurface(void); 102 | void alloc_geomertycollection(void); 103 | void alloc_empty(); 104 | void alloc_counter(void); 105 | 106 | 107 | void pop(void); 108 | void popc(void); 109 | 110 | void alloc_wkb(const char* parser); 111 | 112 | /* 113 | Use these functions to parse and unparse lwgeoms 114 | You are responsible for freeing the returned memory. 115 | */ 116 | 117 | int parse_lwg(LWGEOM_PARSER_RESULT *lwg_parser_result, const char* wkt, int flags, allocator allocfunc,report_error errfunc); 118 | int parse_lwgi(LWGEOM_PARSER_RESULT *lwg_parser_result, const char* wkt, int flags, allocator allocfunc,report_error errfunc); 119 | int unparse_WKT(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allocator alloc, freeor free, int flags); 120 | int unparse_WKB(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allocator alloc, freeor free, int flags, char endian, uchar hexform); 121 | int lwg_parse_yyparse(void); 122 | int lwg_parse_yyerror(char* s); 123 | void lwg_parse_yynotice(char* s); 124 | 125 | 126 | #endif /* _WKTPARSE_H */ 127 | -------------------------------------------------------------------------------- /src/liblwgeom/wktparse.lex: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Ralph Mason ralph.masontelogis.com 3 | * 4 | * Copyright Telogis 2004 5 | * www.telogis.com 6 | * 7 | */ 8 | 9 | %x vals_ok 10 | %{ 11 | #include "wktparse.tab.h" 12 | #include 13 | #include /* need stdlib for atof() definition */ 14 | 15 | void init_parser(const char *src); 16 | void close_parser(void); 17 | int lwg_parse_yywrap(void); 18 | int lwg_parse_yylex(void); 19 | 20 | static YY_BUFFER_STATE buf_state; 21 | void init_parser(const char *src) { BEGIN(0);buf_state = lwg_parse_yy_scan_string(src); } 22 | void close_parser() { lwg_parse_yy_delete_buffer(buf_state); } 23 | int lwg_parse_yywrap(void){ return 1; } 24 | 25 | /* Macro to keep track of the current parse position */ 26 | #define UPDATE_YYLLOC() (lwg_parse_yylloc.last_column += yyleng) 27 | 28 | %} 29 | 30 | %% 31 | 32 | [-|\+]?[0-9]+(\.[0-9]+)?([Ee](\+|-)?[0-9]+)? { lwg_parse_yylval.value=atof(lwg_parse_yytext); UPDATE_YYLLOC(); return VALUE; } 33 | [-|\+]?(\.[0-9]+)([Ee](\+|-)?[0-9]+)? { lwg_parse_yylval.value=atof(lwg_parse_yytext); UPDATE_YYLLOC(); return VALUE; } 34 | 35 | 00[0-9A-F]* { lwg_parse_yylval.wkb=lwg_parse_yytext; return WKB;} 36 | 01[0-9A-F]* { lwg_parse_yylval.wkb=lwg_parse_yytext; return WKB;} 37 | 38 | <*>POINT { UPDATE_YYLLOC(); return POINT; } 39 | <*>POINTM { UPDATE_YYLLOC(); return POINTM; } 40 | <*>LINESTRING { UPDATE_YYLLOC(); return LINESTRING; } 41 | <*>LINESTRINGM { UPDATE_YYLLOC(); return LINESTRINGM; } 42 | <*>CIRCULARSTRING { UPDATE_YYLLOC(); return CIRCULARSTRING; } 43 | <*>CIRCULARSTRINGM { UPDATE_YYLLOC(); return CIRCULARSTRINGM; } 44 | <*>POLYGON { UPDATE_YYLLOC(); return POLYGON; } 45 | <*>POLYGONM { UPDATE_YYLLOC(); return POLYGONM; } 46 | <*>COMPOUNDCURVE { UPDATE_YYLLOC(); return COMPOUNDCURVE; } 47 | <*>COMPOUNDCURVEM { UPDATE_YYLLOC(); return COMPOUNDCURVEM; } 48 | <*>CURVEPOLYGON { UPDATE_YYLLOC(); return CURVEPOLYGON; } 49 | <*>CURVEPOLYGONM { UPDATE_YYLLOC(); return CURVEPOLYGONM; } 50 | <*>MULTIPOINT { UPDATE_YYLLOC(); return MULTIPOINT; } 51 | <*>MULTIPOINTM { UPDATE_YYLLOC(); return MULTIPOINTM; } 52 | <*>MULTILINESTRING { UPDATE_YYLLOC(); return MULTILINESTRING; } 53 | <*>MULTILINESTRINGM { UPDATE_YYLLOC(); return MULTILINESTRINGM; } 54 | <*>MULTICURVE { UPDATE_YYLLOC(); return MULTICURVE; } 55 | <*>MULTICURVEM { UPDATE_YYLLOC(); return MULTICURVEM; } 56 | <*>MULTIPOLYGON { UPDATE_YYLLOC(); return MULTIPOLYGON; } 57 | <*>MULTIPOLYGONM { UPDATE_YYLLOC(); return MULTIPOLYGONM; } 58 | <*>MULTISURFACE { UPDATE_YYLLOC(); return MULTISURFACE; } 59 | <*>MULTISURFACEM { UPDATE_YYLLOC(); return MULTISURFACEM; } 60 | <*>GEOMETRYCOLLECTION { UPDATE_YYLLOC(); return GEOMETRYCOLLECTION; } 61 | <*>GEOMETRYCOLLECTIONM { UPDATE_YYLLOC(); return GEOMETRYCOLLECTIONM; } 62 | <*>SRID { BEGIN(vals_ok); UPDATE_YYLLOC(); return SRID; } 63 | <*>EMPTY { UPDATE_YYLLOC(); return EMPTY; } 64 | 65 | <*>\( { BEGIN(vals_ok); UPDATE_YYLLOC(); return LPAREN; } 66 | <*>\) { UPDATE_YYLLOC(); return RPAREN; } 67 | <*>, { UPDATE_YYLLOC(); return COMMA ; } 68 | <*>= { UPDATE_YYLLOC(); return EQUALS ; } 69 | <*>; { BEGIN(0); UPDATE_YYLLOC(); return SEMICOLON; } 70 | <*>[ \t\n\r]+ /*eat whitespace*/ { UPDATE_YYLLOC(); } 71 | <*>. { return lwg_parse_yytext[0]; } 72 | 73 | %% 74 | 75 | -------------------------------------------------------------------------------- /src/liblwgeom/wktparse.tab.h: -------------------------------------------------------------------------------- 1 | /* A Bison parser, made by GNU Bison 2.3. */ 2 | 3 | /* Skeleton interface for Bison's Yacc-like parsers in C 4 | 5 | Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 6 | Free Software Foundation, Inc. 7 | 8 | This program is free software; you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation; either version 2, or (at your option) 11 | any later version. 12 | 13 | This program is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with this program; if not, write to the Free Software 20 | Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 | Boston, MA 02110-1301, USA. */ 22 | 23 | /* As a special exception, you may create a larger work that contains 24 | part or all of the Bison parser skeleton and distribute that work 25 | under terms of your choice, so long as that work isn't itself a 26 | parser generator using the skeleton or a modified version thereof 27 | as a parser skeleton. Alternatively, if you modify or redistribute 28 | the parser skeleton itself, you may (at your option) remove this 29 | special exception, which will cause the skeleton and the resulting 30 | Bison output files to be licensed under the GNU General Public 31 | License without this special exception. 32 | 33 | This special exception was added by the Free Software Foundation in 34 | version 2.2 of Bison. */ 35 | 36 | /* Tokens. */ 37 | #ifndef YYTOKENTYPE 38 | # define YYTOKENTYPE 39 | /* Put the tokens into the symbol table, so that GDB and other debuggers 40 | know about them. */ 41 | enum yytokentype { 42 | POINT = 258, 43 | LINESTRING = 259, 44 | POLYGON = 260, 45 | MULTIPOINT = 261, 46 | MULTILINESTRING = 262, 47 | MULTIPOLYGON = 263, 48 | GEOMETRYCOLLECTION = 264, 49 | CIRCULARSTRING = 265, 50 | COMPOUNDCURVE = 266, 51 | CURVEPOLYGON = 267, 52 | MULTICURVE = 268, 53 | MULTISURFACE = 269, 54 | POINTM = 270, 55 | LINESTRINGM = 271, 56 | POLYGONM = 272, 57 | MULTIPOINTM = 273, 58 | MULTILINESTRINGM = 274, 59 | MULTIPOLYGONM = 275, 60 | GEOMETRYCOLLECTIONM = 276, 61 | CIRCULARSTRINGM = 277, 62 | COMPOUNDCURVEM = 278, 63 | CURVEPOLYGONM = 279, 64 | MULTICURVEM = 280, 65 | MULTISURFACEM = 281, 66 | SRID = 282, 67 | EMPTY = 283, 68 | VALUE = 284, 69 | LPAREN = 285, 70 | RPAREN = 286, 71 | COMMA = 287, 72 | EQUALS = 288, 73 | SEMICOLON = 289, 74 | WKB = 290 75 | }; 76 | #endif 77 | /* Tokens. */ 78 | #define POINT 258 79 | #define LINESTRING 259 80 | #define POLYGON 260 81 | #define MULTIPOINT 261 82 | #define MULTILINESTRING 262 83 | #define MULTIPOLYGON 263 84 | #define GEOMETRYCOLLECTION 264 85 | #define CIRCULARSTRING 265 86 | #define COMPOUNDCURVE 266 87 | #define CURVEPOLYGON 267 88 | #define MULTICURVE 268 89 | #define MULTISURFACE 269 90 | #define POINTM 270 91 | #define LINESTRINGM 271 92 | #define POLYGONM 272 93 | #define MULTIPOINTM 273 94 | #define MULTILINESTRINGM 274 95 | #define MULTIPOLYGONM 275 96 | #define GEOMETRYCOLLECTIONM 276 97 | #define CIRCULARSTRINGM 277 98 | #define COMPOUNDCURVEM 278 99 | #define CURVEPOLYGONM 279 100 | #define MULTICURVEM 280 101 | #define MULTISURFACEM 281 102 | #define SRID 282 103 | #define EMPTY 283 104 | #define VALUE 284 105 | #define LPAREN 285 106 | #define RPAREN 286 107 | #define COMMA 287 108 | #define EQUALS 288 109 | #define SEMICOLON 289 110 | #define WKB 290 111 | 112 | 113 | 114 | 115 | #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED 116 | typedef union YYSTYPE 117 | #line 22 "wktparse.y" 118 | { 119 | double value; 120 | const char* wkb; 121 | } 122 | /* Line 1489 of yacc.c. */ 123 | #line 124 "y.tab.h" 124 | YYSTYPE; 125 | # define yystype YYSTYPE /* obsolescent; will be withdrawn */ 126 | # define YYSTYPE_IS_DECLARED 1 127 | # define YYSTYPE_IS_TRIVIAL 1 128 | #endif 129 | 130 | extern YYSTYPE lwg_parse_yylval; 131 | 132 | #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED 133 | typedef struct YYLTYPE 134 | { 135 | int first_line; 136 | int first_column; 137 | int last_line; 138 | int last_column; 139 | } YYLTYPE; 140 | # define yyltype YYLTYPE /* obsolescent; will be withdrawn */ 141 | # define YYLTYPE_IS_DECLARED 1 142 | # define YYLTYPE_IS_TRIVIAL 1 143 | #endif 144 | 145 | extern YYLTYPE lwg_parse_yylloc; 146 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/Makefile: -------------------------------------------------------------------------------- 1 | all: libsqlite3_geocoder.so 2 | CC=gcc 3 | CFLAGS=-fPIC 4 | libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o 5 | $(CC) -fPIC -lsqlite3 -I/usr/include -lm -shared $^ -o $@ 6 | 7 | test: test_wkb_compress test_levenshtein 8 | 9 | test_wkb_compress: wkb_compress.c 10 | $(CC) -DTEST -o wkb_compress $^ 11 | 12 | test_levenshtein: levenshtein.c 13 | $(CC) -DTEST -o levenshtein $^ 14 | 15 | clean: 16 | rm -f *.o *.so wkb_compress levenshtein 17 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/Makefile.nix: -------------------------------------------------------------------------------- 1 | all: libsqlite3_geocoder.so 2 | 3 | libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o 4 | $(CC) -shared $^ -o $@ 5 | 6 | test: test_wkb_compress test_levenshtein 7 | 8 | test_wkb_compress: wkb_compress.c 9 | $(CC) -DTEST -o wkb_compress $^ 10 | 11 | test_levenshtein: levenshtein.c 12 | $(CC) -DTEST -o levenshtein $^ 13 | 14 | clean: 15 | rm -f *.o *.so wkb_compress levenshtein 16 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/Makefile.redhat: -------------------------------------------------------------------------------- 1 | all: libsqlite3_geocoder.so 2 | CFLAGS=-fPIC 3 | libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o 4 | $(CC) $(CFLAGS) -shared $^ -o $@ 5 | 6 | test: test_wkb_compress test_levenshtein 7 | 8 | test_wkb_compress: wkb_compress.c 9 | $(CC) -DTEST -o wkb_compress $^ 10 | 11 | test_levenshtein: levenshtein.c 12 | $(CC) -DTEST -o levenshtein $^ 13 | 14 | clean: 15 | rm -f *.o *.so wkb_compress levenshtein 16 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/extension.c: -------------------------------------------------------------------------------- 1 | # include 2 | # include 3 | # include 4 | # include 5 | # include 6 | 7 | # include "extension.h" 8 | 9 | static SQLITE_EXTENSION_INIT1; 10 | 11 | static void 12 | sqlite3_metaphone (sqlite3_context *context, int argc, sqlite3_value **argv) { 13 | const unsigned char *input = sqlite3_value_text(argv[0]); 14 | int max_phones = 0; 15 | char *output; 16 | int len; 17 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 18 | sqlite3_result_null(context); 19 | return; 20 | } 21 | if (argc > 1) 22 | max_phones = sqlite3_value_int(argv[1]); 23 | if (max_phones <= 0) 24 | max_phones = strlen(input); 25 | output = sqlite3_malloc((max_phones+1)*sizeof(char)); 26 | len = metaphone(input, output, max_phones); 27 | sqlite3_result_text(context, output, len, sqlite3_free); 28 | } 29 | 30 | static void 31 | sqlite3_levenshtein (sqlite3_context *context, int argc, sqlite3_value **argv) { 32 | const unsigned char *s1 = sqlite3_value_text(argv[0]), 33 | *s2 = sqlite3_value_text(argv[1]); 34 | double dist; 35 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL || 36 | sqlite3_value_type(argv[1]) == SQLITE_NULL) { 37 | sqlite3_result_null(context); 38 | return; 39 | } 40 | dist = levenshtein_distance(s1, s2); 41 | sqlite3_result_double(context, dist); 42 | } 43 | 44 | static void 45 | sqlite3_digit_suffix (sqlite3_context *context, 46 | int argc, sqlite3_value **argv) { 47 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 48 | sqlite3_result_null(context); 49 | return; 50 | } 51 | const unsigned char *input = sqlite3_value_text(argv[0]); 52 | char *output = sqlite3_malloc((strlen(input)+1) * sizeof(char)); 53 | size_t len = digit_suffix(input, output); 54 | sqlite3_result_text(context, output, len, sqlite3_free); 55 | } 56 | 57 | static void 58 | sqlite3_nondigit_prefix (sqlite3_context *context, 59 | int argc, sqlite3_value **argv) { 60 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 61 | sqlite3_result_null(context); 62 | return; 63 | } 64 | const unsigned char *input = sqlite3_value_text(argv[0]); 65 | char *output = sqlite3_malloc((strlen(input)+1) * sizeof(char)); 66 | size_t len = nondigit_prefix(input, output); 67 | sqlite3_result_text(context, output, len, sqlite3_free); 68 | } 69 | 70 | 71 | static void 72 | sqlite3_compress_wkb_line (sqlite3_context *context, 73 | int argc, sqlite3_value **argv) { 74 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 75 | sqlite3_result_null(context); 76 | return; 77 | } 78 | unsigned long input_len = sqlite3_value_bytes(argv[0]); 79 | const void *input = sqlite3_value_blob(argv[0]); 80 | unsigned long output_len = ceil((input_len-9)/8.0) * 4; 81 | unsigned long len = 0; 82 | void *output = sqlite3_malloc(output_len); 83 | len = compress_wkb_line(output, input, input_len); 84 | assert(len == output_len); 85 | sqlite3_result_blob(context, output, len, sqlite3_free); 86 | } 87 | 88 | static void 89 | sqlite3_uncompress_wkb_line (sqlite3_context *context, 90 | int argc, sqlite3_value **argv) { 91 | unsigned long input_len = sqlite3_value_bytes(argv[0]); 92 | const void *input = sqlite3_value_blob(argv[0]); 93 | unsigned long output_len = input_len*2+9; 94 | unsigned long len = 0; 95 | void *output = sqlite3_malloc(output_len); 96 | len = uncompress_wkb_line(output, input, input_len); 97 | assert(len == output_len); 98 | sqlite3_result_blob(context, output, len, sqlite3_free); 99 | } 100 | 101 | int sqlite3_extension_init (sqlite3 * db, char **pzErrMsg, 102 | const sqlite3_api_routines *pApi) { 103 | SQLITE_EXTENSION_INIT2(pApi); 104 | 105 | sqlite3_create_function(db, "metaphone", 1, SQLITE_ANY, 106 | NULL, sqlite3_metaphone, NULL, NULL); 107 | sqlite3_create_function(db, "metaphone", 2, SQLITE_ANY, 108 | NULL, sqlite3_metaphone, NULL, NULL); 109 | 110 | sqlite3_create_function(db, "levenshtein", 2, SQLITE_ANY, 111 | NULL, sqlite3_levenshtein, NULL, NULL); 112 | sqlite3_create_function(db, "compress_wkb_line", 1, SQLITE_ANY, 113 | NULL, sqlite3_compress_wkb_line, NULL, NULL); 114 | sqlite3_create_function(db, "uncompress_wkb_line", 1, SQLITE_ANY, 115 | NULL, sqlite3_uncompress_wkb_line, NULL, NULL); 116 | sqlite3_create_function(db, "digit_suffix", 1, SQLITE_ANY, 117 | NULL, sqlite3_digit_suffix, NULL, NULL); 118 | sqlite3_create_function(db, "nondigit_prefix", 1, SQLITE_ANY, 119 | NULL, sqlite3_nondigit_prefix, NULL, NULL); 120 | return 0; 121 | } 122 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/extension.h: -------------------------------------------------------------------------------- 1 | #ifndef SQLITE3_GEOCODER 2 | #define SQLITE3_GEOCODER 3 | 4 | #include 5 | 6 | int metaphone(const char *Word, char *Metaph, int max_phones); 7 | double levenshtein_distance (const unsigned char *s1, const unsigned char *s2); 8 | signed int rindex_nondigit (const char *string); 9 | signed int nondigit_prefix (const char *input, char *output); 10 | uint32_t compress_wkb_line (void *dest, const void *src, uint32_t len); 11 | uint32_t uncompress_wkb_line (void *dest, const void *src, uint32_t len); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/levenshtein.c: -------------------------------------------------------------------------------- 1 | # include 2 | # define STRLEN_MAX 256 3 | # define min(x, y) ((x) < (y) ? (x) : (y)) 4 | # define max(x, y) ((x) > (y) ? (x) : (y)) 5 | # define NO_CASE (~(unsigned char)32) 6 | # define eql(x, y) (((x) & NO_CASE) == ((y) & NO_CASE)) 7 | 8 | static int d[STRLEN_MAX][STRLEN_MAX]; // this isn't thread safe 9 | 10 | double levenshtein_distance (const unsigned char *s1, const unsigned char *s2) { 11 | const size_t len1 = min(strlen(s1), STRLEN_MAX-1), 12 | len2 = min(strlen(s2), STRLEN_MAX-1); 13 | int cost, i, j; 14 | 15 | for (i = 1; i <= len1; ++i) d[i][0] = i; 16 | for (i = 1; i <= len2; ++i) d[0][i] = i; 17 | for (i = 1; i <= len1; ++i) { 18 | for (j = 1; j <= len2; ++j) { 19 | cost = (eql(s1[i-1], s2[j-1]) ? 0 : 1); 20 | d[i][j] = min(min( 21 | d[i-1][j ] + 1, /* deletion */ 22 | d[i ][j-1] + 1), /* insertion */ 23 | d[i-1][j-1] + cost); /* substitution */ 24 | if (i > 1 && j > 1 && eql(s1[i-1], s2[j-2]) && eql(s1[i-2], s2[j-1])) { 25 | d[i][j] = min( d[i][j], 26 | d[i-2][j-2] + cost ); /* transposition */ 27 | } 28 | } 29 | } 30 | return (d[len1][len2] / (double) max(len1, len2)); 31 | } 32 | 33 | #ifdef TEST 34 | #include 35 | 36 | int main (int argc, char **argv) { 37 | if (argc < 3) return -1; 38 | printf("%.1f%%\n", levenshtein_distance(argv[1],argv[2]) * 100); 39 | return 0; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/util.c: -------------------------------------------------------------------------------- 1 | # include 2 | # include 3 | 4 | int address_metaphone(const char *input, char *output, int max_phones) { 5 | const char *n = input; 6 | int i = 0; 7 | if (isdigit(*n)) { 8 | while (i < max_phones && isdigit(n[i]) && n[i] != '\0') 9 | *output++ = n[i++]; 10 | *output = '\0'; 11 | return 1; 12 | } else { 13 | return metaphone(input, output, max_phones); 14 | } 15 | } 16 | 17 | signed int rindex_nondigit (const char *string) { 18 | signed int i = strlen(string); 19 | if (!i) return -1; 20 | for (i--; i >= 0 && isdigit(string[i]); i--); 21 | return i; 22 | } 23 | 24 | signed int digit_suffix (const char *input, char *output) { 25 | signed int i = rindex_nondigit(input); 26 | strcpy(output, input+i+1); 27 | return strlen(output); 28 | } 29 | 30 | signed int nondigit_prefix (const char *input, char *output) { 31 | signed int i = rindex_nondigit(input); 32 | if (i++ >= 0) { 33 | strncpy(output, input, i); 34 | output[i] = '\0'; 35 | } 36 | return i; 37 | } 38 | -------------------------------------------------------------------------------- /src/libsqlite3_geocoder/wkb_compress.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | uint32_t compress_wkb_line (void *dest, const void *src, uint32_t len) { 5 | uint32_t d, s; 6 | double value; 7 | if (!len) return 0; 8 | for (s = 9, d = 0; s < len; d += 4, s += 8) { 9 | value = *(double *)(src + s); 10 | value *= 1000000; 11 | *(int32_t *)(dest + d) = (int32_t) value; 12 | } 13 | return d; 14 | } 15 | 16 | uint32_t uncompress_wkb_line (void *dest, const void *src, uint32_t len) { 17 | uint32_t d, s; 18 | double value; 19 | if (!len) return 0; 20 | memcpy(dest, "\01\02\00\00\00\06\00\00\00", 10); 21 | for (s = 0, d = 9; s < len; s += 4, d += 8) { 22 | value = (double) *(int32_t *)(src + s); 23 | value /= 1000000; 24 | *(double *)(dest + d) = value; 25 | } 26 | return d; 27 | } 28 | 29 | 30 | #ifdef TEST 31 | 32 | #include 33 | int main (int argc, char *argv) { 34 | char hex[1024], *scan; 35 | char wkb[512]; 36 | unsigned long len, clen; 37 | 38 | while (!feof(stdin)) { 39 | fgets(hex, sizeof(hex), stdin); 40 | for (scan = hex, len = 0; *scan && sizeof(wkb)>len; scan += 2, len++) { 41 | if (sscanf(scan, "%2x", (uint32_t *)(wkb+len)) != 1) break; 42 | } 43 | clen = compress_wkb_line(hex, wkb, len); 44 | printf("before: %lu, after: %lu\n", len, clen); 45 | len = uncompress_wkb_line(wkb, hex, clen); 46 | printf("before: %lu, after: %lu\n", clen, len); 47 | for (scan = wkb + 9; scan < wkb + len; scan += 8) { 48 | printf("%.6f ", *(double *)scan); 49 | } 50 | printf("\n"); 51 | } 52 | } 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /src/metaphone/Makefile: -------------------------------------------------------------------------------- 1 | all: metaphone.so 2 | 3 | metaphone.so: extension.o metaphon.o 4 | $(CC) -fPIC -shared $^ -o $@ 5 | 6 | clean: 7 | rm -f *.o *.so 8 | -------------------------------------------------------------------------------- /src/metaphone/README: -------------------------------------------------------------------------------- 1 | = SQLite 3 Metaphone extension = 2 | 3 | * This library implements the Metaphone algorithm, originally developed by 4 | Laurence Phillips, as an SQLite 3 extension function: 5 | 6 | http://en.wikipedia.org/wiki/Metaphone 7 | 8 | * This code is based around the original public domain implementation in 9 | C by Gary Phillips, as provided by Sadi Evren Seker: 10 | 11 | http://www.shedai.net/c/new/METAPHON.C 12 | 13 | * Like SQLite and the Phillips implementation of Metaphone, this code 14 | is provided in the public domain, in the hope that it will be useful. 15 | 16 | * To compile the code, simply run `make`. You must have GNU Make and GCC 17 | installed. 18 | 19 | * The module implements one function, metaphone(), which takes a string 20 | to convert to a metaphone representation as its first argument, and an 21 | optional second argument to specify the maximum length of the output. 22 | 23 | * To use the code, run `sqlite3` and enter the following: 24 | 25 | {{{ 26 | sqlite> .load /path/to/metaphone.so 27 | -- you can use ./metaphone.so if the .so is in your 28 | -- current directory, or just metaphone.so if it's 29 | -- somewhere in your library path. 30 | sqlite> select metaphone("Schuyler"); 31 | SKLR 32 | sqlite> select metaphone("Schuyler", 3); 33 | SKL 34 | sqlite> select metaphone("Skyler"); 35 | SKLR 36 | sqlite> select metaphone("Skylar"); 37 | SKLR 38 | sqlite> select metaphone("SQLite rules!"); 39 | SKLTRLS 40 | sqlite> select metaphone("SQLite roolz!!!1!"); 41 | SKLTRLS 42 | }}} 43 | 44 | Questions? Comments? Complaints? Approbation? Email schuyler@nocat.net. 45 | 46 | Schuyler Erle 47 | 9 March 2008 48 | 49 | =30= 50 | -------------------------------------------------------------------------------- /src/metaphone/extension.c: -------------------------------------------------------------------------------- 1 | # include 2 | # include 3 | # include 4 | # include 5 | 6 | static SQLITE_EXTENSION_INIT1; 7 | 8 | static void 9 | sqlite3_metaphone (sqlite3_context *context, int argc, sqlite3_value **argv) { 10 | const unsigned char *input = sqlite3_value_text(argv[0]); 11 | int max_phones = 0; 12 | char *output; 13 | int len; 14 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 15 | sqlite3_result_null(context); 16 | return; 17 | } 18 | if (argc > 1) 19 | max_phones = sqlite3_value_int(argv[1]); 20 | if (max_phones <= 0) 21 | max_phones = strlen(input); 22 | output = sqlite3_malloc((max_phones+1)*sizeof(char)); 23 | len = metaphone(input, output, max_phones); 24 | sqlite3_result_text(context, output, len, SQLITE_TRANSIENT); 25 | } 26 | 27 | int sqlite3_extension_init (sqlite3 * db, char **pzErrMsg, 28 | const sqlite3_api_routines *pApi) { 29 | SQLITE_EXTENSION_INIT2(pApi); 30 | sqlite3_create_function(db, "metaphone", 1, SQLITE_ANY, 31 | NULL, sqlite3_metaphone, NULL, NULL); 32 | sqlite3_create_function(db, "metaphone", 2, SQLITE_ANY, 33 | NULL, sqlite3_metaphone, NULL, NULL); 34 | return 0; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/metaphone/metaphon.c: -------------------------------------------------------------------------------- 1 | /* +++Customized by SDE for sqlite3 use 09-Mar-2009 */ 2 | /* +++File obtained from http://www.shedai.net/c/new/METAPHON.C */ 3 | /* +++Date previously modified: 05-Jul-1997 */ 4 | 5 | /* 6 | ** METAPHON.C - Phonetic string matching 7 | ** 8 | ** The Metaphone algorithm was developed by Lawrence Phillips. Like the 9 | ** Soundex algorithm, it compares words that sound alike but are spelled 10 | ** differently. Metaphone was designed to overcome difficulties encountered 11 | ** with Soundex. 12 | ** 13 | ** This implementation was written by Gary A. Parker and originally published 14 | ** in the June/July, 1991 (vol. 5 nr. 4) issue of C Gazette. As published, 15 | ** this code was explicitly placed in the public domain by the author. 16 | */ 17 | 18 | #include 19 | #include /* strlen() */ 20 | #include 21 | #define malloc(x) sqlite3_malloc((x)) 22 | #define free(x) sqlite3_free((x)) 23 | 24 | /* 25 | ** Character coding array 26 | */ 27 | 28 | static char vsvfn[26] = { 29 | 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0}; 30 | /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ 31 | 32 | /* 33 | ** Macros to access the character coding array 34 | */ 35 | 36 | #define vowel(x) (vsvfn[(x) - 'A'] & 1) /* AEIOU */ 37 | #define same(x) (vsvfn[(x) - 'A'] & 2) /* FJLMNR */ 38 | #define varson(x) (vsvfn[(x) - 'A'] & 4) /* CGPST */ 39 | #define frontv(x) (vsvfn[(x) - 'A'] & 8) /* EIY */ 40 | #define noghf(x) (vsvfn[(x) - 'A'] & 16) /* BDH */ 41 | 42 | int metaphone(const char *Word, char *Metaph, int max_phones) 43 | { 44 | char *n, *n_start, *n_end; /* Pointers to string */ 45 | char *metaph_start = Metaph, *metaph_end; 46 | /* Pointers to metaph */ 47 | int ntrans_len = strlen(Word)+4; 48 | char *ntrans = (char *)malloc(sizeof(char) * ntrans_len); 49 | /* Word with uppercase letters */ 50 | int KSflag; /* State flag for X translation */ 51 | 52 | /* 53 | ** Copy word to internal buffer, dropping non-alphabetic characters 54 | ** and converting to upper case. 55 | */ 56 | 57 | for (n = ntrans + 1, n_end = ntrans + ntrans_len - 2; 58 | *Word && n < n_end; ++Word) 59 | { 60 | if (isalpha(*Word)) 61 | *n++ = toupper(*Word); 62 | } 63 | 64 | if (n == ntrans + 1) { 65 | free(ntrans); 66 | Metaph[0]='\0'; 67 | return 1; /* Return if zero characters */ 68 | } 69 | else n_end = n; /* Set end of string pointer */ 70 | 71 | /* 72 | ** Pad with '\0's, front and rear 73 | */ 74 | 75 | *n++ = '\0'; 76 | *n = '\0'; 77 | n = ntrans; 78 | *n++ = '\0'; 79 | 80 | /* 81 | ** Check for PN, KN, GN, WR, WH, and X at start 82 | */ 83 | 84 | switch (*n) 85 | { 86 | case 'P': 87 | case 'K': 88 | case 'G': 89 | if ('N' == *(n + 1)) 90 | *n++ = '\0'; 91 | break; 92 | 93 | case 'A': 94 | if ('E' == *(n + 1)) 95 | *n++ = '\0'; 96 | break; 97 | 98 | case 'W': 99 | if ('R' == *(n + 1)) 100 | *n++ = '\0'; 101 | else if ('H' == *(n + 1)) 102 | { 103 | *(n + 1) = *n; 104 | *n++ = '\0'; 105 | } 106 | break; 107 | 108 | case 'X': 109 | *n = 'S'; 110 | break; 111 | } 112 | 113 | /* 114 | ** Now loop through the string, stopping at the end of the string 115 | ** or when the computed Metaphone code is max_phones characters long. 116 | */ 117 | 118 | KSflag = 0; /* State flag for KStranslation */ 119 | for (metaph_end = Metaph + max_phones, n_start = n; 120 | n <= n_end && Metaph < metaph_end; ++n) 121 | { 122 | if (KSflag) 123 | { 124 | KSflag = 0; 125 | *Metaph++ = *n; 126 | } 127 | else 128 | { 129 | /* Drop duplicates except for CC */ 130 | 131 | if (*(n - 1) == *n && *n != 'C') 132 | continue; 133 | 134 | /* Check for F J L M N R or first letter vowel */ 135 | 136 | if (same(*n) || (n == n_start && vowel(*n))) 137 | *Metaph++ = *n; 138 | else switch (*n) 139 | { 140 | case 'B': 141 | if (n < n_end || *(n - 1) != 'M') 142 | *Metaph++ = *n; 143 | break; 144 | 145 | case 'C': 146 | if (*(n - 1) != 'S' || !frontv(*(n + 1))) 147 | { 148 | if ('I' == *(n + 1) && 'A' == *(n + 2)) 149 | *Metaph++ = 'X'; 150 | else if (frontv(*(n + 1))) 151 | *Metaph++ = 'S'; 152 | else if ('H' == *(n + 1)) 153 | *Metaph++ = ((n == n_start && 154 | !vowel(*(n + 2))) || 155 | 'S' == *(n - 1)) ? 'K' : 'X'; 156 | else *Metaph++ = 'K'; 157 | } 158 | break; 159 | 160 | case 'D': 161 | *Metaph++ = ('G' == *(n + 1) && frontv(*(n + 2))) ? 162 | 'J' : 'T'; 163 | break; 164 | 165 | case 'G': 166 | if ((*(n + 1) != 'H' || vowel(*(n + 2))) && 167 | (*(n + 1) != 'N' || ((n + 1) < n_end && 168 | (*(n + 2) != 'E' || *(n + 3) != 'D'))) && 169 | (*(n - 1) != 'D' || !frontv(*(n + 1)))) 170 | { 171 | *Metaph++ = (frontv(*(n + 1)) && 172 | *(n + 2) != 'G') ? 'J' : 'K'; 173 | } 174 | else if ('H' == *(n + 1) && !noghf(*(n - 3)) && 175 | *(n - 4) != 'H') 176 | { 177 | *Metaph++ = 'F'; 178 | } 179 | break; 180 | 181 | case 'H': 182 | if (!varson(*(n - 1)) && (!vowel(*(n - 1)) || 183 | vowel(*(n + 1)))) 184 | { 185 | *Metaph++ = 'H'; 186 | } 187 | break; 188 | 189 | case 'K': 190 | if (*(n - 1) != 'C') 191 | *Metaph++ = 'K'; 192 | break; 193 | 194 | case 'P': 195 | *Metaph++ = ('H' == *(n + 1)) ? 'F' : 'P'; 196 | break; 197 | 198 | case 'Q': 199 | *Metaph++ = 'K'; 200 | break; 201 | 202 | case 'S': 203 | *Metaph++ = ('H' == *(n + 1) || ('I' == *(n + 1) && 204 | ('O' == *(n + 2) || 'A' == *(n + 2)))) ? 205 | 'X' : 'S'; 206 | break; 207 | 208 | case 'T': 209 | if ('I' == *(n + 1) && ('O' == *(n + 2) || 210 | 'A' == *(n + 2))) 211 | { 212 | *Metaph++ = 'X'; 213 | } 214 | else if ('H' == *(n + 1)) 215 | *Metaph++ = 'O'; 216 | else if (*(n + 1) != 'C' || *(n + 2) != 'H') 217 | *Metaph++ = 'T'; 218 | break; 219 | 220 | case 'V': 221 | *Metaph++ = 'F'; 222 | break; 223 | 224 | case 'W': 225 | case 'Y': 226 | if (vowel(*(n + 1))) 227 | *Metaph++ = *n; 228 | break; 229 | 230 | case 'X': 231 | if (n == n_start) 232 | *Metaph++ = 'S'; 233 | else 234 | { 235 | *Metaph++ = 'K'; 236 | KSflag = 1; 237 | } 238 | break; 239 | 240 | case 'Z': 241 | *Metaph++ = 'S'; 242 | break; 243 | } 244 | } 245 | } 246 | 247 | *Metaph = '\0'; 248 | free(ntrans); 249 | return strlen(metaph_start); 250 | } 251 | 252 | -------------------------------------------------------------------------------- /src/shp2sqlite/Makefile: -------------------------------------------------------------------------------- 1 | # ********************************************************************** 2 | # * $Id: Makefile.in 3 | # * 4 | # * PostGIS - Spatial Types for PostgreSQL 5 | # * http://postgis.refractions.net 6 | # * Copyright 2008 Mark Cave-Ayland 7 | # * 8 | # * This is free software; you can redistribute and/or modify it under 9 | # * the terms of the GNU General Public Licence. See the COPYING file. 10 | # * 11 | # ********************************************************************** 12 | 13 | 14 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes 15 | 16 | # Filenames with extension as determined by the OS 17 | SHP2SQLITE=shp2sqlite 18 | LIBLWGEOM=../liblwgeom/liblwgeom.a 19 | 20 | # iconv flags 21 | ICONV_LDFLAGS=-lc 22 | 23 | all: $(SHP2SQLITE) 24 | 25 | $(LIBLWGEOM): 26 | make -C ../liblwgeom 27 | 28 | $(SHP2SQLITE): shpopen.o dbfopen.o getopt.o shp2sqlite.o $(LIBLWGEOM) 29 | $(CC) $(CFLAGS) $^ $(ICONV_LDFLAGS) -lm -o $@ 30 | 31 | install: all 32 | @cp $(SHP2SQLITE) ../../build/ 33 | 34 | clean: 35 | @rm -f *.o $(SHP2SQLITE) 36 | 37 | -------------------------------------------------------------------------------- /src/shp2sqlite/Makefile.nix: -------------------------------------------------------------------------------- 1 | # ********************************************************************** 2 | # * $Id: Makefile.in 3 | # * 4 | # * PostGIS - Spatial Types for PostgreSQL 5 | # * http://postgis.refractions.net 6 | # * Copyright 2008 Mark Cave-Ayland 7 | # * 8 | # * This is free software; you can redistribute and/or modify it under 9 | # * the terms of the GNU General Public Licence. See the COPYING file. 10 | # * 11 | # ********************************************************************** 12 | 13 | 14 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes 15 | 16 | # Filenames with extension as determined by the OS 17 | SHP2SQLITE=shp2sqlite 18 | LIBLWGEOM=../liblwgeom/liblwgeom.a 19 | 20 | # iconv flags 21 | ICONV_LDFLAGS=-lc 22 | 23 | all: $(SHP2SQLITE) 24 | 25 | $(LIBLWGEOM): 26 | make -C ../liblwgeom 27 | 28 | $(SHP2SQLITE): shpopen.o dbfopen.o getopt.o shp2sqlite.o $(LIBLWGEOM) 29 | $(CC) $(CFLAGS) $^ $(ICONV_LDFLAGS) -lm -o $@ 30 | 31 | install: all 32 | @cp $(SHP2SQLITE) ../../bin 33 | 34 | clean: 35 | @rm -f *.o $(SHP2SQLITE) 36 | 37 | -------------------------------------------------------------------------------- /src/shp2sqlite/Makefile.redhat: -------------------------------------------------------------------------------- 1 | # ********************************************************************** 2 | # * $Id: Makefile.in 3 | # * 4 | # * PostGIS - Spatial Types for PostgreSQL 5 | # * http://postgis.refractions.net 6 | # * Copyright 2008 Mark Cave-Ayland 7 | # * 8 | # * This is free software; you can redistribute and/or modify it under 9 | # * the terms of the GNU General Public Licence. See the COPYING file. 10 | # * 11 | # ********************************************************************** 12 | CC=gcc 13 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes 14 | 15 | # Filenames with extension as determined by the OS 16 | SHP2SQLITE=shp2sqlite 17 | LIBLWGEOM=../liblwgeom/liblwgeom.a 18 | 19 | # iconv flags 20 | ICONV_LDFLAGS=-lc 21 | 22 | all: $(SHP2SQLITE) 23 | 24 | $(LIBLWGEOM): 25 | make -C ../liblwgeom 26 | 27 | $(SHP2SQLITE): shpopen.o dbfopen.o getopt.o shp2sqlite.o $(LIBLWGEOM) 28 | $(CC) $(CFLAGS) $^ $(ICONV_LDFLAGS) -lm -o $@ 29 | 30 | install: all 31 | @cp $(SHP2SQLITE) ../../bin 32 | 33 | clean: 34 | @rm -f *.o $(SHP2SQLITE) 35 | 36 | -------------------------------------------------------------------------------- /src/shp2sqlite/getopt.h: -------------------------------------------------------------------------------- 1 | /* Declarations for getopt. 2 | Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. 3 | 4 | This program is free software; you can redistribute it and/or modify it 5 | under the terms of the GNU General Public License as published by the 6 | Free Software Foundation; either version 2, or (at your option) any 7 | later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ 17 | 18 | #ifndef _GETOPT_H 19 | #define _GETOPT_H 1 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | /* For communication from `getopt' to the caller. 26 | When `getopt' finds an option that takes an argument, 27 | the argument value is returned here. 28 | Also, when `ordering' is RETURN_IN_ORDER, 29 | each non-option ARGV-element is returned here. */ 30 | 31 | extern char *optarg; 32 | 33 | /* Index in ARGV of the next element to be scanned. 34 | This is used for communication to and from the caller 35 | and for communication between successive calls to `getopt'. 36 | 37 | On entry to `getopt', zero means this is the first call; initialize. 38 | 39 | When `getopt' returns EOF, this is the index of the first of the 40 | non-option elements that the caller should itself scan. 41 | 42 | Otherwise, `optind' communicates from one call to the next 43 | how much of ARGV has been scanned so far. */ 44 | 45 | extern int optind; 46 | 47 | /* Callers store zero here to inhibit the error message `getopt' prints 48 | for unrecognized options. */ 49 | 50 | extern int opterr; 51 | 52 | /* Set to an option character which was unrecognized. */ 53 | 54 | extern int optopt; 55 | 56 | /* Describe the long-named options requested by the application. 57 | The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector 58 | of `struct option' terminated by an element containing a name which is 59 | zero. 60 | 61 | The field `has_arg' is: 62 | no_argument (or 0) if the option does not take an argument, 63 | required_argument (or 1) if the option requires an argument, 64 | optional_argument (or 2) if the option takes an optional argument. 65 | 66 | If the field `flag' is not NULL, it points to a variable that is set 67 | to the value given in the field `val' when the option is found, but 68 | left unchanged if the option is not found. 69 | 70 | To have a long-named option do something other than set an `int' to 71 | a compiled-in constant, such as set a value from `optarg', set the 72 | option's `flag' field to zero and its `val' field to a nonzero 73 | value (the equivalent single-letter option character, if there is 74 | one). For long options that have a zero `flag' field, `getopt' 75 | returns the contents of the `val' field. */ 76 | 77 | struct option 78 | { 79 | #if __STDC__ 80 | const char *name; 81 | #else 82 | char *name; 83 | #endif 84 | /* has_arg can't be an enum because some compilers complain about 85 | type mismatches in all the code that assumes it is an int. */ 86 | int has_arg; 87 | int *flag; 88 | int val; 89 | }; 90 | 91 | /* Names for the values of the `has_arg' field of `struct option'. */ 92 | 93 | #define no_argument 0 94 | #define required_argument 1 95 | #define optional_argument 2 96 | 97 | #if __STDC__ || defined(PROTO) 98 | #if defined(__GNU_LIBRARY__) 99 | /* Many other libraries have conflicting prototypes for getopt, with 100 | differences in the consts, in stdlib.h. To avoid compilation 101 | errors, only prototype getopt for the GNU C library. */ 102 | extern int pgis_getopt (int argc, char *const *argv, const char *shortopts); 103 | #endif /* not __GNU_LIBRARY__ */ 104 | extern int pgis_getopt_long (int argc, char *const *argv, const char *shortopts, 105 | const struct option *longopts, int *longind); 106 | extern int pgis_getopt_long_only (int argc, char *const *argv, 107 | const char *shortopts, 108 | const struct option *longopts, int *longind); 109 | 110 | /* Internal only. Users should not call this directly. */ 111 | extern int _pgis_getopt_internal (int argc, char *const *argv, 112 | const char *shortopts, 113 | const struct option *longopts, int *longind, 114 | int long_only); 115 | #else /* not __STDC__ */ 116 | extern int pgis_getopt (); 117 | extern int pgis_getopt_long (); 118 | extern int pgis_getopt_long_only (); 119 | 120 | extern int _pgis_getopt_internal (); 121 | #endif /* not __STDC__ */ 122 | 123 | #ifdef __cplusplus 124 | } 125 | #endif 126 | 127 | #endif /* _GETOPT_H */ 128 | -------------------------------------------------------------------------------- /test/address.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift '../lib' 2 | 3 | require 'test/unit' 4 | require 'set' 5 | require 'geocoder/us/address' 6 | 7 | include Geocoder::US 8 | 9 | class TestAddress < Test::Unit::TestCase 10 | def test_new 11 | addr = Address.new("1600 Pennsylvania Av., Washington DC") 12 | assert_equal "1600 Pennsylvania Av, Washington DC", addr.text 13 | end 14 | def test_clean 15 | fixtures = [ 16 | [ "cleaned text", "cleaned: text!" ], 17 | [ "cleaned-text 2", "cleaned-text: #2?" ], 18 | [ "it's working 1/2", "~it's working 1/2~" ], 19 | [ "it's working, yes", "it's working, yes...?" ], 20 | [ "it's working & well", "it's working & well?" ] 21 | ] 22 | fixtures.each {|output, given| 23 | assert_equal output, Address.new(given).text 24 | } 25 | end 26 | def test_expand_numbers 27 | num_list = ["5", "fifth", "five"] 28 | num_list.each {|n| 29 | addr = Address.new(n) 30 | assert_equal num_list, addr.expand_numbers(n).to_a.sort 31 | } 32 | end 33 | def test_city_parse 34 | places = [ 35 | [ "New York, NY", "New York", "NY", "" ], 36 | [ "NY", "", "NY", "" ], 37 | [ "New York", "New York", "NY", "" ], 38 | [ "Philadelphia", "Philadelphia", "", "" ], 39 | [ "Philadelphia PA", "Philadelphia", "PA", "" ], 40 | [ "Philadelphia, PA", "Philadelphia", "PA", "" ], 41 | [ "Philadelphia, Pennsylvania", "Philadelphia", "PA", "" ], 42 | [ "Philadelphia, Pennsylvania 19131", "Philadelphia", "PA", "19131" ], 43 | [ "Philadelphia 19131", "Philadelphia", "", "19131" ], 44 | [ "Pennsylvania 19131", "Pennsylvania", "PA", "19131" ], # kind of a misfeature 45 | [ "19131", "", "", "19131" ], 46 | [ "19131-9999", "", "", "19131" ], 47 | ] 48 | for fixture in places 49 | addr = Address.new fixture[0] 50 | [:city, :state, :zip].zip(fixture[1..3]).each {|key,val| 51 | result = addr.send key 52 | result = [result.downcase] unless result.kind_of? Array 53 | if result.empty? 54 | assert_equal val, "", key.to_s + " test no result " + fixture.join("/") 55 | else 56 | assert result.member?(val.downcase), key.to_s + " test " + result.inspect + fixture.join("/") 57 | end 58 | } 59 | end 60 | end 61 | 62 | def test_po_box 63 | addr_po = Address.new "PO Box 1111 Herndon VA 20171" 64 | assert addr_po.po_box?, true 65 | end 66 | 67 | 68 | 69 | def test_parse 70 | addrs = [ 71 | {:text => "1600 Pennsylvania Av., Washington DC 20050", 72 | :number => "1600", 73 | :street => "Pennsylvania Ave", 74 | :city => "Washington", 75 | :state => "DC", 76 | :zip => "20050"}, 77 | 78 | {:text => "1600 Pennsylvania, Washington DC", 79 | :number => "1600", 80 | :street => "Pennsylvania", 81 | :city => "Washington", 82 | :state => "DC"}, 83 | 84 | {:text => "1600 Pennsylvania Washington DC", 85 | :number => "1600", 86 | :street => "Pennsylvania Washington", 87 | :city => "Pennsylvania Washington", # FIXME 88 | :state => "DC"}, 89 | 90 | {:text => "1600 Pennsylvania Washington", 91 | :number => "1600", 92 | :street => "Pennsylvania", 93 | :city => "Washington", 94 | :state => "WA"}, # FIXME 95 | 96 | {:text => "1600 Pennsylvania 20050", 97 | :number => "1600", 98 | :street => "Pennsylvania", # FIXME 99 | :zip => "20050"}, 100 | 101 | {:text => "1600 Pennsylvania Av, 20050-9999", 102 | :number => "1600", 103 | :street => "Pennsylvania Ave", 104 | :zip => "20050"}, 105 | 106 | {:text => "1005 Gravenstein Highway North, Sebastopol CA", 107 | :number => "1005", 108 | :street => "Gravenstein Hwy N", 109 | :city => "Sebastopol", 110 | :state => "CA"}, 111 | 112 | {:text => "100 N 7th St, Brooklyn", 113 | :number => "100", 114 | :street => "N 7 St", 115 | :city => "Brooklyn"}, 116 | 117 | {:text => "100 N Seventh St, Brooklyn", 118 | :number => "100", 119 | :street => "N 7 St", 120 | :city => "Brooklyn"}, 121 | 122 | {:text => "100 Central Park West, New York, NY", 123 | :number => "100", 124 | :street => "Central Park W", 125 | :city => "New York", 126 | :state => "NY"}, 127 | 128 | {:text => "100 Central Park West, 10010", 129 | :number => "100", 130 | :street => "Central Park W", 131 | :zip => "10010"}, 132 | 133 | {:text => "1400 Avenue of the Americas, New York, NY 10019", 134 | :number => "1400", 135 | :street => "Ave of the Americas", 136 | :city => "New York", 137 | :state => "NY"}, 138 | 139 | {:text => "1400 Avenue of the Americas, New York", 140 | :number => "1400", 141 | :street => "Ave of the Americas", 142 | :city => "New York"}, 143 | 144 | {:text => "1400 Ave of the Americas, New York", 145 | :number => "1400", 146 | :street => "Ave of the Americas", 147 | :city => "New York"}, 148 | 149 | {:text => "1400 Av of the Americas, New York", 150 | :number => "1400", 151 | :street => "Ave of the Americas", 152 | :city => "New York"}, 153 | 154 | {:text => "1400 Av of the Americas New York", 155 | :number => "1400", 156 | :street => "Ave of the Americas", 157 | :city => "New York"}, 158 | 159 | ] 160 | for fixture in addrs 161 | text = fixture.delete(:text) 162 | addr = Address.new(text) 163 | for key, val in fixture 164 | result = addr.send key 165 | if result.kind_of? Array 166 | result.map! {|str| str.downcase} 167 | assert result.member?(val.downcase), "#{text} (#{key}) = #{result.inspect}" 168 | else 169 | assert_equal val, result, "#{text} (#{key}) = #{result.inspect}" 170 | end 171 | end 172 | end 173 | end 174 | 175 | def test_skip_parse 176 | addresses = [ 177 | {:street => "1233 Main St", :city => "Springfield", :region => "VA", :postal_code => "12345", :final_number => "1233", :parsed_street => "main st"}, 178 | {:street => "somewhere Ln", :city => "Somewhere", :region => "WI", :postal_code => "22222", :number => "402", :parsed_street => "somewhere ln", :final_number => "402"}, 179 | ] 180 | for preparsed_address in addresses 181 | address_for_geocode = Address.new preparsed_address 182 | assert_equal preparsed_address[:parsed_street],address_for_geocode.street[0] 183 | assert_equal preparsed_address[:final_number],address_for_geocode.number 184 | assert_equal preparsed_address[:city],address_for_geocode.city[0] 185 | assert_equal preparsed_address[:region],address_for_geocode.state 186 | assert_equal preparsed_address[:postal_code],address_for_geocode.zip 187 | end 188 | end 189 | 190 | def test_states_abbreviated_in_skip_parse 191 | addresses = [ 192 | {:street => "123 Main St", :city => "Springfield", :region => "Virginia", :postal_code => "12345",:state_abbrev => "VA"}, 193 | {:street => "402 Somewhere Ln", :city => "Somewhere", :region => "WI", :postal_code => "22222", :state_abbrev => "WI"}, 194 | ] 195 | for preparsed_address in addresses 196 | address_for_geocode = Address.new preparsed_address 197 | assert_equal preparsed_address[:state_abbrev],address_for_geocode.state 198 | end 199 | 200 | end 201 | 202 | def test_address_hash 203 | addresses = [ 204 | {:address => "Herndon, VA", :place_check => ["herndon"]}, 205 | {:address => "Arlington, VA", :place_check => ["arlington"]} 206 | ] 207 | for preparsed_address in addresses 208 | address_for_geocode = Address.new preparsed_address 209 | assert_equal preparsed_address[:place_check],address_for_geocode.city 210 | end 211 | end 212 | 213 | def test_partial_address 214 | addresses = [ 215 | {:street => "2200 Wilson Blvd", :postal_code => "22201"}, 216 | ] 217 | for preparsed_address in addresses 218 | address_for_geocode = Address.new preparsed_address 219 | assert_equal preparsed_address[:postal_code],address_for_geocode.zip 220 | end 221 | 222 | 223 | end 224 | 225 | def test_country_parse 226 | addresses = [ 227 | {:city => "Paris", :country => "FR"}, 228 | ] 229 | 230 | for preparsed_address in addresses 231 | address_for_geocode = Address.new preparsed_address 232 | assert_equal preparsed_address[:country],address_for_geocode.state 233 | end 234 | end 235 | 236 | end 237 | -------------------------------------------------------------------------------- /test/benchmark.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'test/unit' 4 | require 'geocoder/us/database' 5 | require 'benchmark' 6 | include Benchmark # we need the CAPTION and FMTSTR constants 7 | 8 | db = Geocoder::US::Database.new("/mnt/tiger2008/geocoder.db") 9 | 10 | n = 50 11 | s = "1005 Gravenstein Hwy N, Sebastopol CA 95472" 12 | a = Geocoder::US::Address.new(s) 13 | 14 | print db.geocode(s) 15 | 16 | Benchmark.bmbm do |x| 17 | x.report("parse max_penalty=0") { n.times{a.parse(0)} } 18 | x.report("parse max_penalty=1") { n.times{a.parse(1)} } 19 | x.report("geocode") { n.times{db.geocode(s)} } 20 | end 21 | -------------------------------------------------------------------------------- /test/constants.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift '../lib' 2 | 3 | require 'test/unit' 4 | require 'geocoder/us/constants' 5 | 6 | include Geocoder::US 7 | 8 | class TestConstants < Test::Unit::TestCase 9 | def initialize (*args) 10 | @map = Map[ 11 | "Abbreviation" => "abbr", 12 | "Two words" => "2words", 13 | "Some three words" => "3words" 14 | ] 15 | super(*args) 16 | end 17 | def test_class_constructor 18 | assert_kind_of Map, @map 19 | assert_kind_of Hash, @map 20 | end 21 | def test_key 22 | assert @map.key?( "Abbreviation" ) 23 | assert @map.key?( "abbreviation" ) 24 | assert !(@map.key? "abbreviation?") 25 | assert @map.key?( "abbr" ) 26 | assert @map.key?( "Two words" ) 27 | assert @map.key?( "2words" ) 28 | end 29 | def test_fetch 30 | assert_equal "abbr", @map["Abbreviation"] 31 | assert_equal "abbr", @map["abbreviation"] 32 | assert_nil @map["abbreviation?"] 33 | assert_equal "abbr", @map["abbr"] 34 | assert_equal "2words", @map["Two words"] 35 | assert_equal "2words", @map["2words"] 36 | end 37 | # def test_partial 38 | # assert @map.partial?( "Abbreviation" ) 39 | # assert @map.partial?( "Two" ) 40 | # assert @map.partial?( "two" ) 41 | # assert !(@map.partial? "words") 42 | # assert @map.partial?( "Some" ) 43 | # assert !(@map.partial? "words") 44 | # assert @map.partial?( "Some three" ) 45 | # assert @map.partial?( "SOME THREE WORDS" ) 46 | # end 47 | def test_constants 48 | assert_kind_of Map, Directional 49 | assert_kind_of Map, Prefix_Qualifier 50 | assert_kind_of Map, Suffix_Qualifier 51 | assert_kind_of Map, Prefix_Type 52 | assert_kind_of Map, Suffix_Type 53 | assert_kind_of Map, Unit_Type 54 | assert_kind_of Map, Name_Abbr 55 | assert_kind_of Map, State 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /test/data/address-sample.csv: -------------------------------------------------------------------------------- 1 | address,number,predir,prequal,pretyp,street,suftyp,sufqual,sufdir,unittyp,unit,city,state,zip,lon,lat,count,comment 2 | "93 NORTH 9TH STREET, BROOKLYN NY 11211",93,N,,,9th,St,,,,,Brooklyn,NY,11211,,,, 3 | "380 WESTMINSTER ST, PROVIDENCE RI 02903",380,,,,Westminster,St,,,,,Providence,RI,02903,,,, 4 | "177 MAIN STREET, LITTLETON NH 03561",177,,,,Main,St,,,,,Littleton,NH,03561,,,, 5 | "202 HARLOW ST, BANGOR ME 04401",202,,,,Harlow,St,,,,,Bangor,ME,04401,,,, 6 | "46 FRONT STREET, WATERVILLE, ME 04901",46,,,,Front,St,,,,,Waterville,ME,04901,,,, 7 | "22 SUSSEX ST, HACKENSACK NJ 07601",22,,,,Sussex,St,,,,,Hackensack,NJ,07601,,,, 8 | "75 OAK STREET, PATCHOGUE NY 11772",75,,,,Oak,St,,,,,Patchogue,NY,11772,,,, 9 | "1 CLINTON AVE, ALBANY NY 12207",1,,,,Clinton,Ave,,,,,Albany,NY,12207,,,, 10 | "7242 ROUTE 9, PLATTSBURGH NY 12901",7242,,,US Hwy,9,,,,,,Plattsburgh,NY,12901,,,, 11 | "520 5TH AVE, MCKEESPORT PA 15132",520,,,,5th,Ave,,,,,McKeesport,PA,15132,,,, 12 | "122 W 3RD STREET, GREENSBURG PA 15601",122,W,,,3rd,St,,,,,Greensburg,PA,15601,,,, 13 | "901 UNIVERSITY DR, STATE COLLEGE PA 16801",901,,,,University,Dr,,,,,"State College",PA,16801,,,, 14 | "240 W 3RD ST, WILLIAMSPORT PA 17701",240,W,,,3rd,St,,,,,Williamsport,PA,17701,,,, 15 | "41 N 4TH ST, ALLENTOWN PA 18102",41,N,,,4th,St,,,,,Allentown,PA,18102,,,, 16 | "2221 W. MARKET STREET, POTTSVILLE PA 17901",2221,W,,,Market,St,,,,,Pottsville,PA,17901,,,, 17 | "337 BRIGHTSEAT ROAD, LANDOVER MD 20785",337,,,,Brightseat,Rd,,,,,Hyattsville,MD,20785,,,,"canonical place" 18 | "101 CHESAPEAKE BLVD, ELKTON MD 21921",103,,,,Chesapeake,Blvd,,,,,Elkton,MD,21921,,,,"find nearest corner" 19 | "2875 SABRE ST, VIRGINIA BEACH VA 23452",2809,,,,Sabre,St,,,,,"Virginia Beach",VA,23452,,,,"find nearest corner" 20 | "324 COMMERCE ROAD, FARMVILLE VA 23901",324,,,,Commerce,St,,,,,Clarksville,VA,23927,,,,"nearby address; might be TIGER omission" 21 | "1480 EAST MAIN STREET, WYTHEVILLE VA 24382",1480,W,,,Main,St,,,,,Wytheville,VA,24382,,,,"nearby address; TIGER omission" 22 | "116 N JEFFERSON STREET, ROANOKE VA 24016",116,N,,,Jefferson,St,,,,,Roanoke,VA,24016,,,, 23 | "50 MCDOWELL STREET, WELCH WV 24801",50,,,,"Mc Dowell",St,,,,,Welch,WV,24801,,,, 24 | "146 EAST FIRST AVE, WILLIAMSON WV 25661",200,E,,,1st,Ave,,,,,Williamson,WV,25661,,,,"find nearest corner" 25 | "1925 E MAIN ST, ALBEMARLE NC 28001",1925,E,,,Main,St,,,,,Albemarle,NC,28001,,,, 26 | "1013 SPRING LANE, SANFORD NC 27330",1013,,,,Spring,Ln,,,,,Sanford,NC,27330,,,, 27 | "145 ROWAN STREET, FAYETTEVILLE NC 28301",145,,,,Rowan,St,,,,,Fayetteville,NC,28301,,,, 28 | "1420 MCCARTHY BLVD, NEW BERN NC 28562",1420,,,,McCarthy,Blvd,,,,,"New Bern",NC,28562,,,, 29 | "115 ENTERPRISE COURT, GREENWOOD SC 29649",115,,,,Enterprise,Ct,,,,,Greenwood,SC,29649,,,, 30 | "732 W 2ND ST, TIFTON GA 31794",732,,,,2nd,St,,W,,,Tifton,GA,31793,,,,"TIGER artifact" 31 | "97 WEST OAK AVE, PANAMA CITY FL 32401",97,,,,Oak,Ave,,,,,"Panama City",FL,32401,,,,"predir is TIGER artifact" 32 | "2276 WILTON DR, WILTON MANORS FL 33305",2276,,,,Wilton,Dr,,,,,"Fort Lauderdale",FL,33305,,,,"canonical place" 33 | "203 SOUTH WALNUT ST, FLORENCE AL 35630",203,S,,,Walnut,St,,,,,Florence,AL,35630,,,, 34 | "108 CENTER POINTE DR, CLARKSVILLE TN 37040",108,,,,"Center Pointe",Dr,,,,,Clarksville,TN,37040,,,, 35 | "1800 OLD TROY RD, UNION CITY TN 38261",1800,,Old,,Troy,Rd,,,,,"Union City",TN,38261,,,, 36 | "931 OLD SMITHVILLE HWY, MCMINNVILLE TN 37110",931,,Old,,Smithville,Rd,,,,,McMinnville,TN,37110,,,, 37 | "1301 GREENE STREET, MARIETTA OH 45750",1301,,,,Greene,St,,,,,Marietta,OH,45750,,,, 38 | "602 SOUTH MICHIGAN ST, SOUTH BEND IN 46601",602,S,,,Michigan,St,,,,,"South Bend",IN,46601,,,, 39 | "500 NORTH A STREET, RICHMOND IN 47374",500,N,,,A,St,,,,,Richmond,IN,47374,,,, 40 | "317 SOUTH DRAKE ROAD, KALAMAZOO MI 49009",317,S,,,Drake,Rd,,,,,Kalamazoo,MI,49009,,,, 41 | "105 Amity Way, Wayne PA 19087",105,,,,Amity,Dr,,,,,Wayne,PA,19087,,,, 42 | "305 W 45th St, New York NY 10036",305,W,,,45,St,,,,,"New York",NY,10036,,,, 43 | "11839 Federalist Way, Fairfax VA 22030",11839,,,,Federalist,Way,,,,,Fairfax,VA,22030,,,, 44 | "400 Monroe St, Hoboken, NJ 07030",400,,,,Monroe,St,,,,,Hoboken,NJ,07030,,,, 45 | "101 West End Avenue, New York NY 10023",101,W,,,End,Ave,,,,,"New York",NY,10023,,,,"predir is TIGER artifact" 46 | "2900 4TH AVE, BILLINGS MT 59101",2900,,,,4th,Ave,,N,,,Billings,MT,59101,,,,"returns 2 results" 47 | "158 N SCOTT STREET, JOLIET IL 60432",158,N,,,Scott,St,,,,,Joliet,IL,60432,,,, 48 | "1207 NETWORK CENTRE DR, EFFINGHAM IL 62401",1207,,,,"Network Centre",Dr,,,,,Effingham,IL,62401,,,, 49 | "3555 SOUTHERN HILLS DR, SIOUX CITY IA 51106",3555,,,,"Southern Hills",Dr,,,,,"Sioux City",IA,51106,,,, 50 | "300 E 3RD ST, NORTH PLATTE NE 69101",300,E,,,3rd,St,,,,,"North Platte",NE,69101,,,, 51 | "115 N WEBB RD, GRAND ISLAND NE 68803",115,N,,,Webb,Rd,,,,,"Grand Island",NE,68803,,,, 52 | "415 VALLEY VIEW DR, SCOTTSBLUFF NE 69361",501,,,,"Valley View",Dr,,,,,"Scottsbluff",NE,69361,,,,"find nearest corner" 53 | -------------------------------------------------------------------------------- /test/data/db-test.csv: -------------------------------------------------------------------------------- 1 | address,number,street,city,state,zip,lon,lat,count,comment 2 | "93 NORTH 9TH STREET, BROOKLYN NY 11211",93,N 9th St,Brooklyn,NY,11211,-73.958096,40.720064,1, 3 | "380 WESTMINSTER ST, PROVIDENCE RI 02903",380,Westminster St,Providence,RI,02903,-71.415171,41.821004,1, 4 | "177 MAIN STREET, LITTLETON NH 03561",177,Main St,Littleton,NH,03561,-71.776393,44.307299,1,range 5 | "202 HARLOW ST, BANGOR ME 04401",202,Harlow St,Bangor,ME,04401,-68.773934,44.805202,1, 6 | "46 FRONT STREET, WATERVILLE, ME 04901",46,Front St,Waterville,ME,04901,-69.628598,44.550988,1, 7 | "22 SUSSEX ST, HACKENSACK NJ 07601",22,Sussex St,Hackensack,NJ,07601,-74.04821,40.880328,1, 8 | "75 OAK STREET, PATCHOGUE NY 11772",75,Oak St,Patchogue,NY,11772,-73.01036,40.768522,1, 9 | "1 CLINTON AVE, ALBANY NY 12207",1,Clinton Ave,Albany,NY,12207,-73.750031,42.654244,1, 10 | "7242 ROUTE 9, PLATTSBURGH NY 12901",7242,US Hwy 9,Plattsburgh,NY,12901,-73.428066,44.735338,1, 11 | "520 5TH AVE, MCKEESPORT PA 15132",520,5th Ave,McKeesport,PA,15132,-79.861023,40.351228,1, 12 | "122 W 3RD STREET, GREENSBURG PA 15601",122,W 3rd St,Greensburg,PA,15601,-79.546244,40.299681,1, 13 | "901 UNIVERSITY DR, STATE COLLEGE PA 16801",901,University Dr,State College,PA,16801,-77.844056,40.797191,1, 14 | "240 W 3RD ST, WILLIAMSPORT PA 17701",240,W 3rd St,Williamsport,PA,17701,-77.005601,41.238969,1, 15 | "41 N 4TH ST, ALLENTOWN PA 18102",41,N 4th St,Allentown,PA,18102,-75.466113,40.605368,1, 16 | "2221 W. MARKET STREET, POTTSVILLE PA 17901",2221,W Market St,Pottsville,PA,17901,-76.226401,40.674702,1, 17 | "337 BRIGHTSEAT ROAD, LANDOVER MD 20785",337,Brightseat Rd,Hyattsville,MD,20785,-76.850995,38.892762,1,canonical place 18 | "101 CHESAPEAKE BLVD, ELKTON MD 21921",109,Chesapeake Blvd,Elkton,MD,21921,-75.786853,39.6045,1,find nearest corner 19 | "2875 SABRE ST, VIRGINIA BEACH VA 23452",2809,Sabre St,Virginia Beach,VA,23452,-76.067835,36.822959,1,find nearest corner 20 | "324 COMMERCE ROAD, FARMVILLE VA 23901",324,Commerce Rd,Farmville,VA,23901,-78.423296,37.273311,1,fixed in TIGER 2010 21 | "1480 EAST MAIN STREET, WYTHEVILLE VA 24382",1168,E Main St,Wytheville,VA,24382,-81.069279,36.951346,1,nearby address; TIGER omission 22 | "116 N JEFFERSON STREET, ROANOKE VA 24016",116,N Jefferson St,Roanoke,VA,24016,-79.940537,37.275163,1, 23 | "50 MCDOWELL STREET, WELCH WV 24801",50,Mc Dowell St,Welch,WV,24801,-81.585586,37.433465,1, 24 | "146 EAST FIRST AVE, WILLIAMSON WV 25661",200,E 1st Ave,Williamson,WV,25661,-82.277886,37.670798,1,find nearest corner 25 | "1925 E MAIN ST, ALBEMARLE NC 28001",1925,E Main St,Albemarle,NC,28001,-80.163859,35.348818,1, 26 | "1013 SPRING LANE, SANFORD NC 27330",1013,Spring Ln,Sanford,NC,27330,-79.198776,35.487444,1, 27 | "145 ROWAN STREET, FAYETTEVILLE NC 28301",145,Rowan St,Fayetteville,NC,28301,-78.878696,35.057767,1, 28 | "1420 MCCARTHY BLVD, NEW BERN NC 28562",1399,McCarthy Blvd,New Bern,NC,28562,-77.094901,35.097183,1,broken in TIGER 2010 29 | "115 ENTERPRISE COURT, GREENWOOD SC 29649",115,Enterprise Ct,Greenwood,SC,29649,-82.164828,34.216732,1, 30 | "732 W 2ND ST, TIFTON GA 31794",732,W 2nd St,Tifton,GA,31794,-83.523812,31.457889,1,ZIP was fixed in TIGER 2010 31 | "97 WEST OAK AVE, PANAMA CITY FL 32401",95,W Oak Ave,Panama City,FL,32401,-85.661436,30.154306,1,broken in TIGER 2010 32 | "2276 WILTON DR, WILTON MANORS FL 33305",2276,Wilton Dr,Fort Lauderdale,FL,33305,-80.137273,26.156993,1,canonical place 33 | "203 SOUTH WALNUT ST, FLORENCE AL 35630",203,S Walnut St,Florence,AL,35630,-87.670768,34.800112,1, 34 | "108 CENTER POINTE DR, CLARKSVILLE TN 37040",108,Center Pointe Dr,Clarksville,TN,37040,-87.30888,36.56967,1, 35 | "1800 OLD TROY RD, UNION CITY TN 38261",1800,Old Troy Rd,Union City,TN,38261,-89.083201,36.416592,1, 36 | "931 OLD SMITHVILLE HWY, MCMINNVILLE TN 37110",931,Old Smithville Rd,McMinnville,TN,37110,-85.788518,35.701731,1, 37 | "1301 GREENE STREET, MARIETTA OH 45750",1301,Greene St,Marietta,OH,45750,-81.424821,39.426052,1, 38 | "602 SOUTH MICHIGAN ST, SOUTH BEND IN 46601",598,S Michigan St,South Bend,IN,46601,-86.25025,41.670964,1,broken in TIGER 2010 39 | "500 NORTH A STREET, RICHMOND IN 47374",500,N A St,Richmond,IN,47374,-84.89517,39.830625,1, 40 | "317 SOUTH DRAKE ROAD, KALAMAZOO MI 49009",317,S Drake Rd,Kalamazoo,MI,49009,-85.648132,42.288772,1, 41 | "105 Amity Way, Wayne PA 19087",105,Amity Dr,Wayne,PA,19087,-75.455425,40.076446,1, 42 | "305 W 45th St, New York NY 10036",305,W 45 St,New York,NY,10036,-73.991106,40.760371,1, 43 | "11839 Federalist Way, Fairfax VA 22030",11839,Federalist Way,Fairfax,VA,22030,-77.353695,38.849858,1, 44 | "400 Monroe St, Hoboken, NJ 07030",400,Monroe St,Hoboken,NJ,07030,-74.038654,40.743789,1, 45 | "101 West End Avenue, New York NY 10023",101,W End Ave,New York,NY,10023,-73.987822,40.775325,1,predir is TIGER artifact 46 | "2900 4TH AVE, BILLINGS MT 59101",2900,4th Ave N,Billings,MT,59101,-108.51073,45.783452,2,returns 2 results 47 | "158 N SCOTT STREET, JOLIET IL 60432",158,N Scott St,Joliet,IL,60432,-88.080083,41.526353,1, 48 | "1207 NETWORK CENTRE DR, EFFINGHAM IL 62401",1207,Network Centre Dr,Effingham,IL,62401,-88.526702,39.143248,1, 49 | "3555 SOUTHERN HILLS DR, SIOUX CITY IA 51106",3555,Southern Hills Dr,Sioux City,IA,51106,-96.353014,42.449259,1, 50 | "300 E 3RD ST, NORTH PLATTE NE 69101",300,E 3rd St,North Platte,NE,69101,-100.761028,41.135235,1, 51 | "115 N WEBB RD, GRAND ISLAND NE 68803",115,N Webb Rd,Grand Island,NE,68803,-98.378361,40.917627,1, 52 | "415 VALLEY VIEW DR, SCOTTSBLUFF NE 69361",501,Valley View Dr,Scottsbluff,NE,69361,-103.656078,41.879011,1,find nearest corner 53 | "4018 W Ustick Rd, Meridian ID",4018,W Ustick Rd,Meridian,ID,83646,-116.443792,43.634096,1,fixed in TIGER 2010 54 | "2518 S Pacific Hwy, Medford OR",2518,S Pacific Hwy,Medford,OR,97501,-122.855426,42.307241,1,fixed in TIGER 2010 55 | "1111 River Rd Apt A17, Edgewater NJ 07020",1111,River Rd,Edgewater,NJ,07020,-73.972261,40.830852,1,FIXME: parsing 56 | "460 West St, Amherst MA 01002-2964",460,West St,Amherst,MA,01002,-72.520228,42.34014,1,address is all abbreviations 57 | "23 2nd St, Brooklyn NY",23,2nd St,Brooklyn,NY,11231,-73.993897,40.67895,1,regression caused it to point to East Otto 58 | "23 2nd St, Brooklyn, New York",23,2nd St,Brooklyn,NY,11231,-73.993897,40.67895,1,regression caused it to point to Manhattan 59 | "100 Central Park W, 10023",100,Central Park W,New York,NY,10023,-73.975461,40.776899,1,the usual Central Park West parsing issues 60 | "100 Central Park W, New York",100,Central Park W,New York,NY,10023,-73.975461,40.776899,1,the usual Central Park West parsing issues 61 | -------------------------------------------------------------------------------- /test/data/locations.csv: -------------------------------------------------------------------------------- 1 | name,address 2 | "Home","2026 21st St. N, Arlington, VA 22201" 3 | "Work","2200 Wilson Blvd., Arlington, VA 22201" 4 | "RTI","1506 N Main St., Royal Oak, MI 48067" -------------------------------------------------------------------------------- /test/database.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift '../lib' 2 | 3 | require 'test/unit' 4 | require 'geocoder/us/database' 5 | require 'fastercsv' 6 | 7 | Base = File.dirname(__FILE__) 8 | Debug = false 9 | 10 | module Geocoder::US 11 | Database_File = ( 12 | (ARGV[0] and !ARGV[0].empty?) ? ARGV[0] : nil) 13 | end 14 | 15 | class TestDatabase < Test::Unit::TestCase 16 | def get_db 17 | Geocoder::US::Database.new(Geocoder::US::Database_File, {:debug => Debug}) 18 | end 19 | 20 | # def get_international_db 21 | # Geocoder::US::Database.new("/Users/katechapman/Desktop/geonames1.db", {:debug => true}) 22 | # end 23 | 24 | def setup 25 | @db = get_db 26 | #@db_intl = get_international_db 27 | #assert_not_nil @db_intl 28 | assert_not_nil @db 29 | end 30 | 31 | def test_load 32 | return if @db.nil? 33 | assert_kind_of Geocoder::US::Database, @db 34 | end 35 | 36 | def test_zip 37 | return if @db.nil? 38 | [ {:city=>"Chicago", :zip=>"60601", :state=>"IL", :precision=>:zip, 39 | :fips_county=>"17031", :lon=>-87.622130,:lat=>41.885310, :score => 0.714}, 40 | {:city=>"Philadelphia", :zip=>"19019", :state=>"PA", :precision=>:zip, 41 | :fips_county=>"42101", :lon=>-75.11787, :lat=>40.001811, :score => 0.714} 42 | ].each {|record| 43 | result = @db.geocode(record[:zip]) 44 | assert_equal result.length, 1 45 | record.keys.each {|key| assert_equal record[key], result[0][key]} 46 | } 47 | 48 | end 49 | 50 | # def test_international_place 51 | # return if @db_intl.nil? 52 | # [ {:city=>"Paris", :state=>"FR"}, 53 | # {:city=>"Paris", :state=>"FR"} 54 | # ].each {|record| 55 | # result = @db_intl.geocode(record) 56 | # assert_equal result.length, 1 57 | # record.keys.each {|key| assert_equal record[key], result[0][key]} 58 | # } 59 | # end 60 | 61 | def test_place 62 | return if @db.nil? 63 | [ {:city=>"Chicago", :state=>"IL", :precision=>:city, :fips_county=>"17031", :score => 0.857}, 64 | {:city=>"Philadelphia", :state=>"PA", :precision=>:city, :fips_county=>"42101", :score => 0.857} 65 | ].each {|record| 66 | result = @db.geocode(record[:city] + ", " + record[:state]) 67 | assert_equal result.length, 1 68 | record.keys.each {|key| assert_equal record[key], result[0][key]} 69 | } 70 | 71 | end 72 | 73 | # def test_international_place 74 | # return if @db_intl.nil? 75 | # [ {:city=>"Kabul", :state=>"AF", :precision=>:city}, 76 | # {:city=>"Paris", :state=>"FR", :precision=>:city} 77 | # ].each {|record| 78 | # result = @db_intl.geocode({:city => record[:city] , :state => record[:state]}) 79 | # puts result 80 | # assert_equal result.length, 1 81 | # record.keys.each {|key| assert_equal record[key], result[0][key]} 82 | # } 83 | # end 84 | 85 | 86 | def test_sample 87 | return if @db.nil? 88 | FasterCSV.foreach(Base + "/data/db-test.csv", {:headers=>true}) do |row| 89 | result = @db.geocode(row[0], true) 90 | result[0][:count] = result.map{|a|[a[:lat], a[:lon]]}.to_set.length 91 | fields = row.headers - ["comment", "address"] 92 | fields.each {|f| 93 | sample = row[f] || "" 94 | given = result[0][f.to_sym] || "" 95 | sample = sample.to_f if given.kind_of? Float or given.kind_of? Fixnum 96 | assert_equal sample, given, "row: #{row.inspect}\nfield: #{f.inspect} sample: #{sample.inspect}, given: #{given.inspect}" 97 | 98 | } 99 | end 100 | end 101 | 102 | def test_city_with_street_type_in_name 103 | result = @db.geocode("Mountain View, CA") 104 | assert_equal result.length, 1 105 | assert_equal result[0][:city], "Mountain View" # (and not "Mountain View Acres, CA") 106 | assert_equal result[0][:state], "CA" 107 | end 108 | 109 | def test_should_get_street_number_correctly 110 | result = @db.geocode("460 West St, Amherst MA 01002-2964", true) 111 | assert_equal '460', result[0][:number] 112 | end 113 | 114 | def test_should_geocode_with_hash 115 | result = @db.geocode({:street => "2200 Wilson Blvd", :city => "Arlington", :region => "VA", :postal_code => "22201"}, true) 116 | result2 = @db.geocode("2200 Wilson Blvd, Arlington, VA 22201") 117 | assert_equal result2,result 118 | end 119 | 120 | def test_should_work_with_partial_hash 121 | result = @db.geocode({:street => "2200 Wilson Blvd", :postal_code => "22201"}) 122 | assert_equal result[0][:precision],:range 123 | end 124 | 125 | def test_weird_edge_case_explosion 126 | result = @db.geocode({:street => "1410 Spring Hill Rd", :postal_code => "20221"}) 127 | result1 = @db.geocode(:street => "402 Valley View Ave", :postal_code => "12345") 128 | assert_equal result[0][:precision],:zip 129 | end 130 | 131 | def test_city_state_together 132 | result = @db.geocode({:city => "Richmond", :state => "IN"}) 133 | assert_equal result[0][:precision],:city 134 | end 135 | 136 | def test_state_street_together 137 | result = @db.geocode({:region => "VA", :street => "14333 Lee Jackson Memorial Hwy"}) 138 | #assert_equal result[0][:precision],:range 139 | end 140 | 141 | def test_intersection 142 | result = @db.geocode("Decatur St and Bryant St, San Francisco, CA 94103") 143 | assert_equal result[0][:precision], :intersection 144 | end 145 | 146 | end 147 | -------------------------------------------------------------------------------- /test/generate.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'test/unit' 4 | require 'geocoder/us/database' 5 | require 'fastercsv' 6 | 7 | db = Geocoder::US::Database.new("/mnt/tiger2008/geocoder.db", 8 | "/home/sderle/geocoder/lib/libsqlite3_geocoder.so") 9 | 10 | if ARGV.length == 1 11 | result = db.geocode(ARGV[0], 0, 50) 12 | p result 13 | else 14 | FasterCSV.open(ARGV[1], "w", {:headers => true, :write_headers => true}) do |output| 15 | FasterCSV.foreach(ARGV[0], {:headers => true}) do |row| 16 | result = db.geocode(row[0]) 17 | count = result.map{|a|[a[:lat], a[:lon]]}.to_set.length 18 | if !result.empty? 19 | row.headers[1..13].each_with_index {|f,i| 20 | if result[0][f.to_sym] != row[i+1] 21 | print "#{row[0]} !#{f} -> #{result[0][f]} != #{row[i+1]}\n" 22 | end 23 | } 24 | result[0][:count] = count 25 | result[0][:address] = row[0] 26 | result[0][:comment] = row[-1] 27 | columns = row.headers.map{|col|col.to_sym} 28 | output << result[0].values_at(*columns) 29 | else 30 | print "!!! #{row[0]}\n" 31 | end 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /test/numbers.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift '../lib' 2 | 3 | require 'test/unit' 4 | require 'geocoder/us/numbers' 5 | 6 | include Geocoder::US 7 | 8 | class TestAddress < Test::Unit::TestCase 9 | def test_number_to_cardinal 10 | assert_equal 'one', Cardinals[1] 11 | assert_equal 'ten', Cardinals[10] 12 | assert_equal 'twelve', Cardinals[12] 13 | assert_equal 'eighty-seven', Cardinals[87] 14 | end 15 | 16 | def test_cardinal_to_number 17 | assert_equal 1, Cardinals['one'] 18 | assert_equal 1, Cardinals['One'] 19 | assert_equal 10, Cardinals['ten'] 20 | assert_equal 12, Cardinals['twelve'] 21 | assert_equal 87, Cardinals['eighty-seven'] 22 | assert_equal 87, Cardinals['eighty seven'] 23 | assert_equal 87, Cardinals['eightyseven'] 24 | end 25 | 26 | def test_number_to_ordinal 27 | assert_equal 'first', Ordinals[1] 28 | assert_equal 'second', Ordinals[2] 29 | assert_equal 'tenth', Ordinals[10] 30 | assert_equal 'twelfth', Ordinals[12] 31 | assert_equal 'twentieth', Ordinals[20] 32 | assert_equal 'twenty-second', Ordinals[22] 33 | assert_equal 'eighty-seventh', Ordinals[87] 34 | end 35 | 36 | def test_ordinal_to_number 37 | assert_equal 1, Ordinals['first'] 38 | assert_equal 1, Ordinals['First'] 39 | assert_equal 10, Ordinals['tenth'] 40 | assert_equal 12, Ordinals['twelfth'] 41 | assert_equal 73, Ordinals['seventy-third'] 42 | assert_equal 74, Ordinals['seventy fourth'] 43 | assert_equal 75, Ordinals['seventyfifth'] 44 | assert_equal nil, Ordinals['seventy-eleventh'] 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /test/run.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | $LOAD_PATH.unshift 'tests' 4 | $LOAD_PATH.unshift 'lib' 5 | 6 | require 'test/unit' 7 | require 'numbers' 8 | require 'constants' 9 | require 'address' 10 | require 'database' 11 | 12 | --------------------------------------------------------------------------------