├── .gitignore
├── History.txt
├── Makefile
├── Manifest.txt
├── README.rdoc
├── REST.rdoc
├── TODO.txt
├── build
├── build_indexes
├── rebuild_cluster
├── sql
│ ├── cluster.sql
│ ├── convert.sql
│ ├── create.sql
│ ├── index.sql
│ ├── place.sql
│ └── setup.sql
├── tiger2009_import
└── tiger_import
├── conf
├── geocoder-us
│ ├── geocoder.ru
│ └── unicorn.rb
└── init
│ └── geocoder-us.conf
├── debian
├── README.Debian
├── changelog
├── compat
├── control
├── copyright
├── default
├── docs
├── geocoder-us.postinst
├── geocoder-us.prerm
├── rules
└── source
│ └── format
├── demos
├── api
│ ├── server.rb
│ └── views
│ │ └── index.erb
├── cli.rb
├── demo
│ ├── app
│ │ ├── ext
│ │ │ └── geocodewrap.rb
│ │ └── views
│ │ │ ├── index.builder
│ │ │ └── index.erb
│ ├── config.ru
│ ├── config
│ │ ├── bootstraps.rb
│ │ └── geoenvironment.rb
│ ├── geocoder_helper.rb
│ ├── geocom_geocode.rb
│ ├── main.rb
│ ├── rakefile.rb
│ └── tmp
│ │ └── restart.txt
├── parse.rb
└── simpledemo
│ ├── views
│ ├── index.builder
│ └── index.erb
│ └── ws.rb
├── doc
├── Makefile
├── html4css1.css
├── lookup.rst
├── parsing.rst
└── voidspace.css
├── gemspec
├── lib
└── geocoder
│ ├── us.rb
│ └── us
│ ├── address.rb
│ ├── constants.rb
│ ├── database.rb
│ ├── metaphone.rb
│ ├── numbers.rb
│ └── rest.rb
├── navteq
├── README
├── convert.sql
├── navteq_import
└── prepare.sql
├── setup.rb
├── src
├── Makefile
├── README
├── liblwgeom
│ ├── Makefile
│ ├── box2d.c
│ ├── lex.yy.c
│ ├── liblwgeom.h
│ ├── lwalgorithm.c
│ ├── lwalgorithm.h
│ ├── lwcircstring.c
│ ├── lwcollection.c
│ ├── lwcompound.c
│ ├── lwcurvepoly.c
│ ├── lwgeom.c
│ ├── lwgeom_api.c
│ ├── lwgparse.c
│ ├── lwgunparse.c
│ ├── lwline.c
│ ├── lwmcurve.c
│ ├── lwmline.c
│ ├── lwmpoint.c
│ ├── lwmpoly.c
│ ├── lwmsurface.c
│ ├── lwpoint.c
│ ├── lwpoly.c
│ ├── lwsegmentize.c
│ ├── lwutil.c
│ ├── measures.c
│ ├── postgis_config.h
│ ├── ptarray.c
│ ├── vsprintf.c
│ ├── wktparse.h
│ ├── wktparse.lex
│ ├── wktparse.tab.c
│ ├── wktparse.tab.h
│ └── wktparse.y
├── libsqlite3_geocoder
│ ├── Makefile
│ ├── Makefile.nix
│ ├── Makefile.redhat
│ ├── extension.c
│ ├── extension.h
│ ├── levenshtein.c
│ ├── metaphon.c
│ ├── util.c
│ └── wkb_compress.c
├── metaphone
│ ├── Makefile
│ ├── README
│ ├── extension.c
│ └── metaphon.c
└── shp2sqlite
│ ├── Makefile
│ ├── Makefile.nix
│ ├── Makefile.redhat
│ ├── dbfopen.c
│ ├── getopt.c
│ ├── getopt.h
│ ├── shapefil.h
│ ├── shp2sqlite.c
│ └── shpopen.c
└── test
├── address.rb
├── benchmark.rb
├── constants.rb
├── data
├── address-sample.csv
├── db-test.csv
└── locations.csv
├── database.rb
├── generate.rb
├── numbers.rb
└── run.rb
/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.gem
4 | pkg/
5 | bin/shp2sqlite
6 | src/shp2sqlite/shp2sqlite
7 | src/liblwgeom/liblwgeom.a
8 | doc/*.html
9 |
--------------------------------------------------------------------------------
/History.txt:
--------------------------------------------------------------------------------
1 | === 1.0.0 / 2009-06-02
2 |
3 | * 1 major enhancement
4 |
5 | * Birthday!
6 |
7 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | make -C src install
3 | gem build gemspec
4 |
5 | test: all
6 | ruby -Ilib tests/run.rb
7 |
8 | install: all
9 | # gem install *.gem
10 |
11 | clean:
12 | make -C src clean
13 | rm -f lib/geocoder/us/sqlite3.so
14 | rm -f *.gem
15 |
--------------------------------------------------------------------------------
/Manifest.txt:
--------------------------------------------------------------------------------
1 | History.txt
2 | Manifest.txt
3 | README.rdoc
4 | Rakefile
5 | lib/geocoder/us/database.rb
6 | lib/geocoder/us/numbers.rb
7 | lib/geocoder/us/address.rb
8 | lib/geocoder/us/constants.rb
9 | tests/database.rb
10 | tests/numbers.rb
11 | tests/generate.rb
12 | tests/run.rb
13 | tests/address.rb
14 | tests/benchmark.rb
15 | tests/constants.rb
16 | tests/data/address-sample.csv
17 | tests/data/locations.csv
18 | tests/data/db-test.csv
19 |
--------------------------------------------------------------------------------
/README.rdoc:
--------------------------------------------------------------------------------
1 | = Geocoder::US
2 |
3 | Geocoder::US 2.0 is a software package designed to geocode US street
4 | addresses. Although it is primarily intended for use with the US Census
5 | Bureau's free TIGER/Line dataset, it uses an abstract US address data model
6 | that can be employed with other sources of US street address range data.
7 |
8 | Geocoder::US 2.0 implements a Ruby interface to parse US street addresses, and
9 | perform fuzzy lookup against an SQLite 3 database. Geocoder::US is designed to
10 | return the best matches found, with geographic coordinates interpolated from
11 | the street range dataset. Geocoder::US will fill in missing information, and
12 | it knows about standard and common non-standard postal abbreviations, ordinal
13 | versus cardinal numbers, and more.
14 |
15 | Geocoder::US 2.0 is shipped with a free US ZIP code data set, compiled from
16 | public domain sources.
17 |
18 | == Synopsis
19 |
20 | >> require 'geocoder/us'
21 | >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db")
22 | >> p db.geocode("1600 Pennsylvania Av, Washington DC")
23 |
24 | [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502",
25 | :lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"",
26 | :precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave",
27 | :state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}]
28 |
29 | == Prerequisites
30 |
31 | To build Geocoder::US, you will need gcc/g++, make, bash or equivalent, the
32 | standard *NIX 'unzip' utility, and the SQLite 3 executable and development
33 | files installed on your system.
34 |
35 | To use the Ruby interface, you will need the 'Text' gem installed from
36 | rubyforge. To run the tests, you will also need the 'fastercsv' gem.
37 |
38 | Additionally, you will need a custom build of the 'sqlite3-ruby' gem that
39 | supports loading extension modules in SQLite. You can get a patched version of
40 | this gem from http://github.com/schuyler/sqlite3-ruby/. Until the sqlite3-ruby
41 | maintainers roll in the relevant patch, you will need *this* version.
42 |
43 | *NOTE*: If you do not have /usr/include/sqlite3ext.h installed, then your
44 | sqlite3 binaries are probably not configured to support dynamic extension
45 | loading. If not, you *must* compile and install SQLite from source, or rebuild
46 | your system packages. This is not believed to be a problem on Debian/Ubuntu,
47 | but is known to be a problem with Red Hat/CentOS.
48 |
49 | *NOTE*: If you *do* have to install from source, make sure that the
50 | source-installed 'sqlite3' program is in your path before proceeding (and not
51 | the system-installed version), using `which sqlite3`. Also, be sure that you've
52 | added your source install prefix (usually /usr/local) to /etc/ld.so.conf (or
53 | its moral equivalent) and that you've run /sbin/ldconfig.
54 |
55 | == Thread safety
56 |
57 | SQLite 3 is not designed for concurrent use of a single database handle across
58 | multiple threads. Therefore, to prevent segfaults, Geocoder::US::Database
59 | implements a global mutex that wraps all database access. The use of this mutex
60 | will ensure stability in multi-threaded applications, but incurs a performance
61 | penalty. However, since the database is read-only from Ruby, there's no reason
62 | in principle why multi-threaded apps can't each have their own database handle.
63 |
64 | To disable the mutex for better performance, you can do the following:
65 |
66 | * Read the following and make sure you understand them:
67 | * http://www.sqlite.org/faq.html#q6
68 | * http://www.sqlite.org/cvstrac/wiki?p=MultiThreading
69 | * Make sure you have compiled SQLite 3 with thread safety enabled.
70 | * Instantiate a separate Geocoder::US::Database object for *each* thread
71 | in your Ruby script, and pass :threadsafe => true to new() to disable mutex
72 | synchronization.
73 |
74 | Per the SQLite 3 documentation, do *not* attempt to retain a
75 | Geocoder::US::Database object across a fork! "Problems will result if you do."
76 |
77 | == Building Geocoder::US
78 |
79 | Unpack the source and run 'make'. This will compile the SQLite 3 extension
80 | needed by Geocoder::US, the Shapefile import utility, and the Geocoder-US
81 | gem.
82 |
83 | You can run 'make install' as root to install the gem systemwide.
84 |
85 | == Generating a Geocoder::US Database
86 |
87 | Build the package from source as described above. Generating the database
88 | involves three basic steps:
89 |
90 | * Import the Shapefile data into an SQLite database.
91 | * Build the database indexes.
92 | * Optionally, rebuild the database to cluster indexed rows.
93 |
94 | We will presume that you are building a Geocoder::US database from TIGER/Line,
95 | and that you have obtained the complete set of TIGER/Line ZIP files, and put
96 | the entire tree in /opt/tiger. Please adjust these instructions as needed.
97 |
98 | A full TIGER/Line database import takes ten hours to run on a normal Amazon
99 | EC2 instance, and takes up a little over 5 gigabytes after all is said and
100 | done. You will need to have at least 12 gigabytes of free disk space *after*
101 | downloading the TIGER/Line dataset, if you are building the full database.
102 |
103 | === Import TIGER/Line
104 |
105 | From inside the Geocoder::US source tree, run the following:
106 |
107 | $ bin/tiger_import /opt/tiger/geocoder.db /opt/tiger
108 |
109 | This will unpack each TIGER/Line ZIP file to a temporary directory, and
110 | perform the extract/transform/load sequence to incrementally build the
111 | database. The process takes about 10-12 hours on a normal Amazon EC2 instance,
112 | or about 5 CPU hours flat out on a modern PC. Note that not all TIGER/Line
113 | source files contain address range information, so you will see error messages
114 | for some counties, but this is normal.
115 |
116 | If you only want to import specific counties, you can pipe a list of
117 | TIGER/Line county directories to tiger_import on stdin. For example,
118 | the following will install just the data for the state of Delaware:
119 |
120 | $ ls -d /opt/tiger/10_DELAWARE/1* | bin/tiger_import ~/delaware.db
121 |
122 | The tiger_import process uses a binary utility, shp2sqlite, which is derived
123 | from shp2pgsql, which ships with PostGIS. The shp2sqlite utility converts
124 | .shp and .dbf files into SQL suitable for import into SQLite. This SQL
125 | is then piped into the sqlite3 command line tool, where it is loaded into
126 | temporary tables, and then a set of static SQL statements (kept in the sql/
127 | directory) are used to transform this data and import it into the database
128 | itself.
129 |
130 | == Build metaphones using Ruby metaphone
131 |
132 | run bin/rebuild_metaphones /opt/tiger/geocoder.db
133 |
134 | This creates the metaphones using Ruby's metaphone function and will produce better geocoding results.
135 |
136 | === Build the indexes
137 |
138 | After the database import is complete, you will want to construct the database
139 | indexes:
140 |
141 | $ bin/build_indexes /opt/tiger/geocoder.db
142 |
143 | This process takes 25 minutes on an EC2 instance (8 CPU minutes), but it's a
144 | *lot* faster than building the indexes incrementally during the import
145 | process. Basically, this process simply feeds SQL statements to the sqlite3
146 | utility to construct the indexes on the existing database.
147 |
148 | === Cluster the database tables (optional)
149 |
150 | As a final optional step, you can cluster the database tables according to
151 | their indexes, which will make the database smaller, and lookups faster. This
152 | process will take an hour or two, and may be a micro-optimization.
153 |
154 | $ bin/rebuild_cluster /opt/tiger/geocoder.db
155 |
156 | You will need as much free disk space to run rebuild_cluster as the database
157 | takes up, because the process essentially reconstructs the database in a new
158 | file, and then it renames the new database over top of the old.
159 |
160 | == Running the unit tests
161 |
162 | From within the source tree, you can run the following:
163 |
164 | $ ruby tests/run.rb
165 |
166 | This tests the libraries, except for the database routines. If you have a
167 | database built, you can run the test harness like so:
168 |
169 | $ ruby tests/run.rb /opt/tiger/geocoder.db
170 |
171 | The full test suite may take 30 or so seconds to run completely.
172 |
173 | == License
174 |
175 | Geocoder::US 2.0 was based on earlier work by Schuyler Erle on
176 | a Perl module of the same name. You can find it at
177 | http://search.cpan.org/~sderle/.
178 |
179 | Geocoder::US 2.0 was written by Schuyler Erle, of Entropy Free LLC,
180 | with the gracious support of FortiusOne, Inc. Please send bug reports,
181 | patches, kudos, etc. to patches at geocoder.us.
182 |
183 | Copyright (c) 2009 FortiusOne, Inc.
184 |
185 | This program is free software: you can redistribute it and/or modify
186 | it under the terms of the GNU General Public License as published by
187 | the Free Software Foundation, either version 3 of the License, or
188 | (at your option) any later version.
189 |
190 | This program is distributed in the hope that it will be useful,
191 | but WITHOUT ANY WARRANTY; without even the implied warranty of
192 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
193 | GNU General Public License for more details.
194 |
195 | You should have received a copy of the GNU General Public License
196 | along with this program. If not, see .
197 |
198 |
--------------------------------------------------------------------------------
/REST.rdoc:
--------------------------------------------------------------------------------
1 | GET /1.0/geocode/address.json
2 |
3 | The geocode/address endpoint returns the interpolated latitude and longitude of
4 | a US street address or street intersection. When given a US city or ZIP code,
5 | the approximate center point of that place will be returned instead.
6 |
7 | The geocoder attempts to return the most accurate possible result, including,
8 | where possible, correcting the given street type, city, or postal code, and
9 | identifying and correcting misspellings in the street or city name in the given
10 | address.
11 |
12 | Currently, address geocoding only works in the United States.
13 |
14 | Parameters:
15 |
16 | q = a string containing a US street address.
17 |
18 | Returns a GeoJSON feature collection:
19 |
20 | {
21 | "type": "FeatureCollection",
22 | "features": [
23 | {
24 | "type": "Feature",
25 | "properties": {
26 | "number": "41",
27 | "street": "Decatur St",
28 | "city": "San Francisco",
29 | "state": "CA",
30 | "zip": "94103",
31 | "fips_county": "06075",
32 | "score": 1.0,
33 | "precision":"range"
34 | },
35 | "geometry": {
36 | "type": "Point",
37 | "coordinates": [-122.406032, 37.772502]
38 | }
39 | }
40 | ],
41 | "address":"41 Decatur St, San Francisco CA 94103"
42 | }
43 |
44 |
45 | Each address match in the feature collection contains some combination of the
46 | following properties:
47 |
48 | number
49 | The building number of the address. When a building number is not
50 | included in a range stored in the address database, the nearest
51 | known building number will be returned in its place.
52 |
53 | street
54 | The name of the street found in the database that matches the address,
55 | given in a normalized form.
56 |
57 | street1 / street2
58 | When an address is parsed as an intersection, the intersecting streets
59 | are returned as `street1` and `street2` in place of the `number` and
60 | `street` fields.
61 |
62 | city
63 | The city matching the given address. In the US, this is typically
64 | determined from the matching ZIP code, so, for ZIP codes that cover
65 | more than one named place, the results may be different from what you
66 | expect, but will still be suitable for postal addressing.
67 |
68 | state
69 | The two letter postal abbreviation of the state containing the matching
70 | address.
71 |
72 | zip
73 | In the US, the five digit ZIP code of the matching address.
74 |
75 | plus4
76 | In the US, the ZIP+4 extension parsed from the address, if any. This
77 | extension is not actually used in the geocoding process, but is
78 | returned for convenience.
79 |
80 | fips_county
81 | In the US, the FIPS 6-4 code of the county containing the address.
82 |
83 | prenum / sufnum
84 | If the building number has a non-numeric prefix, it will be returned in
85 | `prenum`. Ditto `sufnum` for non-numeric suffixes.
86 |
87 | precision
88 | The qualitative precision of the geocode. The value will be one of
89 | `intersection`, 'range`, `street`, `zip`, or `city`.
90 |
91 | score
92 | The percentage of text match between the given address and the geocoded
93 | result, expressed as a float between 0 and 1. A higher score indicates
94 | a closer match. Results with a score below 0.5 should be regarded with
95 | care.
96 |
--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
1 | 1. Check interpolate measure: scale longitude or not?
2 | 5. Intersections...
3 | - import ALL linestrings (even those with without ranges)
4 | - throw away internal points on lines that don't have ranges
5 | 7. Documentation (*)
6 | 8. Make SQLite memory cache size an option to the Database constructor
7 | 9. Precision and accuracy measure
8 | 10. Street line set back
9 |
--------------------------------------------------------------------------------
/build/build_indexes:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BASE=$(dirname $0)
4 | PATH=$PATH:$BASE/bin
5 | SQL="$BASE/../sql"
6 |
7 | # Just run the SQL that constructs the indexes.
8 | sqlite3 $1 < ${SQL}/index.sql
9 |
--------------------------------------------------------------------------------
/build/rebuild_cluster:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BASE=$(dirname $0)
4 | PATH=$PATH:$BASE/bin
5 | SQL="$BASE/../sql"
6 |
7 | OLD_DB=$1
8 | DATABASE=${OLD_DB}.$$
9 |
10 | [ -r $DATABASE ] && echo "$DATABASE already exists." && exit -1
11 | [ ! -r $OLD_DB ] && echo "Can't read $OLD_DB." && exit -1
12 |
13 | # Create a shiny new database, attach the old one,
14 | # extract the data from it, and then index that.
15 | # Finally, overwrite the old database with the new one.
16 | ( cat ${SQL}/create.sql && \
17 | echo "ATTACH DATABASE '${OLD_DB}' AS old;" && \
18 | cat ${SQL}/cluster.sql && \
19 | echo "DETACH DATABASE old;" && \
20 | cat ${SQL}/index.sql && \
21 | echo "ANALYZE;" ) | sqlite3 $DATABASE \
22 | && mv $DATABASE $OLD_DB
23 |
--------------------------------------------------------------------------------
/build/sql/cluster.sql:
--------------------------------------------------------------------------------
1 | .echo on
2 | -- turn off various pragmas to make SQLite faster
3 | PRAGMA temp_store=MEMORY;
4 | PRAGMA journal_mode=OFF;
5 | PRAGMA synchronous=OFF;
6 | PRAGMA cache_size=500000;
7 | PRAGMA count_changes=0;
8 | BEGIN TRANSACTION;
9 | -- order the contents of each table by their indexes to reduce
10 | -- the number of disk pages that need to be read on each query.
11 | INSERT INTO place SELECT * FROM old.place ORDER BY zip, priority;
12 | INSERT INTO edge SELECT * FROM old.edge ORDER BY tlid;
13 | INSERT INTO feature SELECT * FROM old.feature ORDER BY street_phone, zip;
14 | INSERT INTO feature_edge SELECT * FROM old.feature_edge ORDER BY fid;
15 | INSERT INTO range SELECT * FROM old.range ORDER BY tlid;
16 | COMMIT;
17 |
--------------------------------------------------------------------------------
/build/sql/convert.sql:
--------------------------------------------------------------------------------
1 | BEGIN;
2 | -- start by indexing the temporary tables created from the input data.
3 | CREATE INDEX featnames_tlid ON tiger_featnames (tlid);
4 | CREATE INDEX addr_tlid ON tiger_addr (tlid);
5 | CREATE INDEX edges_tlid ON tiger_edges (tlid);
6 |
7 | -- generate a summary table matching each edge to one or more ZIPs
8 | -- for those edges that are streets and have a name
9 | CREATE TEMPORARY TABLE linezip AS
10 | SELECT DISTINCT tlid, zip FROM (
11 | SELECT tlid, zip FROM tiger_addr a
12 | UNION
13 | SELECT tlid, zipr AS zip FROM tiger_edges e
14 | WHERE e.mtfcc LIKE 'S%' AND zipr <> "" AND zipr IS NOT NULL
15 | UNION
16 | SELECT tlid, zipl AS zip FROM tiger_edges e
17 | WHERE e.mtfcc LIKE 'S%' AND zipl <> "" AND zipl IS NOT NULL
18 | ) AS whatever;
19 |
20 | CREATE INDEX linezip_tlid ON linezip (tlid);
21 |
22 | -- generate features from the featnames table for each desired edge
23 | -- computing the metaphone hash of the name in the process.
24 |
25 | -- CREATE TEMPORARY TABLE sqlite_sequence (
26 | -- name VARCHAR(255),
27 | -- seq INTEGER);
28 |
29 | CREATE TEMPORARY TABLE feature_bin (
30 | fid INTEGER PRIMARY KEY AUTOINCREMENT,
31 | street VARCHAR(100),
32 | street_phone VARCHAR(5),
33 | paflag BOOLEAN,
34 | zip CHAR(5));
35 |
36 | INSERT OR IGNORE INTO sqlite_sequence (name, seq) VALUES ('feature_bin',0);
37 | UPDATE sqlite_sequence
38 | SET seq=(SELECT max(fid) FROM feature)
39 | WHERE name="feature_bin";
40 |
41 | INSERT INTO feature_bin
42 | SELECT DISTINCT NULL, fullname, metaphone(name,5), paflag, zip
43 | FROM linezip l, tiger_featnames f
44 | WHERE l.tlid=f.tlid AND name <> "" AND name IS NOT NULL;
45 |
46 | CREATE INDEX feature_bin_idx ON feature_bin (street, zip);
47 |
48 | INSERT INTO feature_edge
49 | SELECT DISTINCT fid, f.tlid
50 | FROM linezip l, tiger_featnames f, feature_bin b
51 | WHERE l.tlid=f.tlid AND l.zip=b.zip
52 | AND f.fullname=b.street AND f.paflag=b.paflag;
53 |
54 | -- SELECT min(fid),max(fid) FROM feature_bin;
55 |
56 | INSERT INTO feature
57 | SELECT * FROM feature_bin;
58 |
59 | -- generate edges from the edges table for each desired edge, running
60 | -- a simple compression on the WKB geometry (because they're all
61 | -- linestrings).
62 | INSERT OR IGNORE INTO edge
63 | SELECT l.tlid, compress_wkb_line(the_geom) FROM
64 | (SELECT DISTINCT tlid FROM linezip) AS l, tiger_edges e
65 | WHERE l.tlid=e.tlid AND fullname <> "" AND fullname IS NOT NULL;
66 |
67 | -- generate all ranges from the addr table, stripping off any non-digit
68 | -- prefixes and putting them in a separate column.
69 | INSERT INTO range
70 | SELECT tlid, digit_suffix(fromhn), digit_suffix(tohn),
71 | nondigit_prefix(fromhn), zip, side
72 | FROM tiger_addr;
73 | END;
74 |
75 | DROP TABLE feature_bin;
76 | DROP TABLE linezip;
77 | DROP TABLE tiger_addr;
78 | DROP TABLE tiger_featnames;
79 | DROP TABLE tiger_edges;
80 |
81 |
--------------------------------------------------------------------------------
/build/sql/create.sql:
--------------------------------------------------------------------------------
1 | -- initialize the database tables.
2 | -- 'place' contains the gazetteer of place names.
3 | CREATE TABLE place(
4 | zip CHAR(5),
5 | city VARCHAR(100),
6 | state CHAR(2),
7 | city_phone VARCHAR(5),
8 | lat NUMERIC(9,6),
9 | lon NUMERIC(9,6),
10 | status CHAR(1),
11 | fips_class CHAR(2),
12 | fips_place CHAR(7),
13 | fips_county CHAR(5),
14 | priority char(1));
15 | -- 'edge' stores the line geometries and their IDs.
16 | CREATE TABLE edge (
17 | tlid INTEGER(10) PRIMARY KEY,
18 | geometry BLOB);
19 | -- 'feature' stores the name(s) and ZIP(s) of each edge.
20 | CREATE TABLE feature (
21 | fid INTEGER PRIMARY KEY,
22 | street VARCHAR(100),
23 | street_phone VARCHAR(5),
24 | paflag BOOLEAN,
25 | zip CHAR(5));
26 | -- 'feature_edge' links each edge to a feature.
27 | CREATE TABLE feature_edge (
28 | fid INTEGER,
29 | tlid INTEGER);
30 | -- 'range' stores the address range(s) for each edge.
31 | CREATE TABLE range (
32 | tlid INTEGER(10),
33 | fromhn INTEGER(6),
34 | tohn INTEGER(6),
35 | prenum VARCHAR(12),
36 | zip CHAR(5),
37 | side CHAR(1));
38 |
--------------------------------------------------------------------------------
/build/sql/index.sql:
--------------------------------------------------------------------------------
1 | .echo on
2 | PRAGMA temp_store=MEMORY;
3 | PRAGMA journal_mode=MEMORY;
4 | PRAGMA synchronous=OFF;
5 | PRAGMA cache_size=500000;
6 | PRAGMA count_changes=0;
7 | -- create indexes for all the relevant ways each table is queried.
8 | CREATE INDEX place_city_phone_state_idx ON place (city_phone, state);
9 | CREATE INDEX place_zip_priority_idx ON place (zip, priority);
10 | CREATE INDEX feature_street_phone_zip_idx ON feature (street_phone, zip);
11 | CREATE INDEX feature_edge_fid_idx ON feature_edge (fid);
12 | CREATE INDEX range_tlid_idx ON range (tlid);
13 |
--------------------------------------------------------------------------------
/build/sql/setup.sql:
--------------------------------------------------------------------------------
1 | -- create temporary tables to hold the TIGER/Line data before it's
2 | -- transformed and loaded into the permanent tables.
3 | --
4 | -- this file was made by running 'shp2pgsql -p' on each of the
5 | -- TIGER/Line shapefiles and then massaging the result by hand.
6 | --
7 | PRAGMA temp_store=MEMORY;
8 | PRAGMA journal_mode=MEMORY;
9 | PRAGMA synchronous=OFF;
10 | PRAGMA cache_size=500000;
11 | PRAGMA count_changes=0;
12 | CREATE TEMPORARY TABLE "tiger_edges" (
13 | "statefp" varchar(2),
14 | "countyfp" varchar(3),
15 | "tlid" int8,
16 | "tfidl" int8,
17 | "tfidr" int8,
18 | "mtfcc" varchar(5),
19 | "fullname" varchar(100),
20 | "smid" varchar(22),
21 | "lfromadd" varchar(12),
22 | "ltoadd" varchar(12),
23 | "rfromadd" varchar(12),
24 | "rtoadd" varchar(12),
25 | "zipl" varchar(5),
26 | "zipr" varchar(5),
27 | "featcat" varchar(1),
28 | "hydroflg" varchar(1),
29 | "railflg" varchar(1),
30 | "roadflg" varchar(1),
31 | "olfflg" varchar(1),
32 | "passflg" varchar(1),
33 | "divroad" varchar(1),
34 | "exttyp" varchar(1),
35 | "ttyp" varchar(1),
36 | "deckedroad" varchar(1),
37 | "artpath" varchar(1),
38 | "persist" varchar(1),
39 | "gcseflg" varchar(1),
40 | "offsetl" varchar(1),
41 | "offsetr" varchar(1),
42 | "tnidf" int8,
43 | "tnidt" int8,
44 | "the_geom" blob
45 | );
46 | -- SELECT AddGeometryColumn('','edges','the_geom','-1','MULTILINESTRING',2);
47 | CREATE TEMPORARY TABLE "tiger_featnames" (
48 | "tlid" int8,
49 | "fullname" varchar(100),
50 | "name" varchar(100),
51 | "predirabrv" varchar(15),
52 | "pretypabrv" varchar(50),
53 | "prequalabr" varchar(15),
54 | "sufdirabrv" varchar(15),
55 | "suftypabrv" varchar(50),
56 | "sufqualabr" varchar(15),
57 | "predir" varchar(2),
58 | "pretyp" varchar(3),
59 | "prequal" varchar(2),
60 | "sufdir" varchar(2),
61 | "suftyp" varchar(3),
62 | "sufqual" varchar(2),
63 | "linearid" varchar(22),
64 | "mtfcc" varchar(5),
65 | "paflag" varchar(1));
66 | CREATE TEMPORARY TABLE "tiger_addr" (
67 | "tlid" int8,
68 | "fromhn" varchar(12),
69 | "tohn" varchar(12),
70 | "side" varchar(1),
71 | "zip" varchar(5),
72 | "plus4" varchar(4),
73 | "fromtyp" varchar(1),
74 | "totyp" varchar(1),
75 | "fromarmid" int4,
76 | "toarmid" int4,
77 | "arid" varchar(22),
78 | "mtfcc" varchar(5));
79 |
--------------------------------------------------------------------------------
/build/tiger2009_import:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | TMP="/tmp/tiger-import.$$"
4 | SHPS="edges"
5 | DBFS="featnames addr"
6 | BASE=$(dirname $0)
7 | PATH=$PATH:$BASE
8 | SQL="$BASE/../sql"
9 | HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10 | DATABASE=$1
11 | shift
12 |
13 | mkdir -p $TMP || exit 1
14 |
15 | # Initialize the database if it doesn't exist.
16 | [ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE
17 |
18 | # Marshal the county directories to import.
19 | #
20 | # If no directory was given on the command-line, read a list from STDIN.
21 | if [ x"$1" = x"" ]; then
22 | cat
23 | else
24 | # Otherwise, find all of the contents of each state directory.
25 | ls -d $1/[0-9]* | while read state; do
26 | ls -d ${state}/[0-9]*
27 | done
28 | fi | while read county; do
29 | echo "--- $county"
30 | # Unpack the county files into the temp directory.
31 | for file in $SHPS $DBFS; do
32 | ZIP=$(ls ${county}/*_${file}.zip 2>/dev/null)
33 | SHP=$(ls ${county}/*_${file}.* 2>/dev/null)
34 | if [ x"$ZIP" != x"" ]; then
35 | unzip -q $ZIP -d $TMP
36 | elif [ x"$SHP" != x"" ]; then
37 | ln -s $SHP $TMP
38 | fi
39 | done
40 | # Generate an SQL stream to feed into the sqlite3 binary.
41 | # Start by loading the helper libs and initializing the temporary tables
42 | # that will hold the TIGER data before ETL.
43 | (echo ".load $HELPER_LIB" && \
44 | cat ${SQL}/setup.sql && \
45 | for file in $SHPS; do
46 | # Convert each Shapefile into SQL statements.
47 | shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file}
48 | done && \
49 | for file in $DBFS; do
50 | # Convert each DBF into SQL statements likewise.
51 | shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file}
52 | done && \
53 | cat ${SQL}/convert.sql) | sqlite3 $DATABASE
54 | # Finally, do the transform/load phase (convert.sql)
55 | # and clean up the temporary files.
56 | rm -f $TMP/*
57 | done 2>&1 | tee import-$$.log
58 | rm -rf $TMP
59 |
60 |
--------------------------------------------------------------------------------
/build/tiger_import:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | TMP="/tmp/tiger-import.$$"
4 | SHPS="edges"
5 | DBFS="featnames addr"
6 | BASE=$(dirname $0)
7 | PATH=$PATH:$BASE
8 | SQL="$BASE/sql"
9 | HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10 | DATABASE=$1
11 | SOURCE=$2
12 | shift
13 | shift
14 |
15 | mkdir -p $TMP || exit 1
16 |
17 | # Initialize the database if it doesn't exist.
18 | #[ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE
19 | [ ! -r $DATABASE ] && cat ${SQL}/create.sql | sqlite3 $DATABASE
20 |
21 | # Marshal the county directories to import.
22 | #
23 | # If no directory was given on the command-line, read a list of county IDs from STDIN.
24 | if [ x"$1" != x"" ]; then
25 | cat
26 | else
27 | # Otherwise, find all of the IDs from the contents of the directory structure.
28 | ls $SOURCE/**/tl_*_edges.zip | while read file; do
29 | file=$(basename $file)
30 | code=${file##tl_????_}
31 | echo ${code%%_edges.zip}
32 | done
33 | fi | sort | while read code; do
34 | echo "--- $code"
35 | # Unpack the county files into the temp directory.
36 | for file in $SHPS $DBFS; do
37 | ZIP=$(ls $SOURCE/**/*_${code}_${file}.zip 2>/dev/null)
38 | SHP=$(ls $SOURCE/**/*_${code}_${file}.* 2>/dev/null)
39 | if [ x"$ZIP" != x"" ]; then
40 | unzip -q $ZIP -d $TMP
41 | elif [ x"$SHP" != x"" ]; then
42 | ln -s $SHP $TMP
43 | fi
44 | done
45 | # Generate an SQL stream to feed into the sqlite3 binary.
46 | # Start by loading the helper libs and initializing the temporary tables
47 | # that will hold the TIGER data before ETL.
48 | (echo ".load $HELPER_LIB" && \
49 | cat ${SQL}/setup.sql && \
50 | for file in $SHPS; do
51 | # Convert each Shapefile into SQL statements.
52 | shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file}
53 | done && \
54 | for file in $DBFS; do
55 | # Convert each DBF into SQL statements likewise.
56 | shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file}
57 | done && \
58 | cat ${SQL}/convert.sql) | sqlite3 $DATABASE
59 | # Finally, do the transform/load phase (convert.sql)
60 | # and clean up the temporary files.
61 | rm -f $TMP/*
62 | done 2>&1 | tee import-$$.log
63 | rm -rf $TMP
64 |
65 |
--------------------------------------------------------------------------------
/conf/geocoder-us/geocoder.ru:
--------------------------------------------------------------------------------
1 | require 'sinatra'
2 | disable :run, :reload
3 | require 'geocoder/us/rest'
4 | run Sinatra::Application
5 |
--------------------------------------------------------------------------------
/conf/geocoder-us/unicorn.rb:
--------------------------------------------------------------------------------
1 | worker_processes 4
2 | user "www-data", "www-data"
3 | listen "/var/run/geocoder-us/unicorn.sock", :backlog => 64
4 | pid "/var/run/geocoder-us/unicorn.pid"
5 | stderr_path "/var/log/geocoder-us/geocoder-err.log"
6 | stdout_path "/var/log/geocoder-us/geocoder-out.log"
7 |
8 | # Have each process listen on a local port for debugging purposes.
9 | after_fork do |server, worker|
10 | addr = "127.0.0.1:#{40000 + worker.nr}"
11 | server.listen(addr, :tries => 1, :delay => 5, :tcp_nopush => true)
12 | end
13 |
--------------------------------------------------------------------------------
/conf/init/geocoder-us.conf:
--------------------------------------------------------------------------------
1 | description "geocoder.us"
2 |
3 | start on runlevel [2345]
4 | stop on runlevel [!2345]
5 |
6 | respawn
7 | script
8 | . /etc/default/geocoder-us
9 | unicorn -c /etc/geocoder-us/unicorn.rb /etc/geocoder-us/geocoder.ru
10 | end script
11 |
--------------------------------------------------------------------------------
/debian/README.Debian:
--------------------------------------------------------------------------------
1 | geocoder-us for Debian
2 | ----------------------
3 |
4 | The Geocoder::US package is a Ruby library that uses a database built from the
5 | US Census Bureau's TIGER/Line data to interpolate a latitude/longitude
6 | coordinate for a given US street address.
7 |
8 | Binary shared objects
9 | ---------------------
10 |
11 | The Geocoder::US module depends on being able to load a native extension module
12 | in its SQLite driver. For this reason, a version of libsqlite-ruby >= 1.3.0 is
13 | needed. The module is built and included in the .deb as `sqlite.so`, and it is
14 | installed in the same directory as the Ruby modules. This may not be ideal, but
15 | this makes it easy for the Geocoder::US library to find it there; otherwise, a
16 | configuration option would be necessary.
17 |
18 | REST API server
19 | ---------------
20 |
21 | The library's API centers on a single method 'geocode' to the
22 | Geocoder::US::Database class that takes an address string and returns a list of
23 | dicts containing the most likely matches with coordinates.
24 |
25 | The `geocode` method is wrapped in a very simple Sinatra application with a single
26 | endpoint `/geocode` and a single argument `q`, which returns the result of the
27 | geocode method in JSON format.
28 |
29 | The Sinatra web framework does not support running as a daemon on its own, so
30 | the Thin web server is used as a container for the application. This package
31 | creates an `/etc/geocoder-us` directory containing two files:
32 |
33 | `/etc/geocoder-us/geocoder.ru` is the "rackup" adapter between Thin and Sinatra
34 | and should probably not be changed. This file doesn't have to live in /etc, but
35 | I couldn't figure out where else to put it.
36 |
37 | `/etc/geocoder-us/thin.yml` contains the configuration options to run the Thin
38 | server. This file as packaged runs the REST server as the www-data user on port
39 | 8080. This file *probably* doesn't need to be changed, but if the server starts
40 | doing weird things, different options to control Thin's behavior can be set
41 | here.
42 |
43 | The package creates `/var/log/geocoder-us` and `/var/run/geocoder-us`
44 | directories for the Thin log file and PID file, respectively, and chowns them
45 | to www-data.
46 |
47 | An init script is also included in `/etc/init.d/geocoder-us`. It is heavily
48 | hacked from the default Debian init.ex script to support the weirdnesses of
49 | Thin, but it is LSB compliant and supports the `status` command.
50 |
51 | Where to put the database
52 | -------------------------
53 |
54 | The location of the database file should be set in `/etc/default/geocoder-us`.
55 | The package creates a `/var/lib/geocoder-us` directory and configures the
56 | database location by default to be `/var/lib/geocoder-us/geocoder.db`. If you
57 | have an EBS volume containing a file called `geocoder.db`, for example, you can
58 | just mount the volume at `/var/lib/geocoder-us` and then start the server and
59 | all will be well.
60 |
--------------------------------------------------------------------------------
/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 |
--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
1 | Source: geocoder-us
2 | Section: ruby
3 | Priority: extra
4 | Maintainer: SimpleGeo Nerds
5 | Uploaders: Schuyler Erle
6 | Build-Depends: debhelper (>= 7), libsqlite3-dev, ruby1.8, cdbs, ruby-pkg-tools
7 | Standards-Version: 3.9.1
8 | Homepage: http://github.com/simplegeo/geocoder/
9 |
10 | Package: geocoder-us
11 | Architecture: any
12 | Depends: ${misc:Depends}, ${shlibs:Depends}, ruby1.8, libsqlite3-ruby (>= 1.3.0), libsinatra-ruby, libjson-ruby, unicorn
13 | Description: A US address geocoder.
14 | A US address geocoder. Requires a suitable database.
15 |
--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
1 | This work was packaged for Debian by:
2 |
3 | Schuyler Erle on Sat, 07 Aug 2010 00:51:40 +0000
4 |
5 | It was downloaded from http://github.com/simplegeo/geocoder/
6 |
7 | Upstream Author(s):
8 |
9 | Schuyler Erle
10 |
11 | Copyright:
12 |
13 | (c) 2009 FortiusOne, Inc.
14 |
15 | License:
16 |
17 | This program is free software: you can redistribute it and/or modify
18 | it under the terms of the GNU General Public License as published by
19 | the Free Software Foundation, either version 3 of the License, or
20 | (at your option) any later version.
21 |
22 | This package is distributed in the hope that it will be useful,
23 | but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | GNU General Public License for more details.
26 |
27 | You should have received a copy of the GNU General Public License
28 | along with this program. If not, see .
29 |
30 |
31 | The Debian packaging is:
32 |
33 | Copyright (C) 2010 SimpleGeo, Inc.
34 |
35 | and is licensed under the GPL version 3, see `/usr/share/common-licenses/GPL-3'.
36 |
--------------------------------------------------------------------------------
/debian/default:
--------------------------------------------------------------------------------
1 | # Defaults for geocoder-us upstart job
2 | # sourced by /etc/init/geocoder-us.conf
3 | # installed at /etc/default/geocoder-us by maintainer scripts
4 |
5 | # Set the location of the geocoder database.
6 | export GEOCODER_DB="/var/lib/geocoder-us/geocoder.db"
7 |
--------------------------------------------------------------------------------
/debian/docs:
--------------------------------------------------------------------------------
1 | History.txt
2 | Manifest.txt
3 | README.rdoc
4 | TODO.txt
5 | TODO.txt
6 |
--------------------------------------------------------------------------------
/debian/geocoder-us.postinst:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # postinst script for #PACKAGE#
3 | #
4 | # see: dh_installdeb(1)
5 |
6 | set -e
7 |
8 | # summary of how this script can be called:
9 | # * `configure'
10 | # * `abort-upgrade'
11 | # * `abort-remove' `in-favour'
12 | #
13 | # * `abort-remove'
14 | # * `abort-deconfigure' `in-favour'
15 | # `removing'
16 | #
17 | # for details, see http://www.debian.org/doc/debian-policy/ or
18 | # the debian-policy package
19 |
20 |
21 | case "$1" in
22 | configure)
23 | # just make sure that /usr/bin/thin can write its PID file and logs
24 | chown www-data /var/run/geocoder-us
25 | chown www-data /var/log/geocoder-us
26 | start geocoder-us || /bin/true
27 | ;;
28 |
29 | abort-upgrade|abort-remove|abort-deconfigure)
30 | ;;
31 |
32 | *)
33 | echo "postinst called with unknown argument \`$1'" >&2
34 | exit 1
35 | ;;
36 | esac
37 |
38 | # dh_installdeb will replace this with shell code automatically
39 | # generated by other debhelper scripts.
40 |
41 | #DEBHELPER#
42 |
43 | exit 0
44 |
--------------------------------------------------------------------------------
/debian/geocoder-us.prerm:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -e
4 |
5 | case "$1" in
6 | remove|deconfigure)
7 | stop geocoder-us || true
8 | ;;
9 | upgrade)
10 | ;;
11 | failed-upgrade)
12 | ;;
13 | *)
14 | echo "prerm called with unknown argument \`$1'" >&2
15 | exit 0
16 | ;;
17 | esac
18 |
19 | # dh_installdeb will replace this with shell code automatically
20 | # generated by other debhelper scripts.
21 |
22 |
23 |
24 | exit 0
25 |
--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | include /usr/share/cdbs/1/rules/debhelper.mk
4 | include /usr/share/ruby-pkg-tools/1/class/ruby-setup-rb.mk
5 |
6 |
7 | # Add here any variable or target overrides you need.
8 |
9 | build/geocoder-us::
10 | make -C $(CURDIR)/src/libsqlite3_geocoder
11 | install -m 0644 $(CURDIR)/src/libsqlite3_geocoder/*.so \
12 | $(CURDIR)/lib/geocoder/us/sqlite3.so
13 |
14 | install/geocoder-us::
15 | install -d -m 0755 $(DEB_DESTDIR)var/lib/geocoder-us \
16 | $(DEB_DESTDIR)var/run/geocoder-us \
17 | $(DEB_DESTDIR)var/log/geocoder-us
18 |
19 |
--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (quilt)
2 |
--------------------------------------------------------------------------------
/demos/api/server.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'sinatra'
3 | require 'geocoder/us/database'
4 | require 'json'
5 |
6 | set :port, 8080
7 | @@db = Geocoder::US::Database.new("/home/sderle/geocoder/california.db")
8 | get '/geocode.json' do
9 | if params[:q]
10 | (@@db.geocode params[:q]).to_json
11 | else
12 | status 400
13 | "parameter 'q' is missing"
14 | end
15 | end
16 | get '/' do
17 | unless params[:q].nil?
18 | @records = @@db.geocode params[:q]
19 | end
20 | erb :index
21 | end
22 |
--------------------------------------------------------------------------------
/demos/api/views/index.erb:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
11 |
12 |
13 | Geocoder Demo
14 |
15 |
20 |
21 | <% unless @records.nil? %>
22 |
23 |
24 | Match |
25 | Lat |
26 | Lon |
27 | # |
28 | Qual |
29 | Dir |
30 | Type |
31 | Street |
32 | Type |
33 | Dir |
34 | Qual |
35 | City |
36 | St |
37 | ZIP |
38 | |
39 |
40 | <% for record in @records %>
41 |
42 | <%= format("%.2f", record[:score]*100) %>% |
43 | <%= record[:lat].to_s %> |
44 | <%= record[:lon].to_s %> |
45 | <%= record[:prefix] if record[:prefix] %><%= record[:number] %> |
46 | <%= record[:pretyp] %> |
47 | <%= record[:predir] %> |
48 | <%= record[:prequal] %> |
49 | <%= record[:street] %> |
50 | <%= record[:suftyp] %> |
51 | <%= record[:sufdir] %> |
52 | <%= record[:sufqual] %> |
53 | <%= record[:city] %> |
54 | <%= record[:state] %> |
55 | <%= record[:zip] %> |
56 | map |
58 |
59 | <% end %>
60 |
61 | <% end %>
62 |
63 |
64 |
--------------------------------------------------------------------------------
/demos/cli.rb:
--------------------------------------------------------------------------------
1 | require 'geocoder/us/database'
2 | require 'pp'
3 |
4 | db = Geocoder::US::Database.new("/mnt/tiger2010/geocoder.db", :debug=>true)
5 | result = db.geocode(ARGV[0])
6 | pp(result)
7 | print "#{result[0][:lat]} N, #{-result[0][:lon]} W\n"
8 |
--------------------------------------------------------------------------------
/demos/demo/app/ext/geocodewrap.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'geocoder/us/database'
3 | require 'logger'
4 |
5 | module Sinatra
6 | module GeocodeWrap
7 | attr_accessor :db
8 | def self.registered(app)
9 | options = {:cache_size => 100000}
10 | @@db = Geocoder::US::Database.new("/Users/katechapman/usgeocode.db", options)
11 | stats = Logger.new("geocoderstats.log", 10, 1024000)
12 | app.get '/' do
13 | unless params[:address].nil?
14 | begin
15 | @records = @@db.geocode params[:address]
16 | stats.debug "Geocoded: 1, Failed: 0, Geocoded At: " << DateTime.now.to_s
17 | rescue Exception => e
18 | stats.debug "Geocoded: 1, Failed: 1, Geocoded At: " << DateTime.now.to_s
19 | puts e.message
20 | end
21 | end
22 |
23 | case params[:format]
24 | when /xml/
25 | builder :index
26 | when /atom/
27 | builder :atom
28 | when /json/
29 | @records.to_json
30 | else
31 | erb :index
32 | end
33 | end
34 |
35 | app.post '/batch' do
36 | failed_codes = 0
37 | total_codes = 0
38 | puts Time.now
39 | if params[:uploaded_csv].nil?
40 | csv_file = request.env["rack.input"].read
41 | csv = FasterCSV.parse(csv_file, :row_sep => "*", :col_sep => "|")
42 | else
43 | FileUtils.mkdir_p('uploads/')
44 | FileUtils.mv(params[:uploaded_csv][:tempfile].path, "uploads/#{params[:uploaded_csv][:filename]}")
45 | csv_file = open("uploads/#{params[:uploaded_csv][:filename]}")
46 | @filename = params[:uploaded_csv][:filename].gsub(/\.csv/,"")
47 | csv = FasterCSV.parse(csv_file)
48 | end
49 | headers = csv[0]
50 |
51 | @records = csv.collect do |record|
52 | total_codes += 1
53 | next if record == headers
54 | begin
55 | result = @@db.geocode record[1]
56 | if result.empty?
57 | result[0] = {:lon => nil, :lat => nil, :precision => 'unmatched', :score => 0}
58 | failed_codes += 1
59 | end
60 | result.first.merge(headers[0] => record[0])
61 | rescue Exception => e
62 | failed_codes += 1
63 | puts e.message
64 | next
65 | end
66 | end.compact
67 | puts Time.now
68 | stats.debug "Geocoded: " << total_codes.to_s << ", Failed: " << failed_codes.to_s << ",Geocoded At: " << DateTime.now.to_s
69 | case params[:format]
70 | when /xml/
71 | builder :index
72 | when /atom/
73 | builder :atom
74 | when /json/
75 | @records.to_json
76 |
77 | else
78 | erb :index
79 | end
80 | end
81 | end
82 | end
83 | register GeocodeWrap
84 | end
85 |
--------------------------------------------------------------------------------
/demos/demo/app/views/index.builder:
--------------------------------------------------------------------------------
1 | xml.locations do
2 | unless @records.nil?
3 | @records.each do |record|
4 | xml.location do
5 | xml.score format("%.2f", record[:score]*100)
6 | %w{lat lon number prefix pretyp predir prequal street suftyp sufdir sufqual city state zip}.each do |field|
7 | xml.tag! field, record[field.to_sym]
8 | end
9 | end
10 | end
11 | end
12 | end
13 |
14 |
--------------------------------------------------------------------------------
/demos/demo/app/views/index.erb:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
11 |
12 |
13 | Geocoder Demo
14 |
15 |
19 |
23 |
24 | <% unless @records.nil? %>
25 |
26 |
27 | Match |
28 | Precision |
29 | Lat |
30 | Lon |
31 | # |
32 | Qual |
33 | Dir |
34 | Type |
35 | Street |
36 | Type |
37 | Dir |
38 | Qual |
39 | City |
40 | St |
41 | ZIP |
42 | |
43 |
44 | <% for record in @records %>
45 |
46 | <%= format("%.2f", record[:score]*100) %>% |
47 | <%= record[:precision].to_s %> |
48 | <%= record[:lat].to_s %> |
49 | <%= record[:lon].to_s %> |
50 | <%= record[:prefix] if record[:prefix] %><%= record[:number] %> |
51 | <%= record[:pretyp] %> |
52 | <%= record[:predir] %> |
53 | <%= record[:prequal] %> |
54 | <%= record[:street] %> |
55 | <%= record[:suftyp] %> |
56 | <%= record[:sufdir] %> |
57 | <%= record[:sufqual] %> |
58 | <%= record[:city] %> |
59 | <%= record[:state] %> |
60 | <%= record[:zip] %> |
61 | map |
63 |
64 | <% end %>
65 |
66 | <% end %>
67 | <% unless @filename.nil? %>
68 | Atom Feed
69 | <% end %>
70 |
71 |
72 |
--------------------------------------------------------------------------------
/demos/demo/config.ru:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'sinatra'
3 |
4 |
5 | Sinatra::Application.default_options.merge!(
6 | :run => false,
7 | :env => ENV['RACK_ENV']
8 | )
9 | require 'geocom_geocode'
10 | run GeocomGeocode::GeocodeServer
11 |
12 |
13 |
--------------------------------------------------------------------------------
/demos/demo/config/bootstraps.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 |
3 | module BootStraps
4 |
5 | class Framework
6 |
7 | def initialize
8 | @methods = {}
9 | end
10 |
11 | def apply_settings!(app)
12 | @methods.each_pair do |method, calls|
13 | calls.each do |arg_set|
14 | app.send(method, *arg_set)
15 | end
16 | end
17 | end
18 |
19 | def method_missing(method, *args)
20 | @methods[method] ||= []
21 | @methods[method] << args
22 | end
23 | end
24 |
25 |
26 | class DataStore
27 | def connect_action(&block)
28 | @connect_action = block
29 | end
30 |
31 | #TODO raise UndefinedConnectAction
32 | def connect
33 | @connect_action.call if @connect_action
34 | end
35 | end
36 |
37 | class Configuration
38 | attr_accessor :db, :global, :default_env, :vendor_dir, :lib_paths, :framework, :vendored
39 | attr_reader :gems
40 |
41 | def initialize
42 | @framework = Framework.new
43 | @gems = {}
44 | @global = {}
45 | @default_env = 'production'
46 | @vendor_dir = File.join(root, 'vendor')
47 | @lib_paths = []
48 | @vendored = false
49 | end
50 |
51 | def env
52 | ENV['RACK_ENV'] ||= default_env
53 | end
54 |
55 | def env=(val)
56 | ENV['RACK_ENV'] = val
57 | end
58 |
59 | def root
60 | File.join(File.expand_path(File.dirname(__FILE__)), "..")
61 | end
62 |
63 | def gem(*args)
64 | gem = args.first
65 | ver = args.last
66 |
67 | @gems[gem] = ver
68 |
69 | #its concievable that vendored could be changed mid config
70 | use_vendor if vendored
71 | Kernel.send(:gem, *args)
72 | require gem
73 | end
74 |
75 | private
76 | def use_vendor
77 | Gem.clear_paths
78 | prepend_gem_path!(File.join(root, 'vendor'))
79 | end
80 |
81 | def prepend_gem_path!(path)
82 | ENV['GEM_PATH'] = path
83 | end
84 | end
85 |
86 | class Initializer
87 | @@config = Configuration.new
88 | class << self
89 | def configure
90 | unless @@config.frozen?
91 | yield @@config
92 | @@config.freeze
93 | end
94 | end
95 |
96 | def config
97 | @@config
98 | end
99 |
100 | def boot!
101 | require File.join(@@config.root, 'config', 'geoenvironment.rb')
102 | require_libs
103 | end
104 |
105 |
106 | private
107 | def require_libs
108 | [
109 | subdir_expansion('lib'),
110 | subdir_expansion(File.join('app','ext'))
111 | ].each do |p|
112 | require_all(p)
113 | end
114 | end
115 |
116 | def require_all(path)
117 | Dir[path].each { |f| require f }
118 | end
119 |
120 | def subdir_expansion(subdir)
121 | File.join(@@config.root, subdir, '**', '*.rb')
122 | end
123 | end
124 | end
125 | end
126 |
127 | BootStraps::Initializer.boot!
128 | Straps = BootStraps::Initializer.config
129 |
130 |
131 |
--------------------------------------------------------------------------------
/demos/demo/config/geoenvironment.rb:
--------------------------------------------------------------------------------
1 |
2 | BootStraps::Initializer.configure do |config|
3 |
4 | #Use the vendor directory
5 | config.vendored = true
6 | config.default_env = 'production'
7 |
8 | config.gem 'sinatra'
9 | config.gem 'fastercsv'
10 | config.gem 'json'
11 |
12 |
13 |
14 |
15 | config.framework.set :root, config.root
16 | config.framework.set :environment, config.env
17 | config.framework.set :raise_errors, true
18 | config.framework.set :views, File.join('app','views')
19 | config.framework.set :server, 'mongrel'
20 | config.framework.set :static, true
21 | config.framework.set :logging, true
22 | config.framework.set :port, 4567
23 | config.framework.set :lock, false
24 |
25 | end
26 |
--------------------------------------------------------------------------------
/demos/demo/geocoder_helper.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'geocoder/us/database'
3 | require 'fastercsv'
4 | require 'json'
5 |
6 |
7 |
8 | def initialize
9 |
10 |
11 |
12 | end
13 |
--------------------------------------------------------------------------------
/demos/demo/geocom_geocode.rb:
--------------------------------------------------------------------------------
1 | require 'config/bootstraps'
2 |
3 | module GeocomGeocode
4 | class GeocodeServer < Sinatra::Base
5 | register Sinatra::GeocodeWrap
6 | configure do
7 | Straps.framework.apply_settings!(self)
8 | end
9 | end
10 | end
11 |
--------------------------------------------------------------------------------
/demos/demo/main.rb:
--------------------------------------------------------------------------------
1 | require 'geocom_geocode'
2 |
3 | GeocomGeocode::GeocodeServer.run!
4 |
--------------------------------------------------------------------------------
/demos/demo/rakefile.rb:
--------------------------------------------------------------------------------
1 | require 'rake'
2 |
3 | task :boot_env do
4 | require 'config/bootstraps';
5 | end
6 |
7 | namespace :db do
8 | task :migrate => :connect do
9 | ActiveRecord::Base.logger = Logger.new(STDOUT)
10 | ActiveRecord::Migration.verbose = true
11 | ActiveRecord::Migrator.migrate('db/migrate/', nil)
12 | end
13 |
14 | task :connect => :boot_env do
15 | BootStraps::Initializer.config.db.connect
16 | end
17 | end
--------------------------------------------------------------------------------
/demos/demo/tmp/restart.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simplegeo/geocoder/5c7f678a1abe79c77c36153fbcee32e4dec24e53/demos/demo/tmp/restart.txt
--------------------------------------------------------------------------------
/demos/parse.rb:
--------------------------------------------------------------------------------
1 | require 'geocoder/us/address'
2 | require 'pp'
3 |
4 | pp(Geocoder::US::Address.new(ARGV[0]))
5 |
--------------------------------------------------------------------------------
/demos/simpledemo/views/index.builder:
--------------------------------------------------------------------------------
1 | xml.locations do
2 | unless @records.nil?
3 | @records.each do |record|
4 | xml.location do
5 | xml.score format("%.2f", record[:score]*100)
6 | %w{lat lon number prefix pretyp predir prequal street suftyp sufdir sufqual city state zip}.each do |field|
7 | xml.tag! field, record[field.to_sym]
8 | end
9 | end
10 | end
11 | end
12 | end
13 |
14 |
--------------------------------------------------------------------------------
/demos/simpledemo/views/index.erb:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
11 |
12 |
13 | Geocoder Demo
14 |
15 |
19 |
23 |
24 | <% unless @records.nil? %>
25 |
26 |
27 | Match |
28 | Lat |
29 | Lon |
30 | # |
31 | Qual |
32 | Dir |
33 | Type |
34 | Street |
35 | Type |
36 | Dir |
37 | Qual |
38 | City |
39 | St |
40 | ZIP |
41 | |
42 |
43 | <% for record in @records %>
44 |
45 | <%= format("%.2f", record[:score]*100) %>% |
46 | <%= record[:lat].to_s %> |
47 | <%= record[:lon].to_s %> |
48 | <%= record[:prefix] if record[:prefix] %><%= record[:number] %> |
49 | <%= record[:pretyp] %> |
50 | <%= record[:predir] %> |
51 | <%= record[:prequal] %> |
52 | <%= record[:street] %> |
53 | <%= record[:suftyp] %> |
54 | <%= record[:sufdir] %> |
55 | <%= record[:sufqual] %> |
56 | <%= record[:city] %> |
57 | <%= record[:state] %> |
58 | <%= record[:zip] %> |
59 | map |
61 |
62 | <% end %>
63 |
64 | <% end %>
65 | <% unless @filename.nil? %>
66 | Atom Feed
67 | <% end %>
68 |
69 |
70 |
--------------------------------------------------------------------------------
/demos/simpledemo/ws.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'sinatra'
3 | require 'geocoder/us/database'
4 | require 'fastercsv'
5 | require 'json'
6 |
7 | set :port, 8080
8 | @@db = Geocoder::US::Database.new("/fortiusone/geocoder/geocoder.db")
9 | get '/' do
10 | unless params[:address].nil?
11 | @records = @@db.geocode params[:address]
12 | end
13 |
14 | case params[:format]
15 | when /xml/
16 | builder :index
17 | when /atom/
18 | builder :atom
19 | else
20 | erb :index
21 | end
22 | end
23 |
24 | require 'open-uri'
25 | get '/link.:format' do
26 | if(params.include?(:url))
27 | csv_file = params[:url]
28 | else
29 | csv_file = "uploads/#{params[:filename]}.csv"
30 | end
31 | csv = FasterCSV.parse(open(csv_file))
32 | headers = csv[0]
33 |
34 | @records = csv.collect do |record|
35 | next if record == headers
36 | begin
37 | (@@db.geocode record[1]).first
38 | rescue Exception => e
39 | puts e.message
40 | next
41 | end
42 | end.compact
43 | case params[:format]
44 | when /atom/
45 | builder :atom
46 | when /xml/
47 | builder :index
48 | else
49 | erb :index
50 | end
51 |
52 | end
53 |
54 |
55 | post '/batch' do
56 | csv_file = request.env["rack.input"].read
57 | csv = FasterCSV.parse(csv_file, :row_sep => "*", :col_sep => "|")
58 | headers = csv[0]
59 | @records = csv.collect do |record|
60 | next if record == headers
61 | begin
62 | (@@db.geocode record[1]).first.merge(headers[0] => record[0])
63 | rescue Exception => e
64 | puts e.message
65 | next
66 | end
67 | end.compact
68 | case params[:format]
69 | when /xml/
70 | builder :index
71 | when /atom/
72 | builder :atom
73 | when /json/
74 | @records.to_json
75 | else
76 | erb :index
77 | end
78 | end
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | all: lookup.html parsing.html
2 |
3 | %.html: %.rst voidspace.css
4 | rst2html --stylesheet-path=voidspace.css --no-compact-lists $< > $@
5 |
6 | clean:
7 | rm -f *.html
8 |
--------------------------------------------------------------------------------
/doc/html4css1.css:
--------------------------------------------------------------------------------
1 | /*
2 | :Author: David Goodger
3 | :Contact: goodger@users.sourceforge.net
4 | :Date: $Date: 2005-12-18 01:56:14 +0100 (Sun, 18 Dec 2005) $
5 | :Revision: $Revision: 4224 $
6 | :Copyright: This stylesheet has been placed in the public domain.
7 |
8 | Default cascading style sheet for the HTML output of Docutils.
9 |
10 | See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
11 | customize this style sheet.
12 | */
13 |
14 | /* used to remove borders from tables and images */
15 | .borderless, table.borderless td, table.borderless th {
16 | border: 0 }
17 |
18 | table.borderless td, table.borderless th {
19 | /* Override padding for "table.docutils td" with "! important".
20 | The right padding separates the table cells. */
21 | padding: 0 0.5em 0 0 ! important }
22 |
23 | .first {
24 | /* Override more specific margin styles with "! important". */
25 | margin-top: 0 ! important }
26 |
27 | .last, .with-subtitle {
28 | margin-bottom: 0 ! important }
29 |
30 | .hidden {
31 | display: none }
32 |
33 | a.toc-backref {
34 | text-decoration: none ;
35 | color: black }
36 |
37 | blockquote.epigraph {
38 | margin: 2em 5em ; }
39 |
40 | dl.docutils dd {
41 | margin-bottom: 0.5em }
42 |
43 | /* Uncomment (and remove this text!) to get bold-faced definition list terms
44 | dl.docutils dt {
45 | font-weight: bold }
46 | */
47 |
48 | div.abstract {
49 | margin: 2em 5em }
50 |
51 | div.abstract p.topic-title {
52 | font-weight: bold ;
53 | text-align: center }
54 |
55 | div.admonition, div.attention, div.caution, div.danger, div.error,
56 | div.hint, div.important, div.note, div.tip, div.warning {
57 | margin: 2em ;
58 | border: medium outset ;
59 | padding: 1em }
60 |
61 | div.admonition p.admonition-title, div.hint p.admonition-title,
62 | div.important p.admonition-title, div.note p.admonition-title,
63 | div.tip p.admonition-title {
64 | font-weight: bold ;
65 | font-family: sans-serif }
66 |
67 | div.attention p.admonition-title, div.caution p.admonition-title,
68 | div.danger p.admonition-title, div.error p.admonition-title,
69 | div.warning p.admonition-title {
70 | color: red ;
71 | font-weight: bold ;
72 | font-family: sans-serif }
73 |
74 | /* Uncomment (and remove this text!) to get reduced vertical space in
75 | compound paragraphs.
76 | div.compound .compound-first, div.compound .compound-middle {
77 | margin-bottom: 0.5em }
78 |
79 | div.compound .compound-last, div.compound .compound-middle {
80 | margin-top: 0.5em }
81 | */
82 |
83 | div.dedication {
84 | margin: 2em 5em ;
85 | text-align: center ;
86 | font-style: italic }
87 |
88 | div.dedication p.topic-title {
89 | font-weight: bold ;
90 | font-style: normal }
91 |
92 | div.figure {
93 | margin-left: 2em ;
94 | margin-right: 2em }
95 |
96 | div.footer, div.header {
97 | clear: both;
98 | font-size: smaller }
99 |
100 | div.line-block {
101 | display: block ;
102 | margin-top: 1em ;
103 | margin-bottom: 1em }
104 |
105 | div.line-block div.line-block {
106 | margin-top: 0 ;
107 | margin-bottom: 0 ;
108 | margin-left: 1.5em }
109 |
110 | div.sidebar {
111 | margin-left: 1em ;
112 | border: medium outset ;
113 | padding: 1em ;
114 | background-color: #ffffee ;
115 | width: 40% ;
116 | float: right ;
117 | clear: right }
118 |
119 | div.sidebar p.rubric {
120 | font-family: sans-serif ;
121 | font-size: medium }
122 |
123 | div.system-messages {
124 | margin: 5em }
125 |
126 | div.system-messages h1 {
127 | color: red }
128 |
129 | div.system-message {
130 | border: medium outset ;
131 | padding: 1em }
132 |
133 | div.system-message p.system-message-title {
134 | color: red ;
135 | font-weight: bold }
136 |
137 | div.topic {
138 | margin: 2em }
139 |
140 | h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
141 | h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
142 | margin-top: 0.4em }
143 |
144 | h1.title {
145 | text-align: center }
146 |
147 | h2.subtitle {
148 | text-align: center }
149 |
150 | hr.docutils {
151 | width: 75% }
152 |
153 | img.align-left {
154 | clear: left }
155 |
156 | img.align-right {
157 | clear: right }
158 |
159 | ol.simple, ul.simple {
160 | margin-bottom: 1em }
161 |
162 | ol.arabic {
163 | list-style: decimal }
164 |
165 | ol.loweralpha {
166 | list-style: lower-alpha }
167 |
168 | ol.upperalpha {
169 | list-style: upper-alpha }
170 |
171 | ol.lowerroman {
172 | list-style: lower-roman }
173 |
174 | ol.upperroman {
175 | list-style: upper-roman }
176 |
177 | p.attribution {
178 | text-align: right ;
179 | margin-left: 50% }
180 |
181 | p.caption {
182 | font-style: italic }
183 |
184 | p.credits {
185 | font-style: italic ;
186 | font-size: smaller }
187 |
188 | p.label {
189 | white-space: nowrap }
190 |
191 | p.rubric {
192 | font-weight: bold ;
193 | font-size: larger ;
194 | color: maroon ;
195 | text-align: center }
196 |
197 | p.sidebar-title {
198 | font-family: sans-serif ;
199 | font-weight: bold ;
200 | font-size: larger }
201 |
202 | p.sidebar-subtitle {
203 | font-family: sans-serif ;
204 | font-weight: bold }
205 |
206 | p.topic-title {
207 | font-weight: bold }
208 |
209 | pre.address {
210 | margin-bottom: 0 ;
211 | margin-top: 0 ;
212 | font-family: serif ;
213 | font-size: 100% }
214 |
215 | pre.literal-block, pre.doctest-block {
216 | margin-left: 2em ;
217 | margin-right: 2em ;
218 | background-color: #eeeeee }
219 |
220 | span.classifier {
221 | font-family: sans-serif ;
222 | font-style: oblique }
223 |
224 | span.classifier-delimiter {
225 | font-family: sans-serif ;
226 | font-weight: bold }
227 |
228 | span.interpreted {
229 | font-family: sans-serif }
230 |
231 | span.option {
232 | white-space: nowrap }
233 |
234 | span.pre {
235 | white-space: pre }
236 |
237 | span.problematic {
238 | color: red }
239 |
240 | span.section-subtitle {
241 | /* font-size relative to parent (h1..h6 element) */
242 | font-size: 80% }
243 |
244 | table.citation {
245 | border-left: solid 1px gray;
246 | margin-left: 1px }
247 |
248 | table.docinfo {
249 | margin: 2em 4em }
250 |
251 | table.docutils {
252 | margin-top: 0.5em ;
253 | margin-bottom: 0.5em }
254 |
255 | table.footnote {
256 | border-left: solid 1px black;
257 | margin-left: 1px }
258 |
259 | table.docutils td, table.docutils th,
260 | table.docinfo td, table.docinfo th {
261 | padding-left: 0.5em ;
262 | padding-right: 0.5em ;
263 | vertical-align: top }
264 |
265 | table.docutils th.field-name, table.docinfo th.docinfo-name {
266 | font-weight: bold ;
267 | text-align: left ;
268 | white-space: nowrap ;
269 | padding-left: 0 }
270 |
271 | h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
272 | h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
273 | font-size: 100% }
274 |
275 | tt.docutils {
276 | background-color: #eeeeee }
277 |
278 | ul.auto-toc {
279 | list-style-type: none }
280 |
--------------------------------------------------------------------------------
/doc/lookup.rst:
--------------------------------------------------------------------------------
1 | .. _lookup:
2 |
3 | ===================================
4 | Geocoder.us Address Lookup Strategy
5 | ===================================
6 |
7 | :Author: Schuyler Erle
8 | :Contact: schuyler at geocoder dot us
9 | :Created: 2009/03/13
10 | :Edited: 2009/03/14
11 |
12 | Definitions
13 | -----------
14 |
15 | Edge
16 | Database representation of a street segment, consisting of a linestring
17 | geometry and an edge ID. Edges relate to many ranges and many features
18 | through its ID.
19 |
20 | Feature
21 | Database representation of a named street, consisting of street name
22 | and modifier elements, a reference ZIP code, and a primary/alternate flag.
23 |
24 | Range
25 | Database representation of a range of address numbers on a given
26 | street, consisting of range start and end numbers, an optional prefix
27 | ending with a non-numeric character, and a delivery ZIP code for that
28 | range.
29 |
30 | Place
31 | Database representation of a ZIP code, consisting of a city name,
32 | state abbreviation, a ZIP code, and a primary/alternate flag.
33 |
34 | Address record
35 | A set consisting of exactly one edge, one feature, and one range, related
36 | through the edge ID.
37 |
38 | Address query
39 | An ordered set of {Number Prefix, Number, Directional Prefix, Type Prefix,
40 | Qualifier Prefix, Street Name, Qualifier Suffix, Type Suffix, Directional
41 | Suffix, City, State, ZIP}. All of the elements are optional except Number and
42 | Street Name. Either ZIP or City must also be present. The State element
43 | and all of the prefix and suffix elements are assumed to be normalized to
44 | standard postal abbreviations.
45 |
46 | Address string
47 | A string including some or all of the elements of an address.
48 |
49 | Address Lookup Strategy
50 | -----------------------
51 |
52 | 1. Given a an address query, initialize an empty set of candidate places,
53 | and an empty set of candidate address records.
54 |
55 | #. If a ZIP was given, look up `the place from the ZIP`_, and add the
56 | place, if any, to the candidate place set.
57 |
58 | #. If a city was given, look up all `the places matching the metaphone hash
59 | of the city name`_, and add them, if any, to the candidate place set.
60 |
61 | #. Generate a unique set of ZIPs from the set of candidate places, since a ZIP
62 | may have one or more names associated with it.
63 |
64 | #. Generate `a list of candidate address records`_ by fetching all the street
65 | features matching the metaphone hash of the street name and one of the ZIPs
66 | in the query set, along with the ranges matching the edge ID of each
67 | feature, where the given number is in the range. The edge does not
68 | need to be fetched yet.
69 |
70 | #. If the look up generates no results, optionally generate `more candidate
71 | records`_ by looking up all the street features matching the metaphone hash
72 | of the street name, along with the ranges matching the edge ID of each
73 | feature, where the given number is in the range. This may be a very time
74 | consuming database query, because some street names are quite common.
75 |
76 | #. Score each of the candidate records as follows:
77 |
78 | a. Score one point for every provided element of the address query that it
79 | matches exactly.
80 | #. Optionally, compute the scaled Damerau-Levenshtein distance (or
81 | alternately the simple Levenshtein distance) between each provided
82 | element of the address query and the corresponding element in the
83 | candidate. Score one minus the scaled distance, which yields a fraction
84 | of a point.
85 | #. Score one point if the parity of starting range number matches the parity
86 | of the queried address number.
87 | #. Note that the maximum possible score is equal to the number of provided
88 | elements in the address query. Divide the score by the maximum possible.
89 | This is the confidence value of the candidate.
90 |
91 | #. Sort the candidate address records by confidence. Retain only the records
92 | that share the highest confidence as candidates.
93 |
94 | #. Fetch `the edges and primary feature names`_ matching the edge IDs of
95 | the remaining candidate address records.
96 |
97 | #. For each remaining candidate record:
98 |
99 | a. Replace the candidate record feature elements with those of the
100 | primary feature name for that edge.
101 | #. Fetch `all of the ranges for the edge ID`_ of the candidate, sorted by
102 | starting number.
103 | #. Compute the sum of the differences of the starting and ending house
104 | number for each range. This is the total number width of the edge.
105 | #. Take the difference between the candidate starting number and the lowest
106 | starting number, add the difference between the queried number and the
107 | candidate starting number, and divide by the total number width. This is
108 | the interpolation distance.
109 | #. Optionally, find the local UTM zone and project the edge into it.
110 | #. Find the point along the line at the interpolation distance.
111 | #. If the edge was projected, unproject the point.
112 | #. Assign the point as the geocoded location of the query to the candidate
113 | record.
114 |
115 | #. Construct a set of result ZIPs from the remaining candidates, and look up
116 | `the primary name and state for each ZIP`_ in the set. Assign the matching
117 | primary city and state to each candidate.
118 |
119 | #. Return the set of candidate records as the result of the query.
120 |
121 | SQL Statements
122 | --------------
123 |
124 | the place from the ZIP
125 | ~~~~~~~~~~~~~~~~~~~~~~~
126 |
127 | ::
128 |
129 | SELECT * FROM place WHERE zip = '...';
130 |
131 | the places matching the metaphone hash of the city name
132 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
133 |
134 | ::
135 |
136 | SELECT * FROM place WHERE city_phone = metaphone('...');
137 |
138 | a list of candidate address records
139 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 |
141 | ::
142 |
143 | SELECT feature.*, range.* FROM feature, range
144 | WHERE name_phone = metaphone('...') AND feature.zip IN (...)
145 | AND range.tlid = feature.tlid
146 | AND fromhn <= ... AND tohn >= ...;
147 |
148 | more candidate records
149 | ~~~~~~~~~~~~~~~~~~~~~~
150 |
151 | ::
152 |
153 | SELECT feature.*, range.* FROM feature, range
154 | WHERE name_phone = metaphone('...')
155 | AND range.tlid = feature.tlid
156 | AND fromhn <= ... AND tohn >= ...;
157 |
158 | the edges and primary feature names
159 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
160 |
161 | ::
162 |
163 | SELECT feature.*, edge.* FROM feature, edge
164 | WHERE feature.tlid = ... AND paflag = 'P'
165 | AND edge.tlid = feature.tlid;
166 |
167 | -- or
168 |
169 | SELECT feature.*, edge.* FROM feature, edge
170 | WHERE feature.tlid IN (...)
171 | AND paflag = 'P'
172 | AND edge.tlid = feature.tlid;
173 |
174 | all of the ranges for the edge ID
175 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 |
177 | ::
178 |
179 | SELECT * FROM range WHERE range.tlid = ...;
180 |
181 | -- or
182 |
183 | SELECT * FROM range WHERE range.tlid IN (...);
184 |
185 | the primary name and state for each ZIP
186 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187 |
188 |
189 | ::
190 |
191 | SELECT * FROM place WHERE zip IN (...) AND paflag = 'P';
192 |
193 | = 30 =
194 |
--------------------------------------------------------------------------------
/doc/parsing.rst:
--------------------------------------------------------------------------------
1 | .. _parsing:
2 |
3 | ====================================
4 | Geocoder.us Address Parsing Strategy
5 | ====================================
6 |
7 | :Author: Schuyler Erle
8 | :Contact: schuyler at geocoder dot us
9 | :Created: 2009/03/18
10 | :Edited: 2009/03/18
11 |
12 | Structured address components
13 | -----------------------------
14 |
15 | Unless otherwise labeled as "required", all components of a structured address
16 | are optional.
17 |
18 | prenum
19 | The alphanumeric prefix portion of a house or building number. (e.g. "32-"
20 | in "32-20 Jackson St".
21 |
22 | number
23 | The house or building number component. Required.
24 |
25 | sufnum
26 | The alphanumeric suffix portion of a house or building number. (e.g. "23B
27 | Baker St")
28 |
29 | fraction
30 | The fractional portion of a house or building number. (e.g. "23 1/2 Baker
31 | St")
32 |
33 | predir
34 | The prefixed street directional component. (e.g. "N", "SW")
35 |
36 | prequal
37 | The prefixed street qualifier component. (e.g. "Old", "Business")
38 |
39 | pretyp
40 | The prefixed street type component. (e.g. "US Hwy")
41 |
42 | street
43 | The main portion of the street name. Required.
44 |
45 | suftyp
46 | The suffixed street type component. (e.g. "Rd", "Ave")
47 |
48 | sufqual
49 | The suffixed street qualifier component.
50 |
51 | sufdir
52 | The suffixed street directional component.
53 |
54 | unittyp
55 | The unit type, if any. (e.g. "Fl", "Apt", "Ste")
56 |
57 | unit
58 | The unit identifer, if any.
59 |
60 | city
61 | The name of the city or locale.
62 |
63 | state
64 | The two letter postal state code.
65 |
66 | zip
67 | The zero padded, five digit ZIP postal code.
68 |
69 | plus4
70 | The zero padded, four digit ZIP+4 postal extension.
71 |
72 | Parsing Strategy
73 | ----------------
74 |
75 | Each component will have a regular expression, and a maximum
76 | count. Components are ordered from first to last.
77 |
78 | Those components drawn from finite lists - directionals, qualifiers,
79 | types, and states - will have regular expressions composed of the union of
80 | the corresponding list.
81 |
82 | A *parse* will consist of a component state, a penalty count, a list of
83 | component strings and a counter for each component.
84 |
85 | 1. Initialize an input stack, consisting of a single blank parse.
86 |
87 | #. Split the address string on whitespace into tokens.
88 |
89 | #. For each token:
90 |
91 | A. For each component:
92 |
93 | i. Test the token against the regular expression.
94 | #. If the regexp matches, add the component name to a list of matching
95 | components.
96 |
97 | #. Initialize an empty output stack.
98 |
99 | #. For each parse in the input stack:
100 |
101 | i. Copy the current parse, increment the penalty count on the new parse,
102 | and add it to the output stack.
103 | #. For each matching component for the current token:
104 |
105 | a. If the component state for this parse is later than the
106 | matching component, continue to the next matching component.
107 | #. If the component count for this parse state is equal to the
108 | maximum count for the component, continue to the next matching
109 | component.
110 | #. Otherwise, copy the parse state, and append the token to the
111 | component string, with a leading space, if necessary.
112 | #. Increment the matching component counter for the current parse.
113 | #. Set the component state of the current parse to the matching
114 | component.
115 | #. Push the new parse on to the output stack.
116 |
117 | #. Replace the input stack with the output stack.
118 |
119 | #. Post-process number prefix/suffixes and ZIP+4 extensions.
120 |
121 | #. Score each parse by the number of components with non-empty strings,
122 | minus the penalty count of the parse.
123 |
124 | #. Return the sorted list of parsed string lists.
125 |
126 |
--------------------------------------------------------------------------------
/doc/voidspace.css:
--------------------------------------------------------------------------------
1 | /*
2 | :Authors: Ian Bicking, Michael Foord
3 | :Contact: fuzzyman@voidspace.org.uk
4 | :Date: 2005/08/26
5 | :Version: 0.1.0
6 | :Copyright: This stylesheet has been placed in the public domain.
7 | :Modified By: Schuyler Erle, for geocoder.us, 2008-03-14
8 |
9 | Stylesheet for Docutils.
10 | Based on ``blue_box.css`` by Ian Bicking
11 | and ``html4css1.css`` revision 1.46.
12 | */
13 |
14 | @import url(html4css1.css);
15 |
16 | /* changes made by SDE */
17 | body {
18 | font-family: Arial, sans-serif;
19 | margin-left: 10%;
20 | margin-right: 10%;
21 | }
22 |
23 | p { text-align: justify; }
24 | dt { font-style: italic; }
25 | /* end changes */
26 |
27 | em, i {
28 | /* Typically serif fonts have much nicer italics */
29 | font-family: Times New Roman, Times, serif;
30 | }
31 |
32 | a.target {
33 | color: blue;
34 | }
35 |
36 | a.target {
37 | color: blue;
38 | }
39 |
40 | a.toc-backref {
41 | text-decoration: none;
42 | color: black;
43 | }
44 |
45 | a.toc-backref:hover {
46 | background-color: inherit;
47 | }
48 |
49 | a:hover {
50 | background-color: #cccccc;
51 | }
52 |
53 | div.attention, div.caution, div.danger, div.error, div.hint,
54 | div.important, div.note, div.tip, div.warning {
55 | background-color: #cccccc;
56 | padding: 3px;
57 | width: 80%;
58 | }
59 |
60 | div.admonition p.admonition-title, div.hint p.admonition-title,
61 | div.important p.admonition-title, div.note p.admonition-title,
62 | div.tip p.admonition-title {
63 | text-align: center;
64 | background-color: #999999;
65 | display: block;
66 | margin: 0;
67 | }
68 |
69 | div.attention p.admonition-title, div.caution p.admonition-title,
70 | div.danger p.admonition-title, div.error p.admonition-title,
71 | div.warning p.admonition-title {
72 | color: #cc0000;
73 | font-family: sans-serif;
74 | text-align: center;
75 | background-color: #999999;
76 | display: block;
77 | margin: 0;
78 | }
79 |
80 | h1, h2, h3, h4, h5, h6 {
81 | font-family: Helvetica, Arial, sans-serif;
82 | border: thin solid black;
83 | /* This makes the borders rounded on Mozilla, which pleases me */
84 | -moz-border-radius: 8px;
85 | padding: 4px;
86 | }
87 |
88 | h1 {
89 | background-color: #444499;
90 | color: #ffffff;
91 | border: medium solid black;
92 | }
93 |
94 | h1 a.toc-backref, h2 a.toc-backref {
95 | color: #ffffff;
96 | }
97 |
98 | h2 {
99 | background-color: #666666;
100 | color: #ffffff;
101 | border: medium solid black;
102 | }
103 |
104 | h3, h4, h5, h6 {
105 | background-color: #cccccc;
106 | color: #000000;
107 | }
108 |
109 | h3 a.toc-backref, h4 a.toc-backref, h5 a.toc-backref,
110 | h6 a.toc-backref {
111 | color: #000000;
112 | }
113 |
114 | h1.title {
115 | text-align: center;
116 | background-color: #444499;
117 | color: #eeeeee;
118 | border: thick solid black;
119 | -moz-border-radius: 20px;
120 | }
121 |
122 | table.footnote {
123 | padding-left: 0.5ex;
124 | }
125 |
126 | table.citation {
127 | padding-left: 0.5ex
128 | }
129 |
130 | pre.literal-block, pre.doctest-block {
131 | border: thin black solid;
132 | padding: 5px;
133 | }
134 |
135 | .image img { border-style : solid;
136 | border-width : 2px;
137 | }
138 |
139 | h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt {
140 | font-size: 100%;
141 | }
142 |
143 | code, tt {
144 | color: #000066;
145 | }
146 |
147 |
148 |
--------------------------------------------------------------------------------
/gemspec:
--------------------------------------------------------------------------------
1 | Gem::Specification.new do |s|
2 | s.name = 'Geocoder-US'
3 | s.version = "2.0.1pre"
4 | s.author = "Schuyler Erle"
5 | s.email = 'geocoder@entropyfree.com'
6 | s.description = "US address geocoding based on TIGER/Line."
7 | s.summary = "US address geocoding based on TIGER/Line."
8 | s.homepage = "http://geocoder.us/"
9 | s.files = ["lib/geocoder/us.rb"] + Dir["lib/geocoder/us/*"] + Dir["tests/*"]
10 | s.require_path = "lib"
11 | s.test_files = "tests/run.rb"
12 | s.has_rdoc = true
13 | s.extra_rdoc_files = ["README.rdoc"]
14 | end
15 |
--------------------------------------------------------------------------------
/lib/geocoder/us.rb:
--------------------------------------------------------------------------------
1 | require "geocoder/us/database"
2 | require "geocoder/us/address"
3 |
4 | # Imports the Geocoder::US::Database and Geocoder::US::Address
5 | # modules.
6 | #
7 | # General usage is as follows:
8 | #
9 | # >> require 'geocoder/us'
10 | # >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db")
11 | # >> p db.geocode("1600 Pennsylvania Av, Washington DC")
12 | #
13 | # [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502",
14 | # :lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"",
15 | # :precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave",
16 | # :state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}]
17 | #
18 | # See Geocoder::US::Database and README.txt for more details.
19 | module Geocoder::US
20 | VERSION = "2.0.0"
21 | end
22 |
--------------------------------------------------------------------------------
/lib/geocoder/us/metaphone.rb:
--------------------------------------------------------------------------------
1 | module Text # :nodoc:
2 | module Metaphone
3 |
4 | module Rules # :nodoc:all
5 |
6 | # Metaphone rules. These are simply applied in order.
7 | #
8 | STANDARD = [
9 | # Regexp, replacement
10 | [ /([bcdfhjklmnpqrstvwxyz])\1+/,
11 | '\1' ], # Remove doubled consonants except g.
12 | # [PHP] remove c from regexp.
13 | [ /^ae/, 'E' ],
14 | [ /^[gkp]n/, 'N' ],
15 | [ /^wr/, 'R' ],
16 | [ /^x/, 'S' ],
17 | [ /^wh/, 'W' ],
18 | [ /mb$/, 'M' ], # [PHP] remove $ from regexp.
19 | [ /(?!^)sch/, 'SK' ],
20 | [ /th/, '0' ],
21 | [ /t?ch|sh/, 'X' ],
22 | [ /c(?=ia)/, 'X' ],
23 | [ /[st](?=i[ao])/, 'X' ],
24 | [ /s?c(?=[iey])/, 'S' ],
25 | [ /[cq]/, 'K' ],
26 | [ /dg(?=[iey])/, 'J' ],
27 | [ /d/, 'T' ],
28 | [ /g(?=h[^aeiou])/, '' ],
29 | [ /gn(ed)?/, 'N' ],
30 | [ /([^g]|^)g(?=[iey])/,
31 | '\1J' ],
32 | [ /g+/, 'K' ],
33 | [ /ph/, 'F' ],
34 | [ /([aeiou])h(?=\b|[^aeiou])/,
35 | '\1' ],
36 | [ /[wy](?![aeiou])/, '' ],
37 | [ /z/, 'S' ],
38 | [ /v/, 'F' ],
39 | [ /(?!^)[aeiou]+/, '' ],
40 | ]
41 |
42 | # The rules for the 'buggy' alternate implementation used by PHP etc.
43 | #
44 | BUGGY = STANDARD.dup
45 | BUGGY[0] = [ /([bdfhjklmnpqrstvwxyz])\1+/, '\1' ]
46 | BUGGY[6] = [ /mb/, 'M' ]
47 | end
48 |
49 | # Returns the Metaphone representation of a string. If the string contains
50 | # multiple words, each word in turn is converted into its Metaphone
51 | # representation. Note that only the letters A-Z are supported, so any
52 | # language-specific processing should be done beforehand.
53 | #
54 | # If the :buggy option is set, alternate 'buggy' rules are used.
55 | #
56 | def metaphone(str, options={})
57 | return str.strip.split(/\s+/).map { |w| metaphone_word(w, options) }.join(' ')
58 | end
59 |
60 | private
61 |
62 | def metaphone_word(w, options={})
63 | # Normalise case and remove non-ASCII
64 | s = w.downcase.gsub(/[^a-z]/, '')
65 | # Apply the Metaphone rules
66 | rules = options[:buggy] ? Rules::BUGGY : Rules::STANDARD
67 | rules.each { |rx, rep| s.gsub!(rx, rep) }
68 | return s.upcase
69 | end
70 |
71 | extend self
72 |
73 | end
74 | end
75 |
--------------------------------------------------------------------------------
/lib/geocoder/us/numbers.rb:
--------------------------------------------------------------------------------
1 | module Geocoder
2 | end
3 |
4 | module Geocoder::US
5 | # The NumberMap class provides a means for mapping ordinal
6 | # and cardinal number words to digits and back.
7 | class NumberMap < Hash
8 | attr_accessor :regexp
9 | def self.[] (array)
10 | nmap = self.new({})
11 | array.each {|item| nmap << item }
12 | nmap.build_match
13 | nmap
14 | end
15 | def initialize (array)
16 | @count = 0
17 | end
18 | def build_match
19 | @regexp = Regexp.new(
20 | '\b(' + keys.flatten.join("|") + ')\b',
21 | Regexp::IGNORECASE)
22 | end
23 | def clean (key)
24 | key.is_a?(String) ? key.downcase.gsub(/\W/o, "") : key
25 | end
26 | def <<(item)
27 | store clean(item), @count
28 | store @count, item
29 | @count += 1
30 | end
31 | def [] (key)
32 | super(clean(key))
33 | end
34 | end
35 |
36 | # The Cardinals constant maps digits to cardinal number words and back.
37 | Cardinals = NumberMap[%w[
38 | zero one two three four five six seven eight nine ten
39 | eleven twelve thirteen fourteen fifteen sixteen seventeen
40 | eighteen nineteen
41 | ]]
42 | Cardinal_Tens = %w[ twenty thirty forty fifty sixty seventy eighty ninety ]
43 | Cardinal_Tens.each {|tens|
44 | Cardinals << tens
45 | (1..9).each {|n| Cardinals << tens + "-" + Cardinals[n]}
46 | }
47 |
48 | # The Ordinals constant maps digits to ordinal number words and back.
49 | Ordinals = NumberMap[%w[
50 | zeroth first second third fourth fifth sixth seventh eighth ninth
51 | tenth eleventh twelfth thirteenth fourteenth fifteenth sixteenth
52 | seventeenth eighteenth nineteenth
53 | ]]
54 | Cardinal_Tens.each {|tens|
55 | Ordinals << tens.gsub("y","ieth")
56 | (1..9).each {|n| Ordinals << tens + "-" + Ordinals[n]}
57 | }
58 | end
59 |
--------------------------------------------------------------------------------
/lib/geocoder/us/rest.rb:
--------------------------------------------------------------------------------
1 | require 'sinatra'
2 | require 'geocoder/us/database'
3 | require 'json'
4 |
5 | @@db = Geocoder::US::Database.new(ENV["GEOCODER_DB"] || ARGV[0])
6 |
7 | set :port, 8081
8 | get '/geocode' do
9 | if params[:q]
10 | results = @@db.geocode params[:q]
11 | features = []
12 | results.each do |result|
13 | coords = [result.delete(:lon), result.delete(:lat)]
14 | result.keys.each do |key|
15 | if result[key].is_a? String
16 | result[key] = result[key].unpack("C*").pack("U*") # utf8
17 | end
18 | end
19 | features << {
20 | :type => "Feature",
21 | :properties => result,
22 | :geometry => {
23 | :type => "Point",
24 | :coordinates => coords
25 | }
26 | }
27 | end
28 | begin
29 | {
30 | :type => "FeatureCollection",
31 | :address => params[:q],
32 | :features => features
33 | }.to_json
34 | rescue JSON::GeneratorError
35 | {
36 | :type => "FeatureCollection",
37 | :error => "JSON::GeneratorError",
38 | :features => []
39 | }.to_json
40 | end
41 | else
42 | status 400
43 | "parameter 'q' is missing"
44 | end
45 | end
46 |
47 | get '/health' do
48 | "All is well."
49 | end
50 |
--------------------------------------------------------------------------------
/navteq/README:
--------------------------------------------------------------------------------
1 | The navteq_import script in this directory is designed to be used with Navteq's
2 | local_streets layer. It works basically like tiger_import, except that you
3 | provide either a list of .zip files containing the local_streets.* files on the
4 | command line, or via standard input.
5 |
--------------------------------------------------------------------------------
/navteq/convert.sql:
--------------------------------------------------------------------------------
1 | BEGIN;
2 | CREATE INDEX navteq_link_id on local_streets (link_id);
3 |
4 | CREATE TEMPORARY TABLE linezip AS
5 | SELECT DISTINCT tlid, zip FROM (
6 | SELECT link_id AS tlid, r_postcode AS zip FROM local_streets
7 | WHERE addr_type IS NOT NULL AND st_name IS NOT NULL
8 | AND r_postcode IS NOT NULL
9 | UNION
10 | SELECT link_id AS tlid, l_postcode AS zip FROM local_streets
11 | WHERE addr_type IS NOT NULL AND st_name IS NOT NULL
12 | AND l_postcode IS NOT NULL
13 | ) AS whatever;
14 |
15 | INSERT INTO feature
16 | SELECT l.tlid, st_nm_base, metaphone(st_nm_base,5), st_nm_pref, st_typ_bef,
17 | NULL, st_nm_suff, st_typ_aft, NULL, 'P', zip
18 | FROM linezip l, local_streets f
19 | WHERE l.tlid=f.link_id AND st_name IS NOT NULL;
20 |
21 | INSERT OR IGNORE INTO edge
22 | SELECT l.tlid, compress_wkb_line(the_geom) FROM
23 | (SELECT DISTINCT tlid FROM linezip) AS l, local_streets f
24 | WHERE l.tlid=f.link_id AND st_name IS NOT NULL;
25 |
26 | INSERT INTO range
27 | SELECT link_id, digit_suffix(l_refaddr), digit_suffix(l_nrefaddr),
28 | nondigit_prefix(l_refaddr), l_postcode, 'L'
29 | FROM linezip l, local_streets f
30 | WHERE l.tlid=f.link_id AND l_refaddr IS NOT NULL
31 | UNION
32 | SELECT link_id, digit_suffix(r_refaddr), digit_suffix(r_nrefaddr),
33 | nondigit_prefix(r_refaddr), r_postcode, 'R'
34 | FROM linezip l, local_streets f
35 | WHERE l.tlid=f.link_id AND r_refaddr IS NOT NULL;
36 |
37 | END;
38 |
--------------------------------------------------------------------------------
/navteq/navteq_import:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | TMP="/tmp/navteq-import.$$"
4 | SHPS="local_streets"
5 | DBFS=""
6 | BASE=$(dirname $0)
7 | PATH=$PATH:$BASE/../bin
8 | SQL="$BASE/../sql"
9 | HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10 | DATABASE=$1
11 | shift
12 |
13 | mkdir -p $TMP || exit 1
14 |
15 | [ ! -r $DATABASE ] && cat ${SQL}/create.sql ${SQL}/place.sql | sqlite3 $DATABASE
16 |
17 | if [ x"$1" = x"" ]; then
18 | cat
19 | else
20 | ls $@
21 | fi | while read county; do
22 | echo "--- $county"
23 | if [ -r ${county%.zip}.zip ]; then
24 | unzip -q $(ls ${county}.zip) -d $TMP
25 | else
26 | cp ${county%.*}.* $TMP
27 | fi
28 | (echo ".load $HELPER_LIB" && \
29 | cat ${BASE}/prepare.sql && \
30 | for file in $SHPS; do
31 | shp2sqlite -aS $(ls ${TMP}/${file}.shp) ${file}
32 | done && \
33 | for file in $DBFS; do
34 | shp2sqlite -an $(ls ${TMP}/${file}.dbf) ${file}
35 | done && \
36 | cat ${BASE}/convert.sql) | sqlite3 $DATABASE
37 | rm -f $TMP/*
38 | done 2>&1 | tee import-$$.log
39 | rm -rf $TMP
40 |
--------------------------------------------------------------------------------
/navteq/prepare.sql:
--------------------------------------------------------------------------------
1 | PRAGMA temp_store=MEMORY;
2 | PRAGMA journal_mode=MEMORY;
3 | PRAGMA synchronous=OFF;
4 | PRAGMA cache_size=250000;
5 | PRAGMA count_changes=0;
6 | BEGIN;
7 | CREATE TABLE "local_streets" (gid integer PRIMARY KEY,
8 | "the_geom" blob,
9 | "link_id" integer,
10 | "st_name" varchar(80),
11 | "feat_id" integer,
12 | "st_langcd" varchar(3),
13 | "num_stnmes" integer,
14 | "st_nm_pref" varchar(2),
15 | "st_typ_bef" varchar(30),
16 | "st_nm_base" varchar(35),
17 | "st_nm_suff" varchar(2),
18 | "st_typ_aft" varchar(30),
19 | "st_typ_att" varchar(1),
20 | "addr_type" varchar(1),
21 | "l_refaddr" varchar(10),
22 | "l_nrefaddr" varchar(10),
23 | "l_addrsch" varchar(1),
24 | "l_addrform" varchar(1),
25 | "r_refaddr" varchar(10),
26 | "r_nrefaddr" varchar(10),
27 | "r_addrsch" varchar(1),
28 | "r_addrform" varchar(1),
29 | "ref_in_id" integer,
30 | "nref_in_id" integer,
31 | "n_shapepnt" integer,
32 | "func_class" varchar(1),
33 | "speed_cat" varchar(1),
34 | "fr_spd_lim" integer,
35 | "to_spd_lim" integer,
36 | "to_lanes" integer,
37 | "from_lanes" integer,
38 | "enh_geom" varchar(1),
39 | "lane_cat" varchar(1),
40 | "divider" varchar(1),
41 | "dir_travel" varchar(1),
42 | "l_area_id" integer,
43 | "r_area_id" integer,
44 | "l_postcode" varchar(11),
45 | "r_postcode" varchar(11),
46 | "l_numzones" integer,
47 | "r_numzones" integer,
48 | "num_ad_rng" integer,
49 | "ar_auto" varchar(1),
50 | "ar_bus" varchar(1),
51 | "ar_taxis" varchar(1),
52 | "ar_carpool" varchar(1),
53 | "ar_pedest" varchar(1),
54 | "ar_trucks" varchar(1),
55 | "ar_traff" varchar(1),
56 | "ar_deliv" varchar(1),
57 | "ar_emerveh" varchar(1),
58 | "paved" varchar(1),
59 | "private" varchar(1),
60 | "frontage" varchar(1),
61 | "bridge" varchar(1),
62 | "tunnel" varchar(1),
63 | "ramp" varchar(1),
64 | "tollway" varchar(1),
65 | "poiaccess" varchar(1),
66 | "contracc" varchar(1),
67 | "roundabout" varchar(1),
68 | "interinter" varchar(1),
69 | "undeftraff" varchar(1),
70 | "ferry_type" varchar(1),
71 | "multidigit" varchar(1),
72 | "maxattr" varchar(1),
73 | "spectrfig" varchar(1),
74 | "indescrib" varchar(1),
75 | "manoeuvre" varchar(1),
76 | "dividerleg" varchar(1),
77 | "inprocdata" varchar(1),
78 | "full_geom" varchar(1),
79 | "urban" varchar(1),
80 | "route_type" varchar(1),
81 | "dironsign" varchar(1),
82 | "explicatbl" varchar(1),
83 | "nameonrdsn" varchar(1),
84 | "postalname" varchar(1),
85 | "stalename" varchar(1),
86 | "vanityname" varchar(1),
87 | "junctionnm" varchar(1),
88 | "exitname" varchar(1),
89 | "scenic_rt" varchar(1),
90 | "scenic_nm" varchar(1));
91 | --SELECT AddGeometryColumn('','local_streets','the_geom','-1','MULTILINESTRING',2);
92 | END;
93 |
--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | $(MAKE) -C libsqlite3_geocoder
3 | $(MAKE) -C liblwgeom
4 | $(MAKE) -C shp2sqlite
5 |
6 | clean:
7 | $(MAKE) -C libsqlite3_geocoder clean
8 | $(MAKE) -C liblwgeom clean
9 | $(MAKE) -C shp2sqlite clean
10 |
11 | install: all
12 | cp libsqlite3_geocoder/*.so ../lib/geocoder/us/sqlite3.so
13 | $(MAKE) -C shp2sqlite install
14 |
--------------------------------------------------------------------------------
/src/README:
--------------------------------------------------------------------------------
1 | What's in this directory
2 | ------------------------
3 |
4 | shp2sqlite/
5 | A fork of shp2pgsql that generates SQLite 3 compatible
6 | output. Used for import.
7 | liblwgeom/
8 | Required by shp2sqlite for converting Shapefiles to WKB.
9 | libsqlite3_geocoder/
10 | Not actually the geocoder itself, but a library of
11 | extensions to SQLite 3 to facilitate geocoding.
12 | metaphone/
13 | Unused in this project. The metaphone functions have
14 | been rolled into libsqlite3_geocoder.
15 |
--------------------------------------------------------------------------------
/src/liblwgeom/Makefile:
--------------------------------------------------------------------------------
1 | # **********************************************************************
2 | # * $Id: Makefile.in
3 | # *
4 | # * PostGIS - Spatial Types for PostgreSQL
5 | # * http://postgis.refractions.net
6 | # * Copyright 2008 Mark Cave-Ayland
7 | # *
8 | # * This is free software; you can redistribute and/or modify it under
9 | # * the terms of the GNU General Public Licence. See the COPYING file.
10 | # *
11 | # **********************************************************************
12 |
13 | CC=gcc
14 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes
15 |
16 | YACC=yacc
17 | LEX=flex
18 |
19 | # Standalone LWGEOM objects
20 | SA_OBJS = \
21 | measures.o \
22 | box2d.o \
23 | ptarray.o \
24 | lwgeom_api.o \
25 | lwgeom.o \
26 | lwpoint.o \
27 | lwline.o \
28 | lwpoly.o \
29 | lwmpoint.o \
30 | lwmline.o \
31 | lwmpoly.o \
32 | lwcollection.o \
33 | lwcircstring.o \
34 | lwcompound.o \
35 | lwcurvepoly.o \
36 | lwmcurve.o \
37 | lwmsurface.o \
38 | lwutil.o \
39 | lwalgorithm.o \
40 | lwgunparse.o \
41 | lwgparse.o \
42 | lwsegmentize.o \
43 | wktparse.tab.o \
44 | lex.yy.o \
45 | vsprintf.o
46 |
47 | SA_HEADERS = \
48 | liblwgeom.h \
49 | lwalgorithm.h
50 |
51 | all: liblwgeom.a
52 |
53 | liblwgeom.a: $(SA_OBJS) $(SA_HEADERS)
54 | ar rs liblwgeom.a $(SA_OBJS)
55 |
56 | clean:
57 | rm -f $(SA_OBJS)
58 | rm -f liblwgeom.a
59 |
60 | check: liblwgeom.a
61 | make -C cunit check
62 |
63 | # Command to build each of the .o files
64 | $(SA_OBJS): %.o: %.c
65 | $(CC) $(CFLAGS) -c -o $@ $<
66 |
67 | # Commands to generate the lexer and parser from input files
68 | wktparse.tab.c: wktparse.y
69 | $(YACC) -vd -p lwg_parse_yy wktparse.y
70 | mv -f y.tab.c wktparse.tab.c
71 | mv -f y.tab.h wktparse.tab.h
72 |
73 | lex.yy.c: wktparse.lex wktparse.tab.c
74 | $(LEX) -Plwg_parse_yy -i -f -o'lex.yy.c' wktparse.lex
75 |
76 |
--------------------------------------------------------------------------------
/src/liblwgeom/box2d.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "liblwgeom.h"
7 |
8 | #ifndef EPSILON
9 | #define EPSILON 1.0E-06
10 | #endif
11 | #ifndef FPeq
12 | #define FPeq(A,B) (fabs((A) - (B)) <= EPSILON)
13 | #endif
14 |
15 |
16 | /* Expand given box of 'd' units in all directions */
17 | void
18 | expand_box2d(BOX2DFLOAT4 *box, double d)
19 | {
20 | box->xmin -= d;
21 | box->ymin -= d;
22 |
23 | box->xmax += d;
24 | box->ymax += d;
25 | }
26 |
27 |
28 | /*
29 | * This has been changed in PostGIS 1.1.2 to
30 | * check exact equality of values (rather then using
31 | * the FPeq macro taking into account coordinate drifts).
32 | */
33 | char
34 | box2d_same(BOX2DFLOAT4 *box1, BOX2DFLOAT4 *box2)
35 | {
36 | return( (box1->xmax==box2->xmax) &&
37 | (box1->xmin==box2->xmin) &&
38 | (box1->ymax==box2->ymax) &&
39 | (box1->ymin==box2->ymin));
40 | #if 0
41 | return(FPeq(box1->xmax, box2->xmax) &&
42 | FPeq(box1->xmin, box2->xmin) &&
43 | FPeq(box1->ymax, box2->ymax) &&
44 | FPeq(box1->ymin, box2->ymin));
45 | #endif
46 | }
47 |
48 | BOX2DFLOAT4 *
49 | box2d_clone(const BOX2DFLOAT4 *in)
50 | {
51 | BOX2DFLOAT4 *ret = lwalloc(sizeof(BOX2DFLOAT4));
52 | memcpy(ret, in, sizeof(BOX2DFLOAT4));
53 | return ret;
54 | }
55 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwalgorithm.h:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwalgorithm.h 3688 2009-02-11 21:48:13Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2008 Paul Ramsey
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include "liblwgeom.h"
15 |
16 | enum CG_SEGMENT_INTERSECTION_TYPE {
17 | SEG_ERROR = -1,
18 | SEG_NO_INTERSECTION = 0,
19 | SEG_COLINEAR = 1,
20 | SEG_CROSS_LEFT = 2,
21 | SEG_CROSS_RIGHT = 3,
22 | SEG_TOUCH_LEFT = 4,
23 | SEG_TOUCH_RIGHT = 5
24 | };
25 |
26 | double lw_segment_side(POINT2D *p1, POINT2D *p2, POINT2D *q);
27 | int lw_segment_intersects(POINT2D *p1, POINT2D *p2, POINT2D *q1, POINT2D *q2);
28 | int lw_segment_envelope_intersects(POINT2D p1, POINT2D p2, POINT2D q1, POINT2D q2);
29 |
30 |
31 | enum CG_LINE_CROSS_TYPE {
32 | LINE_NO_CROSS = 0,
33 | LINE_CROSS_LEFT = -1,
34 | LINE_CROSS_RIGHT = 1,
35 | LINE_MULTICROSS_END_LEFT = -2,
36 | LINE_MULTICROSS_END_RIGHT = 2,
37 | LINE_MULTICROSS_END_SAME_FIRST_LEFT = -3,
38 | LINE_MULTICROSS_END_SAME_FIRST_RIGHT = 3
39 | };
40 |
41 | int lwline_crossing_direction(LWLINE *l1, LWLINE *l2);
42 |
43 | double lwpoint_get_ordinate(const POINT4D *p, int ordinate);
44 | void lwpoint_set_ordinate(POINT4D *p, int ordinate, double value);
45 | int lwpoint_interpolate(const POINT4D *p1, const POINT4D *p2, POINT4D *p, int ndims, int ordinate, double interpolation_value);
46 | LWCOLLECTION *lwline_clip_to_ordinate_range(LWLINE *line, int ordinate, double from, double to);
47 | LWCOLLECTION *lwmline_clip_to_ordinate_range(LWMLINE *mline, int ordinate, double from, double to);
48 |
49 | int lwgeom_geohash_precision(BOX3D bbox, BOX3D *bounds);
50 | char *lwgeom_geohash(const LWGEOM *lwgeom, int precision);
51 | char *geohash_point(double longitude, double latitude, int precision);
52 |
53 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwcompound.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwcompound.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include
15 | #include
16 | #include "liblwgeom.h"
17 |
18 | LWCOMPOUND *
19 | lwcompound_deserialize(uchar *serialized)
20 | {
21 | LWCOMPOUND *result;
22 | LWGEOM_INSPECTED *insp;
23 | int type = lwgeom_getType(serialized[0]);
24 | int i;
25 |
26 | if(type != COMPOUNDTYPE)
27 | {
28 | lwerror("lwcompound_deserialize called on non compound: %d", type);
29 | return NULL;
30 | }
31 |
32 | insp = lwgeom_inspect(serialized);
33 |
34 | result = lwalloc(sizeof(LWCOMPOUND));
35 | result->type = insp->type;
36 | result->SRID = insp->SRID;
37 | result->ngeoms = insp->ngeometries;
38 | result->geoms = lwalloc(sizeof(LWGEOM *)*insp->ngeometries);
39 |
40 | if(lwgeom_hasBBOX(serialized[0]))
41 | {
42 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
43 | memcpy(result->bbox, serialized + 1, sizeof(BOX2DFLOAT4));
44 | }
45 | else result->bbox = NULL;
46 |
47 | for(i = 0; i < insp->ngeometries; i++)
48 | {
49 | if(lwgeom_getType(insp->sub_geoms[i][0]) == LINETYPE)
50 | result->geoms[i] = (LWGEOM *)lwline_deserialize(insp->sub_geoms[i]);
51 | else
52 | result->geoms[i] = (LWGEOM *)lwcircstring_deserialize(insp->sub_geoms[i]);
53 | if(TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type))
54 | {
55 | lwerror("Mixed dimensions (compound: %d, line/circularstring %d:%d)",
56 | TYPE_NDIMS(result->type), i,
57 | TYPE_NDIMS(result->geoms[i]->type)
58 | );
59 | lwfree(result);
60 | return NULL;
61 | }
62 | }
63 | return result;
64 | }
65 |
66 | /*
67 | * Add 'what' to this string at position 'where'
68 | * where=0 == prepend
69 | * where=-1 == append
70 | * Returns a COMPOUND or a GEOMETRYCOLLECTION
71 | */
72 | LWGEOM *
73 | lwcompound_add(const LWCOMPOUND *to, uint32 where, const LWGEOM *what)
74 | {
75 | LWCOLLECTION *col;
76 | LWGEOM **geoms;
77 | int newtype;
78 |
79 | LWDEBUG(2, "lwcompound_add called.");
80 |
81 | if(where != -1 && where != 0)
82 | {
83 | lwerror("lwcompound_add only supports 0 or -1 as a second argument, not %d", where);
84 | return NULL;
85 | }
86 |
87 | /* dimensions compatibility are checked by caller */
88 |
89 | /* Construct geoms array */
90 | geoms = lwalloc(sizeof(LWGEOM *)*2);
91 | if(where == -1) /* append */
92 | {
93 | geoms[0] = lwgeom_clone((LWGEOM *)to);
94 | geoms[1] = lwgeom_clone(what);
95 | }
96 | else /* prepend */
97 | {
98 | geoms[0] = lwgeom_clone(what);
99 | geoms[1] = lwgeom_clone((LWGEOM *)to);
100 | }
101 |
102 | /* reset SRID and wantbbox flag from component types */
103 | geoms[0]->SRID = geoms[1]->SRID = -1;
104 | TYPE_SETHASSRID(geoms[0]->type, 0);
105 | TYPE_SETHASSRID(geoms[1]->type, 0);
106 | TYPE_SETHASBBOX(geoms[0]->type, 0);
107 | TYPE_SETHASBBOX(geoms[1]->type, 0);
108 |
109 | /* Find appropriate geom type */
110 | if(TYPE_GETTYPE(what->type) == LINETYPE || TYPE_GETTYPE(what->type) == CIRCSTRINGTYPE) newtype = COMPOUNDTYPE;
111 | else newtype = COLLECTIONTYPE;
112 |
113 | col = lwcollection_construct(newtype,
114 | to->SRID, NULL, 2, geoms);
115 |
116 | return (LWGEOM *)col;
117 | }
118 |
119 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwcurvepoly.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwcurvepoly.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | /* basic LWCURVEPOLY manipulation */
14 |
15 | #include
16 | #include
17 | #include
18 | #include "liblwgeom.h"
19 |
20 |
21 | LWCURVEPOLY *
22 | lwcurvepoly_deserialize(uchar *srl)
23 | {
24 | LWCURVEPOLY *result;
25 | LWGEOM_INSPECTED *insp;
26 | int type = lwgeom_getType(srl[0]);
27 | int i;
28 |
29 | LWDEBUG(3, "lwcurvepoly_deserialize called.");
30 |
31 | if(type != CURVEPOLYTYPE)
32 | {
33 | lwerror("lwcurvepoly_deserialize called on NON curvepoly: %d",
34 | type);
35 | return NULL;
36 | }
37 |
38 | insp = lwgeom_inspect(srl);
39 |
40 | result = lwalloc(sizeof(LWCURVEPOLY));
41 | result->type = insp->type;
42 | result->SRID = insp->SRID;
43 | result->nrings = insp->ngeometries;
44 | result->rings = lwalloc(sizeof(LWGEOM *)*insp->ngeometries);
45 |
46 | if(lwgeom_hasBBOX(srl[0]))
47 | {
48 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
49 | memcpy(result->bbox, srl + 1, sizeof(BOX2DFLOAT4));
50 | }
51 | else result->bbox = NULL;
52 |
53 | for(i = 0; i < insp->ngeometries; i++)
54 | {
55 | result->rings[i] = lwgeom_deserialize(insp->sub_geoms[i]);
56 | if(lwgeom_getType(result->rings[i]->type) != CIRCSTRINGTYPE
57 | && lwgeom_getType(result->rings[i]->type) != LINETYPE)
58 | {
59 | lwerror("Only Circularstrings and Linestrings are currently supported as rings, not %s (%d)", lwgeom_typename(result->rings[i]->type), result->rings[i]->type);
60 | lwfree(result);
61 | lwfree(insp);
62 | return NULL;
63 | }
64 | if(TYPE_NDIMS(result->rings[i]->type) != TYPE_NDIMS(result->type))
65 | {
66 | lwerror("Mixed dimensions (curvepoly %d, ring %d)",
67 | TYPE_NDIMS(result->type), i,
68 | TYPE_NDIMS(result->rings[i]->type));
69 | lwfree(result);
70 | lwfree(insp);
71 | return NULL;
72 | }
73 | }
74 | return result;
75 | }
76 |
77 | LWGEOM *
78 | lwcurvepoly_add(const LWCURVEPOLY *to, uint32 where, const LWGEOM *what)
79 | {
80 | /* TODO */
81 | lwerror("lwcurvepoly_add not yet implemented.");
82 | return NULL;
83 | }
84 |
85 |
86 |
87 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwgunparse.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simplegeo/geocoder/5c7f678a1abe79c77c36153fbcee32e4dec24e53/src/liblwgeom/lwgunparse.c
--------------------------------------------------------------------------------
/src/liblwgeom/lwmcurve.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwmcurve.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include
15 | #include
16 | #include "liblwgeom.h"
17 |
18 | LWMCURVE *
19 | lwmcurve_deserialize(uchar *srl)
20 | {
21 | LWMCURVE *result;
22 | LWGEOM_INSPECTED *insp;
23 | int stype;
24 | int type = lwgeom_getType(srl[0]);
25 | int i;
26 |
27 | if(type != MULTICURVETYPE)
28 | {
29 | lwerror("lwmcurve_deserialize called on NON multicurve: %d", type);
30 | return NULL;
31 | }
32 |
33 | insp = lwgeom_inspect(srl);
34 |
35 | result = lwalloc(sizeof(LWMCURVE));
36 | result->type = insp->type;
37 | result->SRID = insp->SRID;
38 | result->ngeoms = insp->ngeometries;
39 | result->geoms = lwalloc(sizeof(LWGEOM *)*insp->ngeometries);
40 |
41 | if(lwgeom_hasBBOX(srl[0]))
42 | {
43 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
44 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4));
45 | }
46 | else result->bbox = NULL;
47 |
48 | for(i = 0; i < insp->ngeometries; i++)
49 | {
50 | stype = lwgeom_getType(insp->sub_geoms[i][0]);
51 | if(stype == CIRCSTRINGTYPE)
52 | {
53 | result->geoms[i] = (LWGEOM *)lwcircstring_deserialize(insp->sub_geoms[i]);
54 | }
55 | else if(stype == LINETYPE)
56 | {
57 | result->geoms[i] = (LWGEOM *)lwline_deserialize(insp->sub_geoms[i]);
58 | }
59 | else
60 | {
61 | lwerror("Only Circular and Line strings are currenly permitted in a MultiCurve.");
62 | lwfree(result);
63 | lwfree(insp);
64 | return NULL;
65 | }
66 |
67 | if(TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type))
68 | {
69 | lwerror("Mixed dimensions (multicurve: %d, curve %d:%d)",
70 | TYPE_NDIMS(result->type), i,
71 | TYPE_NDIMS(result->geoms[i]->type));
72 | lwfree(result);
73 | lwfree(insp);
74 | return NULL;
75 | }
76 | }
77 | return result;
78 | }
79 |
80 | /*
81 | * Add 'what' to this multicurve at position 'where'.
82 | * where=0 == prepend
83 | * where=-1 == append
84 | * Returns a MULTICURVE or a COLLECTION
85 | */
86 | LWGEOM *
87 | lwmcurve_add(const LWMCURVE *to, uint32 where, const LWGEOM *what)
88 | {
89 | LWCOLLECTION *col;
90 | LWGEOM **geoms;
91 | int newtype;
92 | uint32 i;
93 |
94 | if(where == -1) where = to->ngeoms;
95 | else if(where < -1 || where > to->ngeoms)
96 | {
97 | lwerror("lwmcurve_add: add position out of range %d..%d",
98 | -1, to->ngeoms);
99 | return NULL;
100 | }
101 |
102 | /* dimensions compatibility are checked by caller */
103 |
104 | /* Construct geoms array */
105 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1));
106 | for(i = 0; i < where; i++)
107 | {
108 | geoms[i] = lwgeom_clone((LWGEOM *)to->geoms[i]);
109 | }
110 | geoms[where] = lwgeom_clone(what);
111 | for(i = where; i < to->ngeoms; i++)
112 | {
113 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]);
114 | }
115 |
116 | if(TYPE_GETTYPE(what->type) == CIRCSTRINGTYPE) newtype = MULTICURVETYPE;
117 | else newtype = COLLECTIONTYPE;
118 |
119 | col = lwcollection_construct(newtype,
120 | to->SRID, NULL,
121 | to->ngeoms + 1, geoms);
122 |
123 | return (LWGEOM *)col;
124 | }
125 |
126 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwmline.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwmline.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include
15 | #include
16 | #include "liblwgeom.h"
17 |
18 | void
19 | lwmline_release(LWMLINE *lwmline)
20 | {
21 | lwgeom_release(lwmline_as_lwgeom(lwmline));
22 | }
23 |
24 | LWMLINE *
25 | lwmline_deserialize(uchar *srl)
26 | {
27 | LWMLINE *result;
28 | LWGEOM_INSPECTED *insp;
29 | int type = lwgeom_getType(srl[0]);
30 | int i;
31 |
32 | if ( type != MULTILINETYPE )
33 | {
34 | lwerror("lwmline_deserialize called on NON multiline: %d",
35 | type);
36 | return NULL;
37 | }
38 |
39 | insp = lwgeom_inspect(srl);
40 |
41 | result = lwalloc(sizeof(LWMLINE));
42 | result->type = insp->type;
43 | result->SRID = insp->SRID;
44 | result->ngeoms = insp->ngeometries;
45 | result->geoms = lwalloc(sizeof(LWLINE *)*insp->ngeometries);
46 |
47 | if (lwgeom_hasBBOX(srl[0]))
48 | {
49 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
50 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4));
51 | }
52 | else result->bbox = NULL;
53 |
54 |
55 | for (i=0; ingeometries; i++)
56 | {
57 | result->geoms[i] = lwline_deserialize(insp->sub_geoms[i]);
58 | if ( TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type) )
59 | {
60 | lwerror("Mixed dimensions (multiline:%d, line%d:%d)",
61 | TYPE_NDIMS(result->type), i,
62 | TYPE_NDIMS(result->geoms[i]->type)
63 | );
64 | return NULL;
65 | }
66 | }
67 |
68 | return result;
69 | }
70 |
71 | /*
72 | * Add 'what' to this multiline at position 'where'.
73 | * where=0 == prepend
74 | * where=-1 == append
75 | * Returns a MULTILINE or a COLLECTION
76 | */
77 | LWGEOM *
78 | lwmline_add(const LWMLINE *to, uint32 where, const LWGEOM *what)
79 | {
80 | LWCOLLECTION *col;
81 | LWGEOM **geoms;
82 | int newtype;
83 | uint32 i;
84 |
85 | if ( where == -1 ) where = to->ngeoms;
86 | else if ( where < -1 || where > to->ngeoms )
87 | {
88 | lwerror("lwmline_add: add position out of range %d..%d",
89 | -1, to->ngeoms);
90 | return NULL;
91 | }
92 |
93 | /* dimensions compatibility are checked by caller */
94 |
95 | /* Construct geoms array */
96 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1));
97 | for (i=0; igeoms[i]);
100 | }
101 | geoms[where] = lwgeom_clone(what);
102 | for (i=where; ingeoms; i++)
103 | {
104 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]);
105 | }
106 |
107 | if ( TYPE_GETTYPE(what->type) == LINETYPE ) newtype = MULTILINETYPE;
108 | else newtype = COLLECTIONTYPE;
109 |
110 | col = lwcollection_construct(newtype,
111 | to->SRID, NULL,
112 | to->ngeoms+1, geoms);
113 |
114 | return (LWGEOM *)col;
115 |
116 | }
117 |
118 | void lwmline_free(LWMLINE *mline)
119 | {
120 | int i;
121 | if( mline->bbox )
122 | {
123 | lwfree(mline->bbox);
124 | }
125 | for ( i = 0; i < mline->ngeoms; i++ )
126 | {
127 | if( mline->geoms[i] ) {
128 | lwline_free(mline->geoms[i]);
129 | }
130 | }
131 | if( mline->geoms )
132 | {
133 | lwfree(mline->geoms);
134 | }
135 | lwfree(mline);
136 |
137 | };
138 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwmpoint.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwmpoint.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include
15 | #include
16 | #include "liblwgeom.h"
17 |
18 | void
19 | lwmpoint_release(LWMPOINT *lwmpoint)
20 | {
21 | lwgeom_release(lwmpoint_as_lwgeom(lwmpoint));
22 | }
23 |
24 |
25 | LWMPOINT *
26 | lwmpoint_deserialize(uchar *srl)
27 | {
28 | LWMPOINT *result;
29 | LWGEOM_INSPECTED *insp;
30 | int type = lwgeom_getType(srl[0]);
31 | int i;
32 |
33 | if ( type != MULTIPOINTTYPE )
34 | {
35 | lwerror("lwmpoint_deserialize called on NON multipoint: %d",
36 | type);
37 | return NULL;
38 | }
39 |
40 | insp = lwgeom_inspect(srl);
41 |
42 | result = lwalloc(sizeof(LWMPOINT));
43 | result->type = insp->type;
44 | result->SRID = insp->SRID;
45 | result->ngeoms = insp->ngeometries;
46 | result->geoms = lwalloc(sizeof(LWPOINT *)*result->ngeoms);
47 |
48 | if (lwgeom_hasBBOX(srl[0]))
49 | {
50 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
51 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4));
52 | }
53 | else result->bbox = NULL;
54 |
55 | for (i=0; ingeometries; i++)
56 | {
57 | result->geoms[i] = lwpoint_deserialize(insp->sub_geoms[i]);
58 | if ( TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type) )
59 | {
60 | lwerror("Mixed dimensions (multipoint:%d, point%d:%d)",
61 | TYPE_NDIMS(result->type), i,
62 | TYPE_NDIMS(result->geoms[i]->type)
63 | );
64 | return NULL;
65 | }
66 | }
67 |
68 | return result;
69 | }
70 |
71 | /*
72 | * Add 'what' to this multipoint at position 'where'.
73 | * where=0 == prepend
74 | * where=-1 == append
75 | * Returns a MULTIPOINT or a COLLECTION
76 | */
77 | LWGEOM *
78 | lwmpoint_add(const LWMPOINT *to, uint32 where, const LWGEOM *what)
79 | {
80 | LWCOLLECTION *col;
81 | LWGEOM **geoms;
82 | int newtype;
83 | uint32 i;
84 |
85 | if ( where == -1 ) where = to->ngeoms;
86 | else if ( where < -1 || where > to->ngeoms )
87 | {
88 | lwerror("lwmpoint_add: add position out of range %d..%d",
89 | -1, to->ngeoms);
90 | return NULL;
91 | }
92 |
93 | /* dimensions compatibility are checked by caller */
94 |
95 | /* Construct geoms array */
96 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1));
97 | for (i=0; igeoms[i]);
100 | }
101 | geoms[where] = lwgeom_clone(what);
102 | for (i=where; ingeoms; i++)
103 | {
104 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]);
105 | }
106 |
107 | if ( TYPE_GETTYPE(what->type) == POINTTYPE ) newtype = MULTIPOINTTYPE;
108 | else newtype = COLLECTIONTYPE;
109 |
110 | col = lwcollection_construct(newtype,
111 | to->SRID, NULL,
112 | to->ngeoms+1, geoms);
113 |
114 | return (LWGEOM *)col;
115 |
116 | }
117 |
118 | void lwmpoint_free(LWMPOINT *mpt)
119 | {
120 | int i;
121 | if( mpt->bbox )
122 | {
123 | lwfree(mpt->bbox);
124 | }
125 | for ( i = 0; i < mpt->ngeoms; i++ )
126 | {
127 | if( mpt->geoms[i] ) {
128 | lwpoint_free(mpt->geoms[i]);
129 | }
130 | }
131 | if( mpt->geoms )
132 | {
133 | lwfree(mpt->geoms);
134 | }
135 | lwfree(mpt);
136 |
137 | };
138 |
139 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwmpoly.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwmpoly.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include
15 | #include
16 | #include "liblwgeom.h"
17 |
18 |
19 | void
20 | lwmpoly_release(LWMPOLY *lwmpoly)
21 | {
22 | lwgeom_release(lwmpoly_as_lwgeom(lwmpoly));
23 | }
24 |
25 |
26 | LWMPOLY *
27 | lwmpoly_deserialize(uchar *srl)
28 | {
29 | LWMPOLY *result;
30 | LWGEOM_INSPECTED *insp;
31 | int type = lwgeom_getType(srl[0]);
32 | int i;
33 |
34 | LWDEBUG(2, "lwmpoly_deserialize called");
35 |
36 | if ( type != MULTIPOLYGONTYPE )
37 | {
38 | lwerror("lwmpoly_deserialize called on NON multipoly: %d",
39 | type);
40 | return NULL;
41 | }
42 |
43 | insp = lwgeom_inspect(srl);
44 |
45 | result = lwalloc(sizeof(LWMPOLY));
46 | result->type = insp->type;
47 | result->SRID = insp->SRID;
48 | result->ngeoms = insp->ngeometries;
49 | result->geoms = lwalloc(sizeof(LWPOLY *)*insp->ngeometries);
50 |
51 | if (lwgeom_hasBBOX(srl[0]))
52 | {
53 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
54 | memcpy(result->bbox, srl+1, sizeof(BOX2DFLOAT4));
55 | }
56 | else result->bbox = NULL;
57 |
58 | for (i=0; ingeometries; i++)
59 | {
60 | result->geoms[i] = lwpoly_deserialize(insp->sub_geoms[i]);
61 | if ( TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type) )
62 | {
63 | lwerror("Mixed dimensions (multipoly:%d, poly%d:%d)",
64 | TYPE_NDIMS(result->type), i,
65 | TYPE_NDIMS(result->geoms[i]->type)
66 | );
67 | return NULL;
68 | }
69 | }
70 |
71 | return result;
72 | }
73 |
74 | /*
75 | * Add 'what' to this multiline at position 'where'.
76 | * where=0 == prepend
77 | * where=-1 == append
78 | * Returns a MULTIPOLY or a COLLECTION
79 | */
80 | LWGEOM *
81 | lwmpoly_add(const LWMPOLY *to, uint32 where, const LWGEOM *what)
82 | {
83 | LWCOLLECTION *col;
84 | LWGEOM **geoms;
85 | int newtype;
86 | uint32 i;
87 |
88 | if ( where == -1 ) where = to->ngeoms;
89 | else if ( where < -1 || where > to->ngeoms )
90 | {
91 | lwerror("lwmline_add: add position out of range %d..%d",
92 | -1, to->ngeoms);
93 | return NULL;
94 | }
95 |
96 | /* dimensions compatibility are checked by caller */
97 |
98 | /* Construct geoms array */
99 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1));
100 | for (i=0; igeoms[i]);
103 | }
104 | geoms[where] = lwgeom_clone(what);
105 | for (i=where; ingeoms; i++)
106 | {
107 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]);
108 | }
109 |
110 | if ( TYPE_GETTYPE(what->type) == POLYGONTYPE ) newtype = MULTIPOLYGONTYPE;
111 | else newtype = COLLECTIONTYPE;
112 |
113 | col = lwcollection_construct(newtype,
114 | to->SRID, NULL,
115 | to->ngeoms+1, geoms);
116 |
117 | return (LWGEOM *)col;
118 |
119 | }
120 |
121 | void lwmpoly_free(LWMPOLY *mpoly)
122 | {
123 | int i;
124 | if( mpoly->bbox )
125 | {
126 | lwfree(mpoly->bbox);
127 | }
128 | for ( i = 0; i < mpoly->ngeoms; i++ )
129 | {
130 | if( mpoly->geoms[i] ) {
131 | lwpoly_free(mpoly->geoms[i]);
132 | }
133 | }
134 | if( mpoly->geoms )
135 | {
136 | lwfree(mpoly->geoms);
137 | }
138 | lwfree(mpoly);
139 |
140 | };
141 |
142 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwmsurface.c:
--------------------------------------------------------------------------------
1 | /**********************************************************************
2 | * $Id: lwmsurface.c 3639 2009-02-04 00:28:37Z pramsey $
3 | *
4 | * PostGIS - Spatial Types for PostgreSQL
5 | * http://postgis.refractions.net
6 | * Copyright 2001-2006 Refractions Research Inc.
7 | *
8 | * This is free software; you can redistribute and/or modify it under
9 | * the terms of the GNU General Public Licence. See the COPYING file.
10 | *
11 | **********************************************************************/
12 |
13 | #include
14 | #include
15 | #include
16 | #include "liblwgeom.h"
17 |
18 |
19 | LWMSURFACE *
20 | lwmsurface_deserialize(uchar *srl)
21 | {
22 | LWMSURFACE *result;
23 | LWGEOM_INSPECTED *insp;
24 | int stype;
25 | int type = lwgeom_getType(srl[0]);
26 | int i;
27 |
28 | LWDEBUG(2, "lwmsurface_deserialize called");
29 |
30 | if(type != MULTISURFACETYPE)
31 | {
32 | lwerror("lwmsurface_deserialize called on a non-multisurface: %d", type);
33 | return NULL;
34 | }
35 |
36 | insp = lwgeom_inspect(srl);
37 |
38 | result = lwalloc(sizeof(LWMSURFACE));
39 | result->type = insp->type;
40 | result->SRID = insp->SRID;
41 | result->ngeoms = insp->ngeometries;
42 | result->geoms = lwalloc(sizeof(LWPOLY *)*insp->ngeometries);
43 |
44 | if(lwgeom_hasBBOX(srl[0]))
45 | {
46 | result->bbox = lwalloc(sizeof(BOX2DFLOAT4));
47 | memcpy(result->bbox, srl + 1, sizeof(BOX2DFLOAT4));
48 | }
49 | else result->bbox = NULL;
50 |
51 | for(i = 0; i < insp->ngeometries; i++)
52 | {
53 | stype = lwgeom_getType(insp->sub_geoms[i][0]);
54 | if(stype == POLYGONTYPE)
55 | {
56 | result->geoms[i] = (LWGEOM *)lwpoly_deserialize(insp->sub_geoms[i]);
57 | }
58 | else if(stype == CURVEPOLYTYPE)
59 | {
60 | result->geoms[i] = (LWGEOM *)lwcurvepoly_deserialize(insp->sub_geoms[i]);
61 | }
62 | else
63 | {
64 | lwerror("Only Polygons and Curved Polygons are supported in a MultiSurface.");
65 | lwfree(result);
66 | lwfree(insp);
67 | return NULL;
68 | }
69 |
70 | if(TYPE_NDIMS(result->geoms[i]->type) != TYPE_NDIMS(result->type))
71 | {
72 | lwerror("Mixed dimensions (multisurface: %d, surface %d:%d",
73 | TYPE_NDIMS(result->type), i,
74 | TYPE_NDIMS(result->geoms[i]->type));
75 | lwfree(result);
76 | lwfree(insp);
77 | return NULL;
78 | }
79 | }
80 | return result;
81 | }
82 |
83 | /*
84 | * Add 'what' to this multisurface at position 'where'
85 | * where=0 == prepend
86 | * where=-1 == append
87 | * Returns a MULTISURFACE or a COLLECTION
88 | */
89 | LWGEOM *
90 | lwmsurface_add(const LWMSURFACE *to, uint32 where, const LWGEOM *what)
91 | {
92 | LWCOLLECTION *col;
93 | LWGEOM **geoms;
94 | int newtype;
95 | uint32 i;
96 |
97 | if(where == -1) where = to->ngeoms;
98 | else if(where < -1 || where > to->ngeoms)
99 | {
100 | lwerror("lwmsurface_add: add position out of range %d..%d",
101 | -1, to->ngeoms);
102 | return NULL;
103 | }
104 |
105 | /* dimensions compatibility are checked by caller */
106 |
107 | /* Construct geoms array */
108 | geoms = lwalloc(sizeof(LWGEOM *)*(to->ngeoms+1));
109 | for(i = 0; i < where; i++)
110 | {
111 | geoms[i] = lwgeom_clone((LWGEOM *)to->geoms[i]);
112 | }
113 | geoms[where] = lwgeom_clone(what);
114 | for(i = where; i < to->ngeoms; i++)
115 | {
116 | geoms[i+1] = lwgeom_clone((LWGEOM *)to->geoms[i]);
117 | }
118 |
119 | if(TYPE_GETTYPE(what->type) == POLYGONTYPE
120 | || TYPE_GETTYPE(what->type) == CURVEPOLYTYPE)
121 | newtype = MULTISURFACETYPE;
122 | else newtype = COLLECTIONTYPE;
123 |
124 | col = lwcollection_construct(newtype,
125 | to->SRID, NULL, to->ngeoms + 1, geoms);
126 |
127 | return (LWGEOM *)col;
128 | }
129 |
130 |
--------------------------------------------------------------------------------
/src/liblwgeom/lwutil.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 |
7 | /* Global variables */
8 | #include "liblwgeom.h"
9 |
10 | void *init_allocator(size_t size);
11 | void init_freeor(void *mem);
12 | void *init_reallocator(void *mem, size_t size);
13 | void init_noticereporter(const char *fmt, va_list ap);
14 | void init_errorreporter(const char *fmt, va_list ap);
15 |
16 | lwallocator lwalloc_var = init_allocator;
17 | lwreallocator lwrealloc_var = init_reallocator;
18 | lwfreeor lwfree_var = init_freeor;
19 | lwreporter lwnotice_var = init_noticereporter;
20 | lwreporter lwerror_var = init_errorreporter;
21 |
22 | static char *lwgeomTypeName[] = {
23 | "Unknown",
24 | "Point",
25 | "Line",
26 | "Polygon",
27 | "MultiPoint",
28 | "MultiLine",
29 | "MultiPolygon",
30 | "GeometryCollection",
31 | "CircularString",
32 | "CompoundString",
33 | "Invalid Type", /* POINTTYPEI */
34 | "Invalid Type", /* LINETYPEI */
35 | "Invalid Type", /* POLYTYPEI */
36 | "CurvePolygon",
37 | "MultiCurve",
38 | "MultiSurface"
39 | };
40 |
41 |
42 | /*
43 | * lwnotice/lwerror handlers
44 | *
45 | * Since variadic functions cannot pass their parameters directly, we need
46 | * wrappers for these functions to convert the arguments into a va_list
47 | * structure.
48 | */
49 |
50 | void
51 | lwnotice(const char *fmt, ...)
52 | {
53 | va_list ap;
54 |
55 | va_start(ap, fmt);
56 |
57 | /* Call the supplied function */
58 | (*lwnotice_var)(fmt, ap);
59 |
60 | va_end(ap);
61 | }
62 |
63 | void
64 | lwerror(const char *fmt, ...)
65 | {
66 | va_list ap;
67 |
68 | va_start(ap, fmt);
69 |
70 | /* Call the supplied function */
71 | (*lwerror_var)(fmt, ap);
72 |
73 | va_end(ap);
74 | }
75 |
76 | /*
77 | * Initialisation allocators
78 | *
79 | * These are used the first time any of the allocators are called
80 | * to enable executables/libraries that link into liblwgeom to
81 | * be able to set up their own allocators. This is mainly useful
82 | * for older PostgreSQL versions that don't have functions that
83 | * are called upon startup.
84 | */
85 |
86 | void *
87 | init_allocator(size_t size)
88 | {
89 | lwgeom_init_allocators();
90 |
91 | return lwalloc_var(size);
92 | }
93 |
94 | void
95 | init_freeor(void *mem)
96 | {
97 | lwgeom_init_allocators();
98 |
99 | lwfree_var(mem);
100 | }
101 |
102 | void *
103 | init_reallocator(void *mem, size_t size)
104 | {
105 | lwgeom_init_allocators();
106 |
107 | return lwrealloc_var(mem, size);
108 | }
109 |
110 | void
111 | init_noticereporter(const char *fmt, va_list ap)
112 | {
113 | lwgeom_init_allocators();
114 |
115 | (*lwnotice_var)(fmt, ap);
116 | }
117 |
118 | void
119 | init_errorreporter(const char *fmt, va_list ap)
120 | {
121 | lwgeom_init_allocators();
122 |
123 | (*lwerror_var)(fmt, ap);
124 | }
125 |
126 |
127 | /*
128 | * Default allocators
129 | *
130 | * We include some default allocators that use malloc/free/realloc
131 | * along with stdout/stderr since this is the most common use case
132 | *
133 | */
134 |
135 | void *
136 | default_allocator(size_t size)
137 | {
138 | void *mem = malloc(size);
139 | return mem;
140 | }
141 |
142 | void
143 | default_freeor(void *mem)
144 | {
145 | free(mem);
146 | }
147 |
148 | void *
149 | default_reallocator(void *mem, size_t size)
150 | {
151 | void *ret = realloc(mem, size);
152 | return ret;
153 | }
154 |
155 | void
156 | default_noticereporter(const char *fmt, va_list ap)
157 | {
158 | char *msg;
159 |
160 | /*
161 | * This is a GNU extension.
162 | * Dunno how to handle errors here.
163 | */
164 | if (!lw_vasprintf (&msg, fmt, ap))
165 | {
166 | va_end (ap);
167 | return;
168 | }
169 | printf("%s\n", msg);
170 | free(msg);
171 | }
172 |
173 | void
174 | default_errorreporter(const char *fmt, va_list ap)
175 | {
176 | char *msg;
177 |
178 | /*
179 | * This is a GNU extension.
180 | * Dunno how to handle errors here.
181 | */
182 | if (!lw_vasprintf (&msg, fmt, ap))
183 | {
184 | va_end (ap);
185 | return;
186 | }
187 | fprintf(stderr, "%s\n", msg);
188 | free(msg);
189 | exit(1);
190 | }
191 |
192 |
193 | /*
194 | * This function should be called from lwgeom_init_allocators() by programs
195 | * which wish to use the default allocators above
196 | */
197 |
198 | void lwgeom_install_default_allocators(void)
199 | {
200 | lwalloc_var = default_allocator;
201 | lwrealloc_var = default_reallocator;
202 | lwfree_var = default_freeor;
203 | lwerror_var = default_errorreporter;
204 | lwnotice_var = default_noticereporter;
205 | }
206 |
207 |
208 | const char *
209 | lwgeom_typename(int type)
210 | {
211 | /* something went wrong somewhere */
212 | if ( type < 0 || type > 15 ) {
213 | /* assert(0); */
214 | return "Invalid type";
215 | }
216 | return lwgeomTypeName[type];
217 | }
218 |
219 | void *
220 | lwalloc(size_t size)
221 | {
222 | void *mem = lwalloc_var(size);
223 | LWDEBUGF(5, "lwalloc: %d@%p", size, mem);
224 | return mem;
225 | }
226 |
227 | void *
228 | lwrealloc(void *mem, size_t size)
229 | {
230 | LWDEBUGF(5, "lwrealloc: %d@%p", size, mem);
231 | return lwrealloc_var(mem, size);
232 | }
233 |
234 | void
235 | lwfree(void *mem)
236 | {
237 | lwfree_var(mem);
238 | }
239 |
240 | /*
241 | * Removes trailing zeros and dot for a %f formatted number.
242 | * Modifies input.
243 | */
244 | void
245 | trim_trailing_zeros(char *str)
246 | {
247 | char *ptr, *totrim=NULL;
248 | int len;
249 | int i;
250 |
251 | LWDEBUGF(3, "input: %s", str);
252 |
253 | ptr = strchr(str, '.');
254 | if ( ! ptr ) return; /* no dot, no decimal digits */
255 |
256 | LWDEBUGF(3, "ptr: %s", ptr);
257 |
258 | len = strlen(ptr);
259 | for (i=len-1; i; i--)
260 | {
261 | if ( ptr[i] != '0' ) break;
262 | totrim=&ptr[i];
263 | }
264 | if ( totrim )
265 | {
266 | if ( ptr == totrim-1 ) *ptr = '\0';
267 | else *totrim = '\0';
268 | }
269 |
270 | LWDEBUGF(3, "output: %s", str);
271 | }
272 |
273 |
274 | /*
275 | * Returns a new string which contains a maximum of maxlength characters starting
276 | * from startpos and finishing at endpos (0-based indexing). If the string is
277 | * truncated then the first or last characters are replaced by "..." as
278 | * appropriate.
279 | *
280 | * The caller should specify start or end truncation by setting the truncdirection
281 | * parameter as follows:
282 | * 0 - start truncation (i.e. characters are removed from the beginning)
283 | * 1 - end trunctation (i.e. characters are removed from the end)
284 | */
285 |
286 | char *lwmessage_truncate(char *str, int startpos, int endpos, int maxlength, int truncdirection)
287 | {
288 | char *output;
289 | char *outstart;
290 |
291 | /* Allocate space for new string */
292 | output = lwalloc(maxlength + 4);
293 | output[0] = '\0';
294 |
295 | /* Start truncation */
296 | if (truncdirection == 0)
297 | {
298 | /* Calculate the start position */
299 | if (endpos - startpos < maxlength)
300 | {
301 | outstart = str + startpos;
302 | strncat(output, outstart, endpos - startpos + 1);
303 | }
304 | else
305 | {
306 | if (maxlength >= 3)
307 | {
308 | /* Add "..." prefix */
309 | outstart = str + endpos + 1 - maxlength + 3;
310 | strncat(output, "...", 3);
311 | strncat(output, outstart, maxlength - 3);
312 | }
313 | else
314 | {
315 | /* maxlength is too small; just output "..." */
316 | strncat(output, "...", 3);
317 | }
318 | }
319 | }
320 |
321 | /* End truncation */
322 | if (truncdirection == 1)
323 | {
324 | /* Calculate the end position */
325 | if (endpos - startpos < maxlength)
326 | {
327 | outstart = str + startpos;
328 | strncat(output, outstart, endpos - startpos + 1);
329 | }
330 | else
331 | {
332 | if (maxlength >= 3)
333 | {
334 | /* Add "..." suffix */
335 | outstart = str + startpos;
336 | strncat(output, outstart, maxlength - 3);
337 | strncat(output, "...", 3);
338 | }
339 | else
340 | {
341 | /* maxlength is too small; just output "..." */
342 | strncat(output, "...", 3);
343 | }
344 | }
345 | }
346 |
347 | return output;
348 | }
349 |
350 |
351 | char
352 | getMachineEndian(void)
353 | {
354 | static int endian_check_int = 1; /* dont modify this!!! */
355 |
356 | return *((char *) &endian_check_int); /* 0 = big endian | xdr,
357 | * 1 = little endian | ndr
358 | */
359 | }
360 |
361 |
362 | void
363 | errorIfSRIDMismatch(int srid1, int srid2)
364 | {
365 | if ( srid1 != srid2 )
366 | {
367 | lwerror("Operation on mixed SRID geometries");
368 | }
369 | }
370 |
--------------------------------------------------------------------------------
/src/liblwgeom/postgis_config.h:
--------------------------------------------------------------------------------
1 | /* postgis_config.h. Generated from postgis_config.h.in by configure. */
2 | /* postgis_config.h.in. Generated from configure.ac by autoheader. */
3 |
4 | /* Define to 1 if you have the header file. */
5 | #define HAVE_DLFCN_H 1
6 |
7 | /* Defined if libiconv headers and library are present */
8 | #define HAVE_ICONV 0
9 |
10 | /* Define to 1 if you have the header file. */
11 | #define HAVE_INTTYPES_H 1
12 |
13 | /* Define to 1 if you have the `geos_c' library (-lgeos_c). */
14 | #define HAVE_LIBGEOS_C 1
15 |
16 | /* Define to 1 if you have the `pq' library (-lpq). */
17 | #define HAVE_LIBPQ 0
18 |
19 | /* Define to 1 if you have the `proj' library (-lproj). */
20 | #define HAVE_LIBPROJ 0
21 |
22 | /* Define to 1 if you have the header file. */
23 | #define HAVE_MEMORY_H 1
24 |
25 | /* Define to 1 if you have the header file. */
26 | #define HAVE_STDINT_H 1
27 |
28 | /* Define to 1 if you have the header file. */
29 | #define HAVE_STDLIB_H 1
30 |
31 | /* Define to 1 if you have the header file. */
32 | #define HAVE_STRINGS_H 1
33 |
34 | /* Define to 1 if you have the header file. */
35 | #define HAVE_STRING_H 1
36 |
37 | /* Define to 1 if you have the header file. */
38 | #define HAVE_SYS_STAT_H 1
39 |
40 | /* Define to 1 if you have the header file. */
41 | #define HAVE_SYS_TYPES_H 1
42 |
43 | /* Define to 1 if you have the header file. */
44 | #define HAVE_UNISTD_H 1
45 |
46 | /* Enable caching of bounding box within geometries */
47 | #define POSTGIS_AUTOCACHE_BBOX 0
48 |
49 | /* PostGIS build date */
50 | #define POSTGIS_BUILD_DATE "2009-03-09 15:11:36"
51 |
52 | /* PostGIS library debug level (0=disabled) */
53 | #define POSTGIS_DEBUG_LEVEL 0
54 |
55 | /* GEOS library version */
56 | #define POSTGIS_GEOS_VERSION 30
57 |
58 | /* PostGIS library version */
59 | #define POSTGIS_LIB_VERSION "1.4.0SVN"
60 |
61 | /* PostGIS major version */
62 | #define POSTGIS_MAJOR_VERSION "1"
63 |
64 | /* PostGIS micro version */
65 | #define POSTGIS_MICRO_VERSION "0SVN"
66 |
67 | /* PostGIS minor version */
68 | #define POSTGIS_MINOR_VERSION "4"
69 |
70 | /* PostgreSQL server version */
71 | #define POSTGIS_PGSQL_VERSION 83
72 |
73 | /* Enable GEOS profiling (0=disabled) */
74 | #define POSTGIS_PROFILE 0
75 |
76 | /* PROJ library version */
77 | #define POSTGIS_PROJ_VERSION 46
78 |
79 | /* PostGIS scripts version */
80 | #define POSTGIS_SCRIPTS_VERSION "1.4.0SVN"
81 |
82 | /* Enable use of ANALYZE statistics */
83 | #define POSTGIS_USE_STATS 1
84 |
85 | /* PostGIS version */
86 | #define POSTGIS_VERSION "1.4 USE_GEOS=1 USE_PROJ=1 USE_STATS=1"
87 |
88 | /* Define to 1 if you have the ANSI C header files. */
89 | #define STDC_HEADERS 1
90 |
91 | /* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
92 | `char[]'. */
93 | #define YYTEXT_POINTER 1
94 |
--------------------------------------------------------------------------------
/src/liblwgeom/vsprintf.c:
--------------------------------------------------------------------------------
1 | /* Like vsprintf but provides a pointer to malloc'd storage, which must
2 | be freed by the caller.
3 | Copyright (C) 1994, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 |
5 | This program is free software; you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation; either version 2, or (at your option)
8 | any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program; if not, write to the Free Software
17 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 |
19 | #ifdef HAVE_CONFIG_H
20 | # include
21 | #endif
22 |
23 | #include
24 | #include
25 | #include
26 |
27 | #if __STDC__
28 | # include
29 | #else
30 | # include
31 | #endif
32 |
33 | #include
34 |
35 | #ifdef TEST
36 | int global_total_width;
37 | #endif
38 |
39 |
40 | int lw_vasprintf (char **result, const char *format, va_list args);
41 | int lw_asprintf
42 | #if __STDC__
43 | (char **result, const char *format, ...);
44 | #else
45 | (result, va_alist);
46 | char **result;
47 | va_dcl
48 | #endif
49 |
50 |
51 | static int
52 | int_vasprintf (result, format, args)
53 | char **result;
54 | const char *format;
55 | va_list *args;
56 | {
57 | const char *p = format;
58 | /* Add one to make sure that it is never zero, which might cause malloc
59 | to return NULL. */
60 | int total_width = strlen (format) + 1;
61 | va_list ap;
62 |
63 | memcpy (&ap, args, sizeof (va_list));
64 |
65 | while (*p != '\0')
66 | {
67 | if (*p++ == '%')
68 | {
69 | while (strchr ("-+ #0", *p))
70 | ++p;
71 | if (*p == '*')
72 | {
73 | ++p;
74 | total_width += abs (va_arg (ap, int));
75 | }
76 | else
77 | total_width += strtoul (p, (char **) &p, 10);
78 | if (*p == '.')
79 | {
80 | ++p;
81 | if (*p == '*')
82 | {
83 | ++p;
84 | total_width += abs (va_arg (ap, int));
85 | }
86 | else
87 | total_width += strtoul (p, (char **) &p, 10);
88 | }
89 | while (strchr ("hlLjtz", *p))
90 | ++p;
91 | /* Should be big enough for any format specifier except %s
92 | and floats. */
93 | total_width += 30;
94 | switch (*p)
95 | {
96 | case 'd':
97 | case 'i':
98 | case 'o':
99 | case 'u':
100 | case 'x':
101 | case 'X':
102 | case 'c':
103 | (void) va_arg (ap, int);
104 | break;
105 | case 'f':
106 | {
107 | double arg = va_arg (ap, double);
108 | if (arg >= 1.0 || arg <= -1.0)
109 | /* Since an ieee double can have an exponent of 307, we'll
110 | make the buffer wide enough to cover the gross case. */
111 | total_width += 307;
112 | }
113 | break;
114 | case 'e':
115 | case 'E':
116 | case 'g':
117 | case 'G':
118 | (void) va_arg (ap, double);
119 | break;
120 | case 's':
121 | total_width += strlen (va_arg (ap, char *));
122 | break;
123 | case 'p':
124 | case 'n':
125 | (void) va_arg (ap, char *);
126 | break;
127 | }
128 | p++;
129 | }
130 | }
131 | #ifdef TEST
132 | global_total_width = total_width;
133 | #endif
134 | *result = malloc (total_width);
135 | if (*result != NULL)
136 | return vsprintf (*result, format, *args);
137 | else
138 | return 0;
139 | }
140 |
141 | int
142 | lw_vasprintf (result, format, args)
143 | char **result;
144 | const char *format;
145 | va_list args;
146 | {
147 | va_list temp;
148 |
149 | /* Use va_copy for compatibility with both 32 and 64 bit args */
150 | __va_copy(temp, args);
151 |
152 | return int_vasprintf (result, format, &temp);
153 | }
154 |
155 | int
156 | lw_asprintf
157 | #if __STDC__
158 | (char **result, const char *format, ...)
159 | #else
160 | (result, va_alist)
161 | char **result;
162 | va_dcl
163 | #endif
164 | {
165 | va_list args;
166 | int done;
167 |
168 | #if __STDC__
169 | va_start (args, format);
170 | #else
171 | char *format;
172 | va_start (args);
173 | format = va_arg (args, char *);
174 | #endif
175 | done = lw_vasprintf (result, format, args);
176 | va_end (args);
177 |
178 | return done;
179 | }
180 |
--------------------------------------------------------------------------------
/src/liblwgeom/wktparse.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Written by Ralph Mason ralph.masontelogis.com
3 | *
4 | * Copyright Telogis 2004
5 | * www.telogis.com
6 | *
7 | */
8 |
9 | #ifndef _WKTPARSE_H
10 | #define _WKTPARSE_H
11 |
12 | #include
13 |
14 |
15 | #ifndef _LIBLWGEOM_H
16 | typedef unsigned char uchar;
17 |
18 | typedef struct serialized_lwgeom {
19 | uchar *lwgeom;
20 | int size;
21 | } SERIALIZED_LWGEOM;
22 |
23 | typedef struct struct_lwgeom_parser_result
24 | {
25 | const char *wkinput;
26 | SERIALIZED_LWGEOM *serialized_lwgeom;
27 | int size;
28 | const char *message;
29 | int errlocation;
30 | } LWGEOM_PARSER_RESULT;
31 |
32 | typedef struct struct_lwgeom_unparser_result
33 | {
34 | uchar *serialized_lwgeom;
35 | char *wkoutput;
36 | int size;
37 | const char *message;
38 | int errlocation;
39 | } LWGEOM_UNPARSER_RESULT;
40 | #endif
41 | typedef void* (*allocator)(size_t size);
42 | typedef void (*freeor)(void* mem);
43 | typedef void (*report_error)(const char* string, ...);
44 |
45 | /*typedef unsigned long int4;*/
46 |
47 | /* How much memory is allocated at a time(bytes) for tuples */
48 | #define ALLOC_CHUNKS 8192
49 |
50 | /* to shrink ints less than 0x7f to 1 byte */
51 | /* #define SHRINK_INTS */
52 |
53 | #define POINTTYPE 1
54 | #define LINETYPE 2
55 | #define POLYGONTYPE 3
56 | #define MULTIPOINTTYPE 4
57 | #define MULTILINETYPE 5
58 | #define MULTIPOLYGONTYPE 6
59 | #define COLLECTIONTYPE 7
60 |
61 | /* Extended lwgeom integer types */
62 | #define POINTTYPEI 10
63 | #define LINETYPEI 11
64 | #define POLYGONTYPEI 12
65 |
66 | #define CIRCSTRINGTYPE 8
67 | #define COMPOUNDTYPE 9
68 | #define CURVEPOLYTYPE 13
69 | #define MULTICURVETYPE 14
70 | #define MULTISURFACETYPE 15
71 |
72 | extern int srid;
73 |
74 | /*
75 |
76 | These functions are used by the
77 | generated parser and are not meant
78 | for public use
79 |
80 | */
81 |
82 | void set_srid(double srid);
83 | void alloc_lwgeom(int srid);
84 |
85 | void alloc_point_2d(double x,double y);
86 | void alloc_point_3d(double x,double y,double z);
87 | void alloc_point_4d(double x,double y,double z,double m);
88 |
89 | void alloc_point(void);
90 | void alloc_linestring(void);
91 | void alloc_linestring_closed(void);
92 | void alloc_circularstring(void);
93 | void alloc_circularstring_closed(void);
94 | void alloc_polygon(void);
95 | void alloc_compoundcurve(void);
96 | void alloc_curvepolygon(void);
97 | void alloc_multipoint(void);
98 | void alloc_multilinestring(void);
99 | void alloc_multicurve(void);
100 | void alloc_multipolygon(void);
101 | void alloc_multisurface(void);
102 | void alloc_geomertycollection(void);
103 | void alloc_empty();
104 | void alloc_counter(void);
105 |
106 |
107 | void pop(void);
108 | void popc(void);
109 |
110 | void alloc_wkb(const char* parser);
111 |
112 | /*
113 | Use these functions to parse and unparse lwgeoms
114 | You are responsible for freeing the returned memory.
115 | */
116 |
117 | int parse_lwg(LWGEOM_PARSER_RESULT *lwg_parser_result, const char* wkt, int flags, allocator allocfunc,report_error errfunc);
118 | int parse_lwgi(LWGEOM_PARSER_RESULT *lwg_parser_result, const char* wkt, int flags, allocator allocfunc,report_error errfunc);
119 | int unparse_WKT(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allocator alloc, freeor free, int flags);
120 | int unparse_WKB(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allocator alloc, freeor free, int flags, char endian, uchar hexform);
121 | int lwg_parse_yyparse(void);
122 | int lwg_parse_yyerror(char* s);
123 | void lwg_parse_yynotice(char* s);
124 |
125 |
126 | #endif /* _WKTPARSE_H */
127 |
--------------------------------------------------------------------------------
/src/liblwgeom/wktparse.lex:
--------------------------------------------------------------------------------
1 | /*
2 | * Written by Ralph Mason ralph.masontelogis.com
3 | *
4 | * Copyright Telogis 2004
5 | * www.telogis.com
6 | *
7 | */
8 |
9 | %x vals_ok
10 | %{
11 | #include "wktparse.tab.h"
12 | #include
13 | #include /* need stdlib for atof() definition */
14 |
15 | void init_parser(const char *src);
16 | void close_parser(void);
17 | int lwg_parse_yywrap(void);
18 | int lwg_parse_yylex(void);
19 |
20 | static YY_BUFFER_STATE buf_state;
21 | void init_parser(const char *src) { BEGIN(0);buf_state = lwg_parse_yy_scan_string(src); }
22 | void close_parser() { lwg_parse_yy_delete_buffer(buf_state); }
23 | int lwg_parse_yywrap(void){ return 1; }
24 |
25 | /* Macro to keep track of the current parse position */
26 | #define UPDATE_YYLLOC() (lwg_parse_yylloc.last_column += yyleng)
27 |
28 | %}
29 |
30 | %%
31 |
32 | [-|\+]?[0-9]+(\.[0-9]+)?([Ee](\+|-)?[0-9]+)? { lwg_parse_yylval.value=atof(lwg_parse_yytext); UPDATE_YYLLOC(); return VALUE; }
33 | [-|\+]?(\.[0-9]+)([Ee](\+|-)?[0-9]+)? { lwg_parse_yylval.value=atof(lwg_parse_yytext); UPDATE_YYLLOC(); return VALUE; }
34 |
35 | 00[0-9A-F]* { lwg_parse_yylval.wkb=lwg_parse_yytext; return WKB;}
36 | 01[0-9A-F]* { lwg_parse_yylval.wkb=lwg_parse_yytext; return WKB;}
37 |
38 | <*>POINT { UPDATE_YYLLOC(); return POINT; }
39 | <*>POINTM { UPDATE_YYLLOC(); return POINTM; }
40 | <*>LINESTRING { UPDATE_YYLLOC(); return LINESTRING; }
41 | <*>LINESTRINGM { UPDATE_YYLLOC(); return LINESTRINGM; }
42 | <*>CIRCULARSTRING { UPDATE_YYLLOC(); return CIRCULARSTRING; }
43 | <*>CIRCULARSTRINGM { UPDATE_YYLLOC(); return CIRCULARSTRINGM; }
44 | <*>POLYGON { UPDATE_YYLLOC(); return POLYGON; }
45 | <*>POLYGONM { UPDATE_YYLLOC(); return POLYGONM; }
46 | <*>COMPOUNDCURVE { UPDATE_YYLLOC(); return COMPOUNDCURVE; }
47 | <*>COMPOUNDCURVEM { UPDATE_YYLLOC(); return COMPOUNDCURVEM; }
48 | <*>CURVEPOLYGON { UPDATE_YYLLOC(); return CURVEPOLYGON; }
49 | <*>CURVEPOLYGONM { UPDATE_YYLLOC(); return CURVEPOLYGONM; }
50 | <*>MULTIPOINT { UPDATE_YYLLOC(); return MULTIPOINT; }
51 | <*>MULTIPOINTM { UPDATE_YYLLOC(); return MULTIPOINTM; }
52 | <*>MULTILINESTRING { UPDATE_YYLLOC(); return MULTILINESTRING; }
53 | <*>MULTILINESTRINGM { UPDATE_YYLLOC(); return MULTILINESTRINGM; }
54 | <*>MULTICURVE { UPDATE_YYLLOC(); return MULTICURVE; }
55 | <*>MULTICURVEM { UPDATE_YYLLOC(); return MULTICURVEM; }
56 | <*>MULTIPOLYGON { UPDATE_YYLLOC(); return MULTIPOLYGON; }
57 | <*>MULTIPOLYGONM { UPDATE_YYLLOC(); return MULTIPOLYGONM; }
58 | <*>MULTISURFACE { UPDATE_YYLLOC(); return MULTISURFACE; }
59 | <*>MULTISURFACEM { UPDATE_YYLLOC(); return MULTISURFACEM; }
60 | <*>GEOMETRYCOLLECTION { UPDATE_YYLLOC(); return GEOMETRYCOLLECTION; }
61 | <*>GEOMETRYCOLLECTIONM { UPDATE_YYLLOC(); return GEOMETRYCOLLECTIONM; }
62 | <*>SRID { BEGIN(vals_ok); UPDATE_YYLLOC(); return SRID; }
63 | <*>EMPTY { UPDATE_YYLLOC(); return EMPTY; }
64 |
65 | <*>\( { BEGIN(vals_ok); UPDATE_YYLLOC(); return LPAREN; }
66 | <*>\) { UPDATE_YYLLOC(); return RPAREN; }
67 | <*>, { UPDATE_YYLLOC(); return COMMA ; }
68 | <*>= { UPDATE_YYLLOC(); return EQUALS ; }
69 | <*>; { BEGIN(0); UPDATE_YYLLOC(); return SEMICOLON; }
70 | <*>[ \t\n\r]+ /*eat whitespace*/ { UPDATE_YYLLOC(); }
71 | <*>. { return lwg_parse_yytext[0]; }
72 |
73 | %%
74 |
75 |
--------------------------------------------------------------------------------
/src/liblwgeom/wktparse.tab.h:
--------------------------------------------------------------------------------
1 | /* A Bison parser, made by GNU Bison 2.3. */
2 |
3 | /* Skeleton interface for Bison's Yacc-like parsers in C
4 |
5 | Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
6 | Free Software Foundation, Inc.
7 |
8 | This program is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 2, or (at your option)
11 | any later version.
12 |
13 | This program is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, write to the Free Software
20 | Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 | Boston, MA 02110-1301, USA. */
22 |
23 | /* As a special exception, you may create a larger work that contains
24 | part or all of the Bison parser skeleton and distribute that work
25 | under terms of your choice, so long as that work isn't itself a
26 | parser generator using the skeleton or a modified version thereof
27 | as a parser skeleton. Alternatively, if you modify or redistribute
28 | the parser skeleton itself, you may (at your option) remove this
29 | special exception, which will cause the skeleton and the resulting
30 | Bison output files to be licensed under the GNU General Public
31 | License without this special exception.
32 |
33 | This special exception was added by the Free Software Foundation in
34 | version 2.2 of Bison. */
35 |
36 | /* Tokens. */
37 | #ifndef YYTOKENTYPE
38 | # define YYTOKENTYPE
39 | /* Put the tokens into the symbol table, so that GDB and other debuggers
40 | know about them. */
41 | enum yytokentype {
42 | POINT = 258,
43 | LINESTRING = 259,
44 | POLYGON = 260,
45 | MULTIPOINT = 261,
46 | MULTILINESTRING = 262,
47 | MULTIPOLYGON = 263,
48 | GEOMETRYCOLLECTION = 264,
49 | CIRCULARSTRING = 265,
50 | COMPOUNDCURVE = 266,
51 | CURVEPOLYGON = 267,
52 | MULTICURVE = 268,
53 | MULTISURFACE = 269,
54 | POINTM = 270,
55 | LINESTRINGM = 271,
56 | POLYGONM = 272,
57 | MULTIPOINTM = 273,
58 | MULTILINESTRINGM = 274,
59 | MULTIPOLYGONM = 275,
60 | GEOMETRYCOLLECTIONM = 276,
61 | CIRCULARSTRINGM = 277,
62 | COMPOUNDCURVEM = 278,
63 | CURVEPOLYGONM = 279,
64 | MULTICURVEM = 280,
65 | MULTISURFACEM = 281,
66 | SRID = 282,
67 | EMPTY = 283,
68 | VALUE = 284,
69 | LPAREN = 285,
70 | RPAREN = 286,
71 | COMMA = 287,
72 | EQUALS = 288,
73 | SEMICOLON = 289,
74 | WKB = 290
75 | };
76 | #endif
77 | /* Tokens. */
78 | #define POINT 258
79 | #define LINESTRING 259
80 | #define POLYGON 260
81 | #define MULTIPOINT 261
82 | #define MULTILINESTRING 262
83 | #define MULTIPOLYGON 263
84 | #define GEOMETRYCOLLECTION 264
85 | #define CIRCULARSTRING 265
86 | #define COMPOUNDCURVE 266
87 | #define CURVEPOLYGON 267
88 | #define MULTICURVE 268
89 | #define MULTISURFACE 269
90 | #define POINTM 270
91 | #define LINESTRINGM 271
92 | #define POLYGONM 272
93 | #define MULTIPOINTM 273
94 | #define MULTILINESTRINGM 274
95 | #define MULTIPOLYGONM 275
96 | #define GEOMETRYCOLLECTIONM 276
97 | #define CIRCULARSTRINGM 277
98 | #define COMPOUNDCURVEM 278
99 | #define CURVEPOLYGONM 279
100 | #define MULTICURVEM 280
101 | #define MULTISURFACEM 281
102 | #define SRID 282
103 | #define EMPTY 283
104 | #define VALUE 284
105 | #define LPAREN 285
106 | #define RPAREN 286
107 | #define COMMA 287
108 | #define EQUALS 288
109 | #define SEMICOLON 289
110 | #define WKB 290
111 |
112 |
113 |
114 |
115 | #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
116 | typedef union YYSTYPE
117 | #line 22 "wktparse.y"
118 | {
119 | double value;
120 | const char* wkb;
121 | }
122 | /* Line 1489 of yacc.c. */
123 | #line 124 "y.tab.h"
124 | YYSTYPE;
125 | # define yystype YYSTYPE /* obsolescent; will be withdrawn */
126 | # define YYSTYPE_IS_DECLARED 1
127 | # define YYSTYPE_IS_TRIVIAL 1
128 | #endif
129 |
130 | extern YYSTYPE lwg_parse_yylval;
131 |
132 | #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
133 | typedef struct YYLTYPE
134 | {
135 | int first_line;
136 | int first_column;
137 | int last_line;
138 | int last_column;
139 | } YYLTYPE;
140 | # define yyltype YYLTYPE /* obsolescent; will be withdrawn */
141 | # define YYLTYPE_IS_DECLARED 1
142 | # define YYLTYPE_IS_TRIVIAL 1
143 | #endif
144 |
145 | extern YYLTYPE lwg_parse_yylloc;
146 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/Makefile:
--------------------------------------------------------------------------------
1 | all: libsqlite3_geocoder.so
2 | CC=gcc
3 | CFLAGS=-fPIC
4 | libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o
5 | $(CC) -fPIC -lsqlite3 -I/usr/include -lm -shared $^ -o $@
6 |
7 | test: test_wkb_compress test_levenshtein
8 |
9 | test_wkb_compress: wkb_compress.c
10 | $(CC) -DTEST -o wkb_compress $^
11 |
12 | test_levenshtein: levenshtein.c
13 | $(CC) -DTEST -o levenshtein $^
14 |
15 | clean:
16 | rm -f *.o *.so wkb_compress levenshtein
17 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/Makefile.nix:
--------------------------------------------------------------------------------
1 | all: libsqlite3_geocoder.so
2 |
3 | libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o
4 | $(CC) -shared $^ -o $@
5 |
6 | test: test_wkb_compress test_levenshtein
7 |
8 | test_wkb_compress: wkb_compress.c
9 | $(CC) -DTEST -o wkb_compress $^
10 |
11 | test_levenshtein: levenshtein.c
12 | $(CC) -DTEST -o levenshtein $^
13 |
14 | clean:
15 | rm -f *.o *.so wkb_compress levenshtein
16 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/Makefile.redhat:
--------------------------------------------------------------------------------
1 | all: libsqlite3_geocoder.so
2 | CFLAGS=-fPIC
3 | libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o
4 | $(CC) $(CFLAGS) -shared $^ -o $@
5 |
6 | test: test_wkb_compress test_levenshtein
7 |
8 | test_wkb_compress: wkb_compress.c
9 | $(CC) -DTEST -o wkb_compress $^
10 |
11 | test_levenshtein: levenshtein.c
12 | $(CC) -DTEST -o levenshtein $^
13 |
14 | clean:
15 | rm -f *.o *.so wkb_compress levenshtein
16 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/extension.c:
--------------------------------------------------------------------------------
1 | # include
2 | # include
3 | # include
4 | # include
5 | # include
6 |
7 | # include "extension.h"
8 |
9 | static SQLITE_EXTENSION_INIT1;
10 |
11 | static void
12 | sqlite3_metaphone (sqlite3_context *context, int argc, sqlite3_value **argv) {
13 | const unsigned char *input = sqlite3_value_text(argv[0]);
14 | int max_phones = 0;
15 | char *output;
16 | int len;
17 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
18 | sqlite3_result_null(context);
19 | return;
20 | }
21 | if (argc > 1)
22 | max_phones = sqlite3_value_int(argv[1]);
23 | if (max_phones <= 0)
24 | max_phones = strlen(input);
25 | output = sqlite3_malloc((max_phones+1)*sizeof(char));
26 | len = metaphone(input, output, max_phones);
27 | sqlite3_result_text(context, output, len, sqlite3_free);
28 | }
29 |
30 | static void
31 | sqlite3_levenshtein (sqlite3_context *context, int argc, sqlite3_value **argv) {
32 | const unsigned char *s1 = sqlite3_value_text(argv[0]),
33 | *s2 = sqlite3_value_text(argv[1]);
34 | double dist;
35 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL ||
36 | sqlite3_value_type(argv[1]) == SQLITE_NULL) {
37 | sqlite3_result_null(context);
38 | return;
39 | }
40 | dist = levenshtein_distance(s1, s2);
41 | sqlite3_result_double(context, dist);
42 | }
43 |
44 | static void
45 | sqlite3_digit_suffix (sqlite3_context *context,
46 | int argc, sqlite3_value **argv) {
47 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
48 | sqlite3_result_null(context);
49 | return;
50 | }
51 | const unsigned char *input = sqlite3_value_text(argv[0]);
52 | char *output = sqlite3_malloc((strlen(input)+1) * sizeof(char));
53 | size_t len = digit_suffix(input, output);
54 | sqlite3_result_text(context, output, len, sqlite3_free);
55 | }
56 |
57 | static void
58 | sqlite3_nondigit_prefix (sqlite3_context *context,
59 | int argc, sqlite3_value **argv) {
60 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
61 | sqlite3_result_null(context);
62 | return;
63 | }
64 | const unsigned char *input = sqlite3_value_text(argv[0]);
65 | char *output = sqlite3_malloc((strlen(input)+1) * sizeof(char));
66 | size_t len = nondigit_prefix(input, output);
67 | sqlite3_result_text(context, output, len, sqlite3_free);
68 | }
69 |
70 |
71 | static void
72 | sqlite3_compress_wkb_line (sqlite3_context *context,
73 | int argc, sqlite3_value **argv) {
74 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
75 | sqlite3_result_null(context);
76 | return;
77 | }
78 | unsigned long input_len = sqlite3_value_bytes(argv[0]);
79 | const void *input = sqlite3_value_blob(argv[0]);
80 | unsigned long output_len = ceil((input_len-9)/8.0) * 4;
81 | unsigned long len = 0;
82 | void *output = sqlite3_malloc(output_len);
83 | len = compress_wkb_line(output, input, input_len);
84 | assert(len == output_len);
85 | sqlite3_result_blob(context, output, len, sqlite3_free);
86 | }
87 |
88 | static void
89 | sqlite3_uncompress_wkb_line (sqlite3_context *context,
90 | int argc, sqlite3_value **argv) {
91 | unsigned long input_len = sqlite3_value_bytes(argv[0]);
92 | const void *input = sqlite3_value_blob(argv[0]);
93 | unsigned long output_len = input_len*2+9;
94 | unsigned long len = 0;
95 | void *output = sqlite3_malloc(output_len);
96 | len = uncompress_wkb_line(output, input, input_len);
97 | assert(len == output_len);
98 | sqlite3_result_blob(context, output, len, sqlite3_free);
99 | }
100 |
101 | int sqlite3_extension_init (sqlite3 * db, char **pzErrMsg,
102 | const sqlite3_api_routines *pApi) {
103 | SQLITE_EXTENSION_INIT2(pApi);
104 |
105 | sqlite3_create_function(db, "metaphone", 1, SQLITE_ANY,
106 | NULL, sqlite3_metaphone, NULL, NULL);
107 | sqlite3_create_function(db, "metaphone", 2, SQLITE_ANY,
108 | NULL, sqlite3_metaphone, NULL, NULL);
109 |
110 | sqlite3_create_function(db, "levenshtein", 2, SQLITE_ANY,
111 | NULL, sqlite3_levenshtein, NULL, NULL);
112 | sqlite3_create_function(db, "compress_wkb_line", 1, SQLITE_ANY,
113 | NULL, sqlite3_compress_wkb_line, NULL, NULL);
114 | sqlite3_create_function(db, "uncompress_wkb_line", 1, SQLITE_ANY,
115 | NULL, sqlite3_uncompress_wkb_line, NULL, NULL);
116 | sqlite3_create_function(db, "digit_suffix", 1, SQLITE_ANY,
117 | NULL, sqlite3_digit_suffix, NULL, NULL);
118 | sqlite3_create_function(db, "nondigit_prefix", 1, SQLITE_ANY,
119 | NULL, sqlite3_nondigit_prefix, NULL, NULL);
120 | return 0;
121 | }
122 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/extension.h:
--------------------------------------------------------------------------------
1 | #ifndef SQLITE3_GEOCODER
2 | #define SQLITE3_GEOCODER
3 |
4 | #include
5 |
6 | int metaphone(const char *Word, char *Metaph, int max_phones);
7 | double levenshtein_distance (const unsigned char *s1, const unsigned char *s2);
8 | signed int rindex_nondigit (const char *string);
9 | signed int nondigit_prefix (const char *input, char *output);
10 | uint32_t compress_wkb_line (void *dest, const void *src, uint32_t len);
11 | uint32_t uncompress_wkb_line (void *dest, const void *src, uint32_t len);
12 |
13 | #endif
14 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/levenshtein.c:
--------------------------------------------------------------------------------
1 | # include
2 | # define STRLEN_MAX 256
3 | # define min(x, y) ((x) < (y) ? (x) : (y))
4 | # define max(x, y) ((x) > (y) ? (x) : (y))
5 | # define NO_CASE (~(unsigned char)32)
6 | # define eql(x, y) (((x) & NO_CASE) == ((y) & NO_CASE))
7 |
8 | static int d[STRLEN_MAX][STRLEN_MAX]; // this isn't thread safe
9 |
10 | double levenshtein_distance (const unsigned char *s1, const unsigned char *s2) {
11 | const size_t len1 = min(strlen(s1), STRLEN_MAX-1),
12 | len2 = min(strlen(s2), STRLEN_MAX-1);
13 | int cost, i, j;
14 |
15 | for (i = 1; i <= len1; ++i) d[i][0] = i;
16 | for (i = 1; i <= len2; ++i) d[0][i] = i;
17 | for (i = 1; i <= len1; ++i) {
18 | for (j = 1; j <= len2; ++j) {
19 | cost = (eql(s1[i-1], s2[j-1]) ? 0 : 1);
20 | d[i][j] = min(min(
21 | d[i-1][j ] + 1, /* deletion */
22 | d[i ][j-1] + 1), /* insertion */
23 | d[i-1][j-1] + cost); /* substitution */
24 | if (i > 1 && j > 1 && eql(s1[i-1], s2[j-2]) && eql(s1[i-2], s2[j-1])) {
25 | d[i][j] = min( d[i][j],
26 | d[i-2][j-2] + cost ); /* transposition */
27 | }
28 | }
29 | }
30 | return (d[len1][len2] / (double) max(len1, len2));
31 | }
32 |
33 | #ifdef TEST
34 | #include
35 |
36 | int main (int argc, char **argv) {
37 | if (argc < 3) return -1;
38 | printf("%.1f%%\n", levenshtein_distance(argv[1],argv[2]) * 100);
39 | return 0;
40 | }
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/util.c:
--------------------------------------------------------------------------------
1 | # include
2 | # include
3 |
4 | int address_metaphone(const char *input, char *output, int max_phones) {
5 | const char *n = input;
6 | int i = 0;
7 | if (isdigit(*n)) {
8 | while (i < max_phones && isdigit(n[i]) && n[i] != '\0')
9 | *output++ = n[i++];
10 | *output = '\0';
11 | return 1;
12 | } else {
13 | return metaphone(input, output, max_phones);
14 | }
15 | }
16 |
17 | signed int rindex_nondigit (const char *string) {
18 | signed int i = strlen(string);
19 | if (!i) return -1;
20 | for (i--; i >= 0 && isdigit(string[i]); i--);
21 | return i;
22 | }
23 |
24 | signed int digit_suffix (const char *input, char *output) {
25 | signed int i = rindex_nondigit(input);
26 | strcpy(output, input+i+1);
27 | return strlen(output);
28 | }
29 |
30 | signed int nondigit_prefix (const char *input, char *output) {
31 | signed int i = rindex_nondigit(input);
32 | if (i++ >= 0) {
33 | strncpy(output, input, i);
34 | output[i] = '\0';
35 | }
36 | return i;
37 | }
38 |
--------------------------------------------------------------------------------
/src/libsqlite3_geocoder/wkb_compress.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | uint32_t compress_wkb_line (void *dest, const void *src, uint32_t len) {
5 | uint32_t d, s;
6 | double value;
7 | if (!len) return 0;
8 | for (s = 9, d = 0; s < len; d += 4, s += 8) {
9 | value = *(double *)(src + s);
10 | value *= 1000000;
11 | *(int32_t *)(dest + d) = (int32_t) value;
12 | }
13 | return d;
14 | }
15 |
16 | uint32_t uncompress_wkb_line (void *dest, const void *src, uint32_t len) {
17 | uint32_t d, s;
18 | double value;
19 | if (!len) return 0;
20 | memcpy(dest, "\01\02\00\00\00\06\00\00\00", 10);
21 | for (s = 0, d = 9; s < len; s += 4, d += 8) {
22 | value = (double) *(int32_t *)(src + s);
23 | value /= 1000000;
24 | *(double *)(dest + d) = value;
25 | }
26 | return d;
27 | }
28 |
29 |
30 | #ifdef TEST
31 |
32 | #include
33 | int main (int argc, char *argv) {
34 | char hex[1024], *scan;
35 | char wkb[512];
36 | unsigned long len, clen;
37 |
38 | while (!feof(stdin)) {
39 | fgets(hex, sizeof(hex), stdin);
40 | for (scan = hex, len = 0; *scan && sizeof(wkb)>len; scan += 2, len++) {
41 | if (sscanf(scan, "%2x", (uint32_t *)(wkb+len)) != 1) break;
42 | }
43 | clen = compress_wkb_line(hex, wkb, len);
44 | printf("before: %lu, after: %lu\n", len, clen);
45 | len = uncompress_wkb_line(wkb, hex, clen);
46 | printf("before: %lu, after: %lu\n", clen, len);
47 | for (scan = wkb + 9; scan < wkb + len; scan += 8) {
48 | printf("%.6f ", *(double *)scan);
49 | }
50 | printf("\n");
51 | }
52 | }
53 |
54 | #endif
55 |
--------------------------------------------------------------------------------
/src/metaphone/Makefile:
--------------------------------------------------------------------------------
1 | all: metaphone.so
2 |
3 | metaphone.so: extension.o metaphon.o
4 | $(CC) -fPIC -shared $^ -o $@
5 |
6 | clean:
7 | rm -f *.o *.so
8 |
--------------------------------------------------------------------------------
/src/metaphone/README:
--------------------------------------------------------------------------------
1 | = SQLite 3 Metaphone extension =
2 |
3 | * This library implements the Metaphone algorithm, originally developed by
4 | Laurence Phillips, as an SQLite 3 extension function:
5 |
6 | http://en.wikipedia.org/wiki/Metaphone
7 |
8 | * This code is based around the original public domain implementation in
9 | C by Gary Phillips, as provided by Sadi Evren Seker:
10 |
11 | http://www.shedai.net/c/new/METAPHON.C
12 |
13 | * Like SQLite and the Phillips implementation of Metaphone, this code
14 | is provided in the public domain, in the hope that it will be useful.
15 |
16 | * To compile the code, simply run `make`. You must have GNU Make and GCC
17 | installed.
18 |
19 | * The module implements one function, metaphone(), which takes a string
20 | to convert to a metaphone representation as its first argument, and an
21 | optional second argument to specify the maximum length of the output.
22 |
23 | * To use the code, run `sqlite3` and enter the following:
24 |
25 | {{{
26 | sqlite> .load /path/to/metaphone.so
27 | -- you can use ./metaphone.so if the .so is in your
28 | -- current directory, or just metaphone.so if it's
29 | -- somewhere in your library path.
30 | sqlite> select metaphone("Schuyler");
31 | SKLR
32 | sqlite> select metaphone("Schuyler", 3);
33 | SKL
34 | sqlite> select metaphone("Skyler");
35 | SKLR
36 | sqlite> select metaphone("Skylar");
37 | SKLR
38 | sqlite> select metaphone("SQLite rules!");
39 | SKLTRLS
40 | sqlite> select metaphone("SQLite roolz!!!1!");
41 | SKLTRLS
42 | }}}
43 |
44 | Questions? Comments? Complaints? Approbation? Email schuyler@nocat.net.
45 |
46 | Schuyler Erle
47 | 9 March 2008
48 |
49 | =30=
50 |
--------------------------------------------------------------------------------
/src/metaphone/extension.c:
--------------------------------------------------------------------------------
1 | # include
2 | # include
3 | # include
4 | # include
5 |
6 | static SQLITE_EXTENSION_INIT1;
7 |
8 | static void
9 | sqlite3_metaphone (sqlite3_context *context, int argc, sqlite3_value **argv) {
10 | const unsigned char *input = sqlite3_value_text(argv[0]);
11 | int max_phones = 0;
12 | char *output;
13 | int len;
14 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
15 | sqlite3_result_null(context);
16 | return;
17 | }
18 | if (argc > 1)
19 | max_phones = sqlite3_value_int(argv[1]);
20 | if (max_phones <= 0)
21 | max_phones = strlen(input);
22 | output = sqlite3_malloc((max_phones+1)*sizeof(char));
23 | len = metaphone(input, output, max_phones);
24 | sqlite3_result_text(context, output, len, SQLITE_TRANSIENT);
25 | }
26 |
27 | int sqlite3_extension_init (sqlite3 * db, char **pzErrMsg,
28 | const sqlite3_api_routines *pApi) {
29 | SQLITE_EXTENSION_INIT2(pApi);
30 | sqlite3_create_function(db, "metaphone", 1, SQLITE_ANY,
31 | NULL, sqlite3_metaphone, NULL, NULL);
32 | sqlite3_create_function(db, "metaphone", 2, SQLITE_ANY,
33 | NULL, sqlite3_metaphone, NULL, NULL);
34 | return 0;
35 | }
36 |
37 |
38 |
--------------------------------------------------------------------------------
/src/metaphone/metaphon.c:
--------------------------------------------------------------------------------
1 | /* +++Customized by SDE for sqlite3 use 09-Mar-2009 */
2 | /* +++File obtained from http://www.shedai.net/c/new/METAPHON.C */
3 | /* +++Date previously modified: 05-Jul-1997 */
4 |
5 | /*
6 | ** METAPHON.C - Phonetic string matching
7 | **
8 | ** The Metaphone algorithm was developed by Lawrence Phillips. Like the
9 | ** Soundex algorithm, it compares words that sound alike but are spelled
10 | ** differently. Metaphone was designed to overcome difficulties encountered
11 | ** with Soundex.
12 | **
13 | ** This implementation was written by Gary A. Parker and originally published
14 | ** in the June/July, 1991 (vol. 5 nr. 4) issue of C Gazette. As published,
15 | ** this code was explicitly placed in the public domain by the author.
16 | */
17 |
18 | #include
19 | #include /* strlen() */
20 | #include
21 | #define malloc(x) sqlite3_malloc((x))
22 | #define free(x) sqlite3_free((x))
23 |
24 | /*
25 | ** Character coding array
26 | */
27 |
28 | static char vsvfn[26] = {
29 | 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0};
30 | /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
31 |
32 | /*
33 | ** Macros to access the character coding array
34 | */
35 |
36 | #define vowel(x) (vsvfn[(x) - 'A'] & 1) /* AEIOU */
37 | #define same(x) (vsvfn[(x) - 'A'] & 2) /* FJLMNR */
38 | #define varson(x) (vsvfn[(x) - 'A'] & 4) /* CGPST */
39 | #define frontv(x) (vsvfn[(x) - 'A'] & 8) /* EIY */
40 | #define noghf(x) (vsvfn[(x) - 'A'] & 16) /* BDH */
41 |
42 | int metaphone(const char *Word, char *Metaph, int max_phones)
43 | {
44 | char *n, *n_start, *n_end; /* Pointers to string */
45 | char *metaph_start = Metaph, *metaph_end;
46 | /* Pointers to metaph */
47 | int ntrans_len = strlen(Word)+4;
48 | char *ntrans = (char *)malloc(sizeof(char) * ntrans_len);
49 | /* Word with uppercase letters */
50 | int KSflag; /* State flag for X translation */
51 |
52 | /*
53 | ** Copy word to internal buffer, dropping non-alphabetic characters
54 | ** and converting to upper case.
55 | */
56 |
57 | for (n = ntrans + 1, n_end = ntrans + ntrans_len - 2;
58 | *Word && n < n_end; ++Word)
59 | {
60 | if (isalpha(*Word))
61 | *n++ = toupper(*Word);
62 | }
63 |
64 | if (n == ntrans + 1) {
65 | free(ntrans);
66 | Metaph[0]='\0';
67 | return 1; /* Return if zero characters */
68 | }
69 | else n_end = n; /* Set end of string pointer */
70 |
71 | /*
72 | ** Pad with '\0's, front and rear
73 | */
74 |
75 | *n++ = '\0';
76 | *n = '\0';
77 | n = ntrans;
78 | *n++ = '\0';
79 |
80 | /*
81 | ** Check for PN, KN, GN, WR, WH, and X at start
82 | */
83 |
84 | switch (*n)
85 | {
86 | case 'P':
87 | case 'K':
88 | case 'G':
89 | if ('N' == *(n + 1))
90 | *n++ = '\0';
91 | break;
92 |
93 | case 'A':
94 | if ('E' == *(n + 1))
95 | *n++ = '\0';
96 | break;
97 |
98 | case 'W':
99 | if ('R' == *(n + 1))
100 | *n++ = '\0';
101 | else if ('H' == *(n + 1))
102 | {
103 | *(n + 1) = *n;
104 | *n++ = '\0';
105 | }
106 | break;
107 |
108 | case 'X':
109 | *n = 'S';
110 | break;
111 | }
112 |
113 | /*
114 | ** Now loop through the string, stopping at the end of the string
115 | ** or when the computed Metaphone code is max_phones characters long.
116 | */
117 |
118 | KSflag = 0; /* State flag for KStranslation */
119 | for (metaph_end = Metaph + max_phones, n_start = n;
120 | n <= n_end && Metaph < metaph_end; ++n)
121 | {
122 | if (KSflag)
123 | {
124 | KSflag = 0;
125 | *Metaph++ = *n;
126 | }
127 | else
128 | {
129 | /* Drop duplicates except for CC */
130 |
131 | if (*(n - 1) == *n && *n != 'C')
132 | continue;
133 |
134 | /* Check for F J L M N R or first letter vowel */
135 |
136 | if (same(*n) || (n == n_start && vowel(*n)))
137 | *Metaph++ = *n;
138 | else switch (*n)
139 | {
140 | case 'B':
141 | if (n < n_end || *(n - 1) != 'M')
142 | *Metaph++ = *n;
143 | break;
144 |
145 | case 'C':
146 | if (*(n - 1) != 'S' || !frontv(*(n + 1)))
147 | {
148 | if ('I' == *(n + 1) && 'A' == *(n + 2))
149 | *Metaph++ = 'X';
150 | else if (frontv(*(n + 1)))
151 | *Metaph++ = 'S';
152 | else if ('H' == *(n + 1))
153 | *Metaph++ = ((n == n_start &&
154 | !vowel(*(n + 2))) ||
155 | 'S' == *(n - 1)) ? 'K' : 'X';
156 | else *Metaph++ = 'K';
157 | }
158 | break;
159 |
160 | case 'D':
161 | *Metaph++ = ('G' == *(n + 1) && frontv(*(n + 2))) ?
162 | 'J' : 'T';
163 | break;
164 |
165 | case 'G':
166 | if ((*(n + 1) != 'H' || vowel(*(n + 2))) &&
167 | (*(n + 1) != 'N' || ((n + 1) < n_end &&
168 | (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
169 | (*(n - 1) != 'D' || !frontv(*(n + 1))))
170 | {
171 | *Metaph++ = (frontv(*(n + 1)) &&
172 | *(n + 2) != 'G') ? 'J' : 'K';
173 | }
174 | else if ('H' == *(n + 1) && !noghf(*(n - 3)) &&
175 | *(n - 4) != 'H')
176 | {
177 | *Metaph++ = 'F';
178 | }
179 | break;
180 |
181 | case 'H':
182 | if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
183 | vowel(*(n + 1))))
184 | {
185 | *Metaph++ = 'H';
186 | }
187 | break;
188 |
189 | case 'K':
190 | if (*(n - 1) != 'C')
191 | *Metaph++ = 'K';
192 | break;
193 |
194 | case 'P':
195 | *Metaph++ = ('H' == *(n + 1)) ? 'F' : 'P';
196 | break;
197 |
198 | case 'Q':
199 | *Metaph++ = 'K';
200 | break;
201 |
202 | case 'S':
203 | *Metaph++ = ('H' == *(n + 1) || ('I' == *(n + 1) &&
204 | ('O' == *(n + 2) || 'A' == *(n + 2)))) ?
205 | 'X' : 'S';
206 | break;
207 |
208 | case 'T':
209 | if ('I' == *(n + 1) && ('O' == *(n + 2) ||
210 | 'A' == *(n + 2)))
211 | {
212 | *Metaph++ = 'X';
213 | }
214 | else if ('H' == *(n + 1))
215 | *Metaph++ = 'O';
216 | else if (*(n + 1) != 'C' || *(n + 2) != 'H')
217 | *Metaph++ = 'T';
218 | break;
219 |
220 | case 'V':
221 | *Metaph++ = 'F';
222 | break;
223 |
224 | case 'W':
225 | case 'Y':
226 | if (vowel(*(n + 1)))
227 | *Metaph++ = *n;
228 | break;
229 |
230 | case 'X':
231 | if (n == n_start)
232 | *Metaph++ = 'S';
233 | else
234 | {
235 | *Metaph++ = 'K';
236 | KSflag = 1;
237 | }
238 | break;
239 |
240 | case 'Z':
241 | *Metaph++ = 'S';
242 | break;
243 | }
244 | }
245 | }
246 |
247 | *Metaph = '\0';
248 | free(ntrans);
249 | return strlen(metaph_start);
250 | }
251 |
252 |
--------------------------------------------------------------------------------
/src/shp2sqlite/Makefile:
--------------------------------------------------------------------------------
1 | # **********************************************************************
2 | # * $Id: Makefile.in
3 | # *
4 | # * PostGIS - Spatial Types for PostgreSQL
5 | # * http://postgis.refractions.net
6 | # * Copyright 2008 Mark Cave-Ayland
7 | # *
8 | # * This is free software; you can redistribute and/or modify it under
9 | # * the terms of the GNU General Public Licence. See the COPYING file.
10 | # *
11 | # **********************************************************************
12 |
13 |
14 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes
15 |
16 | # Filenames with extension as determined by the OS
17 | SHP2SQLITE=shp2sqlite
18 | LIBLWGEOM=../liblwgeom/liblwgeom.a
19 |
20 | # iconv flags
21 | ICONV_LDFLAGS=-lc
22 |
23 | all: $(SHP2SQLITE)
24 |
25 | $(LIBLWGEOM):
26 | make -C ../liblwgeom
27 |
28 | $(SHP2SQLITE): shpopen.o dbfopen.o getopt.o shp2sqlite.o $(LIBLWGEOM)
29 | $(CC) $(CFLAGS) $^ $(ICONV_LDFLAGS) -lm -o $@
30 |
31 | install: all
32 | @cp $(SHP2SQLITE) ../../build/
33 |
34 | clean:
35 | @rm -f *.o $(SHP2SQLITE)
36 |
37 |
--------------------------------------------------------------------------------
/src/shp2sqlite/Makefile.nix:
--------------------------------------------------------------------------------
1 | # **********************************************************************
2 | # * $Id: Makefile.in
3 | # *
4 | # * PostGIS - Spatial Types for PostgreSQL
5 | # * http://postgis.refractions.net
6 | # * Copyright 2008 Mark Cave-Ayland
7 | # *
8 | # * This is free software; you can redistribute and/or modify it under
9 | # * the terms of the GNU General Public Licence. See the COPYING file.
10 | # *
11 | # **********************************************************************
12 |
13 |
14 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes
15 |
16 | # Filenames with extension as determined by the OS
17 | SHP2SQLITE=shp2sqlite
18 | LIBLWGEOM=../liblwgeom/liblwgeom.a
19 |
20 | # iconv flags
21 | ICONV_LDFLAGS=-lc
22 |
23 | all: $(SHP2SQLITE)
24 |
25 | $(LIBLWGEOM):
26 | make -C ../liblwgeom
27 |
28 | $(SHP2SQLITE): shpopen.o dbfopen.o getopt.o shp2sqlite.o $(LIBLWGEOM)
29 | $(CC) $(CFLAGS) $^ $(ICONV_LDFLAGS) -lm -o $@
30 |
31 | install: all
32 | @cp $(SHP2SQLITE) ../../bin
33 |
34 | clean:
35 | @rm -f *.o $(SHP2SQLITE)
36 |
37 |
--------------------------------------------------------------------------------
/src/shp2sqlite/Makefile.redhat:
--------------------------------------------------------------------------------
1 | # **********************************************************************
2 | # * $Id: Makefile.in
3 | # *
4 | # * PostGIS - Spatial Types for PostgreSQL
5 | # * http://postgis.refractions.net
6 | # * Copyright 2008 Mark Cave-Ayland
7 | # *
8 | # * This is free software; you can redistribute and/or modify it under
9 | # * the terms of the GNU General Public Licence. See the COPYING file.
10 | # *
11 | # **********************************************************************
12 | CC=gcc
13 | CFLAGS=-g -O2 -fPIC -DPIC -Wall -Wmissing-prototypes
14 |
15 | # Filenames with extension as determined by the OS
16 | SHP2SQLITE=shp2sqlite
17 | LIBLWGEOM=../liblwgeom/liblwgeom.a
18 |
19 | # iconv flags
20 | ICONV_LDFLAGS=-lc
21 |
22 | all: $(SHP2SQLITE)
23 |
24 | $(LIBLWGEOM):
25 | make -C ../liblwgeom
26 |
27 | $(SHP2SQLITE): shpopen.o dbfopen.o getopt.o shp2sqlite.o $(LIBLWGEOM)
28 | $(CC) $(CFLAGS) $^ $(ICONV_LDFLAGS) -lm -o $@
29 |
30 | install: all
31 | @cp $(SHP2SQLITE) ../../bin
32 |
33 | clean:
34 | @rm -f *.o $(SHP2SQLITE)
35 |
36 |
--------------------------------------------------------------------------------
/src/shp2sqlite/getopt.h:
--------------------------------------------------------------------------------
1 | /* Declarations for getopt.
2 | Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
3 |
4 | This program is free software; you can redistribute it and/or modify it
5 | under the terms of the GNU General Public License as published by the
6 | Free Software Foundation; either version 2, or (at your option) any
7 | later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
17 |
18 | #ifndef _GETOPT_H
19 | #define _GETOPT_H 1
20 |
21 | #ifdef __cplusplus
22 | extern "C" {
23 | #endif
24 |
25 | /* For communication from `getopt' to the caller.
26 | When `getopt' finds an option that takes an argument,
27 | the argument value is returned here.
28 | Also, when `ordering' is RETURN_IN_ORDER,
29 | each non-option ARGV-element is returned here. */
30 |
31 | extern char *optarg;
32 |
33 | /* Index in ARGV of the next element to be scanned.
34 | This is used for communication to and from the caller
35 | and for communication between successive calls to `getopt'.
36 |
37 | On entry to `getopt', zero means this is the first call; initialize.
38 |
39 | When `getopt' returns EOF, this is the index of the first of the
40 | non-option elements that the caller should itself scan.
41 |
42 | Otherwise, `optind' communicates from one call to the next
43 | how much of ARGV has been scanned so far. */
44 |
45 | extern int optind;
46 |
47 | /* Callers store zero here to inhibit the error message `getopt' prints
48 | for unrecognized options. */
49 |
50 | extern int opterr;
51 |
52 | /* Set to an option character which was unrecognized. */
53 |
54 | extern int optopt;
55 |
56 | /* Describe the long-named options requested by the application.
57 | The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
58 | of `struct option' terminated by an element containing a name which is
59 | zero.
60 |
61 | The field `has_arg' is:
62 | no_argument (or 0) if the option does not take an argument,
63 | required_argument (or 1) if the option requires an argument,
64 | optional_argument (or 2) if the option takes an optional argument.
65 |
66 | If the field `flag' is not NULL, it points to a variable that is set
67 | to the value given in the field `val' when the option is found, but
68 | left unchanged if the option is not found.
69 |
70 | To have a long-named option do something other than set an `int' to
71 | a compiled-in constant, such as set a value from `optarg', set the
72 | option's `flag' field to zero and its `val' field to a nonzero
73 | value (the equivalent single-letter option character, if there is
74 | one). For long options that have a zero `flag' field, `getopt'
75 | returns the contents of the `val' field. */
76 |
77 | struct option
78 | {
79 | #if __STDC__
80 | const char *name;
81 | #else
82 | char *name;
83 | #endif
84 | /* has_arg can't be an enum because some compilers complain about
85 | type mismatches in all the code that assumes it is an int. */
86 | int has_arg;
87 | int *flag;
88 | int val;
89 | };
90 |
91 | /* Names for the values of the `has_arg' field of `struct option'. */
92 |
93 | #define no_argument 0
94 | #define required_argument 1
95 | #define optional_argument 2
96 |
97 | #if __STDC__ || defined(PROTO)
98 | #if defined(__GNU_LIBRARY__)
99 | /* Many other libraries have conflicting prototypes for getopt, with
100 | differences in the consts, in stdlib.h. To avoid compilation
101 | errors, only prototype getopt for the GNU C library. */
102 | extern int pgis_getopt (int argc, char *const *argv, const char *shortopts);
103 | #endif /* not __GNU_LIBRARY__ */
104 | extern int pgis_getopt_long (int argc, char *const *argv, const char *shortopts,
105 | const struct option *longopts, int *longind);
106 | extern int pgis_getopt_long_only (int argc, char *const *argv,
107 | const char *shortopts,
108 | const struct option *longopts, int *longind);
109 |
110 | /* Internal only. Users should not call this directly. */
111 | extern int _pgis_getopt_internal (int argc, char *const *argv,
112 | const char *shortopts,
113 | const struct option *longopts, int *longind,
114 | int long_only);
115 | #else /* not __STDC__ */
116 | extern int pgis_getopt ();
117 | extern int pgis_getopt_long ();
118 | extern int pgis_getopt_long_only ();
119 |
120 | extern int _pgis_getopt_internal ();
121 | #endif /* not __STDC__ */
122 |
123 | #ifdef __cplusplus
124 | }
125 | #endif
126 |
127 | #endif /* _GETOPT_H */
128 |
--------------------------------------------------------------------------------
/test/address.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift '../lib'
2 |
3 | require 'test/unit'
4 | require 'set'
5 | require 'geocoder/us/address'
6 |
7 | include Geocoder::US
8 |
9 | class TestAddress < Test::Unit::TestCase
10 | def test_new
11 | addr = Address.new("1600 Pennsylvania Av., Washington DC")
12 | assert_equal "1600 Pennsylvania Av, Washington DC", addr.text
13 | end
14 | def test_clean
15 | fixtures = [
16 | [ "cleaned text", "cleaned: text!" ],
17 | [ "cleaned-text 2", "cleaned-text: #2?" ],
18 | [ "it's working 1/2", "~it's working 1/2~" ],
19 | [ "it's working, yes", "it's working, yes...?" ],
20 | [ "it's working & well", "it's working & well?" ]
21 | ]
22 | fixtures.each {|output, given|
23 | assert_equal output, Address.new(given).text
24 | }
25 | end
26 | def test_expand_numbers
27 | num_list = ["5", "fifth", "five"]
28 | num_list.each {|n|
29 | addr = Address.new(n)
30 | assert_equal num_list, addr.expand_numbers(n).to_a.sort
31 | }
32 | end
33 | def test_city_parse
34 | places = [
35 | [ "New York, NY", "New York", "NY", "" ],
36 | [ "NY", "", "NY", "" ],
37 | [ "New York", "New York", "NY", "" ],
38 | [ "Philadelphia", "Philadelphia", "", "" ],
39 | [ "Philadelphia PA", "Philadelphia", "PA", "" ],
40 | [ "Philadelphia, PA", "Philadelphia", "PA", "" ],
41 | [ "Philadelphia, Pennsylvania", "Philadelphia", "PA", "" ],
42 | [ "Philadelphia, Pennsylvania 19131", "Philadelphia", "PA", "19131" ],
43 | [ "Philadelphia 19131", "Philadelphia", "", "19131" ],
44 | [ "Pennsylvania 19131", "Pennsylvania", "PA", "19131" ], # kind of a misfeature
45 | [ "19131", "", "", "19131" ],
46 | [ "19131-9999", "", "", "19131" ],
47 | ]
48 | for fixture in places
49 | addr = Address.new fixture[0]
50 | [:city, :state, :zip].zip(fixture[1..3]).each {|key,val|
51 | result = addr.send key
52 | result = [result.downcase] unless result.kind_of? Array
53 | if result.empty?
54 | assert_equal val, "", key.to_s + " test no result " + fixture.join("/")
55 | else
56 | assert result.member?(val.downcase), key.to_s + " test " + result.inspect + fixture.join("/")
57 | end
58 | }
59 | end
60 | end
61 |
62 | def test_po_box
63 | addr_po = Address.new "PO Box 1111 Herndon VA 20171"
64 | assert addr_po.po_box?, true
65 | end
66 |
67 |
68 |
69 | def test_parse
70 | addrs = [
71 | {:text => "1600 Pennsylvania Av., Washington DC 20050",
72 | :number => "1600",
73 | :street => "Pennsylvania Ave",
74 | :city => "Washington",
75 | :state => "DC",
76 | :zip => "20050"},
77 |
78 | {:text => "1600 Pennsylvania, Washington DC",
79 | :number => "1600",
80 | :street => "Pennsylvania",
81 | :city => "Washington",
82 | :state => "DC"},
83 |
84 | {:text => "1600 Pennsylvania Washington DC",
85 | :number => "1600",
86 | :street => "Pennsylvania Washington",
87 | :city => "Pennsylvania Washington", # FIXME
88 | :state => "DC"},
89 |
90 | {:text => "1600 Pennsylvania Washington",
91 | :number => "1600",
92 | :street => "Pennsylvania",
93 | :city => "Washington",
94 | :state => "WA"}, # FIXME
95 |
96 | {:text => "1600 Pennsylvania 20050",
97 | :number => "1600",
98 | :street => "Pennsylvania", # FIXME
99 | :zip => "20050"},
100 |
101 | {:text => "1600 Pennsylvania Av, 20050-9999",
102 | :number => "1600",
103 | :street => "Pennsylvania Ave",
104 | :zip => "20050"},
105 |
106 | {:text => "1005 Gravenstein Highway North, Sebastopol CA",
107 | :number => "1005",
108 | :street => "Gravenstein Hwy N",
109 | :city => "Sebastopol",
110 | :state => "CA"},
111 |
112 | {:text => "100 N 7th St, Brooklyn",
113 | :number => "100",
114 | :street => "N 7 St",
115 | :city => "Brooklyn"},
116 |
117 | {:text => "100 N Seventh St, Brooklyn",
118 | :number => "100",
119 | :street => "N 7 St",
120 | :city => "Brooklyn"},
121 |
122 | {:text => "100 Central Park West, New York, NY",
123 | :number => "100",
124 | :street => "Central Park W",
125 | :city => "New York",
126 | :state => "NY"},
127 |
128 | {:text => "100 Central Park West, 10010",
129 | :number => "100",
130 | :street => "Central Park W",
131 | :zip => "10010"},
132 |
133 | {:text => "1400 Avenue of the Americas, New York, NY 10019",
134 | :number => "1400",
135 | :street => "Ave of the Americas",
136 | :city => "New York",
137 | :state => "NY"},
138 |
139 | {:text => "1400 Avenue of the Americas, New York",
140 | :number => "1400",
141 | :street => "Ave of the Americas",
142 | :city => "New York"},
143 |
144 | {:text => "1400 Ave of the Americas, New York",
145 | :number => "1400",
146 | :street => "Ave of the Americas",
147 | :city => "New York"},
148 |
149 | {:text => "1400 Av of the Americas, New York",
150 | :number => "1400",
151 | :street => "Ave of the Americas",
152 | :city => "New York"},
153 |
154 | {:text => "1400 Av of the Americas New York",
155 | :number => "1400",
156 | :street => "Ave of the Americas",
157 | :city => "New York"},
158 |
159 | ]
160 | for fixture in addrs
161 | text = fixture.delete(:text)
162 | addr = Address.new(text)
163 | for key, val in fixture
164 | result = addr.send key
165 | if result.kind_of? Array
166 | result.map! {|str| str.downcase}
167 | assert result.member?(val.downcase), "#{text} (#{key}) = #{result.inspect}"
168 | else
169 | assert_equal val, result, "#{text} (#{key}) = #{result.inspect}"
170 | end
171 | end
172 | end
173 | end
174 |
175 | def test_skip_parse
176 | addresses = [
177 | {:street => "1233 Main St", :city => "Springfield", :region => "VA", :postal_code => "12345", :final_number => "1233", :parsed_street => "main st"},
178 | {:street => "somewhere Ln", :city => "Somewhere", :region => "WI", :postal_code => "22222", :number => "402", :parsed_street => "somewhere ln", :final_number => "402"},
179 | ]
180 | for preparsed_address in addresses
181 | address_for_geocode = Address.new preparsed_address
182 | assert_equal preparsed_address[:parsed_street],address_for_geocode.street[0]
183 | assert_equal preparsed_address[:final_number],address_for_geocode.number
184 | assert_equal preparsed_address[:city],address_for_geocode.city[0]
185 | assert_equal preparsed_address[:region],address_for_geocode.state
186 | assert_equal preparsed_address[:postal_code],address_for_geocode.zip
187 | end
188 | end
189 |
190 | def test_states_abbreviated_in_skip_parse
191 | addresses = [
192 | {:street => "123 Main St", :city => "Springfield", :region => "Virginia", :postal_code => "12345",:state_abbrev => "VA"},
193 | {:street => "402 Somewhere Ln", :city => "Somewhere", :region => "WI", :postal_code => "22222", :state_abbrev => "WI"},
194 | ]
195 | for preparsed_address in addresses
196 | address_for_geocode = Address.new preparsed_address
197 | assert_equal preparsed_address[:state_abbrev],address_for_geocode.state
198 | end
199 |
200 | end
201 |
202 | def test_address_hash
203 | addresses = [
204 | {:address => "Herndon, VA", :place_check => ["herndon"]},
205 | {:address => "Arlington, VA", :place_check => ["arlington"]}
206 | ]
207 | for preparsed_address in addresses
208 | address_for_geocode = Address.new preparsed_address
209 | assert_equal preparsed_address[:place_check],address_for_geocode.city
210 | end
211 | end
212 |
213 | def test_partial_address
214 | addresses = [
215 | {:street => "2200 Wilson Blvd", :postal_code => "22201"},
216 | ]
217 | for preparsed_address in addresses
218 | address_for_geocode = Address.new preparsed_address
219 | assert_equal preparsed_address[:postal_code],address_for_geocode.zip
220 | end
221 |
222 |
223 | end
224 |
225 | def test_country_parse
226 | addresses = [
227 | {:city => "Paris", :country => "FR"},
228 | ]
229 |
230 | for preparsed_address in addresses
231 | address_for_geocode = Address.new preparsed_address
232 | assert_equal preparsed_address[:country],address_for_geocode.state
233 | end
234 | end
235 |
236 | end
237 |
--------------------------------------------------------------------------------
/test/benchmark.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/ruby
2 |
3 | require 'test/unit'
4 | require 'geocoder/us/database'
5 | require 'benchmark'
6 | include Benchmark # we need the CAPTION and FMTSTR constants
7 |
8 | db = Geocoder::US::Database.new("/mnt/tiger2008/geocoder.db")
9 |
10 | n = 50
11 | s = "1005 Gravenstein Hwy N, Sebastopol CA 95472"
12 | a = Geocoder::US::Address.new(s)
13 |
14 | print db.geocode(s)
15 |
16 | Benchmark.bmbm do |x|
17 | x.report("parse max_penalty=0") { n.times{a.parse(0)} }
18 | x.report("parse max_penalty=1") { n.times{a.parse(1)} }
19 | x.report("geocode") { n.times{db.geocode(s)} }
20 | end
21 |
--------------------------------------------------------------------------------
/test/constants.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift '../lib'
2 |
3 | require 'test/unit'
4 | require 'geocoder/us/constants'
5 |
6 | include Geocoder::US
7 |
8 | class TestConstants < Test::Unit::TestCase
9 | def initialize (*args)
10 | @map = Map[
11 | "Abbreviation" => "abbr",
12 | "Two words" => "2words",
13 | "Some three words" => "3words"
14 | ]
15 | super(*args)
16 | end
17 | def test_class_constructor
18 | assert_kind_of Map, @map
19 | assert_kind_of Hash, @map
20 | end
21 | def test_key
22 | assert @map.key?( "Abbreviation" )
23 | assert @map.key?( "abbreviation" )
24 | assert !(@map.key? "abbreviation?")
25 | assert @map.key?( "abbr" )
26 | assert @map.key?( "Two words" )
27 | assert @map.key?( "2words" )
28 | end
29 | def test_fetch
30 | assert_equal "abbr", @map["Abbreviation"]
31 | assert_equal "abbr", @map["abbreviation"]
32 | assert_nil @map["abbreviation?"]
33 | assert_equal "abbr", @map["abbr"]
34 | assert_equal "2words", @map["Two words"]
35 | assert_equal "2words", @map["2words"]
36 | end
37 | # def test_partial
38 | # assert @map.partial?( "Abbreviation" )
39 | # assert @map.partial?( "Two" )
40 | # assert @map.partial?( "two" )
41 | # assert !(@map.partial? "words")
42 | # assert @map.partial?( "Some" )
43 | # assert !(@map.partial? "words")
44 | # assert @map.partial?( "Some three" )
45 | # assert @map.partial?( "SOME THREE WORDS" )
46 | # end
47 | def test_constants
48 | assert_kind_of Map, Directional
49 | assert_kind_of Map, Prefix_Qualifier
50 | assert_kind_of Map, Suffix_Qualifier
51 | assert_kind_of Map, Prefix_Type
52 | assert_kind_of Map, Suffix_Type
53 | assert_kind_of Map, Unit_Type
54 | assert_kind_of Map, Name_Abbr
55 | assert_kind_of Map, State
56 | end
57 | end
58 |
--------------------------------------------------------------------------------
/test/data/address-sample.csv:
--------------------------------------------------------------------------------
1 | address,number,predir,prequal,pretyp,street,suftyp,sufqual,sufdir,unittyp,unit,city,state,zip,lon,lat,count,comment
2 | "93 NORTH 9TH STREET, BROOKLYN NY 11211",93,N,,,9th,St,,,,,Brooklyn,NY,11211,,,,
3 | "380 WESTMINSTER ST, PROVIDENCE RI 02903",380,,,,Westminster,St,,,,,Providence,RI,02903,,,,
4 | "177 MAIN STREET, LITTLETON NH 03561",177,,,,Main,St,,,,,Littleton,NH,03561,,,,
5 | "202 HARLOW ST, BANGOR ME 04401",202,,,,Harlow,St,,,,,Bangor,ME,04401,,,,
6 | "46 FRONT STREET, WATERVILLE, ME 04901",46,,,,Front,St,,,,,Waterville,ME,04901,,,,
7 | "22 SUSSEX ST, HACKENSACK NJ 07601",22,,,,Sussex,St,,,,,Hackensack,NJ,07601,,,,
8 | "75 OAK STREET, PATCHOGUE NY 11772",75,,,,Oak,St,,,,,Patchogue,NY,11772,,,,
9 | "1 CLINTON AVE, ALBANY NY 12207",1,,,,Clinton,Ave,,,,,Albany,NY,12207,,,,
10 | "7242 ROUTE 9, PLATTSBURGH NY 12901",7242,,,US Hwy,9,,,,,,Plattsburgh,NY,12901,,,,
11 | "520 5TH AVE, MCKEESPORT PA 15132",520,,,,5th,Ave,,,,,McKeesport,PA,15132,,,,
12 | "122 W 3RD STREET, GREENSBURG PA 15601",122,W,,,3rd,St,,,,,Greensburg,PA,15601,,,,
13 | "901 UNIVERSITY DR, STATE COLLEGE PA 16801",901,,,,University,Dr,,,,,"State College",PA,16801,,,,
14 | "240 W 3RD ST, WILLIAMSPORT PA 17701",240,W,,,3rd,St,,,,,Williamsport,PA,17701,,,,
15 | "41 N 4TH ST, ALLENTOWN PA 18102",41,N,,,4th,St,,,,,Allentown,PA,18102,,,,
16 | "2221 W. MARKET STREET, POTTSVILLE PA 17901",2221,W,,,Market,St,,,,,Pottsville,PA,17901,,,,
17 | "337 BRIGHTSEAT ROAD, LANDOVER MD 20785",337,,,,Brightseat,Rd,,,,,Hyattsville,MD,20785,,,,"canonical place"
18 | "101 CHESAPEAKE BLVD, ELKTON MD 21921",103,,,,Chesapeake,Blvd,,,,,Elkton,MD,21921,,,,"find nearest corner"
19 | "2875 SABRE ST, VIRGINIA BEACH VA 23452",2809,,,,Sabre,St,,,,,"Virginia Beach",VA,23452,,,,"find nearest corner"
20 | "324 COMMERCE ROAD, FARMVILLE VA 23901",324,,,,Commerce,St,,,,,Clarksville,VA,23927,,,,"nearby address; might be TIGER omission"
21 | "1480 EAST MAIN STREET, WYTHEVILLE VA 24382",1480,W,,,Main,St,,,,,Wytheville,VA,24382,,,,"nearby address; TIGER omission"
22 | "116 N JEFFERSON STREET, ROANOKE VA 24016",116,N,,,Jefferson,St,,,,,Roanoke,VA,24016,,,,
23 | "50 MCDOWELL STREET, WELCH WV 24801",50,,,,"Mc Dowell",St,,,,,Welch,WV,24801,,,,
24 | "146 EAST FIRST AVE, WILLIAMSON WV 25661",200,E,,,1st,Ave,,,,,Williamson,WV,25661,,,,"find nearest corner"
25 | "1925 E MAIN ST, ALBEMARLE NC 28001",1925,E,,,Main,St,,,,,Albemarle,NC,28001,,,,
26 | "1013 SPRING LANE, SANFORD NC 27330",1013,,,,Spring,Ln,,,,,Sanford,NC,27330,,,,
27 | "145 ROWAN STREET, FAYETTEVILLE NC 28301",145,,,,Rowan,St,,,,,Fayetteville,NC,28301,,,,
28 | "1420 MCCARTHY BLVD, NEW BERN NC 28562",1420,,,,McCarthy,Blvd,,,,,"New Bern",NC,28562,,,,
29 | "115 ENTERPRISE COURT, GREENWOOD SC 29649",115,,,,Enterprise,Ct,,,,,Greenwood,SC,29649,,,,
30 | "732 W 2ND ST, TIFTON GA 31794",732,,,,2nd,St,,W,,,Tifton,GA,31793,,,,"TIGER artifact"
31 | "97 WEST OAK AVE, PANAMA CITY FL 32401",97,,,,Oak,Ave,,,,,"Panama City",FL,32401,,,,"predir is TIGER artifact"
32 | "2276 WILTON DR, WILTON MANORS FL 33305",2276,,,,Wilton,Dr,,,,,"Fort Lauderdale",FL,33305,,,,"canonical place"
33 | "203 SOUTH WALNUT ST, FLORENCE AL 35630",203,S,,,Walnut,St,,,,,Florence,AL,35630,,,,
34 | "108 CENTER POINTE DR, CLARKSVILLE TN 37040",108,,,,"Center Pointe",Dr,,,,,Clarksville,TN,37040,,,,
35 | "1800 OLD TROY RD, UNION CITY TN 38261",1800,,Old,,Troy,Rd,,,,,"Union City",TN,38261,,,,
36 | "931 OLD SMITHVILLE HWY, MCMINNVILLE TN 37110",931,,Old,,Smithville,Rd,,,,,McMinnville,TN,37110,,,,
37 | "1301 GREENE STREET, MARIETTA OH 45750",1301,,,,Greene,St,,,,,Marietta,OH,45750,,,,
38 | "602 SOUTH MICHIGAN ST, SOUTH BEND IN 46601",602,S,,,Michigan,St,,,,,"South Bend",IN,46601,,,,
39 | "500 NORTH A STREET, RICHMOND IN 47374",500,N,,,A,St,,,,,Richmond,IN,47374,,,,
40 | "317 SOUTH DRAKE ROAD, KALAMAZOO MI 49009",317,S,,,Drake,Rd,,,,,Kalamazoo,MI,49009,,,,
41 | "105 Amity Way, Wayne PA 19087",105,,,,Amity,Dr,,,,,Wayne,PA,19087,,,,
42 | "305 W 45th St, New York NY 10036",305,W,,,45,St,,,,,"New York",NY,10036,,,,
43 | "11839 Federalist Way, Fairfax VA 22030",11839,,,,Federalist,Way,,,,,Fairfax,VA,22030,,,,
44 | "400 Monroe St, Hoboken, NJ 07030",400,,,,Monroe,St,,,,,Hoboken,NJ,07030,,,,
45 | "101 West End Avenue, New York NY 10023",101,W,,,End,Ave,,,,,"New York",NY,10023,,,,"predir is TIGER artifact"
46 | "2900 4TH AVE, BILLINGS MT 59101",2900,,,,4th,Ave,,N,,,Billings,MT,59101,,,,"returns 2 results"
47 | "158 N SCOTT STREET, JOLIET IL 60432",158,N,,,Scott,St,,,,,Joliet,IL,60432,,,,
48 | "1207 NETWORK CENTRE DR, EFFINGHAM IL 62401",1207,,,,"Network Centre",Dr,,,,,Effingham,IL,62401,,,,
49 | "3555 SOUTHERN HILLS DR, SIOUX CITY IA 51106",3555,,,,"Southern Hills",Dr,,,,,"Sioux City",IA,51106,,,,
50 | "300 E 3RD ST, NORTH PLATTE NE 69101",300,E,,,3rd,St,,,,,"North Platte",NE,69101,,,,
51 | "115 N WEBB RD, GRAND ISLAND NE 68803",115,N,,,Webb,Rd,,,,,"Grand Island",NE,68803,,,,
52 | "415 VALLEY VIEW DR, SCOTTSBLUFF NE 69361",501,,,,"Valley View",Dr,,,,,"Scottsbluff",NE,69361,,,,"find nearest corner"
53 |
--------------------------------------------------------------------------------
/test/data/db-test.csv:
--------------------------------------------------------------------------------
1 | address,number,street,city,state,zip,lon,lat,count,comment
2 | "93 NORTH 9TH STREET, BROOKLYN NY 11211",93,N 9th St,Brooklyn,NY,11211,-73.958096,40.720064,1,
3 | "380 WESTMINSTER ST, PROVIDENCE RI 02903",380,Westminster St,Providence,RI,02903,-71.415171,41.821004,1,
4 | "177 MAIN STREET, LITTLETON NH 03561",177,Main St,Littleton,NH,03561,-71.776393,44.307299,1,range
5 | "202 HARLOW ST, BANGOR ME 04401",202,Harlow St,Bangor,ME,04401,-68.773934,44.805202,1,
6 | "46 FRONT STREET, WATERVILLE, ME 04901",46,Front St,Waterville,ME,04901,-69.628598,44.550988,1,
7 | "22 SUSSEX ST, HACKENSACK NJ 07601",22,Sussex St,Hackensack,NJ,07601,-74.04821,40.880328,1,
8 | "75 OAK STREET, PATCHOGUE NY 11772",75,Oak St,Patchogue,NY,11772,-73.01036,40.768522,1,
9 | "1 CLINTON AVE, ALBANY NY 12207",1,Clinton Ave,Albany,NY,12207,-73.750031,42.654244,1,
10 | "7242 ROUTE 9, PLATTSBURGH NY 12901",7242,US Hwy 9,Plattsburgh,NY,12901,-73.428066,44.735338,1,
11 | "520 5TH AVE, MCKEESPORT PA 15132",520,5th Ave,McKeesport,PA,15132,-79.861023,40.351228,1,
12 | "122 W 3RD STREET, GREENSBURG PA 15601",122,W 3rd St,Greensburg,PA,15601,-79.546244,40.299681,1,
13 | "901 UNIVERSITY DR, STATE COLLEGE PA 16801",901,University Dr,State College,PA,16801,-77.844056,40.797191,1,
14 | "240 W 3RD ST, WILLIAMSPORT PA 17701",240,W 3rd St,Williamsport,PA,17701,-77.005601,41.238969,1,
15 | "41 N 4TH ST, ALLENTOWN PA 18102",41,N 4th St,Allentown,PA,18102,-75.466113,40.605368,1,
16 | "2221 W. MARKET STREET, POTTSVILLE PA 17901",2221,W Market St,Pottsville,PA,17901,-76.226401,40.674702,1,
17 | "337 BRIGHTSEAT ROAD, LANDOVER MD 20785",337,Brightseat Rd,Hyattsville,MD,20785,-76.850995,38.892762,1,canonical place
18 | "101 CHESAPEAKE BLVD, ELKTON MD 21921",109,Chesapeake Blvd,Elkton,MD,21921,-75.786853,39.6045,1,find nearest corner
19 | "2875 SABRE ST, VIRGINIA BEACH VA 23452",2809,Sabre St,Virginia Beach,VA,23452,-76.067835,36.822959,1,find nearest corner
20 | "324 COMMERCE ROAD, FARMVILLE VA 23901",324,Commerce Rd,Farmville,VA,23901,-78.423296,37.273311,1,fixed in TIGER 2010
21 | "1480 EAST MAIN STREET, WYTHEVILLE VA 24382",1168,E Main St,Wytheville,VA,24382,-81.069279,36.951346,1,nearby address; TIGER omission
22 | "116 N JEFFERSON STREET, ROANOKE VA 24016",116,N Jefferson St,Roanoke,VA,24016,-79.940537,37.275163,1,
23 | "50 MCDOWELL STREET, WELCH WV 24801",50,Mc Dowell St,Welch,WV,24801,-81.585586,37.433465,1,
24 | "146 EAST FIRST AVE, WILLIAMSON WV 25661",200,E 1st Ave,Williamson,WV,25661,-82.277886,37.670798,1,find nearest corner
25 | "1925 E MAIN ST, ALBEMARLE NC 28001",1925,E Main St,Albemarle,NC,28001,-80.163859,35.348818,1,
26 | "1013 SPRING LANE, SANFORD NC 27330",1013,Spring Ln,Sanford,NC,27330,-79.198776,35.487444,1,
27 | "145 ROWAN STREET, FAYETTEVILLE NC 28301",145,Rowan St,Fayetteville,NC,28301,-78.878696,35.057767,1,
28 | "1420 MCCARTHY BLVD, NEW BERN NC 28562",1399,McCarthy Blvd,New Bern,NC,28562,-77.094901,35.097183,1,broken in TIGER 2010
29 | "115 ENTERPRISE COURT, GREENWOOD SC 29649",115,Enterprise Ct,Greenwood,SC,29649,-82.164828,34.216732,1,
30 | "732 W 2ND ST, TIFTON GA 31794",732,W 2nd St,Tifton,GA,31794,-83.523812,31.457889,1,ZIP was fixed in TIGER 2010
31 | "97 WEST OAK AVE, PANAMA CITY FL 32401",95,W Oak Ave,Panama City,FL,32401,-85.661436,30.154306,1,broken in TIGER 2010
32 | "2276 WILTON DR, WILTON MANORS FL 33305",2276,Wilton Dr,Fort Lauderdale,FL,33305,-80.137273,26.156993,1,canonical place
33 | "203 SOUTH WALNUT ST, FLORENCE AL 35630",203,S Walnut St,Florence,AL,35630,-87.670768,34.800112,1,
34 | "108 CENTER POINTE DR, CLARKSVILLE TN 37040",108,Center Pointe Dr,Clarksville,TN,37040,-87.30888,36.56967,1,
35 | "1800 OLD TROY RD, UNION CITY TN 38261",1800,Old Troy Rd,Union City,TN,38261,-89.083201,36.416592,1,
36 | "931 OLD SMITHVILLE HWY, MCMINNVILLE TN 37110",931,Old Smithville Rd,McMinnville,TN,37110,-85.788518,35.701731,1,
37 | "1301 GREENE STREET, MARIETTA OH 45750",1301,Greene St,Marietta,OH,45750,-81.424821,39.426052,1,
38 | "602 SOUTH MICHIGAN ST, SOUTH BEND IN 46601",598,S Michigan St,South Bend,IN,46601,-86.25025,41.670964,1,broken in TIGER 2010
39 | "500 NORTH A STREET, RICHMOND IN 47374",500,N A St,Richmond,IN,47374,-84.89517,39.830625,1,
40 | "317 SOUTH DRAKE ROAD, KALAMAZOO MI 49009",317,S Drake Rd,Kalamazoo,MI,49009,-85.648132,42.288772,1,
41 | "105 Amity Way, Wayne PA 19087",105,Amity Dr,Wayne,PA,19087,-75.455425,40.076446,1,
42 | "305 W 45th St, New York NY 10036",305,W 45 St,New York,NY,10036,-73.991106,40.760371,1,
43 | "11839 Federalist Way, Fairfax VA 22030",11839,Federalist Way,Fairfax,VA,22030,-77.353695,38.849858,1,
44 | "400 Monroe St, Hoboken, NJ 07030",400,Monroe St,Hoboken,NJ,07030,-74.038654,40.743789,1,
45 | "101 West End Avenue, New York NY 10023",101,W End Ave,New York,NY,10023,-73.987822,40.775325,1,predir is TIGER artifact
46 | "2900 4TH AVE, BILLINGS MT 59101",2900,4th Ave N,Billings,MT,59101,-108.51073,45.783452,2,returns 2 results
47 | "158 N SCOTT STREET, JOLIET IL 60432",158,N Scott St,Joliet,IL,60432,-88.080083,41.526353,1,
48 | "1207 NETWORK CENTRE DR, EFFINGHAM IL 62401",1207,Network Centre Dr,Effingham,IL,62401,-88.526702,39.143248,1,
49 | "3555 SOUTHERN HILLS DR, SIOUX CITY IA 51106",3555,Southern Hills Dr,Sioux City,IA,51106,-96.353014,42.449259,1,
50 | "300 E 3RD ST, NORTH PLATTE NE 69101",300,E 3rd St,North Platte,NE,69101,-100.761028,41.135235,1,
51 | "115 N WEBB RD, GRAND ISLAND NE 68803",115,N Webb Rd,Grand Island,NE,68803,-98.378361,40.917627,1,
52 | "415 VALLEY VIEW DR, SCOTTSBLUFF NE 69361",501,Valley View Dr,Scottsbluff,NE,69361,-103.656078,41.879011,1,find nearest corner
53 | "4018 W Ustick Rd, Meridian ID",4018,W Ustick Rd,Meridian,ID,83646,-116.443792,43.634096,1,fixed in TIGER 2010
54 | "2518 S Pacific Hwy, Medford OR",2518,S Pacific Hwy,Medford,OR,97501,-122.855426,42.307241,1,fixed in TIGER 2010
55 | "1111 River Rd Apt A17, Edgewater NJ 07020",1111,River Rd,Edgewater,NJ,07020,-73.972261,40.830852,1,FIXME: parsing
56 | "460 West St, Amherst MA 01002-2964",460,West St,Amherst,MA,01002,-72.520228,42.34014,1,address is all abbreviations
57 | "23 2nd St, Brooklyn NY",23,2nd St,Brooklyn,NY,11231,-73.993897,40.67895,1,regression caused it to point to East Otto
58 | "23 2nd St, Brooklyn, New York",23,2nd St,Brooklyn,NY,11231,-73.993897,40.67895,1,regression caused it to point to Manhattan
59 | "100 Central Park W, 10023",100,Central Park W,New York,NY,10023,-73.975461,40.776899,1,the usual Central Park West parsing issues
60 | "100 Central Park W, New York",100,Central Park W,New York,NY,10023,-73.975461,40.776899,1,the usual Central Park West parsing issues
61 |
--------------------------------------------------------------------------------
/test/data/locations.csv:
--------------------------------------------------------------------------------
1 | name,address
2 | "Home","2026 21st St. N, Arlington, VA 22201"
3 | "Work","2200 Wilson Blvd., Arlington, VA 22201"
4 | "RTI","1506 N Main St., Royal Oak, MI 48067"
--------------------------------------------------------------------------------
/test/database.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift '../lib'
2 |
3 | require 'test/unit'
4 | require 'geocoder/us/database'
5 | require 'fastercsv'
6 |
7 | Base = File.dirname(__FILE__)
8 | Debug = false
9 |
10 | module Geocoder::US
11 | Database_File = (
12 | (ARGV[0] and !ARGV[0].empty?) ? ARGV[0] : nil)
13 | end
14 |
15 | class TestDatabase < Test::Unit::TestCase
16 | def get_db
17 | Geocoder::US::Database.new(Geocoder::US::Database_File, {:debug => Debug})
18 | end
19 |
20 | # def get_international_db
21 | # Geocoder::US::Database.new("/Users/katechapman/Desktop/geonames1.db", {:debug => true})
22 | # end
23 |
24 | def setup
25 | @db = get_db
26 | #@db_intl = get_international_db
27 | #assert_not_nil @db_intl
28 | assert_not_nil @db
29 | end
30 |
31 | def test_load
32 | return if @db.nil?
33 | assert_kind_of Geocoder::US::Database, @db
34 | end
35 |
36 | def test_zip
37 | return if @db.nil?
38 | [ {:city=>"Chicago", :zip=>"60601", :state=>"IL", :precision=>:zip,
39 | :fips_county=>"17031", :lon=>-87.622130,:lat=>41.885310, :score => 0.714},
40 | {:city=>"Philadelphia", :zip=>"19019", :state=>"PA", :precision=>:zip,
41 | :fips_county=>"42101", :lon=>-75.11787, :lat=>40.001811, :score => 0.714}
42 | ].each {|record|
43 | result = @db.geocode(record[:zip])
44 | assert_equal result.length, 1
45 | record.keys.each {|key| assert_equal record[key], result[0][key]}
46 | }
47 |
48 | end
49 |
50 | # def test_international_place
51 | # return if @db_intl.nil?
52 | # [ {:city=>"Paris", :state=>"FR"},
53 | # {:city=>"Paris", :state=>"FR"}
54 | # ].each {|record|
55 | # result = @db_intl.geocode(record)
56 | # assert_equal result.length, 1
57 | # record.keys.each {|key| assert_equal record[key], result[0][key]}
58 | # }
59 | # end
60 |
61 | def test_place
62 | return if @db.nil?
63 | [ {:city=>"Chicago", :state=>"IL", :precision=>:city, :fips_county=>"17031", :score => 0.857},
64 | {:city=>"Philadelphia", :state=>"PA", :precision=>:city, :fips_county=>"42101", :score => 0.857}
65 | ].each {|record|
66 | result = @db.geocode(record[:city] + ", " + record[:state])
67 | assert_equal result.length, 1
68 | record.keys.each {|key| assert_equal record[key], result[0][key]}
69 | }
70 |
71 | end
72 |
73 | # def test_international_place
74 | # return if @db_intl.nil?
75 | # [ {:city=>"Kabul", :state=>"AF", :precision=>:city},
76 | # {:city=>"Paris", :state=>"FR", :precision=>:city}
77 | # ].each {|record|
78 | # result = @db_intl.geocode({:city => record[:city] , :state => record[:state]})
79 | # puts result
80 | # assert_equal result.length, 1
81 | # record.keys.each {|key| assert_equal record[key], result[0][key]}
82 | # }
83 | # end
84 |
85 |
86 | def test_sample
87 | return if @db.nil?
88 | FasterCSV.foreach(Base + "/data/db-test.csv", {:headers=>true}) do |row|
89 | result = @db.geocode(row[0], true)
90 | result[0][:count] = result.map{|a|[a[:lat], a[:lon]]}.to_set.length
91 | fields = row.headers - ["comment", "address"]
92 | fields.each {|f|
93 | sample = row[f] || ""
94 | given = result[0][f.to_sym] || ""
95 | sample = sample.to_f if given.kind_of? Float or given.kind_of? Fixnum
96 | assert_equal sample, given, "row: #{row.inspect}\nfield: #{f.inspect} sample: #{sample.inspect}, given: #{given.inspect}"
97 |
98 | }
99 | end
100 | end
101 |
102 | def test_city_with_street_type_in_name
103 | result = @db.geocode("Mountain View, CA")
104 | assert_equal result.length, 1
105 | assert_equal result[0][:city], "Mountain View" # (and not "Mountain View Acres, CA")
106 | assert_equal result[0][:state], "CA"
107 | end
108 |
109 | def test_should_get_street_number_correctly
110 | result = @db.geocode("460 West St, Amherst MA 01002-2964", true)
111 | assert_equal '460', result[0][:number]
112 | end
113 |
114 | def test_should_geocode_with_hash
115 | result = @db.geocode({:street => "2200 Wilson Blvd", :city => "Arlington", :region => "VA", :postal_code => "22201"}, true)
116 | result2 = @db.geocode("2200 Wilson Blvd, Arlington, VA 22201")
117 | assert_equal result2,result
118 | end
119 |
120 | def test_should_work_with_partial_hash
121 | result = @db.geocode({:street => "2200 Wilson Blvd", :postal_code => "22201"})
122 | assert_equal result[0][:precision],:range
123 | end
124 |
125 | def test_weird_edge_case_explosion
126 | result = @db.geocode({:street => "1410 Spring Hill Rd", :postal_code => "20221"})
127 | result1 = @db.geocode(:street => "402 Valley View Ave", :postal_code => "12345")
128 | assert_equal result[0][:precision],:zip
129 | end
130 |
131 | def test_city_state_together
132 | result = @db.geocode({:city => "Richmond", :state => "IN"})
133 | assert_equal result[0][:precision],:city
134 | end
135 |
136 | def test_state_street_together
137 | result = @db.geocode({:region => "VA", :street => "14333 Lee Jackson Memorial Hwy"})
138 | #assert_equal result[0][:precision],:range
139 | end
140 |
141 | def test_intersection
142 | result = @db.geocode("Decatur St and Bryant St, San Francisco, CA 94103")
143 | assert_equal result[0][:precision], :intersection
144 | end
145 |
146 | end
147 |
--------------------------------------------------------------------------------
/test/generate.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/ruby
2 |
3 | require 'test/unit'
4 | require 'geocoder/us/database'
5 | require 'fastercsv'
6 |
7 | db = Geocoder::US::Database.new("/mnt/tiger2008/geocoder.db",
8 | "/home/sderle/geocoder/lib/libsqlite3_geocoder.so")
9 |
10 | if ARGV.length == 1
11 | result = db.geocode(ARGV[0], 0, 50)
12 | p result
13 | else
14 | FasterCSV.open(ARGV[1], "w", {:headers => true, :write_headers => true}) do |output|
15 | FasterCSV.foreach(ARGV[0], {:headers => true}) do |row|
16 | result = db.geocode(row[0])
17 | count = result.map{|a|[a[:lat], a[:lon]]}.to_set.length
18 | if !result.empty?
19 | row.headers[1..13].each_with_index {|f,i|
20 | if result[0][f.to_sym] != row[i+1]
21 | print "#{row[0]} !#{f} -> #{result[0][f]} != #{row[i+1]}\n"
22 | end
23 | }
24 | result[0][:count] = count
25 | result[0][:address] = row[0]
26 | result[0][:comment] = row[-1]
27 | columns = row.headers.map{|col|col.to_sym}
28 | output << result[0].values_at(*columns)
29 | else
30 | print "!!! #{row[0]}\n"
31 | end
32 | end
33 | end
34 | end
35 |
--------------------------------------------------------------------------------
/test/numbers.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift '../lib'
2 |
3 | require 'test/unit'
4 | require 'geocoder/us/numbers'
5 |
6 | include Geocoder::US
7 |
8 | class TestAddress < Test::Unit::TestCase
9 | def test_number_to_cardinal
10 | assert_equal 'one', Cardinals[1]
11 | assert_equal 'ten', Cardinals[10]
12 | assert_equal 'twelve', Cardinals[12]
13 | assert_equal 'eighty-seven', Cardinals[87]
14 | end
15 |
16 | def test_cardinal_to_number
17 | assert_equal 1, Cardinals['one']
18 | assert_equal 1, Cardinals['One']
19 | assert_equal 10, Cardinals['ten']
20 | assert_equal 12, Cardinals['twelve']
21 | assert_equal 87, Cardinals['eighty-seven']
22 | assert_equal 87, Cardinals['eighty seven']
23 | assert_equal 87, Cardinals['eightyseven']
24 | end
25 |
26 | def test_number_to_ordinal
27 | assert_equal 'first', Ordinals[1]
28 | assert_equal 'second', Ordinals[2]
29 | assert_equal 'tenth', Ordinals[10]
30 | assert_equal 'twelfth', Ordinals[12]
31 | assert_equal 'twentieth', Ordinals[20]
32 | assert_equal 'twenty-second', Ordinals[22]
33 | assert_equal 'eighty-seventh', Ordinals[87]
34 | end
35 |
36 | def test_ordinal_to_number
37 | assert_equal 1, Ordinals['first']
38 | assert_equal 1, Ordinals['First']
39 | assert_equal 10, Ordinals['tenth']
40 | assert_equal 12, Ordinals['twelfth']
41 | assert_equal 73, Ordinals['seventy-third']
42 | assert_equal 74, Ordinals['seventy fourth']
43 | assert_equal 75, Ordinals['seventyfifth']
44 | assert_equal nil, Ordinals['seventy-eleventh']
45 | end
46 | end
47 |
--------------------------------------------------------------------------------
/test/run.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/ruby
2 |
3 | $LOAD_PATH.unshift 'tests'
4 | $LOAD_PATH.unshift 'lib'
5 |
6 | require 'test/unit'
7 | require 'numbers'
8 | require 'constants'
9 | require 'address'
10 | require 'database'
11 |
12 |
--------------------------------------------------------------------------------