├── .gitignore ├── LICENSE.txt ├── MANIFEST.in ├── Pipfile ├── Pipfile.lock ├── README.md ├── bin ├── address2coordinate.py ├── coordinate2address.py ├── finalize_geocoder.py ├── geocoder_service.py ├── import_openaddress_data.py ├── postal_service.py └── prepare_osm.py ├── example_setup.sh ├── osmgeocoder ├── __init__.py ├── data │ ├── config-example.json │ ├── imposm_mapping.yml │ ├── sql │ │ ├── geocoder │ │ │ ├── 001-wordlist_for_text_prediction.sql │ │ │ ├── 002-text_prediction.sql │ │ │ ├── 003-forward_geocoding.sql │ │ │ └── 004-reverse_geocoding.sql │ │ ├── optimize │ │ │ ├── 001-osm_house_number_to_struct_house.sql │ │ │ ├── 002-osm_buildings_to_struct_house.sql │ │ │ ├── 003-struct_house_index.sql │ │ │ ├── 004-fill_street_only_entries.sql │ │ │ ├── 005-fill_postcode_only_entries.sql │ │ │ ├── 006-extract_cities.sql │ │ │ ├── 007-index_cities.sql │ │ │ ├── 008-update_struct_house_with_city.sql │ │ │ ├── 009-extract_streets.sql │ │ │ ├── 010-index_streets.sql │ │ │ ├── 011-update_struct_house_with_street.sql │ │ │ ├── 012-geometry_for_streets.sql │ │ │ ├── 013-geometry_for_cities.sql │ │ │ ├── 014-cleanup_struct_house_table.sql │ │ │ ├── 015-cluster_struct_house_table.sql │ │ │ ├── 016-add_indices_to_struct_tables.sql │ │ │ ├── 017-update_city_names_from_osm.sql │ │ │ └── 018-manual_city_name_updates.sql │ │ └── prepare │ │ │ └── 001-custom_types.sql │ └── worldwide.yml ├── format.py ├── forward.py ├── geocoder.py └── reverse.py ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | out.png 2 | dump.json 3 | __pycache__ 4 | *.pyc 5 | .idea/ 6 | .vscode/ 7 | *.egg-info 8 | dist/ 9 | build/ 10 | .mypy_cache/ 11 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018-2019, Johannes Schriewer 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include osmgeocoder/data/sql/geocoder/*.sql 2 | include osmgeocoder/data/sql/optimize/*.sql 3 | include osmgeocoder/data/sql/prepare/*.sql 4 | include osmgeocoder/data/config-example.json 5 | include osmgeocoder/data/imposm_mapping.yml 6 | include osmgeocoder/data/worldwide.yml 7 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | psycopg2-binary = "*" 10 | pyproj = "*" 11 | requests = "*" 12 | pystache = "*" 13 | python-geohash = "*" 14 | PyYAML = "*" 15 | Shapely = {extras = ["vectorized"],version = "*"} 16 | 17 | [requires] 18 | python_version = "3.7" 19 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "da224315959e9b472172243305ee1ff28fd3033db128363d0206d3b9dbaa37b4" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "certifi": { 20 | "hashes": [ 21 | "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d", 22 | "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412" 23 | ], 24 | "markers": "python_version >= '3.6'", 25 | "version": "==2022.6.15" 26 | }, 27 | "charset-normalizer": { 28 | "hashes": [ 29 | "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5", 30 | "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413" 31 | ], 32 | "markers": "python_version >= '3.6'", 33 | "version": "==2.1.0" 34 | }, 35 | "idna": { 36 | "hashes": [ 37 | "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", 38 | "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" 39 | ], 40 | "markers": "python_version >= '3.5'", 41 | "version": "==3.3" 42 | }, 43 | "numpy": { 44 | "hashes": [ 45 | "sha256:1408c3527a74a0209c781ac82bde2182b0f0bf54dea6e6a363fe0cc4488a7ce7", 46 | "sha256:173f28921b15d341afadf6c3898a34f20a0569e4ad5435297ba262ee8941e77b", 47 | "sha256:1865fdf51446839ca3fffaab172461f2b781163f6f395f1aed256b1ddc253622", 48 | "sha256:3119daed207e9410eaf57dcf9591fdc68045f60483d94956bee0bfdcba790953", 49 | "sha256:35590b9c33c0f1c9732b3231bb6a72d1e4f77872390c47d50a615686ae7ed3fd", 50 | "sha256:37e5ebebb0eb54c5b4a9b04e6f3018e16b8ef257d26c8945925ba8105008e645", 51 | "sha256:37ece2bd095e9781a7156852e43d18044fd0d742934833335599c583618181b9", 52 | "sha256:3ab67966c8d45d55a2bdf40701536af6443763907086c0a6d1232688e27e5447", 53 | "sha256:47f10ab202fe4d8495ff484b5561c65dd59177949ca07975663f4494f7269e3e", 54 | "sha256:55df0f7483b822855af67e38fb3a526e787adf189383b4934305565d71c4b148", 55 | "sha256:5d732d17b8a9061540a10fda5bfeabca5785700ab5469a5e9b93aca5e2d3a5fb", 56 | "sha256:68b69f52e6545af010b76516f5daaef6173e73353e3295c5cb9f96c35d755641", 57 | "sha256:7e8229f3687cdadba2c4faef39204feb51ef7c1a9b669247d49a24f3e2e1617c", 58 | "sha256:8002574a6b46ac3b5739a003b5233376aeac5163e5dcd43dd7ad062f3e186129", 59 | "sha256:876f60de09734fbcb4e27a97c9a286b51284df1326b1ac5f1bf0ad3678236b22", 60 | "sha256:9ce242162015b7e88092dccd0e854548c0926b75c7924a3495e02c6067aba1f5", 61 | "sha256:a35c4e64dfca659fe4d0f1421fc0f05b8ed1ca8c46fb73d9e5a7f175f85696bb", 62 | "sha256:aeba539285dcf0a1ba755945865ec61240ede5432df41d6e29fab305f4384db2", 63 | "sha256:b15c3f1ed08df4980e02cc79ee058b788a3d0bef2fb3c9ca90bb8cbd5b8a3a04", 64 | "sha256:c2f91f88230042a130ceb1b496932aa717dcbd665350beb821534c5c7e15881c", 65 | "sha256:d748ef349bfef2e1194b59da37ed5a29c19ea8d7e6342019921ba2ba4fd8b624", 66 | "sha256:e0d7447679ae9a7124385ccf0ea990bb85bb869cef217e2ea6c844b6a6855073" 67 | ], 68 | "version": "==1.23.1" 69 | }, 70 | "psycopg2-binary": { 71 | "hashes": [ 72 | "sha256:01310cf4cf26db9aea5158c217caa92d291f0500051a6469ac52166e1a16f5b7", 73 | "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76", 74 | "sha256:090f3348c0ab2cceb6dfbe6bf721ef61262ddf518cd6cc6ecc7d334996d64efa", 75 | "sha256:0a29729145aaaf1ad8bafe663131890e2111f13416b60e460dae0a96af5905c9", 76 | "sha256:0c9d5450c566c80c396b7402895c4369a410cab5a82707b11aee1e624da7d004", 77 | "sha256:10bb90fb4d523a2aa67773d4ff2b833ec00857f5912bafcfd5f5414e45280fb1", 78 | "sha256:12b11322ea00ad8db8c46f18b7dfc47ae215e4df55b46c67a94b4effbaec7094", 79 | "sha256:152f09f57417b831418304c7f30d727dc83a12761627bb826951692cc6491e57", 80 | "sha256:15803fa813ea05bef089fa78835118b5434204f3a17cb9f1e5dbfd0b9deea5af", 81 | "sha256:15c4e4cfa45f5a60599d9cec5f46cd7b1b29d86a6390ec23e8eebaae84e64554", 82 | "sha256:183a517a3a63503f70f808b58bfbf962f23d73b6dccddae5aa56152ef2bcb232", 83 | "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c", 84 | "sha256:1f6b813106a3abdf7b03640d36e24669234120c72e91d5cbaeb87c5f7c36c65b", 85 | "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834", 86 | "sha256:2d872e3c9d5d075a2e104540965a1cf898b52274a5923936e5bfddb58c59c7c2", 87 | "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71", 88 | "sha256:3303f8807f342641851578ee7ed1f3efc9802d00a6f83c101d21c608cb864460", 89 | "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e", 90 | "sha256:3a79d622f5206d695d7824cbf609a4f5b88ea6d6dab5f7c147fc6d333a8787e4", 91 | "sha256:404224e5fef3b193f892abdbf8961ce20e0b6642886cfe1fe1923f41aaa75c9d", 92 | "sha256:46f0e0a6b5fa5851bbd9ab1bc805eef362d3a230fbdfbc209f4a236d0a7a990d", 93 | "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9", 94 | "sha256:526ea0378246d9b080148f2d6681229f4b5964543c170dd10bf4faaab6e0d27f", 95 | "sha256:53293533fcbb94c202b7c800a12c873cfe24599656b341f56e71dd2b557be063", 96 | "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478", 97 | "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092", 98 | "sha256:63638d875be8c2784cfc952c9ac34e2b50e43f9f0a0660b65e2a87d656b3116c", 99 | "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce", 100 | "sha256:68641a34023d306be959101b345732360fc2ea4938982309b786f7be1b43a4a1", 101 | "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65", 102 | "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e", 103 | "sha256:7af0dd86ddb2f8af5da57a976d27cd2cd15510518d582b478fbb2292428710b4", 104 | "sha256:7b1e9b80afca7b7a386ef087db614faebbf8839b7f4db5eb107d0f1a53225029", 105 | "sha256:874a52ecab70af13e899f7847b3e074eeb16ebac5615665db33bce8a1009cf33", 106 | "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39", 107 | "sha256:8b344adbb9a862de0c635f4f0425b7958bf5a4b927c8594e6e8d261775796d53", 108 | "sha256:8fc53f9af09426a61db9ba357865c77f26076d48669f2e1bb24d85a22fb52307", 109 | "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42", 110 | "sha256:93cd1967a18aa0edd4b95b1dfd554cf15af657cb606280996d393dadc88c3c35", 111 | "sha256:99485cab9ba0fa9b84f1f9e1fef106f44a46ef6afdeec8885e0b88d0772b49e8", 112 | "sha256:9d29409b625a143649d03d0fd7b57e4b92e0ecad9726ba682244b73be91d2fdb", 113 | "sha256:a29b3ca4ec9defec6d42bf5feb36bb5817ba3c0230dd83b4edf4bf02684cd0ae", 114 | "sha256:a9e1f75f96ea388fbcef36c70640c4efbe4650658f3d6a2967b4cc70e907352e", 115 | "sha256:accfe7e982411da3178ec690baaceaad3c278652998b2c45828aaac66cd8285f", 116 | "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba", 117 | "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24", 118 | "sha256:b1c8068513f5b158cf7e29c43a77eb34b407db29aca749d3eb9293ee0d3103ca", 119 | "sha256:bda845b664bb6c91446ca9609fc69f7db6c334ec5e4adc87571c34e4f47b7ddb", 120 | "sha256:c381bda330ddf2fccbafab789d83ebc6c53db126e4383e73794c74eedce855ef", 121 | "sha256:c3ae8e75eb7160851e59adc77b3a19a976e50622e44fd4fd47b8b18208189d42", 122 | "sha256:d1c1b569ecafe3a69380a94e6ae09a4789bbb23666f3d3a08d06bbd2451f5ef1", 123 | "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667", 124 | "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272", 125 | "sha256:e3699852e22aa68c10de06524a3721ade969abf382da95884e6a10ff798f9281", 126 | "sha256:e847774f8ffd5b398a75bc1c18fbb56564cda3d629fe68fd81971fece2d3c67e", 127 | "sha256:ffb7a888a047696e7f8240d649b43fb3644f14f0ee229077e7f6b9f9081635bd" 128 | ], 129 | "index": "pypi", 130 | "version": "==2.9.3" 131 | }, 132 | "pyproj": { 133 | "hashes": [ 134 | "sha256:120d45ed73144c65e9677dc73ba8a531c495d179dd9f9f0471ac5acc02d7ac4b", 135 | "sha256:140fa649fedd04f680a39f8ad339799a55cb1c49f6a84e1b32b97e49646647aa", 136 | "sha256:1adc9ccd1bf04998493b6a2e87e60656c75ab790653b36cfe351e9ef214828ed", 137 | "sha256:1ef1bfbe2dcc558c7a98e2f1836abdcd630390f3160724a6f4f5c818b2be0ad5", 138 | "sha256:2fef9c1e339f25c57f6ae0558b5ab1bbdf7994529a30d8d7504fc6302ea51c03", 139 | "sha256:3cc4771403db54494e1e55bca8e6d33cde322f8cf0ed39f1557ff109c66d2cd1", 140 | "sha256:42eea10afc750fccd1c5c4ba56de29ab791ab4d83c1f7db72705566282ac5396", 141 | "sha256:45487942c19c5a8b09c91964ea3201f4e094518e34743cae373889a36e3d9260", 142 | "sha256:473961faef7a9fd723c5d432f65220ea6ab3854e606bf84b4d409a75a4261c78", 143 | "sha256:52efb681647dfac185cc655a709bc0caaf910031a0390f816f5fc8ce150cbedc", 144 | "sha256:531ea36519fa7b581466d4b6ab32f66ae4dadd9499d726352f71ee5e19c3d1c5", 145 | "sha256:56b0f9ee2c5b2520b18db30a393a7b86130cf527ddbb8c96e7f3c837474a9d79", 146 | "sha256:5ab0d6e38fda7c13726afacaf62e9f9dd858089d67910471758afd9cb24e0ecd", 147 | "sha256:5ca5f32b56210429b367ca4f9a57ffe67975c487af82e179a24370879a3daf68", 148 | "sha256:5dac03d4338a4c8bd0f69144c527474f517b4cbd7d2d8c532cd8937799723248", 149 | "sha256:5f92d8f6514516124abb714dce912b20867831162cfff9fae2678ef07b6fcf0f", 150 | "sha256:67025e37598a6bbed2c9c6c9e4c911f6dd39315d3e1148ead935a5c4d64309d5", 151 | "sha256:797ad5655d484feac14b0fbb4a4efeaac0cf780a223046e2465494c767fd1c3b", 152 | "sha256:aba199704c824fb84ab64927e7bc9ef71e603e483130ec0f7e09e97259b8f61f", 153 | "sha256:aed1a3c0cd4182425f91b48d5db39f459bc2fe0d88017ead6425a1bc85faee33", 154 | "sha256:b3d8e14d91cc95fb3dbc03a9d0588ac58326803eefa5bbb0978d109de3304fbe", 155 | "sha256:b59c08aea13ee428cf8a919212d55c036cc94784805ed77c8f31a4d1f541058c", 156 | "sha256:c99f7b5757a28040a2dd4a28c9805fdf13eef79a796f4a566ab5cb362d10630d" 157 | ], 158 | "index": "pypi", 159 | "version": "==3.3.1" 160 | }, 161 | "pystache": { 162 | "hashes": [ 163 | "sha256:93bf92b2149a4c4b58d12142e2c4c6dd5c08d89e4c95afccd4b6efe2ee1d470d" 164 | ], 165 | "index": "pypi", 166 | "version": "==0.6.0" 167 | }, 168 | "python-geohash": { 169 | "hashes": [ 170 | "sha256:05a21fcf4eda1a5eddbd291890ade23fc5ddaa6bb98f2ee23d2d384ed14f086d" 171 | ], 172 | "index": "pypi", 173 | "version": "==0.8.5" 174 | }, 175 | "pyyaml": { 176 | "hashes": [ 177 | "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", 178 | "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", 179 | "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", 180 | "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", 181 | "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", 182 | "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", 183 | "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", 184 | "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", 185 | "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", 186 | "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", 187 | "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", 188 | "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", 189 | "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", 190 | "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", 191 | "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", 192 | "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", 193 | "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", 194 | "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", 195 | "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", 196 | "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", 197 | "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", 198 | "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", 199 | "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", 200 | "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", 201 | "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", 202 | "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", 203 | "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", 204 | "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", 205 | "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", 206 | "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", 207 | "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", 208 | "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", 209 | "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" 210 | ], 211 | "index": "pypi", 212 | "version": "==6.0" 213 | }, 214 | "requests": { 215 | "hashes": [ 216 | "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", 217 | "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" 218 | ], 219 | "index": "pypi", 220 | "version": "==2.28.1" 221 | }, 222 | "shapely": { 223 | "extras": [ 224 | "vectorized" 225 | ], 226 | "hashes": [ 227 | "sha256:0c0fd457ce477b1dced507a72f1e2084c9191bfcb8a1e09886990ebd02acf024", 228 | "sha256:137f1369630408024a62ff79a437a5657e6c5b76b9cd352dde704b425acdb298", 229 | "sha256:15a856fbb588ad5d042784e00918c662902776452008c771ecba2ff615cd197a", 230 | "sha256:1d95842cc6bbbeab673061b63e70b07be9a375c15a60f4098f8fbd29f43af1b4", 231 | "sha256:256bdf8080bb7bb504d47b2c76919ecebab9708cc1b26266b3ec32b42448f642", 232 | "sha256:2e02da2e988e74d61f15c720f9f613fab51942aae2dfeacdcb78eadece00e1f3", 233 | "sha256:3423299254deec075e79fb7dc7909d702104e4167149de7f45510c3a6342eeea", 234 | "sha256:3a40bf497b57a6625b83996aed10ce2233bca0e5471b8af771b186d681433ac5", 235 | "sha256:44d2832c1b706bf43101fda92831a083467cc4b4923a7ed17319ab599c1025d8", 236 | "sha256:5254240eefc44139ab0d128faf671635d8bdd9c23955ee063d4d6b8f20073ae0", 237 | "sha256:56413f7d32c70b63f239eb0865b24c0c61029e38757de456cc4ab3c416559a0b", 238 | "sha256:572af9d5006fd5e3213e37ee548912b0341fb26724d6dc8a4e3950c10197ebb6", 239 | "sha256:62056e64b12b6d483d79f8e34bf058d2fe734d51c9227c1713705399434eff3b", 240 | "sha256:68c8e18dc9dc8a198c3addc8c9596f64137101f566f04b96ecfca0b214cb8b12", 241 | "sha256:6bdc7728f1e5df430d8c588661f79f1eed4a2728c8b689e12707cfec217f68f8", 242 | "sha256:6fcb28836ae93809de1dde73c03c9c24bab0ba2b2bf419ddb2aeb72c96d110e9", 243 | "sha256:75042e8039c79dd01f102bb288beace9dc2f49fc44a2dea875f9b697aa8cd30d", 244 | "sha256:78966332a89813b237de357a03f612fd451a871fe6e26c12b6b71645fe8eee39", 245 | "sha256:7c8eda45085ccdd7f9805ea4a93fdd5eb0b6039a61d5f0cefb960487e6dc17a1", 246 | "sha256:7c9e3400b716c51ba43eea1678c28272580114e009b6c78cdd00c44df3e325fa", 247 | "sha256:840be3f27a1152851c54b968f2e12d718c9f13b7acd51c482e58a70f60f29e31", 248 | "sha256:8e3ed52a081da58eb4a885c157c594876633dbd4eb283f13ba5bf39c82322d76", 249 | "sha256:8fe641f1f61b3d43dd61b5a85d2ef023e6e19bf8f204a5160a1cb1ec645cbc09", 250 | "sha256:a58e1f362f2091743e5e13212f5d5d16251a4bb63dd0ed587c652d3be9620d3a", 251 | "sha256:a60861b5ca2c488ebcdc706eca94d325c26d1567921c74acc83df5e6913590c7", 252 | "sha256:beee3949ddf381735049cfa6532fb234d5d20a5be910c4f2fb7c7295fd7960e3", 253 | "sha256:c0a0d7752b145343838bd36ed09382d85f5befe426832d7384c5b051c147acbd", 254 | "sha256:c60f3758212ec480675b820b13035dda8af8f7cc560d2cc67999b2717fb8faef", 255 | "sha256:ce0b5c5f7acbccf98b3460eecaa40e9b18272b2a734f74fcddf1d7696e047e95", 256 | "sha256:cec89a5617c0137f4678282e983c3d63bf838fb00cdf318cc555b4d8409f7130", 257 | "sha256:d3f3fac625690f01f35af665649e993f15f924e740b5c0ac0376900655815521", 258 | "sha256:d74de394684d66e25e780b0359fda85be7766af85940fa2dfad728b1a815c71f", 259 | "sha256:e07b0bd2a0e61a8afd4d1c1bd23f3550b711f01274ffb53de99358fd781eefd8", 260 | "sha256:f12695662c3ad1e6031b3de98f191963d0f09de6d1a4988acd907405644032ba" 261 | ], 262 | "index": "pypi", 263 | "version": "==1.8.2" 264 | }, 265 | "urllib3": { 266 | "hashes": [ 267 | "sha256:8298d6d56d39be0e3bc13c1c97d133f9b45d797169a0e11cdd0e0489d786f7ec", 268 | "sha256:879ba4d1e89654d9769ce13121e0f94310ea32e8d2f8cf587b77c08bbcdb30d6" 269 | ], 270 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", 271 | "version": "==1.26.10" 272 | } 273 | }, 274 | "develop": {} 275 | } 276 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OSMGeocoder 2 | 3 | Python implementation for a OSM / Openaddresses.io Geocoder. 4 | 5 | This geocoder is implemented in PostgreSQL DB functions as much as possible, there is a simple API and an example flask app included. 6 | 7 | You will need PostgreSQL 9.5+ (or 11.0+ for OpenAddresses.io support) with PostGIS installed as well as some disk space and data-files from OpenStreetMap and (optionally) OpenAddresses.io. 8 | 9 | Data import will be done via [Omniscale's imposm3](https://github.com/omniscale/imposm3) and a supplied python script to import the openaddresses.io data. 10 | 11 | Optionally you can use the [libpostal machine learning address classifier](https://github.com/openvenues/libpostal) to parse addresses supplied as input to the forward geocoder. 12 | 13 | For formatting the addresses from the reverse geocoder the `worldwide.yml` from [OpenCageData address-formatting repository](https://github.com/OpenCageData/address-formatting) is used to format the address according to customs in the country that is been encoded. 14 | 15 | See `README.md` in the [repository](https://github.com/dunkelstern/osmgeocoder) for more information. 16 | 17 | ## Changelog 18 | 19 | ### v1.0 20 | 21 | - Initial release, reverse geocoding works, forward geocoding is slow 22 | 23 | ### v2.0 24 | 25 | **Warning:** DB Format changed, you'll have to re-import data 26 | 27 | - Fixed forward geocoding speed 28 | - Fixed import scripts to be more resilient 29 | - Made Openaddresses.io completely optional 30 | - Restored compatability with older 3.x python versions 31 | - Restored compatability with older PostgreSQL DB versions (9.5+ if you do no use openaddresses.io) 32 | - Switched to `pipenv` 33 | 34 | ### v2.0.1 35 | 36 | - Fix missing import for structured forward geocoding 37 | - Fix Copy and Paste error in forward geocoding SQL 38 | 39 | If you're coming from `2.0.0`, re-run the finalize step to update the SQL functions: 40 | 41 | ```bash 42 | $ pipenv run bin/finalize_geocoder.py --db postgresql://geocoder:password@localhost/osmgeocoder 43 | ``` 44 | 45 | ### v2.1.0 46 | 47 | - Add type hints to all interfaces 48 | - Add `_dict` variants for geocoding functions to get _raw_ data instead of formatted strings 49 | - Bugfix: Reading of custom opencage data file for address formatting was broken 50 | - Returned addresses now contain county and state if available 51 | 52 | ## TODO 53 | 54 | - Return Attribution in API and in webservices 55 | 56 | ## "Quick" and dirty how-to 57 | 58 | **Statistics uutdated, will be updated shortly** 59 | 60 | Just for your information, this process takes a lot of time for a big import. Example figures on a machine with a Core i7-7700K on 4.2 GHz with a Samsung (SATA-)SSD and 32GB of RAM (and some tuned buffer sizes for Postgres): 61 | 62 | - Import of the Europe-Region of OpenStreetMap: 63 | - Import time: 3 hours 64 | - OSM Data file: 20 GB 65 | - Temporary space needed: 35 GB 66 | - Final size in DB: 58.7 GB 67 | - Summary of space requirement: 115 GB 68 | - Import of the two Openaddresses.io files for Europe: 69 | - Import time: 1 hour 70 | - Data files: 4 GB 71 | - Temporary space needed: 2 GB 72 | - Final size in DB: 18 GB 73 | - Summary of space requirement: 24 GB 74 | - Conversion of the OpenStreetMap data into geocoding format: 75 | - Conversion time: 5 hours 76 | - Final size in DB: 10.5GB 77 | 78 | So in summary you'll need 9 hours of time and 150 GB of disk space. 79 | After cleanup you'll need 28.5 GB of disk space for the Europe data set. A compressed DB export of the converted data sums up to 2.8 GB of RAW data and will explode on import to the said 28 GB. 80 | 81 | 1. Create a PostgreSQL Database (we use the name `osmgeocoder` for the DB name and `geocoder` for the DB user in the example) 82 | 2. Create the PostGIS, trigram and fuzzy string search extension for the DB: 83 | ```sql 84 | CREATE SCHEMA gis; -- isolate postgis into its own schema for easier development 85 | ALTER SCHEMA gis OWNER TO geocoder; 86 | CREATE EXTENSION postgis WITH SCHEMA gis; -- put postgis into gis schema 87 | 88 | CREATE SCHEMA str; -- isolate string functions into its own schema for easier development 89 | ALTER SCHEMA str OWNER TO geocoder; 90 | CREATE EXTENSION pg_trgm WITH SCHEMA str; -- trigram search, used for forward geocoding 91 | CREATE EXTENSION fuzzystrmatch WITH SCHEMA str; -- metaphone search, used for text prediction 92 | 93 | CREATE SCHEMA crypto; -- isolate crypto functions into its own schema for easier development 94 | ALTER SCHEMA crypto OWNER TO geocoder; 95 | CREATE EXTENSION pgcrypto WITH SCHEMA crypto; -- used to generate uuids 96 | 97 | ALTER DATABASE geocoder SET search_path TO public, gis, str, crypto; -- set search path to include the other schemas 98 | ``` 99 | 3. Fetch a copy of [imposm3](https://github.com/omniscale/imposm3) 100 | 4. Get a OpenStreetMap data file (for example from [Geofabrik](http://download.geofabrik.de/), start with a small region!) 101 | 5. Create a virtualenv and install packages: 102 | ```bash 103 | pipenv sync 104 | ``` 105 | 6. See below for importing openaddresses.io data if needed (this is completely optional) 106 | 7. Import some OpenStreetMap data into the DB (grab a coffee or two): 107 | ```bash 108 | $ bin/prepare_osm.py --db postgresql://geocoder:password@localhost/osmgeocoder --import-data osm.pbf --optimize 109 | ``` 110 | 8. Modify configuration file to match your setup. The example config is in `osmgeocoder/data/config-example.json`. 111 | 9. Optionally install and start the postal machine learning address categorizer (see below) 112 | 10. Import the geocoding functions into the DB: 113 | ```bash 114 | $ bin/finalize_geocoder.py --db postgresql://geocoder:password@localhost/osmgeocoder 115 | ``` 116 | 11. Geocode: 117 | ```bash 118 | bin/address2coordinate.py --config config.json --center 48.3849 10.8631 Lauterl 119 | bin/coordinate2address.py --config config.json 48.3849 10.8631 120 | ``` 121 | 122 | For a full example see the ``example_setup.sh`` shell script. 123 | 124 | **NOTE:** you can also install this via pip: 125 | - the scripts from the `bin` directory will be copied to your environment. 126 | - An example config file will be placed in your virtualenv in `osmgeocoder/data/config-example.json` 127 | - The PIP installation will not install `flask` and `gunicorn` nor will it try to install `postal`, 128 | if you want to use those services you need to install those optional dependencies yourself (read on!) 129 | 130 | 131 | ## Optional import of openaddresses.io data 132 | 133 | For some countries there are not enough buildings tagged in the OSM data so we can use the [OpenAddresses.io](http://results.openaddresses.io) data to augment the OSM data. 134 | 135 | The import is relatively slow as the data is contained in a big bunch of zipped CSV files, we try to use more threads to import the data faster but it could take a while... 136 | 137 | ### Importing openaddresses.io data 138 | 139 | ```bash 140 | wget https://s3.amazonaws.com/data.openaddresses.io/openaddr-collected-europe.zip # download openaddress.io data 141 | pipenv run bin/import_openaddress_data.py \ # run an import 142 | --db postgresql://geocoder:password@host/osmgeocoder \ 143 | --threads 4 \ 144 | --optimize \ 145 | openaddr-collected-europe.zip 146 | ``` 147 | 148 | When you have imported the data it will create some tables in your DB, `license` which contains the licenses of the imported data (the API will return the license attribution string with the data), `oa_city` which is a foreign key target from `oa_street` which in turn is a fk target to `oa_house` which contains the imported data. 149 | 150 | If you want to import more than one file, just do so, the tables will not be cleared between import runs, the indices will be dropped and rebuilt after the import though. Skip the `--optimize` flag for the imports and run an optimize only pass last to save some time. 151 | 152 | If you want to save even more time import with `--fast`, but be aware this leaves the DB without any indices or foreign key constraints, an optimize pass is required after importing with this flag! 153 | 154 | If you want to start over run the command with the `--clean-start` flag... Be careful, this destroys all openaddresses.io data in the tables. 155 | 156 | 157 | ## Optional support for libpostal 158 | 159 | ### Installation of libpostal 160 | 161 | Be aware that the make process will download some data-files (about 1GB in size). The installation of libpostal 162 | will need around 1 GB of disk space and about 2 GB of disk space while compiling. 163 | 164 | Currently there is no Ubuntu package for `libpostal`, so we have to install it by hand: 165 | 166 | ```bash 167 | git clone https://github.com/openvenues/libpostal 168 | cd libpostal 169 | ./bootstrap.sh 170 | ./configure --prefix=/opt/libpostal --datadir=/opt/libpostal/share 171 | make -j4 172 | sudo make install 173 | echo "/opt/libpostal/lib" | sudo tee /etc/ld.so.conf.d/libpostal.conf 174 | sudo ldconfig 175 | echo 'export PKG_CONFIG_PATH="$PKG_CONFIG_PATH:/opt/libpostal/lib/pkgconfig"' | sudo tee /etc/profile.d/libpostal.sh 176 | ``` 177 | 178 | Now log out and on again or run a new login shell (e.g. `bash -l`) and install the missing python modules: 179 | 180 | ```bash 181 | workon osmgeocoder 182 | CFLAGS="-L/opt/libpostal/lib -I/opt/libpostal/include" pip install postal 183 | pip install gunicorn 184 | pip install flask 185 | ``` 186 | 187 | ### Run the classifier service 188 | 189 | **Source checkout:** 190 | 191 | ```bash 192 | pipenv run bin/postal_service.py --config config/config.json 193 | ``` 194 | 195 | **PIP install:** 196 | 197 | ```bash 198 | /path/to/virtualenv/bin/postal_service.py --config config.json 199 | ``` 200 | 201 | Attention: Depending on the speed of your disk, startup of this service may take some seconds 202 | (this is why this is implemented as a service) and it will take about 2 GB of RAM, so be warned! 203 | 204 | 205 | If you want to run it in production mode just run it with `gunicorn` directly. 206 | See the [Gunicorn documentation](http://docs.gunicorn.org/en/latest/settings.html) for further information. 207 | Simple Example is following (one worker, run as daemon, bind to 127.0.0.1:3200): 208 | 209 | ```bash 210 | pipenv run gunicorn postal_service:app \ 211 | --bind 127.0.0.1:3200 \ 212 | --workers 1 \ 213 | --pid /var/run/postal_service.pid \ 214 | --log-file /var/log/postal_service.log \ 215 | --daemon 216 | ``` 217 | 218 | **Attention**: Every worker takes that 2GB RAM toll! 219 | 220 | ## Running a HTTP geocoding service 221 | 222 | The file `geocoder_service.py` is a simple Flask app to present the geocoder as a HTTP service. 223 | 224 | ### Installation 225 | 226 | ```bash 227 | pipenv run pip install gunicorn 228 | pipenv run pip install flask 229 | ``` 230 | 231 | You will need a working config file too. 232 | 233 | ### Run the service 234 | 235 | The service will search for a config file in the following places: 236 | - `~/.osmgeocoderrc` 237 | - `~/.config/osmgeocoder.json` 238 | - `/etc/osmgeocoder.json` 239 | - `osmgeocoder.json` 240 | 241 | You can override the path by setting the environment variable `GEOCODER_CONFIG`. 242 | 243 | Gunicorn example: 244 | 245 | ```bash 246 | pipenv run gunicorn geocoder_service:app \ 247 | --env 'GEOCODER_CONFIG=config/config.json' 248 | --bind 127.0.0.1:8080 \ 249 | --workers 4 \ 250 | --pid /var/run/osmgeocoder_service.pid \ 251 | --log-file /var/log/osmgeocoder_service.log \ 252 | --daemon 253 | ``` 254 | 255 | ### Defined API-Endpoints 256 | 257 | #### Forward geocoding 258 | 259 | Address string to coordinate. 260 | 261 | - Endpoint `/forward` 262 | - Method `POST` 263 | - Content-Type `application/json` 264 | - Body: 265 | - `address`: (required) User input / address to convert to coordinates 266 | - `center`: (optional) Array with center coordinate to sort matches 267 | - `country`: (optional) ISO Country code, use only if no center coordinate is available as it slows down the geocoder massively. 268 | - Response: Array of objects 269 | - `address`: Fully written address line, formatted by country standards 270 | - `lat`: Latitude 271 | - `lon`: Longitude 272 | - `license`: License attribution string 273 | 274 | #### Reverse geocoding 275 | 276 | Coordinate to address string. 277 | 278 | - Endpoint `/reverse` 279 | - Method `POST` 280 | - Content-Type `application/json` 281 | - Body: 282 | - `lat`: Latitude 283 | - `lon`: Longitude 284 | - Response: Object 285 | - `address`: Nearest address to the point (building search) or `null`, formatted by country standards 286 | - `license`: License attribution string 287 | 288 | #### Predictive text 289 | 290 | Intelligent text completion while typing. 291 | 292 | - Endpoint `/predict` 293 | - Method `POST` 294 | - Content-Type `application/json` 295 | - Body: 296 | - `query`: User input 297 | - Response: Object 298 | - `predictions`: Up to 10 text predictions, sorted by equality and most common first 299 | 300 | 301 | ## Config file 302 | 303 | Example: 304 | 305 | ```json 306 | { 307 | "db": { 308 | "dbname": "osm", 309 | "user": "osm", 310 | "password": "password" 311 | }, 312 | "opencage_data_file": "data/worldwide.yml", 313 | "postal": { 314 | "service_url": "http://localhost:3200/", 315 | "port": 3200 316 | } 317 | } 318 | ``` 319 | 320 | Keys: 321 | 322 | - `db`: Database configuration this will be built into a [Postgres connection string](https://www.postgresql.org/docs/current/static/libpq-connect.html#id-1.7.3.8.3.5) 323 | - `postal` -> `service_url`: (optional) URL where to find the libpostal service, if not supplied searching is reduced to street names only 324 | - `postal` -> `port`: (optional) only used when running the libpostal service directly without explicitly using gunicorn 325 | - `opencage_data_file`: (optional) Data file for the address formatter, defaults to the one included in the package 326 | 327 | ## API documentation 328 | 329 | The complete project contains actually only two classes: 330 | 331 | ### `Geocoder`. 332 | 333 | Publicly accessible method prototypes are: 334 | 335 | ```python 336 | def __init__(self, db=None, db_handle=None, address_formatter_config=None, postal=None): 337 | pass 338 | 339 | def forward(self, address, country=None, center=None): 340 | pass 341 | 342 | def forward_dict(self, address, country=None, center=None): 343 | pass 344 | 345 | def forward_structured(self, road=None, house_number=None, postcode=None, city=None, country=None, center=None): 346 | pass 347 | 348 | def forward_structured_dict(self, road=None, house_number=None, postcode=None, city=None, country=None, center=None): 349 | pass 350 | 351 | def reverse(self, lat, lon, radius=100, limit=10): 352 | pass 353 | 354 | def reverse_dict(self, lat, lon, radius=100, limit=10): 355 | pass 356 | 357 | def reverse_epsg3857(self, x, y, radius=100, limit=10): 358 | pass 359 | 360 | def reverse_epsg3857_dict(self, x, y, radius=100, limit=10): 361 | pass 362 | 363 | def predict_text(self, input): 364 | pass 365 | ``` 366 | 367 | #### `__init__` 368 | 369 | Initialize a geocoder, this will read all files to be used and set up the DB connection. 370 | - `db`: Dictionary with DB config, when used the geocoder will create a DB-connection on its own 371 | - `db_handle`: Postgres connection, use this if the connection is handled outside the scope of the geocoder (for example when you want to use the geocoder in Django) 372 | - `address_formatter_config`: Path to the `worldwide.yaml` (optional) 373 | - `postal`: Dictionary with postal config (at least `service_url` key) 374 | 375 | see __Config File__ above for more info. 376 | 377 | #### `forward` and `forward_dict` 378 | 379 | Geocode an address to a lat, lon location. 380 | - `address`: Address to code 381 | - `country`: (optional) Country code to restrict search and format address 382 | - `center`: (optional) Center coordinate to sort results for (will be used to determine country too, so you can skip the `country` flag) 383 | 384 | This function is a generator which `yield`s the obtained results. 385 | 386 | #### `forward_structured` and `forward_structured_dict` 387 | 388 | Geocode an address to a lat, lon location without using the address classifier, use this if your input is already structured. 389 | - `road`: (optional) Street/Road name 390 | - `house_number`: (optional) House number, this is a string because of things like `1a` 391 | - `postcode`: (optional) Post code, this is a string because not all countries use numbers only and zero prefixes, 392 | - `city`: (optional) City 393 | - `country`: (optional) Country code to restrict search and format address 394 | - `center`: (optional) Center coordinate to sort results for (will be used to determine country too, so you can skip the `country` flag) 395 | 396 | Be sure that at least one of `road`, `postcode` or `city` is filled, results are not predictable if none is set. 397 | This function is a generator which `yield`s the obtained results. 398 | 399 | #### `reverse` and `reverse_dict` 400 | 401 | Geocode a lat, lon location into a readable address: 402 | - `lat`: Latitude to code 403 | - `lon`: Longitute to code 404 | - `radius`: Search radius in meters 405 | - `limit`: (optional) maximum number of results to return 406 | 407 | This function is a generator which `yield`s the obtained results. 408 | 409 | #### `reverse_epsg3857` and `reverse_epsg3857_dict` 410 | 411 | Geocode a x, y location in EPGS 3857 projection (aka Web Mercator) into a readable address: 412 | - `x`: X coordinate 413 | - `y`: Y coordinate 414 | - `radius`: Search radius in meters 415 | - `limit`: (optional) maximum number of results to return 416 | 417 | Use this function if you're using Web Mercator in your application internally to avoid constant re-projection between lat, lon and x, y. 418 | This function is a generator which `yield`s the obtained results. 419 | 420 | #### `predict_text` 421 | 422 | Return possible text prediction results for the user input. This could be used while the user is typing their query to reduce the load on the database (by avoiding typos and running fewer requests against the geocoder because the user skips over typing long words one character by each). 423 | - `input`: User input 424 | 425 | This function is a generator which `yield`s the obtained results. 426 | 427 | **ATTENTION**: Do not feed complete "sentences" into this function as it will not yield the expected result, tokenize into words on client side and only request predictions for the current word the user is editing. 428 | 429 | 430 | ### `AddressFormatter` 431 | 432 | Publicly accessible method prototypes are: 433 | 434 | ```python 435 | def __init__(self, config=None): 436 | pass 437 | 438 | def format(self, address, country=None): 439 | pass 440 | ``` 441 | 442 | #### `__init__` 443 | 444 | Initialize the address formatter 445 | - `config`: (optional) override default config file to use for the address formatter, defaults to config file included in this package 446 | 447 | #### `format` 448 | 449 | Format an address in the default layout used in the specified country. Return value may contain line breaks. 450 | - `address`: Dictionary that contains the address parts, see below for recognized keys 451 | - `country`: Country code of the formatting template to use 452 | 453 | Recognized keys in `address`: 454 | - `attention` 455 | - `house` 456 | - `road` 457 | - `house_number` 458 | - `postcode` 459 | - `city` 460 | - `town` 461 | - `village` 462 | - `county` 463 | - `state` 464 | - `country` 465 | - `suburb` 466 | - `city_district` 467 | - `state_district` 468 | - `state_code` 469 | - `neighbourhood` 470 | -------------------------------------------------------------------------------- /bin/address2coordinate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import json 5 | import sys 6 | import os 7 | 8 | try: 9 | from osmgeocoder import Geocoder 10 | except (ImportError, ModuleNotFoundError): 11 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 12 | from osmgeocoder import Geocoder 13 | 14 | 15 | parser = argparse.ArgumentParser(description='OSM Address search') 16 | parser.add_argument( 17 | '--config', 18 | type=str, 19 | nargs=1, 20 | dest='config', 21 | required=True, 22 | help='Config file to use' 23 | ) 24 | parser.add_argument( 25 | '--country', 26 | type=str, 27 | nargs=1, 28 | dest='country', 29 | help='Only search in this country' 30 | ) 31 | parser.add_argument( 32 | '--center', 33 | type=float, 34 | nargs=2, 35 | dest='center', 36 | help='Center coordinate to filter the results' 37 | ) 38 | parser.add_argument( 39 | 'address', 40 | type=str, 41 | help='Address to search' 42 | ) 43 | 44 | args = parser.parse_args() 45 | 46 | config = {} 47 | with open(args.config[0], "r") as fp: 48 | config = json.load(fp) 49 | 50 | geocoder = Geocoder(**config) 51 | 52 | kwargs = {} 53 | if args.center is not None: 54 | kwargs['center'] = (args.center[0], args.center[1]) 55 | if args.country is not None: 56 | kwargs['country'] = args.country[0] 57 | 58 | results = geocoder.forward(args.address, **kwargs) 59 | 60 | print('Resolved "{}" to'.format(args.address)) 61 | for addr, lat, lon in results: 62 | addr = ', '.join(addr.split("\n")).strip() 63 | print(" - {} -> {}, {}".format(addr, lat, lon)) 64 | -------------------------------------------------------------------------------- /bin/coordinate2address.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import json 5 | import sys 6 | import os 7 | 8 | try: 9 | from osmgeocoder import Geocoder 10 | except (ImportError, ModuleNotFoundError): 11 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 12 | from osmgeocoder import Geocoder 13 | 14 | 15 | parser = argparse.ArgumentParser(description='OSM Coordinate search') 16 | parser.add_argument( 17 | '--config', 18 | type=str, 19 | nargs=1, 20 | dest='config', 21 | required=True, 22 | help='Config file to use' 23 | ) 24 | parser.add_argument( 25 | 'lat', 26 | type=float, 27 | help='Latitude to search' 28 | ) 29 | parser.add_argument( 30 | 'lon', 31 | type=float, 32 | help='Longitude to search' 33 | ) 34 | 35 | args = parser.parse_args() 36 | 37 | config = {} 38 | with open(args.config[0], "r") as fp: 39 | config = json.load(fp) 40 | 41 | geocoder = Geocoder(**config) 42 | address = next(geocoder.reverse(args.lat, args.lon)) 43 | address_line = ', '.join(address.split("\n")).strip() 44 | print('Resolved {}, {} to "{}"'.format( 45 | args.lat, args.lon, address_line 46 | )) 47 | -------------------------------------------------------------------------------- /bin/finalize_geocoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import io 5 | import os 6 | import math 7 | import random 8 | import argparse 9 | import subprocess 10 | import tempfile 11 | 12 | from time import time, sleep 13 | try: 14 | from urllib.parse import urlparse 15 | except (ImportError, ModuleNotFoundError): 16 | from urlparse import urlparse 17 | from pkg_resources import resource_exists, resource_listdir, resource_string 18 | 19 | import psycopg2 20 | from psycopg2.extras import DictCursor 21 | 22 | # 23 | # DB-Utility functions 24 | # 25 | 26 | def open_db(url, cursor_name=None): 27 | conn = psycopg2.connect(url, cursor_factory=DictCursor) 28 | if cursor_name is None: 29 | cursor = conn.cursor() 30 | else: 31 | cursor = conn.cursor(name=cursor_name) 32 | return cursor 33 | 34 | def load_sql(db, path): 35 | try: 36 | # assume we are in a virtualenv first 37 | if resource_exists('osmgeocoder', path): 38 | sql_files = list(resource_listdir('osmgeocoder', path)) 39 | sql_files.sort() 40 | for f in sql_files: 41 | print('Executing {}...'.format(f)) 42 | db.execute(resource_string('osmgeocoder', os.path.join(path, f))) 43 | except (ImportError, ModuleNotFoundError): 44 | # if not found, assume we have been started from a source checkout 45 | my_dir = os.path.dirname(os.path.abspath(__file__)) 46 | sql_path = os.path.abspath(os.path.join(my_dir, '../osmgeocoder/', path)) 47 | sql_files = [os.path.join(sql_path, f) for f in os.listdir(sql_path) if os.path.isfile(os.path.join(sql_path, f))] 48 | sql_files.sort() 49 | 50 | for f in sql_files: 51 | print('Executing {}...'.format(f)) 52 | with open(f, 'r') as fp: 53 | db.execute(fp.read()) 54 | 55 | 56 | def finish_db(db): 57 | load_sql(db, 'data/sql/geocoder') 58 | 59 | def close_db(db): 60 | conn = db.connection 61 | conn.commit() 62 | 63 | if db.name is None: 64 | db.close() 65 | conn.close() 66 | 67 | def dump(db_url, filename, threads): 68 | print('Dumping database into directory {}...'.format(filename)) 69 | parsed = urlparse(db_url) 70 | args = [ 71 | 'pg_dump', 72 | '-v', # verbose 73 | '-F', 'd', # directory type 74 | '-j', str(threads), # number of concurrent jobs 75 | '-Z', '9', # maximum compression 76 | '-O', # no owners 77 | '-x', # no privileges 78 | '-f', filename, # destination dir 79 | '-h', parsed.hostname, 80 | ] 81 | 82 | if parsed.port is not None: 83 | args.append('-p') 84 | args.append(str(parsed.port)) 85 | if parsed.username is not None: 86 | args.append('-U') 87 | args.append(parsed.username) 88 | args.append(parsed.path[1:]) 89 | print(" ".join(args)) 90 | subprocess.run(args) 91 | 92 | # 93 | # Cmdline interface 94 | # 95 | 96 | def parse_cmdline(): 97 | parser = argparse.ArgumentParser(description='Geocoder finishing script') 98 | parser.add_argument( 99 | '--db', 100 | type=str, 101 | dest='db_url', 102 | required=True, 103 | help='Postgis DB URL' 104 | ) 105 | parser.add_argument( 106 | '--dump', 107 | type=str, 108 | dest='dump_file', 109 | help='Dump the converted data into a pg_dump file to be imported on another server' 110 | ) 111 | 112 | return parser.parse_args() 113 | 114 | 115 | if __name__ == '__main__': 116 | args = parse_cmdline() 117 | db = open_db(args.db_url) 118 | finish_db(db) 119 | close_db(db) 120 | if args.dump_file: 121 | dump(args.db_url, args.dump_file, 4) 122 | print("Attention: When restoring the DB dump you will have to fix the search path") 123 | print("of the DB after importing!") 124 | print() 125 | print("ALTER DATABASE geocoder SET search_path TO public, gis, str, crypto;") 126 | 127 | -------------------------------------------------------------------------------- /bin/geocoder_service.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | try: 4 | from flask import Flask, jsonify, abort, request, Response 5 | from flask.json import dumps 6 | except (ImportError, ModuleNotFoundError): 7 | print("Error: Please install Flask, `pip install flask`") 8 | exit(1) 9 | 10 | import json 11 | import sys 12 | import os 13 | 14 | try: 15 | from osmgeocoder import Geocoder 16 | except (ImportError, ModuleNotFoundError): 17 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 18 | from osmgeocoder import Geocoder 19 | 20 | 21 | app = Flask(__name__) 22 | geocoder = None 23 | 24 | @app.before_first_request 25 | def init(): 26 | global geocoder 27 | 28 | # find config file 29 | config_file = os.environ.get('GEOCODER_CONFIG', None) 30 | if config_file is None: 31 | for location in ['~/.osmgeocoderrc', '~/.config/osmgeocoder.json', '/etc/osmgeocoder.json', 'osmgeocoder.json']: 32 | loc = os.path.expanduser(location) 33 | if os.path.exists(loc) and os.path.isfile(loc): 34 | config_file = loc 35 | break 36 | if config_file is None: 37 | raise RuntimeError("No config file found!") 38 | 39 | config = {} 40 | with open(config_file, "r") as fp: 41 | config = json.load(fp) 42 | 43 | geocoder = Geocoder(**config) 44 | 45 | 46 | @app.route('/forward', methods=['POST']) 47 | def forward(): 48 | if not request.is_json: 49 | abort(400) 50 | data = request.get_json() 51 | address = data.get('address', None) 52 | if address is None: 53 | abort(400) 54 | center = data.get('center', None) 55 | country = data.get('country', None) 56 | 57 | result = [] 58 | results = geocoder.forward(address, center=center, country=country) 59 | for addr, lat, lon in results: 60 | result.append({ 61 | "address": ', '.join(addr.split("\n")).strip(), 62 | "lat": lat, 63 | "lon": lon 64 | }) 65 | 66 | return jsonify(result) 67 | 68 | @app.route('/reverse', methods=['POST']) 69 | def reverse(): 70 | if not request.is_json: 71 | abort(400) 72 | data = request.get_json() 73 | lat = data.get('lat', None) 74 | lon = data.get('lon', None) 75 | if lat is None or lon is None: 76 | abort(400) 77 | 78 | try: 79 | address = next(geocoder.reverse(lat, lon)) 80 | except StopIteration: 81 | return Response(dumps({"error": { "code": 404, "message": "Not found" } }), status=404) 82 | 83 | return jsonify({ 84 | "address": ', '.join(address.split("\n")).strip() 85 | }) 86 | 87 | @app.route('/predict', methods=['POST']) 88 | def predict(): 89 | if not request.is_json: 90 | abort(400) 91 | data = request.get_json() 92 | query = data.get('query', None) 93 | if query is None: 94 | abort(400) 95 | 96 | predictions = list(geocoder.predict_text(query)) 97 | return jsonify({ 98 | "predictions": predictions 99 | }) 100 | 101 | # when running this script directly execute gunicorn to serve 102 | if __name__ == "__main__": 103 | os.execlp( 104 | "gunicorn", 105 | "gunicorn", 106 | "geocoder_service:app", 107 | "--bind", 108 | "127.0.0.1:8080" 109 | ) 110 | -------------------------------------------------------------------------------- /bin/import_openaddress_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import zipfile 4 | import csv 5 | import argparse 6 | import psycopg2 7 | import hashlib 8 | import random 9 | import uuid 10 | from time import time, sleep 11 | from psycopg2.extras import execute_batch 12 | import io 13 | import os 14 | from pprint import pprint 15 | from multiprocessing import Pool, Manager 16 | from itertools import zip_longest 17 | from sys import intern 18 | 19 | from tempfile import TemporaryFile 20 | 21 | import struct 22 | from binascii import hexlify 23 | from pyproj import Proj, transform 24 | import geohash 25 | 26 | PARTITION_SIZE = 360 27 | 28 | def grouper(n, iterable, fillvalue=None): 29 | "Collect data into fixed-length chunks or blocks" 30 | # grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx 31 | args = [iter(iterable)] * n 32 | return zip_longest(fillvalue=fillvalue, *args) 33 | 34 | class CountingTextIOWrapper(io.TextIOWrapper): 35 | """Wrapper for the TextIOWrapper to be able to count already consumed bytes""" 36 | 37 | def __init__(self, stream, encoding=None): 38 | super().__init__(stream, encoding=encoding) 39 | self.position = 0 40 | 41 | def read(self, *args, **kwargs): 42 | result = super().read(*args, **kwargs) 43 | self.position += len(result) 44 | return result 45 | 46 | def readline(self, *args, **kwargs): 47 | result = super().readline(*args, **kwargs) 48 | self.position += len(result) 49 | return result 50 | 51 | # 52 | # DB-Utility functions 53 | # 54 | 55 | def open_db(url, transaction=True): 56 | conn = psycopg2.connect(url) 57 | if not transaction: 58 | conn.autocommit = True 59 | cursor = conn.cursor() 60 | return cursor 61 | 62 | def clear_db(db): 63 | print('Cleaning up') 64 | db.execute(''' 65 | DROP VIEW IF EXISTS address_data; 66 | DROP TABLE IF EXISTS house; 67 | DROP TABLE IF EXISTS street; 68 | DROP TABLE IF EXISTS city; 69 | DROP TABLE IF EXISTS license; 70 | ''') 71 | 72 | def prepare_db(db): 73 | print('Creating tables...') 74 | db.execute(''' 75 | DO 76 | $$ 77 | BEGIN 78 | IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'coordinate_source') THEN 79 | CREATE TYPE coordinate_source AS ENUM ('openaddresses.io', 'openstreetmap'); 80 | END IF; 81 | END 82 | $$; 83 | 84 | CREATE TABLE IF NOT EXISTS public.oa_license ( 85 | id uuid PRIMARY KEY default gen_random_uuid(), 86 | website TEXT, 87 | license TEXT, 88 | attribution TEXT, 89 | "source" TEXT 90 | ); 91 | 92 | CREATE TABLE IF NOT EXISTS public.oa_city ( 93 | id uuid PRIMARY KEY, 94 | city TEXT, 95 | district TEXT, 96 | region TEXT, 97 | postcode TEXT, 98 | license_id uuid 99 | ); 100 | 101 | CREATE TABLE IF NOT EXISTS public.oa_street ( 102 | id uuid PRIMARY KEY, 103 | street TEXT, 104 | unit TEXT, 105 | city_id uuid 106 | ); 107 | 108 | CREATE TABLE IF NOT EXISTS public.oa_house ( 109 | id uuid, 110 | location gis.geometry(POINT, 3857), 111 | "name" TEXT, 112 | housenumber TEXT, 113 | geohash TEXT, 114 | street_id uuid, 115 | "source" coordinate_source 116 | ) PARTITION BY RANGE (ST_X(location)); 117 | 118 | -- 119 | -- Re-assembly of openaddresses.io data into one view 120 | -- 121 | CREATE OR REPLACE VIEW public.oa_address_data AS ( 122 | SELECT 123 | h.id, 124 | h."name", 125 | s.street, 126 | h.housenumber, 127 | c.postcode, 128 | c.city, 129 | location, 130 | h."source" 131 | FROM public.oa_house h 132 | JOIN public.oa_street s ON h.street_id = s.id 133 | JOIN public.oa_city c ON s.city_id = c.id 134 | ); 135 | ''') 136 | 137 | print('Creating shard tables...') 138 | min_val = -20026376.39 139 | max_val = 20026376.39 140 | val_inc = (max_val - min_val) / PARTITION_SIZE 141 | for i in range(0, PARTITION_SIZE): 142 | print(' {}" {} TO {}'.format( 143 | i, 144 | min_val + val_inc * i, 145 | min_val + val_inc * (i + 1) 146 | )) 147 | db.execute(''' 148 | CREATE TABLE IF NOT EXISTS public.oa_house_{} 149 | PARTITION OF public.oa_house FOR VALUES FROM ({}) TO ({}); 150 | '''.format( 151 | i, 152 | min_val + val_inc * i, 153 | min_val + val_inc * (i + 1) 154 | )) 155 | 156 | print('Dropping indices and constraints for speed improvement...') 157 | db.execute(''' 158 | ALTER TABLE public.oa_house DROP CONSTRAINT IF EXISTS house_street_id_fk; 159 | ALTER TABLE public.oa_city DROP CONSTRAINT IF EXISTS city_license_id_fk; 160 | ALTER TABLE public.oa_street DROP CONSTRAINT IF EXISTS street_city_id_fk; 161 | DROP INDEX IF EXISTS street_trgm_idx; 162 | DROP INDEX IF EXISTS city_trgm_idx; 163 | DROP INDEX IF EXISTS street_city_id_idx; 164 | 165 | DROP INDEX IF EXISTS house_street_id_idx; 166 | DROP INDEX IF EXISTS house_location_geohash_idx; 167 | DROP INDEX IF EXISTS house_trgm_idx; 168 | DROP INDEX IF EXISTS house_location_idx; 169 | DROP INDEX IF EXISTS house_id_idx; 170 | DROP INDEX IF EXISTS house_housenumber_idx; 171 | ''') 172 | 173 | def finalize_db(db, optimize=False): 174 | print('Finalizing import and cleaning up...') 175 | sql = [] 176 | 177 | if optimize is False: 178 | sql.extend([ 179 | ('house: Street ID index', 'CREATE INDEX IF NOT EXISTS house_street_id_idx ON public.oa_house USING BTREE(street_id);'), 180 | ('street: City ID index', 'CREATE INDEX IF NOT EXISTS street_city_id_idx ON public.oa_street USING BTREE(city_id);'), 181 | ('house: Update planner statistics', 'ANALYZE public.oa_house;'), 182 | ('city: Update planner statistics', 'ANALYZE public.oa_city;'), 183 | ('street: Update planner statistics', 'ANALYZE public.oa_street;') 184 | ]) 185 | 186 | sql.extend([ 187 | ('house: FK constraint', 'ALTER TABLE public.oa_house ADD CONSTRAINT house_street_id_fk FOREIGN KEY (street_id) REFERENCES public.oa_street (id) ON DELETE CASCADE ON UPDATE CASCADE INITIALLY DEFERRED;'), 188 | ('city: FK constraint', 'ALTER TABLE public.oa_city ADD CONSTRAINT city_license_id_fk FOREIGN KEY (license_id) REFERENCES public.oa_license (id) ON DELETE CASCADE ON UPDATE CASCADE INITIALLY DEFERRED;'), 189 | ('street: FK constraint', 'ALTER TABLE public.oa_street ADD CONSTRAINT street_city_id_fk FOREIGN KEY (city_id) REFERENCES public.oa_city (id) ON DELETE CASCADE ON UPDATE CASCADE INITIALLY DEFERRED;') 190 | ]) 191 | 192 | for log, item in sql: 193 | print(' - {}'.format(log)) 194 | db.execute(item) 195 | 196 | 197 | def cluster_worker(id, db_url): 198 | # open all connections and inputs 199 | db = open_db(db_url) 200 | 201 | print('{i}, '.format(i=id), end='', flush=True) 202 | db.execute('CLUSTER public.oa_house_{i} USING oa_house_{i}_geohash_idx;'.format(i=id)) 203 | 204 | close_db(db) 205 | 206 | 207 | def optimize_db(db, threads, url): 208 | print('Adding indices on tables...') 209 | sql = [ 210 | ('house: Geohash', 'CREATE INDEX IF NOT EXISTS house_location_geohash_idx ON public.oa_house USING BTREE(geohash);'), 211 | #('Cluster house on geohash', 'CLUSTER house USING house_location_geohash_idx;'), 212 | ('house: House number trigram index', 'CREATE INDEX IF NOT EXISTS house_trgm_idx ON public.oa_house USING GIN (housenumber gin_trgm_ops);'), 213 | ('house: Spatial index on location', 'CREATE INDEX IF NOT EXISTS house_location_idx ON public.oa_house USING GIST(location);'), 214 | ('house: Btree index house number', 'CREATE INDEX IF NOT EXISTS house_housenumber_idx ON public.oa_house USING BTREE(housenumber);'), 215 | ('house: Btree index id', 'CREATE INDEX IF NOT EXISTS house_id_idx ON public.oa_house USING BTREE(id);'), 216 | ('house: Update planner statistics', 'ANALYZE public.oa_house;'), 217 | 218 | ('city: Trigram index name', 'CREATE INDEX IF NOT EXISTS city_trgm_idx ON public.oa_city USING GIN (city gin_trgm_ops);'), 219 | ('city: Btree Postcode', 'CREATE INDEX IF NOT EXISTS city_postcode_idx ON public.oa_city USING BTREE(postcode);'), 220 | ('city: Btree name', 'CREATE INDEX IF NOT EXISTS city_city_idx ON public.oa_city USING BTREE(city);'), 221 | ('city: Update planner statistics', 'ANALYZE public.oa_city;'), 222 | 223 | ('street: Trigram index name', 'CREATE INDEX IF NOT EXISTS street_trgm_idx ON public.oa_street USING GIN (street gin_trgm_ops);'), 224 | ('street: Update planner statistics', 'ANALYZE public.oa_street;'), 225 | ] 226 | 227 | for log, item in sql: 228 | print(' - {}'.format(log)) 229 | db.execute(item) 230 | 231 | print('Clustering house tables...') 232 | queue = [] 233 | for i in range(0, PARTITION_SIZE): 234 | queue.append((i, url)) 235 | 236 | # run and wait for all import threads to exit 237 | if threads == 1: 238 | for f in queue: 239 | cluster_worker(*f) 240 | else: 241 | with Pool(threads, maxtasksperchild=1) as p: 242 | p.starmap(cluster_worker, queue, 1) 243 | print('ok') 244 | 245 | finalize_db(db) 246 | 247 | def close_db(db): 248 | conn = db.connection 249 | if conn.autocommit is False: 250 | conn.commit() 251 | 252 | db.close() 253 | conn.close() 254 | 255 | # 256 | # Data importer 257 | # 258 | 259 | def save_license(record, db): 260 | sql = 'INSERT INTO public.oa_license (website, license, attribution, "source") VALUES (%s, %s, %s, %s) RETURNING id;' 261 | db.execute(sql, ( 262 | record['website'], 263 | record['license'], 264 | record['attribution'], 265 | record['file'] 266 | )) 267 | return db.fetchone()[0] 268 | 269 | 270 | def import_licenses(license_data, db): 271 | licenses = {} 272 | licenses['osm'] = save_license({ 273 | 'file': 'osm', 274 | 'license': 'Open Data Commons Open Database License (ODbL)', 275 | 'attribution': '© OpenStreetMap contributors', 276 | 'website': 'https://www.openstreetmap.org/copyright' 277 | }, db) 278 | 279 | lines = license_data.split(b"\n")[2:] # skip header 280 | 281 | record = { 282 | 'file': None, 283 | 'website': None, 284 | 'license': None, 285 | 'attribution': None 286 | } 287 | for line in lines: 288 | if line.startswith(b'Website:'): 289 | record['website'] = line[8:].decode('utf-8').strip() 290 | elif line.startswith(b'License:'): 291 | record['license'] = line[8:].decode('utf-8').strip() 292 | elif line.startswith(b'Required attribution:'): 293 | a = line[21:].decode('utf-8').strip() 294 | if a != 'Yes': 295 | record['attribution'] = a 296 | elif len(line) == 0: 297 | # if record['license'] == 'Unknown': 298 | # continue 299 | fname = record['file'] + '.csv' 300 | licenses[fname] = save_license(record, db) 301 | print('Saved license for {}: {}'.format( 302 | fname, licenses[fname] 303 | )) 304 | 305 | record = { 306 | 'file': None, 307 | 'website': None, 308 | 'license': None, 309 | 'attribution': None 310 | } 311 | else: 312 | record['file'] = line.decode('utf-8').strip() 313 | 314 | return licenses 315 | 316 | 317 | def import_license_from_readme(readme_data, fname, db): 318 | licenses = {} 319 | 320 | lines = readme_data.split(b"\n")[2:] # skip header 321 | 322 | record = { 323 | 'file': None, 324 | 'website': None, 325 | 'license': None, 326 | 'attribution': None 327 | } 328 | for line in lines: 329 | if line.startswith(b'Website:'): 330 | record['website'] = line[8:].decode('utf-8').strip() 331 | elif line.startswith(b'License:'): 332 | record['license'] = line[8:].decode('utf-8').strip() 333 | elif line.startswith(b'Required attribution:'): 334 | a = line[21:].decode('utf-8').strip() 335 | if a != 'Yes': 336 | record['attribution'] = a 337 | 338 | licenses[fname] = save_license(record, db) 339 | print('Saved license for {}: {}'.format( 340 | fname, licenses[fname] 341 | )) 342 | return licenses 343 | 344 | 345 | def import_csv(csv_stream, size, license_id, name, db, line): 346 | # space optimization, reference these strings instead of copying them 347 | key_city = intern('city') 348 | key_streets = intern('streets') 349 | key_street = intern('street') 350 | key_houses = intern('houses') 351 | key_id = intern('id') 352 | 353 | print("\033[{line};0H\033[KPreparing data for {name}, 0%...".format(line=line, name=name)) 354 | 355 | # projection setup, we need WebMercator 356 | mercProj = Proj(init='epsg:3857') 357 | 358 | # Wrap the byte stream into a TextIOWrapper, we have subclassed it to count 359 | # the consumed bytes for progress display 360 | wrapped = CountingTextIOWrapper(csv_stream, encoding='utf8') 361 | reader = csv.reader(wrapped) 362 | 363 | # skip header 364 | reader.__next__() 365 | 366 | cities = {} 367 | timeout = time() # status update timeout 368 | for row in reader: 369 | row = [s.title() for s in row] 370 | 371 | # build a street hash 372 | strt = intern(hashlib.md5( 373 | (row[3] + 374 | row[4]).encode('utf8') 375 | ).hexdigest()) 376 | 377 | # build city hash 378 | cty = intern(hashlib.md5( 379 | (row[5] + 380 | row[6] + 381 | row[7] + 382 | row[8]).encode('utf8') 383 | ).hexdigest()) 384 | 385 | # add city if not already in the list 386 | if cty not in cities: 387 | cities[cty] = { 388 | key_id: str(uuid.uuid4()), 389 | key_city: ( 390 | row[5], 391 | row[6], 392 | row[7], 393 | row[8].upper() 394 | ), 395 | key_streets: {} 396 | } 397 | 398 | # add street if not already in the list 399 | if strt not in cities[cty][key_streets]: 400 | cities[cty][key_streets][strt] = { 401 | key_id: str(uuid.uuid4()), 402 | key_street: ( 403 | row[3], 404 | row[4], 405 | ), 406 | key_houses: {} 407 | } 408 | 409 | # add house to street 410 | cities[cty][key_streets][strt][key_houses][row[2]] = (row[0], row[1]) 411 | 412 | # status update 413 | if time() - timeout > 1.0: 414 | percentage = round(wrapped.position / size * 100.0, 2) 415 | print("\033[{line};0H\033[KPreparing data for {name}, {percentage} %...".format( 416 | line=line, name=name, percentage=percentage 417 | )) 418 | timeout = time() 419 | 420 | # force cleaning up to avoid memory bloat 421 | del reader 422 | del wrapped 423 | del csv_stream 424 | 425 | # create a new temporary file for the house data as we use the postgres COPY command with that 426 | # for speed reasons 427 | city_file = TemporaryFile(mode='w+', buffering=16*1024*1024) 428 | street_file = TemporaryFile(mode='w+', buffering=16*1024*1024) 429 | house_file = TemporaryFile(mode='w+', buffering=16*1024*1024) 430 | 431 | # start insertion cycle 432 | print("\033[{line};0H\033[KInserting data for {name}...".format(line=line, name=name)) 433 | 434 | city_count = 0 435 | row_count = 0 436 | timeout = time() 437 | start = timeout 438 | for key, item in cities.items(): 439 | city_count += 1 440 | 441 | # save city to temp file and fetch the id 442 | row_count += 1 443 | city_file.write(item[key_id]) 444 | for value in item[key_city]: 445 | city_file.write('\t') 446 | if value is not None and value != '': 447 | city_file.write(value.replace('\\', '\\x5c')) 448 | else: 449 | city_file.write(' ') 450 | city_file.write('\t') 451 | city_file.write(license_id) 452 | city_file.write('\n') 453 | 454 | city_id = item[key_id] 455 | 456 | # save street to temp file and fetch ids 457 | for street in item[key_streets].values(): 458 | row_count += 1 459 | 460 | # we need the id 461 | street_file.write(street[key_id]) 462 | for value in street[key_street]: 463 | street_file.write('\t') 464 | if value is not None and value != '': 465 | street_file.write(value.replace('\\', '\\x5c')) 466 | else: 467 | street_file.write(' ') 468 | street_file.write('\t') 469 | street_file.write(city_id) 470 | street_file.write('\n') 471 | 472 | street_id = street[key_id] 473 | 474 | # houses will not be inserted right away but saved to the temp file 475 | for nr, location in street[key_houses].items(): 476 | row_count += 1 477 | 478 | # project into 3857 (mercator) from 4326 (WGS84) 479 | x, y = mercProj(*location) 480 | 481 | # id 482 | house_file.write(str(uuid.uuid4())) 483 | house_file.write('\t') 484 | 485 | # create wkb representation, theoretically we could use shapely 486 | # but we try to not spam newly created objects here 487 | 488 | # ewkb header + srid 489 | house_file.write('0101000020110F0000') 490 | 491 | # coordinate 492 | house_file.write((hexlify(struct.pack(' 1.0: 518 | eta = round((len(cities) / city_count * (time() - start)) - (time() - start)) 519 | percentage = round((city_count / len(cities) * 100), 2) 520 | print("\033[{line};0H\033[K - {name:40}, {percentage:>6}%, {row_count:>6} rows/second, eta: {eta:>5} seconds".format( 521 | line=line, 522 | name=name, 523 | percentage=percentage, 524 | row_count=row_count, 525 | eta=eta 526 | )) 527 | row_count = 0 528 | timeout = time() 529 | 530 | del cities 531 | 532 | # now COPY the contents of the temp file into the DB 533 | print("\033[{line};0H\033[K -> Running copy from tempfile for city ({size} MB)...".format( 534 | line=line, 535 | size=round(city_file.tell() / 1024 / 1024, 2) 536 | )) 537 | city_file.seek(0) 538 | db.copy_from(city_file, 'public.oa_city', columns=('id', 'city', 'district', 'region', 'postcode', 'license_id')) 539 | 540 | print("\033[{line};0H\033[K -> Running copy from tempfile for street ({size} MB)...".format( 541 | line=line, 542 | size=round(street_file.tell() / 1024 / 1024, 2) 543 | )) 544 | street_file.seek(0) 545 | db.copy_from(street_file, 'public.oa_street', columns=('id', 'street', 'unit', 'city_id')) 546 | 547 | print("\033[{line};0H\033[K -> Running copy from tempfile for house ({size} MB)...".format( 548 | line=line, 549 | size=round(house_file.tell() / 1024 / 1024, 2) 550 | )) 551 | house_file.seek(0) 552 | db.copy_from(house_file, 'public.oa_house', columns=('id', 'location', 'housenumber', 'geohash', 'source', 'street_id')) 553 | 554 | # cleanup 555 | print("\033[{line};0H\033[K -> Inserting for {name} took {elapsed} seconds.".format( 556 | line=line, 557 | name=name, 558 | elapsed=round(time() - start) 559 | )) 560 | house_file.close() 561 | street_file.close() 562 | city_file.close() 563 | 564 | 565 | def import_data(filename, threads, db_url, optimize, fast): 566 | # prepare database (drop indices and constraints for speed) 567 | db = open_db(args.db_url) 568 | prepare_db(db) 569 | 570 | # insert license data 571 | z = zipfile.ZipFile(filename) 572 | files = [f for f in z.namelist() if not f.startswith('summary/') and f.endswith('.csv')] 573 | files.sort() 574 | if 'LICENSE.txt' in z.namelist(): 575 | licenses = import_licenses(z.read('LICENSE.txt'), db) 576 | elif 'README.txt' in z.namelist() and len(files) == 1: 577 | licenses = import_license_from_readme(z.read('README.txt'), files[0], db) 578 | else: 579 | raise ValueError("Data file does not contain LICENSE.txt or README.txt which is required for licensing information") 580 | z.close() 581 | 582 | close_db(db) 583 | sleep(1) 584 | 585 | # prepare the work queue 586 | manager = Manager() 587 | status_object = manager.dict() 588 | 589 | import_queue = [] 590 | for f in files: 591 | if f not in licenses.keys(): 592 | print('Skipping {}, no license data'.format(f)) 593 | continue 594 | status_object[f] = -1 595 | import_queue.append((filename, f, licenses[f], db_url, status_object)) 596 | 597 | print("\033[2J") 598 | status_object['__dummy__'] = 0 599 | 600 | # run and wait for all import threads to exit 601 | if threads == 1: 602 | for f in import_queue: 603 | worker(*f) 604 | else: 605 | with Pool(threads, maxtasksperchild=1) as p: 606 | p.starmap(worker, import_queue, 1) 607 | 608 | # clear screen, finalize db (re-create constraints and associated indices) 609 | print("\033[2J\033[1;0H\033[K") 610 | db = open_db(args.db_url) 611 | if not fast: 612 | finalize_db(db, optimize) 613 | close_db(db) 614 | 615 | 616 | def worker(filename, name, license_id, db_url, status): 617 | # wait a random time to make the status line selection robust 618 | sleep(random.random() * 1.0 + 0.5) 619 | 620 | # select which line we want to use to send our status output to 621 | seen_lines = [] 622 | for value in status.values(): 623 | if value >= 0 and value not in seen_lines: 624 | seen_lines.append(value) 625 | seen_lines.sort() 626 | for idx, l in enumerate(seen_lines): 627 | if idx != l: 628 | status[name] = idx 629 | break 630 | if status[name] == -1: 631 | status[name] = max(seen_lines) + 1 632 | 633 | # open all connections and inputs 634 | z = zipfile.ZipFile(filename) 635 | db = open_db(db_url) 636 | 637 | # start the import 638 | zip_info = z.getinfo(name) 639 | import_csv(z.open(name, 'r'), zip_info.file_size, license_id, name, db, status[name]) 640 | 641 | # clean up afterwards 642 | close_db(db) 643 | z.close() 644 | 645 | # free the status line 646 | status[name] = -1 647 | 648 | 649 | # 650 | # Cmdline interface 651 | # 652 | 653 | def parse_cmdline(): 654 | parser = argparse.ArgumentParser(description='OpenAddresses.io data importer') 655 | parser.add_argument( 656 | '--db', 657 | type=str, 658 | dest='db_url', 659 | required=True, 660 | help='Postgis DB URL' 661 | ) 662 | parser.add_argument( 663 | '--threads', 664 | type=int, 665 | dest='threads', 666 | default=1, 667 | help='Number of import threads' 668 | ) 669 | parser.add_argument( 670 | '--clean-start', 671 | dest='clean', 672 | default=False, 673 | action='store_true', 674 | help='Drop tables before importing' 675 | ) 676 | parser.add_argument( 677 | '--optimize', 678 | dest='optimize', 679 | default=False, 680 | action='store_true', 681 | help='Re-create indices and cluster the tables on the indices for speed, you can not import any more data after running optimize' 682 | ) 683 | parser.add_argument( 684 | '--finalize', 685 | dest='finalize', 686 | default=False, 687 | action='store_true', 688 | help='Finalize DB (automatically called when using optimize, use optimize instead)' 689 | ) 690 | parser.add_argument( 691 | '--fast', 692 | dest='fast', 693 | default=False, 694 | action='store_true', 695 | help='Skip finalizing the Database as this is a multi part import' 696 | ) 697 | parser.add_argument( 698 | 'datafile', 699 | type=str, 700 | help='OpenAddresses.io data file (zipped)', 701 | nargs='?', 702 | default=None 703 | ) 704 | 705 | return parser.parse_args() 706 | 707 | 708 | if __name__ == '__main__': 709 | args = parse_cmdline() 710 | if args.clean: 711 | db = open_db(args.db_url) 712 | clear_db(db) 713 | close_db(db) 714 | if args.datafile is not None: 715 | import_data(args.datafile, args.threads, args.db_url, args.optimize, args.fast) 716 | if args.optimize: 717 | db = open_db(args.db_url, transaction=False) 718 | optimize_db(db, args.threads, args.db_url) 719 | close_db(db) 720 | if args.finalize: 721 | db = open_db(args.db_url, transaction=False) 722 | finalize_db(db) 723 | close_db(db) 724 | -------------------------------------------------------------------------------- /bin/postal_service.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | try: 4 | from flask import Flask, jsonify, abort, request 5 | except (ImportError, ModuleNotFoundError): 6 | print("Error: Please install Flask, `pip install flask`") 7 | exit(1) 8 | 9 | try: 10 | from postal.parser import parse_address 11 | from postal.normalize import normalized_tokens 12 | from postal.expand import expand_address 13 | from postal.tokenize import tokenize 14 | except (ImportError, ModuleNotFoundError): 15 | print("Error: You have to install pypostal, instructions: https://github.com/dunkelstern/osmgeocode/blob/master/Readme.md") 16 | exit(1) 17 | 18 | import json 19 | 20 | app = Flask(__name__) 21 | 22 | @app.route('/normalize', methods=['POST']) 23 | def normalize(): 24 | if not request.is_json: 25 | abort(400) 26 | data = request.get_json() 27 | query = data['query'] 28 | languages = data.get('languages', None) 29 | 30 | normalized = normalized_tokens(query, languages=languages) 31 | result = {} 32 | for value, key in normalized: 33 | if str(key) not in result: 34 | result[str(key)] = [] 35 | result[str(key)].append(value) 36 | 37 | return jsonify(result) 38 | 39 | @app.route('/split', methods=['POST']) 40 | def split(): 41 | if not request.is_json: 42 | abort(400) 43 | data = request.get_json() 44 | query = data['query'] 45 | language = data.get('language', None) 46 | country = data.get('country', None) 47 | 48 | # expand address 49 | if language is not None: 50 | variants = expand_address(query, languages=[language]) 51 | else: 52 | variants = expand_address(query) 53 | 54 | result = [] 55 | for variant in variants: 56 | # then parse 57 | parts = parse_address(variant, language=language, country=country) 58 | 59 | sub_result = {} 60 | for value, key in parts: 61 | sub_result[key] = value 62 | 63 | result.append(sub_result) 64 | 65 | return jsonify(result) 66 | 67 | @app.route('/expand', methods=['POST']) 68 | def expand(): 69 | if not request.is_json: 70 | abort(400) 71 | data = request.get_json() 72 | query = data['query'] 73 | languages = data.get('languages', None) 74 | 75 | expanded = expand_address(query, languages=languages) 76 | tokenized = [tokenize(x) for x in expanded] 77 | 78 | result = [] 79 | for item in tokenized: 80 | sub_result = [] 81 | for value, _ in item: 82 | sub_result.append(value) 83 | result.append(sub_result) 84 | 85 | return jsonify(result) 86 | 87 | # when running this script directly execute gunicorn to serve 88 | if __name__ == "__main__": 89 | import os 90 | import argparse 91 | parser = argparse.ArgumentParser(description='Postal address coding service') 92 | parser.add_argument( 93 | '--config', 94 | type=str, 95 | nargs=1, 96 | dest='config', 97 | required=True, 98 | help='Config file to use' 99 | ) 100 | args = parser.parse_args() 101 | 102 | config = {} 103 | with open(args.config[0], "r") as fp: 104 | config = json.load(fp) 105 | 106 | os.execlp( 107 | "gunicorn", 108 | "gunicorn", 109 | "postal_service:app", 110 | "--bind", 111 | "127.0.0.1:{}".format(config['postal']['port']) 112 | ) 113 | -------------------------------------------------------------------------------- /bin/prepare_osm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import io 5 | import os 6 | import math 7 | import random 8 | import argparse 9 | import subprocess 10 | import tempfile 11 | 12 | from time import time, sleep 13 | try: 14 | from urllib.parse import urlparse 15 | except (ImportError, ModuleNotFoundError): 16 | from urlparse import urlparse 17 | 18 | from pkg_resources import resource_exists, resource_listdir, resource_string 19 | 20 | import psycopg2 21 | from psycopg2.extras import DictCursor 22 | 23 | # 24 | # DB-Utility functions 25 | # 26 | 27 | def open_db(url, cursor_name=None): 28 | conn = psycopg2.connect(url, cursor_factory=DictCursor) 29 | if cursor_name is None: 30 | cursor = conn.cursor() 31 | else: 32 | cursor = conn.cursor(name=cursor_name) 33 | return cursor 34 | 35 | def load_sql(db, path): 36 | try: 37 | # assume we are in a virtualenv first 38 | if resource_exists('osmgeocoder', path): 39 | sql_files = list(resource_listdir('osmgeocoder', path)) 40 | sql_files.sort() 41 | for f in sql_files: 42 | print('Executing {}... '.format(os.path.basename(f)), end='', flush=True) 43 | start = time() 44 | db.execute(resource_string('osmgeocoder', os.path.join(path, f))) 45 | end = time() 46 | print('{} s'.format(round(end-start, 2)), flush=True) 47 | except (ImportError, ModuleNotFoundError): 48 | # if not found, assume we have been started from a source checkout 49 | my_dir = os.path.dirname(os.path.abspath(__file__)) 50 | sql_path = os.path.abspath(os.path.join(my_dir, '../osmgeocoder/', path)) 51 | sql_files = [os.path.join(sql_path, f) for f in os.listdir(sql_path) if os.path.isfile(os.path.join(sql_path, f))] 52 | sql_files.sort() 53 | 54 | for f in sql_files: 55 | print('Executing {}... '.format(os.path.basename(f)), end='', flush=True) 56 | start = time() 57 | with open(f, 'r') as fp: 58 | db.execute(fp.read()) 59 | end = time() 60 | print('{} s'.format(round(end-start, 2)), flush=True) 61 | 62 | 63 | def prepare_db(db): 64 | load_sql(db, 'data/sql/prepare') 65 | 66 | def optimize_db(db): 67 | start = time() 68 | load_sql(db, 'data/sql/optimize') 69 | end = time() 70 | print('Optimizing took {} s'.format(round(end - start, 2))) 71 | 72 | def close_db(db): 73 | conn = db.connection 74 | conn.commit() 75 | 76 | if db.name is None: 77 | db.close() 78 | conn.close() 79 | 80 | # 81 | # imposm interface 82 | # 83 | 84 | def get_mapping_file(): 85 | mapping_file = None 86 | temp = None 87 | try: 88 | # assume we are in a virtualenv first 89 | if resource_exists('osmgeocoder', 'data/imposm_mapping.yml'): 90 | data = resource_string('osmgeocoder', 'data/imposm_mapping.yml') 91 | temp = tempfile.NamedTemporaryFile() 92 | temp.write(data) 93 | temp.seek(0) 94 | mapping_file = temp.name 95 | except (ImportError, ModuleNotFoundError): 96 | # if not found, assume we have been started from a source checkout 97 | my_dir = os.path.dirname(os.path.abspath(__file__)) 98 | mapping_file = os.path.abspath(os.path.join(my_dir, '../osmgeocoder/data/imposm_mapping.yml')) 99 | return mapping_file, temp 100 | 101 | def imposm_read(data_files, tmp_dir): 102 | mapping_file, temp = get_mapping_file() 103 | 104 | first = True 105 | for data_file in data_files: 106 | args = [ 107 | 'imposm', 108 | 'import', 109 | '-mapping', 110 | mapping_file, 111 | '-read', 112 | data_file, 113 | '-cachedir', 114 | os.path.join(tmp_dir, 'imposm3'), 115 | '-appendcache' if not first else '-overwritecache', 116 | ] 117 | print(args) 118 | subprocess.run(args) 119 | first = False 120 | 121 | if temp is not None: 122 | temp.close() 123 | 124 | 125 | def imposm_write(db_url, tmp_dir, optimize): 126 | mapping_file, temp = get_mapping_file() 127 | 128 | args = [ 129 | 'imposm', 130 | 'import', 131 | '-connection', 132 | db_url.replace('postgres', 'postgis'), 133 | '-mapping', 134 | mapping_file, 135 | '-cachedir', 136 | os.path.join(tmp_dir, 'imposm3'), 137 | '-write', 138 | ] 139 | if optimize: 140 | args.append('-optimize') 141 | args.append('-deployproduction') 142 | print(args) 143 | subprocess.run(args) 144 | 145 | if temp is not None: 146 | temp.close() 147 | 148 | # 149 | # Cmdline interface 150 | # 151 | 152 | def parse_cmdline(): 153 | parser = argparse.ArgumentParser(description='OpenStreetMap Geocoder preparation script') 154 | parser.add_argument( 155 | '--db', 156 | type=str, 157 | dest='db_url', 158 | required=True, 159 | help='Postgis DB URL' 160 | ) 161 | parser.add_argument( 162 | '--import-data', 163 | type=str, 164 | dest='data_files', 165 | action='append', 166 | help='OpenStreetMap data file to import (can be used multiple times)' 167 | ) 168 | parser.add_argument( 169 | '--optimize', 170 | dest='optimize', 171 | action='store_true', 172 | default=False, 173 | help='Optimize DB Tables and create indices' 174 | ) 175 | parser.add_argument( 176 | '--tmpdir', 177 | type=str, 178 | dest='tmp', 179 | default='/tmp', 180 | help='Temp dir for imports (needs at least 1.5x the amount of space of the import file)' 181 | ) 182 | 183 | return parser.parse_args() 184 | 185 | 186 | if __name__ == '__main__': 187 | args = parse_cmdline() 188 | db = open_db(args.db_url) 189 | prepare_db(db) 190 | if args.data_files: 191 | imposm_read(args.data_files, args.tmp) 192 | imposm_write(args.db_url, args.tmp, args.optimize) 193 | if args.optimize: 194 | optimize_db(db) 195 | close_db(db) -------------------------------------------------------------------------------- /example_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run this script as the `postgresql` user to be able to create the DB 3 | 4 | # set this to the desired db-password 5 | geocoder_password='***' 6 | 7 | # you'll need about 100GB space here: 8 | workdir='/var/tmp/osm' 9 | 10 | mkdir -p "$workdir" 11 | cd "$workdir" 12 | 13 | # openstreetmap 14 | wget http://download.geofabrik.de/europe-latest.osm.pbf 15 | 16 | # openaddresses.io europe 17 | wget https://s3.amazonaws.com/data.openaddresses.io/openaddr-collected-europe.zip 18 | wget https://s3.amazonaws.com/data.openaddresses.io/openaddr-collected-europe-sa.zip 19 | 20 | # openaddresses.io switzerland 21 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/212683/ch/aargau.zip 22 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/596898/ch/basel-land.zip 23 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597245/ch/basel-stadt.zip 24 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/595882/ch/bern.zip 25 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/595777/ch/countrywide.zip 26 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/401113/ch/fribourg.zip 27 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597073/ch/geneva.zip 28 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597654/ch/glarus.zip 29 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597399/ch/grisons.zip 30 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/288191/ch/luzern.zip 31 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597655/ch/schaffhausen.zip 32 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597121/ch/solothurn.zip 33 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597854/ch/uri.zip 34 | wget https://s3.amazonaws.com/data.openaddresses.io/runs/597074/ch/zurich.zip 35 | 36 | # imposm binary 37 | wget https://github.com/omniscale/imposm3/releases/download/v0.8.1/imposm-0.8.1-linux-x86-64.tar.gz 38 | tar xvzf imposm-0.8.1-linux-x86-64.tar.gz 39 | export PATH="$PATH:/var/tmp/osm/imposm-0.8.1-linux-x86-64" 40 | 41 | # install python stuff 42 | python3 -m venv geocoder-env 43 | . ./geocoder-env/bin/activate 44 | 45 | # fetch geocoder scripts 46 | git clone https://github.com/dunkelstern/osmgeocoder.git 47 | cd osmgeocoder 48 | git checkout develop 49 | pip install --upgrade pip 50 | pip install wheel 51 | pip install -r requirements.txt 52 | 53 | # create geocoding db 54 | psql < search_term) ASC 62 | LIMIT max_results; 63 | $$ LANGUAGE 'sql'; 64 | 65 | -- 66 | -- geocode by searching road-names only 67 | -- 68 | -- optionally only search in an area around `center` (with the `radius` specified) 69 | -- this function is used when a country search term is supplied (e.g. country may not be NULL) 70 | -- 71 | -- This query is a bit slower than just searching by center and radius as there will be a costly 72 | -- intersection with the country polygon which can be rather large (5MB for germany for example) 73 | -- 74 | DROP FUNCTION IF EXISTS public._geocode_by_road_with_country_osm( 75 | search_term TEXT, search_housenumber TEXT, max_results int, 76 | center gis.geometry(point), radius int, country TEXT); 77 | CREATE OR REPLACE FUNCTION _geocode_by_road_with_country_osm( 78 | search_term TEXT, 79 | search_housenumber TEXT, 80 | max_results int, 81 | center gis.geometry(point), 82 | radius int, 83 | country TEXT 84 | ) 85 | RETURNS SETOF public.address_and_distance AS 86 | $$ 87 | DECLARE 88 | country_poly gis.geometry; 89 | BEGIN 90 | -- prefetch the country polyon to avoid doing a join in the query 91 | SELECT public._geocode_get_country_polygon(country) INTO country_poly; 92 | 93 | RETURN QUERY SELECT 94 | NULL::text AS house, 95 | s.name::text as road, 96 | h.house_number::text, 97 | c.postcode::text, 98 | NULLIF(c.name, '')::text as city, 99 | NULLIF(a6.name, '')::text as county, 100 | NULLIF(a4.name, '')::text as "state", 101 | h.geometry::gis.geometry(point, 3857), 102 | gis.ST_Distance(h.geometry, center) as distance, 103 | '00000000-0000-0000-0000-000000000000'::uuid as license_id 104 | FROM 105 | public.osm_struct_streets s 106 | JOIN public.osm_struct_cities c ON s.city_id = c.id 107 | JOIN public.osm_struct_house h ON h.street_id = s.id 108 | LEFT JOIN public.osm_admin a4 ON gis.ST_Contains(a4.geometry, h.geometry::gis.geometry(point, 3857)) and a4.admin_level = 4 109 | LEFT JOIN public.osm_admin a6 ON gis.ST_Contains(a6.geometry, h.geometry::gis.geometry(point, 3857)) and a6.admin_level = 6 110 | WHERE 111 | (center IS NULL OR gis.ST_DWithin(h.geometry, center, radius)) -- only search around center if center is not null 112 | AND gis.ST_Within(gis.ST_Centroid(h.geometry), country_poly) -- intersect with country polygon 113 | AND s.name % search_term 114 | AND (search_housenumber IS NULL OR h.house_number % search_housenumber) 115 | ORDER BY 116 | distance ASC, 117 | (s.name <-> search_term) ASC 118 | LIMIT max_results; -- limit here to avoid performing the joins on all rows 119 | END; 120 | $$ LANGUAGE 'plpgsql'; 121 | 122 | -- 123 | -- Convenience switching function that calls the correct detail function 124 | -- 125 | -- This is the external interface to the forward geocoder 126 | -- 127 | DROP FUNCTION IF EXISTS public.geocode_by_road_osm( 128 | search_term TEXT, search_housenumber TEXT, max_results int, 129 | center gis.geometry(point), radius int, country TEXT 130 | ); 131 | CREATE OR REPLACE FUNCTION public.geocode_by_road_osm( 132 | search_term TEXT, 133 | search_housenumber TEXT, 134 | max_results int, 135 | center gis.geometry(point), 136 | radius int, 137 | country TEXT 138 | ) 139 | RETURNS SETOF public.address_and_distance 140 | AS $$ 141 | DECLARE 142 | BEGIN 143 | IF country IS NULL THEN 144 | -- no country, use the simplified and fast functions 145 | RETURN QUERY SELECT * FROM public._geocode_by_road_without_country_osm( 146 | search_term, search_housenumber, max_results, center, radius 147 | ); 148 | ELSE 149 | -- have a country, use the more precise but slower functions 150 | RETURN QUERY SELECT * FROM public._geocode_by_road_with_country_osm( 151 | search_term, search_housenumber, max_results, center, radius, 152 | country 153 | ); 154 | END IF; 155 | END; 156 | $$ LANGUAGE 'plpgsql'; 157 | 158 | 159 | -- 160 | -- geocode by searching road-names in combination with a city 161 | -- 162 | -- optionally only search in an area around `center` (with the `radius` specified) 163 | -- this function is used when no country search term is supplied 164 | -- 165 | -- This query is quicker than matching against the country polygon additionally, but can be 166 | -- imprecise when the address is near a country border 167 | -- 168 | DROP FUNCTION IF EXISTS public._geocode_by_city_without_country_osm( 169 | search_term TEXT, search_housenumber TEXT, search_city TEXT, 170 | max_results int, center gis.geometry(point), radius int 171 | ); 172 | CREATE OR REPLACE FUNCTION public._geocode_by_city_without_country_osm( 173 | search_term TEXT, 174 | search_housenumber TEXT, 175 | search_city TEXT, 176 | max_results int, 177 | center gis.geometry(point), 178 | radius int 179 | ) 180 | RETURNS SETOF public.address_and_distance AS 181 | $$ 182 | SELECT 183 | NULL::text AS house, 184 | s.name::text as road, 185 | h.house_number::text, 186 | c.postcode::text, 187 | NULLIF(c.name, '')::text as city, 188 | NULLIF(a6.name, '')::text as county, 189 | NULLIF(a4.name, '')::text as "state", 190 | h.geometry::gis.geometry(point, 3857), 191 | gis.ST_Distance(h.geometry, center) as distance, 192 | '00000000-0000-0000-0000-000000000000'::uuid as license_id 193 | FROM 194 | public.osm_struct_streets s 195 | JOIN public.osm_struct_cities c ON s.city_id = c.id 196 | JOIN public.osm_struct_house h ON h.street_id = s.id 197 | LEFT JOIN public.osm_admin a4 ON gis.ST_Contains(a4.geometry, h.geometry::gis.geometry(point, 3857)) and a4.admin_level = 4 198 | LEFT JOIN public.osm_admin a6 ON gis.ST_Contains(a6.geometry, h.geometry::gis.geometry(point, 3857)) and a6.admin_level = 6 199 | WHERE 200 | (center IS NULL OR gis.ST_DWithin(h.geometry, center, radius)) -- only search around center if center is not null 201 | AND c.name % search_city 202 | AND s.name % search_term 203 | AND (search_housenumber IS NULL OR h.house_number % search_housenumber) 204 | ORDER BY 205 | distance ASC, 206 | (s.name <-> search_term) ASC 207 | LIMIT max_results; 208 | $$ LANGUAGE 'sql'; 209 | 210 | 211 | -- 212 | -- geocode by searching road-names in combination with a city 213 | -- 214 | -- optionally only search in an area around `center` (with the `radius` specified) 215 | -- this function is used when a country search term is supplied (e.g. country may not be NULL) 216 | -- 217 | -- This query is a bit slower than just searching by center and radius as there will be a costly 218 | -- intersection with the country polygon which can be rather large (5MB for germany for example) 219 | -- 220 | DROP FUNCTION IF EXISTS public._geocode_by_city_with_country_osm( 221 | search_term TEXT, search_housenumber TEXT, search_city TEXT, 222 | max_results int, center gis.geometry(point), radius int, country TEXT 223 | ); 224 | CREATE OR REPLACE FUNCTION public._geocode_by_city_with_country_osm( 225 | search_term TEXT, 226 | search_housenumber TEXT, 227 | search_city TEXT, 228 | max_results int, 229 | center gis.geometry(point), 230 | radius int, 231 | country TEXT 232 | ) 233 | RETURNS SETOF public.address_and_distance AS 234 | $$ 235 | DECLARE 236 | country_poly gis.geometry; 237 | BEGIN 238 | -- prefetch the country polyon to avoid doing a join in the query 239 | SELECT public._geocode_get_country_polygon(country) INTO country_poly; 240 | 241 | RETURN QUERY SELECT 242 | NULL::text AS house, 243 | s.name::text as road, 244 | h.house_number::text, 245 | c.postcode::text, 246 | NULLIF(c.name, '')::text as city, 247 | NULLIF(a6.name, '')::text as county, 248 | NULLIF(a4.name, '')::text as "state", 249 | h.geometry::gis.geometry(point, 3857), 250 | gis.ST_Distance(h.geometry, center) as distance, 251 | '00000000-0000-0000-0000-000000000000'::uuid as license_id 252 | FROM 253 | public.osm_struct_streets s 254 | JOIN public.osm_struct_cities c ON s.city_id = c.id 255 | JOIN public.osm_struct_house h ON h.street_id = s.id 256 | LEFT JOIN public.osm_admin a4 ON gis.ST_Contains(a4.geometry, h.geometry::gis.geometry(point, 3857)) and a4.admin_level = 4 257 | LEFT JOIN public.osm_admin a6 ON gis.ST_Contains(a6.geometry, h.geometry::gis.geometry(point, 3857)) and a6.admin_level = 6 258 | WHERE 259 | (center IS NULL OR gis.ST_DWithin(h.geometry, center, radius)) -- only search around center if center is not null 260 | AND gis.ST_Within(gis.ST_Centroid(h.geometry), country_poly) -- intersect with country polygon 261 | AND c.name % search_city 262 | AND s.name % search_term 263 | AND (search_housenumber IS NULL OR h.house_number % search_housenumber) 264 | ORDER BY 265 | distance ASC, 266 | (s.name <-> search_term) ASC 267 | LIMIT max_results; 268 | END 269 | $$ LANGUAGE 'plpgsql'; 270 | 271 | -- 272 | -- Convenience switching function that calls the correct detail function 273 | -- 274 | -- This is the external interface to the forward geocoder 275 | -- 276 | DROP FUNCTION IF EXISTS public.geocode_by_city_osm( 277 | search_term TEXT, search_housenumber TEXT, search_city TEXT, 278 | max_results int, center gis.geometry(point), radius int, country TEXT 279 | ); 280 | CREATE OR REPLACE FUNCTION public.geocode_by_city_osm( 281 | search_term TEXT, 282 | search_housenumber TEXT, 283 | search_city TEXT, 284 | max_results int, 285 | center gis.geometry(point), 286 | radius int, 287 | country TEXT 288 | ) 289 | RETURNS SETOF public.address_and_distance AS 290 | $$ 291 | DECLARE 292 | BEGIN 293 | IF country IS NULL THEN 294 | -- no country, use the simplified and fast functions 295 | RETURN QUERY SELECT * FROM public._geocode_by_city_without_country_osm(search_term, search_housenumber, search_city, max_results, center, radius); 296 | ELSE 297 | -- have a country, use the more precise but slower functions 298 | RETURN QUERY SELECT * FROM public._geocode_by_city_with_country_osm(search_term, search_housenumber, search_city, max_results, center, radius, country); 299 | END IF; 300 | END; 301 | $$ LANGUAGE 'plpgsql'; 302 | 303 | 304 | -- 305 | -- geocode by searching road-names in combination with a postcode 306 | -- 307 | -- optionally only search in an area around `center` (with the `radius` specified) 308 | -- this function is used when no country search term is supplied 309 | -- 310 | -- This query is quicker than matching against the country polygon additionally, but can be 311 | -- imprecise when the address is near a country border 312 | -- 313 | DROP FUNCTION IF EXISTS public._geocode_by_postcode_without_country_osm( 314 | search_term TEXT, search_housenumber TEXT, search_postcode TEXT, 315 | max_results int, center gis.geometry(point), radius int 316 | ); 317 | CREATE OR REPLACE FUNCTION public._geocode_by_postcode_without_country_osm( 318 | search_term TEXT, 319 | search_housenumber TEXT, 320 | search_postcode TEXT, 321 | max_results int, 322 | center gis.geometry(point), 323 | radius int 324 | ) 325 | RETURNS SETOF public.address_and_distance AS 326 | $$ 327 | SELECT 328 | NULL::text AS house, 329 | s.name::text as road, 330 | h.house_number::text, 331 | c.postcode::text, 332 | NULLIF(c.name, '')::text as city, 333 | NULLIF(a6.name, '')::text as county, 334 | NULLIF(a4.name, '')::text as "state", 335 | h.geometry::gis.geometry(point, 3857), 336 | gis.ST_Distance(h.geometry, center) as distance, 337 | '00000000-0000-0000-0000-000000000000'::uuid as license_id 338 | FROM 339 | public.osm_struct_streets s 340 | JOIN public.osm_struct_cities c ON s.city_id = c.id 341 | JOIN public.osm_struct_house h ON h.street_id = s.id 342 | LEFT JOIN public.osm_admin a4 ON gis.ST_Contains(a4.geometry, h.geometry::gis.geometry(point, 3857)) and a4.admin_level = 4 343 | LEFT JOIN public.osm_admin a6 ON gis.ST_Contains(a6.geometry, h.geometry::gis.geometry(point, 3857)) and a6.admin_level = 6 344 | WHERE 345 | (center IS NULL OR gis.ST_DWithin(h.geometry, center, radius)) -- only search around center if center is not null 346 | AND s.name % search_term 347 | AND c.postcode % search_postcode 348 | AND (search_housenumber IS NULL OR h.house_number % search_housenumber) 349 | ORDER BY 350 | distance ASC, 351 | (s.name <-> search_term) ASC 352 | LIMIT max_results; 353 | $$ LANGUAGE 'sql'; 354 | 355 | 356 | -- 357 | -- geocode by searching road-names in combination with a postcode 358 | -- 359 | -- optionally only search in an area around `center` (with the `radius` specified) 360 | -- this function is used when no country search term is supplied 361 | -- 362 | -- This query is a bit slower than just searching by center and radius as there will be a costly 363 | -- intersection with the country polygon which can be rather large (5MB for germany for example) 364 | -- 365 | DROP FUNCTION IF EXISTS public._geocode_by_postcode_with_country_osm( 366 | search_term TEXT, search_housenumber TEXT, search_postcode TEXT, 367 | max_results int, center gis.geometry(point), radius int, country TEXT 368 | ); 369 | CREATE OR REPLACE FUNCTION public._geocode_by_postcode_with_country_osm( 370 | search_term TEXT, 371 | search_housenumber TEXT, 372 | search_postcode TEXT, 373 | max_results int, 374 | center gis.geometry(point), 375 | radius int, 376 | country TEXT 377 | ) 378 | RETURNS SETOF public.address_and_distance AS 379 | $$ 380 | DECLARE 381 | country_poly geometry; 382 | BEGIN 383 | -- prefetch the country polyon to avoid doing a join in the query 384 | SELECT public._geocode_get_country_polygon(country) INTO country_poly; 385 | 386 | RETURN QUERY SELECT 387 | NULL::text AS house, 388 | s.name::text as road, 389 | h.house_number::text, 390 | c.postcode::text, 391 | NULLIF(c.name, '')::text as city, 392 | NULLIF(a6.name, '')::text as county, 393 | NULLIF(a4.name, '')::text as "state", 394 | h.geometry::gis.geometry(point, 3857), 395 | gis.ST_Distance(h.geometry, center) as distance, 396 | '00000000-0000-0000-0000-000000000000'::uuid as license_id 397 | FROM 398 | public.osm_struct_streets s 399 | JOIN public.osm_struct_cities c ON s.city_id = c.id 400 | JOIN public.osm_struct_house h ON h.street_id = s.id 401 | LEFT JOIN public.osm_admin a4 ON gis.ST_Contains(a4.geometry, h.geometry::gis.geometry(point, 3857)) and a4.admin_level = 4 402 | LEFT JOIN public.osm_admin a6 ON gis.ST_Contains(a6.geometry, h.geometry::gis.geometry(point, 3857)) and a6.admin_level = 6 403 | WHERE 404 | (center IS NULL OR gis.ST_DWithin(h.geometry, center, radius)) -- only search around center if center is not null 405 | AND gis.ST_Within(gis.ST_Centroid(h.geometry), country_poly) -- intersect with country polygon 406 | AND s.name % search_term 407 | AND c.postcode % search_postcode 408 | AND (search_housenumber IS NULL OR h.house_number % search_housenumber) 409 | ORDER BY 410 | distance ASC, 411 | (s.name <-> search_term) ASC 412 | LIMIT max_results; 413 | END; 414 | $$ LANGUAGE 'plpgsql'; 415 | 416 | 417 | -- 418 | -- Convenience switching function that calls the correct detail function 419 | -- 420 | -- This is the external interface to the forward geocoder 421 | -- 422 | DROP FUNCTION IF EXISTS public.geocode_by_postcode_osm( 423 | search_term TEXT, search_housenumber TEXT, search_postcode TEXT, 424 | max_results int, center gis.geometry(point), radius int, country TEXT 425 | ); 426 | CREATE OR REPLACE FUNCTION public.geocode_by_postcode_osm( 427 | search_term TEXT, 428 | search_housenumber TEXT, 429 | search_postcode TEXT, 430 | max_results int, 431 | center gis.geometry(point), 432 | radius int, 433 | country TEXT 434 | ) 435 | RETURNS SETOF public.address_and_distance AS 436 | $$ 437 | DECLARE 438 | BEGIN 439 | IF country IS NULL THEN 440 | -- no country, use the simplified and fast functions 441 | RETURN QUERY SELECT * FROM public._geocode_by_postcode_without_country_osm(search_term, search_housenumber, search_postcode, max_results, center, radius); 442 | ELSE 443 | -- have a country, use the more precise but slower functions 444 | RETURN QUERY SELECT * FROM public._geocode_by_postcode_with_country_osm(search_term, search_housenumber, search_postcode, max_results, center, radius, country); 445 | END IF; 446 | END; 447 | $$ LANGUAGE 'plpgsql'; 448 | 449 | 450 | -- 451 | -- Convenience switching function that calls the correct detail function 452 | -- 453 | -- This is the external interface to the forward geocoder 454 | -- 455 | DROP FUNCTION IF EXISTS public.geocode_osm( 456 | search_term TEXT, search_housenumber TEXT, search_postcode TEXT, 457 | search_city TEXT, max_results int, center gis.geometry(point), 458 | radius int, country TEXT 459 | ); 460 | CREATE OR REPLACE FUNCTION public.geocode_osm( 461 | search_term TEXT, 462 | search_housenumber TEXT, 463 | search_postcode TEXT, 464 | search_city TEXT, 465 | max_results int, 466 | center gis.geometry(point), 467 | radius int, 468 | country TEXT 469 | ) 470 | RETURNS SETOF public.address_and_distance AS 471 | $$ 472 | DECLARE 473 | BEGIN 474 | IF search_postcode IS NOT NULL THEN 475 | RETURN QUERY SELECT * FROM public.geocode_by_postcode_osm( 476 | search_term, search_housenumber, search_postcode, max_results, 477 | center, radius, country 478 | ); 479 | RETURN; 480 | END IF; 481 | IF search_city IS NOT NULL THEN 482 | RETURN QUERY SELECT * FROM public.geocode_by_city_osm( 483 | search_term, search_housenumber, search_city, max_results, 484 | center, radius, country 485 | ); 486 | RETURN; 487 | END IF; 488 | 489 | RETURN QUERY SELECT * FROM public.geocode_by_road_osm( 490 | search_term, search_housenumber, max_results, center, radius, 491 | country 492 | ); 493 | END; 494 | $$ LANGUAGE 'plpgsql'; 495 | 496 | -- SELECT * FROM geocode_osm('Georgenstr', '34', NULL, 'Amberg', 10, NULL, NULL, NULL); -------------------------------------------------------------------------------- /osmgeocoder/data/sql/geocoder/004-reverse_geocoding.sql: -------------------------------------------------------------------------------- 1 | DO 2 | $$ 3 | DECLARE 4 | oa_exists boolean; 5 | osm_exists boolean; 6 | BEGIN 7 | SELECT EXISTS ( 8 | SELECT 1 9 | FROM information_schema.tables 10 | WHERE table_schema = 'public' 11 | AND table_name = 'oa_city' 12 | ) INTO oa_exists; 13 | 14 | SELECT EXISTS ( 15 | SELECT 1 16 | FROM information_schema.tables 17 | WHERE table_schema = 'public' 18 | AND table_name = 'osm_struct_cities' 19 | ) INTO osm_exists; 20 | 21 | IF oa_exists THEN 22 | -- 23 | -- Geocode a point to the nearest address 24 | -- This is the Openaddresses.io version for finding an address. 25 | -- 26 | DROP FUNCTION IF EXISTS public.point_to_address_oa(point gis.geometry(point), radius float); 27 | CREATE OR REPLACE FUNCTION public.point_to_address_oa(point gis.geometry(point), radius float) 28 | RETURNS SETOF public.address_and_distance AS 29 | $func$ 30 | SELECT 31 | h.name AS house, 32 | s.street as road, 33 | h.housenumber as house_number, 34 | c.postcode, 35 | c.city, 36 | NULL as county, 37 | NULL as "state", 38 | location, 39 | gis.ST_Distance(location, point) as distance, 40 | c.license_id 41 | FROM public.oa_house h 42 | JOIN public.oa_street s ON h.street_id = s.id 43 | JOIN public.oa_city c ON s.city_id = c.id 44 | WHERE 45 | gis.ST_X(location) >= gis.ST_X(point) - radius 46 | AND gis.ST_X(location) <= gis.ST_X(point) + radius 47 | AND gis.ST_DWithin(location, point, radius) -- only search within radius 48 | ORDER BY gis.ST_Distance(location, point) -- order by distance to point 49 | $func$ LANGUAGE 'sql'; 50 | ELSE 51 | DROP FUNCTION IF EXISTS public.point_to_address_oa(point gis.geometry(point), radius float); 52 | CREATE OR REPLACE FUNCTION public.point_to_address_oa(point gis.geometry(point), radius float) 53 | RETURNS SETOF public.address_and_distance AS 54 | $func$ 55 | SELECT NULL::public.address_and_distance LIMIT 0; -- return an empty set 56 | $func$ LANGUAGE 'sql'; 57 | END IF; 58 | 59 | IF osm_exists THEN 60 | -- 61 | -- Geocode a point to the nearest address 62 | -- This is the OpenStreetMap version for finding an address. 63 | -- 64 | DROP FUNCTION IF EXISTS public.point_to_address_osm(point gis.geometry(point), radius float); 65 | CREATE OR REPLACE FUNCTION public.point_to_address_osm(point gis.geometry(point), radius float) 66 | RETURNS SETOF public.address_and_distance AS 67 | $func$ 68 | SELECT 69 | NULL::text AS house, 70 | s.name as road, 71 | h.house_number, 72 | c.postcode, 73 | c.name as city, 74 | NULLIF(a6.name, '')::text as county, 75 | NULLIF(a4.name, '')::text as "state", 76 | h.geometry as location, 77 | gis.ST_Distance(h.geometry, point) as distance, 78 | '00000000-0000-0000-0000-000000000000'::uuid as license_id 79 | FROM public.osm_struct_house h 80 | JOIN public.osm_struct_streets s ON h.street_id = s.id 81 | JOIN public.osm_struct_cities c ON s.city_id = c.id 82 | LEFT JOIN public.osm_admin a4 ON gis.ST_Contains(a4.geometry, h.geometry::gis.geometry(point, 3857)) and a4.admin_level = 4 83 | LEFT JOIN public.osm_admin a6 ON gis.ST_Contains(a6.geometry, h.geometry::gis.geometry(point, 3857)) and a6.admin_level = 6 84 | WHERE 85 | gis.ST_X(h.geometry) >= gis.ST_X(point) - radius 86 | AND gis.ST_X(h.geometry) <= gis.ST_X(point) + radius 87 | AND gis.ST_DWithin(h.geometry, point, radius) -- only search within radius 88 | ORDER BY gis.ST_Distance(h.geometry, point) -- order by distance to point 89 | $func$ LANGUAGE 'sql'; 90 | ELSE 91 | DROP FUNCTION IF EXISTS public.point_to_address_osm(point gis.geometry(point), radius float); 92 | CREATE OR REPLACE FUNCTION public.point_to_address_osm(point gis.geometry(point), radius float) 93 | RETURNS SETOF public.address_and_distance AS 94 | $func$ 95 | SELECT NULL::public.address_and_distance LIMIT 0; -- return an empty set 96 | $func$ LANGUAGE 'sql'; 97 | END IF; 98 | END; 99 | $$ LANGUAGE 'plpgsql'; 100 | 101 | -- SELECT * FROM point_to_address_osm(ST_Transform(ST_SetSRID(ST_MakePoint(9.738889, 47.550535), 4326), 3857), 250) LIMIT 10; 102 | -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/001-osm_house_number_to_struct_house.sql: -------------------------------------------------------------------------------- 1 | -- copy table 2 | DROP TABLE IF EXISTS public.osm_struct_house; 3 | SELECT crypto.gen_random_uuid() AS id, osm_id, city, postcode, street, house_number, geometry INTO public.osm_struct_house FROM public.osm_house_number; 4 | 5 | CREATE INDEX IF NOT EXISTS osm_buildings_house_number_idx ON public.osm_buildings USING BTREE(house_number); 6 | ANALYZE public.osm_buildings; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/002-osm_buildings_to_struct_house.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX IF NOT EXISTS osm_buildings_empty_house_number_idx ON public.osm_buildings((house_number <> '')) WHERE house_number <> ''; 2 | ANALYZE osm_buildings; 3 | 4 | INSERT INTO public.osm_struct_house (id, osm_id, city, postcode, street, house_number, geometry) 5 | SELECT 6 | crypto.gen_random_uuid() AS id, 7 | b.osm_id, 8 | '' AS city, 9 | p.postcode, 10 | b.street, 11 | b.house_number, 12 | gis.ST_Centroid(b.geometry) AS geometry 13 | FROM public.osm_buildings b 14 | JOIN public.osm_postal_code p ON gis.ST_Within(gis.ST_Centroid(b.geometry), p.geometry) 15 | WHERE b.house_number <> ''; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/003-struct_house_index.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX osm_struct_house_city_idx ON public.osm_struct_house USING BTREE(city); 2 | CREATE INDEX osm_struct_house_postcode_idx ON public.osm_struct_house USING BTREE(postcode); 3 | CREATE INDEX osm_struct_house_street_idx ON public.osm_struct_house USING BTREE(street); 4 | 5 | ANALYZE public.osm_struct_house; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/004-fill_street_only_entries.sql: -------------------------------------------------------------------------------- 1 | -- update street only entries 2 | UPDATE public.osm_struct_house h SET postcode = p.postcode 3 | FROM public.osm_postal_code p 4 | WHERE 5 | h.city = '' 6 | AND h.postcode = '' 7 | AND gis.ST_Within(h.geometry, p.geometry); -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/005-fill_postcode_only_entries.sql: -------------------------------------------------------------------------------- 1 | -- update postcode only entries 2 | UPDATE public.osm_struct_house h SET city = a.name 3 | FROM public.osm_admin a 4 | WHERE 5 | h.city = '' 6 | AND h.postcode <> '' 7 | AND a.admin_level = 8 8 | AND gis.ST_Within(h.geometry, a.geometry); 9 | 10 | UPDATE public.osm_struct_house h SET city = a.name 11 | FROM public.osm_admin a 12 | WHERE 13 | h.city = '' 14 | AND h.postcode <> '' 15 | AND a.admin_level = 6 16 | AND gis.ST_Within(h.geometry, a.geometry); -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/006-extract_cities.sql: -------------------------------------------------------------------------------- 1 | -- drop calculated tables 2 | DROP TABLE IF EXISTS public.osm_struct_streets; 3 | DROP TABLE IF EXISTS public.osm_struct_cities; 4 | 5 | -- extract cities 6 | SELECT 7 | crypto.gen_random_uuid() AS id, 8 | city AS name, 9 | postcode, 10 | gis.ST_SetSRID(gis.ST_Extent(geometry), 3857) AS extent 11 | INTO public.osm_struct_cities 12 | FROM public.osm_struct_house 13 | WHERE city <> '' OR postcode <> '' 14 | GROUP BY city, postcode; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/007-index_cities.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE public.osm_struct_cities ADD PRIMARY KEY (id); 2 | 3 | CREATE INDEX osm_struct_cities_name_idx ON public.osm_struct_cities USING BTREE(name); 4 | CREATE INDEX osm_struct_cities_postcode_idx ON public.osm_struct_cities USING BTREE(postcode); 5 | CREATE INDEX osm_struct_cities_extent_idx ON public.osm_struct_cities USING GIST(extent); 6 | 7 | ANALYZE public.osm_struct_cities; 8 | 9 | -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/008-update_struct_house_with_city.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE public.osm_struct_house ADD COLUMN city_id uuid REFERENCES public.osm_struct_cities (id); 2 | 3 | UPDATE public.osm_struct_house h 4 | SET city_id = c.id 5 | FROM public.osm_struct_cities c 6 | WHERE 7 | h.city = c.name 8 | AND h.postcode = c.postcode; 9 | 10 | CREATE INDEX osm_struct_house_city_id_idx ON public.osm_struct_house USING BTREE(city_id); 11 | ANALYZE public.osm_struct_house; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/009-extract_streets.sql: -------------------------------------------------------------------------------- 1 | -- extract streets 2 | SELECT 3 | crypto.gen_random_uuid() AS id, 4 | street AS name, 5 | city_id, 6 | gis.ST_SetSRID(gis.ST_Extent(geometry), 3857) AS extent 7 | INTO public.osm_struct_streets 8 | FROM public.osm_struct_house 9 | GROUP BY city_id, street; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/010-index_streets.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE public.osm_struct_streets ADD PRIMARY KEY (id); 2 | 3 | CREATE INDEX osm_struct_streets_name_idx ON public.osm_struct_streets USING BTREE(name); 4 | CREATE INDEX osm_struct_streets_city_idx ON public.osm_struct_streets USING BTREE(city_id); 5 | CREATE INDEX osm_struct_streets_extent_idx ON public.osm_struct_streets USING GIST(extent); 6 | 7 | ALTER TABLE public.osm_struct_house ADD COLUMN street_id uuid REFERENCES public.osm_struct_streets (id); 8 | -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/011-update_struct_house_with_street.sql: -------------------------------------------------------------------------------- 1 | UPDATE public.osm_struct_house h 2 | SET street_id = s.id 3 | FROM public.osm_struct_streets s 4 | WHERE 5 | s.city_id = h.city_id 6 | AND s.name = h.street; 7 | 8 | CREATE INDEX osm_struct_house_street_id_idx ON public.osm_struct_house USING BTREE(street_id); -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/012-geometry_for_streets.sql: -------------------------------------------------------------------------------- 1 | -- fetch geometry for street from osm_roads 2 | ALTER TABLE public.osm_struct_streets ADD COLUMN geometry gis.geometry(linestring, 3857); 3 | 4 | UPDATE public.osm_struct_streets s SET geometry = r.geometry 5 | FROM public.osm_roads r 6 | WHERE 7 | r.street = s.name 8 | AND gis.ST_Intersects(s.extent, gis.ST_SetSRID(gis.Box2D(r.geometry), 3857)); 9 | -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/013-geometry_for_cities.sql: -------------------------------------------------------------------------------- 1 | -- fetch geometry for city from osm_admin 2 | ALTER TABLE public.osm_struct_cities ADD COLUMN geometry gis.geometry(geometry, 3857); 3 | 4 | UPDATE public.osm_struct_cities c SET geometry = p.geometry 5 | FROM public.osm_postal_code p 6 | WHERE 7 | c.geometry IS NULL 8 | AND p.postcode = c.postcode 9 | AND gis.ST_Intersects(c.extent, gis.ST_SetSRID(gis.Box2D(p.geometry), 3857)); 10 | 11 | UPDATE public.osm_struct_cities c SET geometry = a.geometry 12 | FROM public.osm_admin a 13 | WHERE 14 | c.geometry IS NULL 15 | AND a.name = c.name 16 | AND a.admin_level = 8 17 | AND gis.ST_Intersects(c.extent, gis.ST_SetSRID(gis.Box2D(a.geometry), 3857)); -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/014-cleanup_struct_house_table.sql: -------------------------------------------------------------------------------- 1 | -- clean up 2 | ALTER TABLE public.osm_struct_house DROP COLUMN city, DROP COLUMN postcode, DROP COLUMN street, DROP COLUMN city_id; 3 | ANALYZE public.osm_struct_cities; 4 | ANALYZE public.osm_struct_house; 5 | ANALYZE public.osm_struct_streets; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/015-cluster_struct_house_table.sql: -------------------------------------------------------------------------------- 1 | -- drop indices for performance while clustering 2 | DROP INDEX IF EXISTS osm_struct_house_postcode_idx; 3 | DROP INDEX IF EXISTS osm_struct_house_city_id_idx; 4 | DROP INDEX IF EXISTS osm_struct_house_street_id_idx; 5 | 6 | CREATE INDEX osm_struct_house_geohash_idx ON public.osm_struct_house USING BTREE(gis.ST_Geohash(gis.ST_Transform(geometry, 4326))); 7 | CLUSTER public.osm_struct_house USING osm_struct_house_geohash_idx; 8 | 9 | -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/016-add_indices_to_struct_tables.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX osm_struct_house_street_id_idx ON public.osm_struct_house USING BTREE(street_id); 2 | 3 | CREATE INDEX osm_struct_house_geometry ON public.osm_struct_house USING GIST(geometry); 4 | CREATE INDEX osm_struct_street_geometry ON public.osm_struct_streets USING GIST(geometry); 5 | CREATE INDEX osm_struct_city_geometry ON public.osm_struct_cities USING GIST(geometry); 6 | 7 | ANALYZE public.osm_struct_house; 8 | ANALYZE public.osm_struct_streets; 9 | ANALYZE public.osm_struct_cities; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/017-update_city_names_from_osm.sql: -------------------------------------------------------------------------------- 1 | DO 2 | $$ 3 | DECLARE 4 | r record; 5 | oa_exists boolean; 6 | BEGIN 7 | SELECT EXISTS ( 8 | SELECT 1 9 | FROM information_schema.tables 10 | WHERE table_schema = 'public' 11 | AND table_name = 'oa_city' 12 | ) INTO oa_exists; 13 | 14 | IF oa_exists THEN 15 | FOR r IN 16 | SELECT x.id, a."name" AS city 17 | FROM ( 18 | SELECT 19 | c.id, 20 | c.city, 21 | gis.ST_Centroid(gis.ST_Collect(array_agg(h.location))) AS centroid 22 | FROM public.oa_city c 23 | JOIN public.oa_street s ON c.id = s.city_id 24 | JOIN public.oa_house h ON s.id = h.street_id 25 | WHERE c.city = '' 26 | GROUP BY c.id 27 | ) x 28 | JOIN public.osm_admin a ON (a.admin_level = 8 AND gis.ST_Contains(a.geometry, x.centroid)) 29 | LOOP 30 | UPDATE public.oa_city SET city = r.city WHERE id = r.id; 31 | END LOOP; 32 | END IF; 33 | END; 34 | $$ LANGUAGE 'plpgsql'; 35 | -------------------------------------------------------------------------------- /osmgeocoder/data/sql/optimize/018-manual_city_name_updates.sql: -------------------------------------------------------------------------------- 1 | DO 2 | $$ 3 | DECLARE 4 | oa_exists boolean; 5 | BEGIN 6 | SELECT EXISTS ( 7 | SELECT 1 8 | FROM information_schema.tables 9 | WHERE table_schema = 'public' 10 | AND table_name = 'oa_city' 11 | ) INTO oa_exists; 12 | 13 | IF oa_exists THEN 14 | UPDATE public.oa_city SET city = 'Bucharest' WHERE license_id = (SELECT id FROM public.oa_license WHERE source = 'ro/bucharest' LIMIT 1); 15 | UPDATE public.oa_city SET city = 'Wien' WHERE license_id = (SELECT id FROM public.oa_license WHERE source = 'at/city_of_vienna' LIMIT 1); 16 | UPDATE public.oa_city SET city = 'Köln' WHERE license_id = (SELECT id FROM public.oa_license WHERE source = 'de/nw/city_of_cologne' LIMIT 1); 17 | END IF; 18 | END; 19 | $$ LANGUAGE 'plpgsql'; -------------------------------------------------------------------------------- /osmgeocoder/data/sql/prepare/001-custom_types.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- Custom address and distance type to avoid repetition 3 | -- 4 | 5 | DO 6 | $$ 7 | BEGIN 8 | IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'address_and_distance') THEN 9 | CREATE TYPE public.address_and_distance AS ( 10 | house text, 11 | road text, 12 | house_number text, 13 | postcode text, 14 | city text, 15 | county text, 16 | "state" text, 17 | location gis.geometry(point, 3857), 18 | distance float, 19 | license_id uuid 20 | ); 21 | END IF; 22 | END 23 | $$; 24 | 25 | -- 26 | -- Used for attribution messages 27 | -- 28 | DO 29 | $$ 30 | BEGIN 31 | IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'coordinate_source') THEN 32 | CREATE TYPE public.coordinate_source AS ENUM ('openaddresses.io', 'openstreetmap'); 33 | END IF; 34 | END 35 | $$; 36 | -------------------------------------------------------------------------------- /osmgeocoder/data/worldwide.yml: -------------------------------------------------------------------------------- 1 | # 2 | # This file is from https://github.com/OpenCageData/address-formatting 3 | # 4 | 5 | # 6 | # generic mappings, specific territories get mapped to these 7 | # 8 | # postcode before city 9 | generic1: &generic1 | 10 | {{{attention}}} 11 | {{{house}}} 12 | {{{road}}} {{{house_number}}} 13 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 14 | {{{country}}} 15 | 16 | # postcode after city 17 | generic2: &generic2 | 18 | {{{attention}}} 19 | {{{house}}} 20 | {{{house_number}}} {{{road}}} 21 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{county}}} {{/first}} {{{postcode}}} 22 | {{#first}} {{{country}}} || {{{state}}} {{/first}} 23 | 24 | # postcode before city 25 | generic3: &generic3 | 26 | {{{attention}}} 27 | {{{house}}} 28 | {{{house_number}}} {{{road}}} 29 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 30 | {{{country}}} 31 | 32 | # postcode after state 33 | generic4: &generic4 | 34 | {{{attention}}} 35 | {{{house}}} 36 | {{{house_number}}} {{{road}}} 37 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} {{{postcode}}} 38 | {{{country}}} 39 | 40 | # no postcode 41 | generic5: &generic5 | 42 | {{{attention}}} 43 | {{{house}}} 44 | {{{house_number}}} {{{road}}} 45 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 46 | {{#first}} {{{state_district}}} || {{{state}}} {{/first}} 47 | {{{country}}} 48 | 49 | # no postcode, county 50 | generic6: &generic6 | 51 | {{{attention}}} 52 | {{{house}}} 53 | {{{house_number}}} {{{road}}} 54 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 55 | {{{county}}} 56 | {{{country}}} 57 | 58 | # city, postcode 59 | generic7: &generic7 | 60 | {{{attention}}} 61 | {{{house}}} 62 | {{{road}}} {{{house_number}}} 63 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{{postcode}}} 64 | {{{country}}} 65 | 66 | # postcode and county 67 | generic8: &generic8 | 68 | {{{attention}}} 69 | {{{house}}} 70 | {{{road}}}, {{{house_number}}} 71 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{#first}} {{{county_code}}} || {{{county}}} {{/first}} 72 | {{{country}}} 73 | 74 | generic9: &generic9 | 75 | {{{attention}}} 76 | {{{house}}} 77 | {{{road}}} {{{house_number}}} 78 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 79 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 80 | {{{country}}} 81 | 82 | generic10: &generic10 | 83 | {{{attention}}} 84 | {{{house}}} 85 | {{{road}}} {{{house_number}}} 86 | {{{suburb}}} 87 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 88 | {{{state}}} 89 | {{{country}}} 90 | {{{postcode}}} 91 | 92 | generic11: &generic11 | 93 | {{{country}}} 94 | {{{state}}} 95 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 96 | {{{suburb}}} 97 | {{{road}}}, {{{house_number}}} 98 | {{{house}}} 99 | {{{attention}}} 100 | 101 | # city - postcode 102 | generic12: &generic12 | 103 | {{{attention}}} 104 | {{{house}}} 105 | {{{house_number}}}, {{{road}}} 106 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 107 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} - {{{postcode}}} 108 | {{{state}}} 109 | {{{country}}} 110 | 111 | generic13: &generic13 | 112 | {{{attention}}} 113 | {{{house}}} 114 | {{{house_number}}} {{{road}}} 115 | {{#first}} {{{suburb}}} || {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} || {{{region}}} {{/first}} {{#first}} {{{state_code}}} || {{{state}}} {{/first}} {{{postcode}}} 116 | {{{country}}} 117 | 118 | # postcode and state 119 | generic14: &generic14 | 120 | {{{attention}}} 121 | {{{house}}} 122 | {{{house_number}}} {{{road}}} 123 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state_district}}} {{/first}} 124 | {{{state}}} 125 | {{{country}}} 126 | 127 | # postcode and comma before house number 128 | generic15: &generic15 | 129 | {{{attention}}} 130 | {{{house}}} 131 | {{{road}}}, {{{house_number}}} 132 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} || {{{county}}} {{/first}} 133 | {{{country}}} 134 | 135 | # no postcode, no state, just city 136 | generic16: &generic16 | 137 | {{{attention}}} 138 | {{{house}}} 139 | {{{house_number}}} {{{road}}} 140 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{county}}} || {{{state_district}}} || {{{state}}} {{/first}} 141 | {{{country}}} 142 | 143 | # no postcode, no state, just city 144 | generic17: &generic17 | 145 | {{{attention}}} 146 | {{{house}}} 147 | {{{road}}} {{{house_number}}} 148 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{county}}} || {{{state_district}}} || {{{state}}} {{/first}} 149 | {{{country}}} 150 | 151 | # no postcode, just city comma after house number 152 | generic18: &generic18 | 153 | {{{attention}}} 154 | {{{house}}} 155 | {{{house_number}}}, {{{road}}} 156 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} || {{{state}}} {{/first}} 157 | {{{country}}} 158 | 159 | # suburb and postcode after city 160 | generic19: &generic19 | 161 | {{{attention}}} 162 | {{{house}}} 163 | {{{road}}} {{{house_number}}} 164 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 165 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{{postcode}}} 166 | {{{country}}} 167 | 168 | # suburb and postcode after city 169 | generic20: &generic20 | 170 | {{{attention}}} 171 | {{{house}}} 172 | {{{house_number}}} {{{road}}} 173 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 174 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{{postcode}}} 175 | {{{country}}} 176 | 177 | # suburb and city, no postcode 178 | generic21: &generic21 | 179 | {{{attention}}} 180 | {{{house}}} 181 | {{{road}}} {{{house_number}}} 182 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 183 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 184 | {{{country}}} 185 | 186 | # comma after housenumber, postcode before city 187 | generic22: &generic22 | 188 | {{{attention}}} 189 | {{{house}}} 190 | {{{house_number}}}, {{{road}}} 191 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 192 | {{{country}}} 193 | 194 | fallback1: &fallback1 | 195 | {{{attention}}} 196 | {{{house}}} 197 | {{{road}}} {{{house_number}}} 198 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 199 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 200 | {{#first}} {{{county}}} || {{{state_district}}} || {{{state}}} {{/first}} 201 | {{{country}}} 202 | 203 | fallback2: &fallback2 | 204 | {{{attention}}} 205 | {{{house}}} 206 | {{{road}}} {{{house_number}}} 207 | {{{suburb}}} 208 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{county}}} || {{{island}}} {{/first}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} 209 | {{{country}}} 210 | 211 | fallback3: &fallback3 | 212 | {{{attention}}} 213 | {{{house}}} 214 | {{{road}}} {{{house_number}}} 215 | {{{suburb}}} 216 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 217 | {{{county}}} 218 | {{{state}}} 219 | {{{country}}} 220 | 221 | fallback4: &fallback4 | 222 | {{{attention}}} 223 | {{{house}}} 224 | {{{road}}} {{{house_number}}} 225 | {{{suburb}}} 226 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 227 | {{#first}} {{{state}}} || {{{county}}} {{/first}} 228 | {{{country}}} 229 | 230 | default: 231 | address_template: *generic1 232 | fallback_template: *fallback1 233 | 234 | # country / territory specific mappings 235 | # please keep in alpha order by country code 236 | # 237 | 238 | 239 | # Andorra 240 | AD: 241 | address_template: *generic3 242 | 243 | # United Arab Emirates 244 | AE: 245 | address_template: *generic5 246 | 247 | # Angola 248 | AF: 249 | address_template: *generic21 250 | 251 | # Antigua and Barbuda 252 | AG: 253 | address_template: *generic16 254 | 255 | # Anguilla 256 | AI: 257 | address_template: | 258 | {{{attention}}} 259 | {{{house}}} 260 | {{{road}}} {{{house_number}}} 261 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 262 | {{{postcode}}} {{{country}}} 263 | 264 | # Albania 265 | AL: 266 | address_template: | 267 | {{{attention}}} 268 | {{{house}}} 269 | {{{road}}} {{{house_number}}} 270 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} 271 | {{{country}}} 272 | postformat_replace: 273 | # fix the postcode to add - after numbers 274 | - ["\n(\\d{4}) ([^,]*)\n","\n$1-$2\n"] 275 | 276 | # Armenia 277 | AM: 278 | address_template: | 279 | {{{attention}}} 280 | {{{house}}} 281 | {{{house_number}}} {{{road}}} 282 | {{{postcode}}} 283 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 284 | {{#first}} {{{state_district}}} || {{{state}}} {{/first}} 285 | {{{country}}} 286 | 287 | # Angola 288 | AO: 289 | address_template: *generic7 290 | 291 | # Antarctica 292 | AQ: 293 | address_template: | 294 | {{{attention}}} 295 | {{{house}}} 296 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 297 | {{#first}} {{{country}}} || {{{continent}}} {{/first}} 298 | 299 | # Argentina 300 | AR: 301 | address_template: *generic9 302 | replace: 303 | - ["^Autonomous City of ",""] 304 | postformat_replace: 305 | # fix the postcode to make it \w\d\d\d\d \w\w\w 306 | - ["\n(\\w\\d{4})(\\w{3}) ","\n$1 $2 "] 307 | 308 | # American Samoa 309 | AS: 310 | use_country: US 311 | change_country: United States of America 312 | add_component: state=American Samoa 313 | 314 | # Austria 315 | AT: 316 | address_template: *generic1 317 | 318 | # Australia 319 | AU: 320 | address_template: *generic13 321 | 322 | # Aruba 323 | AW: 324 | address_template: *generic17 325 | 326 | # Åland Islands, part of Finnland 327 | AX: 328 | use_country: FI 329 | change_country: Åland, Finland 330 | 331 | # Azerbaijan 332 | AZ: 333 | address_template: *generic3 334 | 335 | # Bosnia 336 | BA: 337 | address_template: *generic1 338 | 339 | # Barbados 340 | BB: 341 | address_template: *generic16 342 | 343 | # Bangladesh 344 | BD: 345 | address_template: | 346 | {{{attention}}} 347 | {{{house}}} 348 | {{{house_number}}} {{{road}}} 349 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 350 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} - {{{postcode}}} 351 | {{{country}}} 352 | 353 | # Belgium 354 | BE: 355 | address_template: *generic1 356 | 357 | # Burkina Faso 358 | BF: 359 | address_template: *generic6 360 | 361 | # Bulgaria 362 | BG: 363 | address_template: *generic9 364 | 365 | # Bahrain 366 | BH: 367 | address_template: *generic2 368 | 369 | # Burundi 370 | BI: 371 | address_template: *generic17 372 | 373 | # Benin 374 | BJ: 375 | address_template: *generic18 376 | 377 | # Saint Barthélemy - same as FR 378 | BL: 379 | use_country: FR 380 | change_country: Saint-Barthélemy, France 381 | 382 | # Bermuda 383 | BM: 384 | address_template: *generic2 385 | 386 | # Brunei 387 | BN: 388 | address_template: | 389 | {{{attention}}} 390 | {{{house}}} 391 | {{{house_number}}}, {{{road}}} 392 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 393 | {{#first}} {{{county}}} || {{{state_district}}} || {{{state}}} {{/first}} {{{postcode}}} 394 | {{{country}}} 395 | 396 | 397 | # Bolivia 398 | BO: 399 | address_template: *generic17 400 | replace: 401 | - ["^Municipio Nuestra Senora de ",""] 402 | 403 | # Dutch Caribbean / Bonaire 404 | BQ: 405 | use_country: NL 406 | change_country: Caribbean Netherlands 407 | 408 | # Brazil 409 | BR: 410 | address_template: | 411 | {{{attention}}} 412 | {{{house}}} 413 | {{{road}}}, {{{house_number}}} 414 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} - {{#first}} {{{state_code}}} || {{{state}}} {{/first}} 415 | {{{postcode}}} 416 | {{{country}}} 417 | postformat_replace: 418 | - ["\\b(\\d{5})(\\d{3})\\b","$1-$2"] 419 | 420 | # Bahamas 421 | BS: 422 | address_template: | 423 | {{{attention}}} 424 | {{{house}}} 425 | {{{road}}} {{{house_number}}} 426 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 427 | {{{county}}} 428 | {{{country}}} 429 | 430 | # Bhutan 431 | BT: 432 | address_template: | 433 | {{{attention}}} 434 | {{{house}}} 435 | {{{road}}} {{{house_number}}}, {{{house}}} 436 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 437 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} {{{postcode}}} 438 | {{{country}}} 439 | 440 | # Bouvet Island 441 | BV: 442 | use_country: "NO" 443 | change_country: Bouvet Island, Norway 444 | 445 | # Botswana 446 | BW: 447 | address_template: | 448 | {{{attention}}} 449 | {{{house}}} 450 | {{{road}}} {{{house_number}}} 451 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 452 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 453 | {{{country}}} 454 | 455 | # Belarus 456 | BY: 457 | address_template: *generic11 458 | 459 | # Belize 460 | BZ: 461 | address_template: *generic16 462 | 463 | # Canada 464 | CA: 465 | address_template: | 466 | {{{attention}}} 467 | {{{house}}} 468 | {{#first}} {{{house_number}}} {{{road}}} || {{{suburb}}} {{/first}} 469 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} {{{postcode}}} 470 | {{{country}}} 471 | fallback_template: | 472 | {{{attention}}} 473 | {{{house}}} 474 | {{#first}} {{{house_number}}} {{{road}}} || {{{suburb}}} {{/first}} 475 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} {{{postcode}}} 476 | {{{country}}} 477 | postformat_replace: 478 | # fix the postcode to make it \w\w\w \w\w\w 479 | - [" (\\w{2}) (\\w{3})(\\w{3})\n"," $1 $2 $3\n"] 480 | 481 | 482 | # Cocos (Keeling) Islands 483 | CC: 484 | use_country: AU 485 | change_country: Australia 486 | 487 | # Democratic Republic of the Congo 488 | CD: 489 | address_template: *generic18 490 | 491 | # Central African Republic 492 | CF: 493 | address_template: *generic17 494 | 495 | # Republic of the Congo / Congo-Brazzaville 496 | CG: 497 | address_template: *generic18 498 | 499 | # Switzerland 500 | CH: 501 | address_template: *generic1 502 | 503 | # Côte d'Ivoire 504 | CI: 505 | address_template: *generic16 506 | 507 | # Cook Islands 508 | CK: 509 | address_template: *generic16 510 | 511 | # Chile 512 | CL: 513 | address_template: *generic1 514 | postformat_replace: 515 | # fix the postcode to make it \d\d\d \d\d\d\d 516 | - ["\n(\\d{3})(\\d{4}) ","\n$1 $2 "] 517 | 518 | # Cameroon 519 | CM: 520 | address_template: *generic17 521 | 522 | # China 523 | CN: 524 | address_template: | 525 | {{{attention}}} 526 | {{{house}}} 527 | {{{house_number}}} {{{road}}} 528 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 529 | {{{county}}} 530 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} {{#first}} {{{state_code}}} || {{{state}}} {{/first}} {{{postcode}}} 531 | {{{country}}} 532 | 533 | # China - English 534 | CN_en: 535 | address_template: | 536 | {{{attention}}} 537 | {{{house}}} 538 | {{{house_number}}} {{{road}}} 539 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 540 | {{{county}}} 541 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} {{#first}} {{{state_code}}} || {{{state}}} {{/first}} {{{postcode}}} 542 | {{{country}}} 543 | 544 | # China - Chinese 545 | CN_zh: 546 | address_template: | 547 | {{{country}}} 548 | {{{postcode}}} 549 | {{#first}} {{{state_code}}} || {{{state}}} {{/first}} 550 | {{#first}} {{{state_district}}} || {{{county}}} {{/first}} 551 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 552 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 553 | {{{road}}} 554 | {{{house_number}}} 555 | {{{house}}} 556 | {{{attention}}} 557 | 558 | # Colombia 559 | CO: 560 | address_template: | 561 | {{{attention}}} 562 | {{{house}}} 563 | {{{road}}} {{{house_number}}} 564 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 565 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} 566 | {{{country}}} 567 | postformat_replace: 568 | - ["Bogota, Bogota","Bogota"] 569 | 570 | # Costa Rica 571 | CR: 572 | address_template: | 573 | {{{attention}}} 574 | {{{house}}} 575 | {{{road}}} {{{house_number}}} 576 | {{{state}}}, {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 577 | {{{postcode}}} {{{country}}} 578 | 579 | # Cuba 580 | CU: 581 | address_template: *generic7 582 | 583 | # Cape Verde 584 | CV: 585 | address_template: *generic1 586 | postformat_replace: 587 | # fix the postcode to add - after numbers 588 | - ["\n(\\d{4}) ([^,]*)\n","\n$1-$2\n"] 589 | 590 | # Curaçao 591 | CW: 592 | address_template: *generic17 593 | 594 | # Christmas Island - same as Australia 595 | CX: 596 | use_country: AU 597 | add_component: state=Christmas Island 598 | change_country: Australia 599 | 600 | # Cyprus 601 | CY: 602 | address_template: *generic1 603 | 604 | # Czech Republic 605 | CZ: 606 | address_template: *generic1 607 | postformat_replace: 608 | # fix the postcode to make it \d\d\d \d\d 609 | - ["\n(\\d{3})(\\d{2}) ","\n$1 $2 "] 610 | 611 | # Germany 612 | DE: 613 | address_template: *generic1 614 | fallback_template: | 615 | {{{attention}}} 616 | {{{house}}} 617 | {{{road}}} {{{house_number}}} 618 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 619 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{county}}} {{/first}} 620 | {{#first}} {{{state}}} || {{{state_district}}} {{/first}} 621 | {{{country}}} 622 | 623 | replace: 624 | - ["^Stadtteil ",""] 625 | - ["^Stadtbezirk (\\d+)",""] 626 | - ["^Gemeinde ",""] 627 | - ["^Landkreis ",""] 628 | - ["^Kreis ",""] 629 | - ["^Grenze ",""] 630 | - ["^Free State of ",""] 631 | - ["^Freistaat ",""] 632 | - ["^Regierungsbezirk ",""] 633 | - ["^Gemeindefreies Gebiet ",""] 634 | - ["city=Alt-Berlin","Berlin"] 635 | postformat_replace: 636 | - ["Berlin\nBerlin","Berlin"] 637 | - ["Bremen\nBremen","Bremen"] 638 | - ["Hamburg\nHamburg","Hamburg"] 639 | 640 | # Djibouti 641 | DJ: 642 | address_template: *generic16 643 | replace: 644 | - ["city=Djibouti","Djibouti-Ville"] 645 | 646 | # Denmark 647 | DK: 648 | address_template: *generic1 649 | 650 | # Dominica 651 | DM: 652 | address_template: *generic16 653 | 654 | # Dominican Republic 655 | DO: 656 | address_template: | 657 | {{{attention}}} 658 | {{{house}}} 659 | {{{road}}} {{{house_number}}} 660 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 661 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{{state}}} 662 | {{{postcode}}} 663 | {{{country}}} 664 | postformat_replace: 665 | - [", Distrito Nacional",", DN"] 666 | 667 | 668 | 669 | # Algeria 670 | DZ: 671 | address_template: *generic3 672 | 673 | # Ecuador 674 | EC: 675 | address_template: | 676 | {{{attention}}} 677 | {{{house}}} 678 | {{{road}}} {{{house_number}}} 679 | {{{postcode}}} 680 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} 681 | {{{country}}} 682 | 683 | # Egypt 684 | EG: 685 | address_template: | 686 | {{{attention}}} 687 | {{{house}}} 688 | {{{house_number}}} {{{road}}} 689 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 690 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 691 | {{{postcode}}} 692 | {{{country}}} 693 | 694 | # Estonia 695 | EE: 696 | address_template: *generic1 697 | 698 | # Western Sahara 699 | EH: 700 | address_template: *generic17 701 | 702 | # Eritrea 703 | ER: 704 | address_template: *generic17 705 | 706 | # Spain 707 | ES: 708 | address_template: *generic15 709 | fallback_template: *fallback4 710 | 711 | # Ethiopia 712 | ET: 713 | address_template: *generic1 714 | 715 | # Finnland 716 | FI: 717 | address_template: *generic1 718 | 719 | # Fiji 720 | FJ: 721 | address_template: *generic16 722 | 723 | # Falkland Islands 724 | FK: 725 | use_country: GB 726 | change_country: United Kingdom 727 | add_component: county=Falkland Islands 728 | 729 | # Federated States of Micronesia 730 | FM: 731 | use_country: US 732 | change_country: United States of America 733 | add_component: state=Micronesia 734 | 735 | # Faroe Islands 736 | FO: 737 | address_template: *generic1 738 | postformat_replace: 739 | - ["Territorial waters of Faroe Islands","Faroe Islands"] 740 | 741 | # France 742 | FR: 743 | address_template: *generic3 744 | replace: 745 | - ["Polynésie française, Îles du Vent \\(eaux territoriales\\)","Polynésie française"] 746 | - ["France, Mayotte \\(eaux territoriales\\)","Mayotte, France"] 747 | - ["France, La Réunion \\(eaux territoriales\\)","La Réunion, France"] 748 | - ["Grande Terre et récifs d'Entrecasteaux",""] 749 | - ["France, Nouvelle-Calédonie","Nouvelle-Calédonie, France"] 750 | - ["\\(eaux territoriales\\)",""] 751 | 752 | # Gabon 753 | GA: 754 | address_template: | 755 | {{{attention}}} 756 | {{{house}}} 757 | {{{house_number}}} {{{road}}} 758 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} || {{{village}}} {{/first}} 759 | {{#first}} {{{city}}} || {{{town}}} || {{{county}}} || {{{state_district}}} || {{{state}}} {{/first}} 760 | {{{country}}} 761 | 762 | GB: 763 | address_template: *generic2 764 | fallback_template: *fallback3 765 | replace: 766 | - ["^Borough of ",""] 767 | - ["^County( of)? ",""] 768 | - ["^Parish of ",""] 769 | - ["^Central ",""] 770 | - ["^Greater London","London"] 771 | - ["^London Borough of .+","London"] 772 | - ["Royal Borough of ",""] 773 | postformat_replace: 774 | - ["London, London","London"] 775 | - ["London, Greater London","London"] 776 | - ["City of Westminster","London"] 777 | - [", United Kingdom$","\nUnited Kingdom"] 778 | - ["London\nEngland\nUnited Kingdom","London\nUnited Kingdom"] 779 | 780 | # Grenada 781 | GD: 782 | address_template: *generic17 783 | 784 | # Georgia 785 | GE: 786 | address_template: *generic1 787 | 788 | # French Guiana - same as FR 789 | GF: 790 | use_country: FR 791 | change_country: France 792 | 793 | # Guernsey - same format as UK, but not part of UK 794 | GG: 795 | use_country: GB 796 | change_country: Guernsey, Channel Islands 797 | 798 | # Ghana 799 | GH: 800 | address_template: *generic16 801 | 802 | # Gibraltar 803 | GI: 804 | address_template: *generic16 805 | 806 | # Greenland 807 | GL: 808 | address_template: *generic1 809 | 810 | # The Gambia 811 | GM: 812 | address_template: *generic16 813 | 814 | # Guinea 815 | GN: 816 | address_template: *generic14 817 | 818 | # Guadeloupe - same as FR 819 | GP: 820 | use_country: FR 821 | change_country: Guadeloupe, France 822 | 823 | # Equatorial Guinea 824 | GQ: 825 | address_template: *generic17 826 | 827 | # Greece 828 | GR: 829 | address_template: *generic1 830 | postformat_replace: 831 | # fix the postcode to make it \d\d\d \d\d 832 | - ["\n(\\d{3})(\\d{2}) ","\n$1 $2 "] 833 | 834 | # South Georgia and the South Sandwich Islands - same as UK 835 | GS: 836 | use_country: GB 837 | change_country: United Kingdom 838 | add_component: county=South Georgia 839 | 840 | # Guatemala 841 | GT: 842 | address_template: | 843 | {{{attention}}} 844 | {{{house}}} 845 | {{{road}}} {{{house_number}}} 846 | {{{postcode}}}-{{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 847 | {{{country}}} 848 | postformat_replace: 849 | - ["\n(\\d{5})- ","\n$1-"] 850 | - ["\n -","\n"] 851 | 852 | # Guam 853 | GU: 854 | use_country: US 855 | change_country: United States of America 856 | add_component: state=Guam 857 | 858 | # Guinea-Bissau 859 | GW: 860 | address_template: *generic1 861 | 862 | # Guyana 863 | GY: 864 | address_template: *generic16 865 | 866 | # Hong Kong 867 | HK: 868 | address_template: | 869 | {{{attention}}} 870 | {{{house}}} 871 | {{{house_number}}} {{{road}}} 872 | {{{state_district}}} 873 | {{{state}}} 874 | 875 | # Hong Kong - English 876 | HK_en: 877 | address_template: | 878 | {{{attention}}} 879 | {{{house}}} 880 | {{{house_number}}} {{{road}}} 881 | {{{state_district}}} 882 | {{{state}}} 883 | {{{country}}} 884 | 885 | # Hong Kong - Chinese 886 | HK_zh: 887 | address_template: | 888 | {{{country}}} 889 | {{{state}}} 890 | {{{state_district}}} 891 | {{{road}}} 892 | {{{house_number}}} 893 | {{{house}}} 894 | {{{attention}}} 895 | 896 | 897 | # Heard Island and McDonald Islands - same as Australia 898 | HM: 899 | use_country: AU 900 | change_country: Australia 901 | add_component: state=Heard Island and McDonald Islands 902 | 903 | # Honduras 904 | HN: 905 | address_template: *generic1 906 | 907 | # Croatia 908 | HR: 909 | address_template: *generic1 910 | 911 | # Haiti 912 | HT: 913 | address_template: *generic1 914 | postformat_replace: 915 | - [" Commune de"," "] 916 | 917 | # Hungary 918 | HU: 919 | address_template: | 920 | {{{attention}}} 921 | {{{house}}} 922 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 923 | {{{road}}} {{{house_number}}} 924 | {{{postcode}}} 925 | {{{country}}} 926 | 927 | # Indonesia 928 | # https://en.wikipedia.org/wiki/Address_%28geography%29#Indonesia 929 | ID: 930 | address_template: | 931 | {{{attention}}} 932 | {{{house}}} 933 | {{{road}}} {{{house_number}}} 934 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 935 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{{postcode}}} 936 | {{{state}}} 937 | {{{country}}} 938 | 939 | # Ireland 940 | # https://en.wikipedia.org/wiki/Postal_addresses_in_the_Republic_of_Ireland 941 | IE: 942 | address_template: | 943 | {{{attention}}} 944 | {{{house}}} 945 | {{{house_number}}} {{{road}}} 946 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 947 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 948 | {{{county}}} 949 | {{{postcode}}} 950 | {{{country}}} 951 | replace: 952 | - [" City$",""] 953 | 954 | # Israel 955 | IL: 956 | address_template: *generic1 957 | 958 | # Isle of Man 959 | IM: 960 | address_template: *generic2 961 | 962 | # India 963 | # http://en.wikipedia.org/wiki/Address_%28geography%29#India 964 | IN: 965 | address_template: *generic12 966 | 967 | # British Indian Ocean Territory - same as UK 968 | IO: 969 | use_country: GB 970 | change_country: British Indian Ocean Territory, United Kingdom 971 | 972 | # Iraq 973 | IQ: 974 | address_template: | 975 | {{{attention}}} 976 | {{{house}}} 977 | {{{city_district}}} 978 | {{{house_number}}} {{{road}}} 979 | {{#first}} {{{city}}} || {{{town}}} || {{{state}}} || {{{village}}} {{/first}} 980 | {{{postcode}}} 981 | {{{country}}} 982 | 983 | # Iran 984 | IR: 985 | address_template: | 986 | {{{attention}}} 987 | {{{house}}} 988 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 989 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 990 | {{{road}}} 991 | {{{house_number}}} 992 | {{#first}} {{{province}}} || {{{state}}} {{/first}} 993 | {{{postcode}}} 994 | {{{country}}} 995 | 996 | IR_en: 997 | address_template: | 998 | {{{attention}}} 999 | {{{house}}} 1000 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1001 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1002 | {{{road}}} 1003 | {{{house_number}}} 1004 | {{#first}} {{{province}}} || {{{state}}} {{/first}} 1005 | {{{postcode}}} 1006 | {{{country}}} 1007 | 1008 | IR_fa: 1009 | address_template: | 1010 | {{{country}}} 1011 | {{#first}} {{{state}}} || {{{province}}} {{/first}} 1012 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1013 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1014 | {{{road}}} 1015 | {{{house_number}}} 1016 | {{{house}}} 1017 | {{{attention}}} 1018 | {{{postcode}}} 1019 | 1020 | # Iceland 1021 | IS: 1022 | address_template: *generic1 1023 | 1024 | # Italy 1025 | IT: 1026 | address_template: *generic8 1027 | 1028 | # Jersey - same format as UK, but not part of UK 1029 | JE: 1030 | use_country: GB 1031 | change_country: Jersey, Channel Islands 1032 | 1033 | # Jamaica 1034 | JM: 1035 | address_template: *generic20 1036 | 1037 | # Jordan 1038 | JO: 1039 | address_template: *generic1 1040 | 1041 | # Japan 1042 | JP: 1043 | address_template: | 1044 | {{{attention}}} 1045 | {{{house}}} 1046 | {{{house_number}}} {{{road}}} 1047 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1048 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{#first}} {{{state}}} || {{{state_district}}} {{/first}} {{{postcode}}} 1049 | {{{country}}} 1050 | postformat_replace: 1051 | # fix the postcode to make it \d\d\d-\d\d\d\d 1052 | - [" (\\d{3})(\\d{4})\n"," $1-$2\n"] 1053 | 1054 | 1055 | 1056 | # Japan - English 1057 | JP_en: 1058 | address_template: | 1059 | {{{attention}}} 1060 | {{{house}}} 1061 | {{{house_number}}} {{{road}}} 1062 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1063 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{#first}} {{{state}}} || {{{state_district}}} {{/first}} {{{postcode}}} 1064 | {{{country}}} 1065 | postformat_replace: 1066 | # fix the postcode to make it \d\d\d-\d\d\d\d 1067 | - [" (\\d{3})(\\d{4})\n"," $1-$2\n"] 1068 | 1069 | 1070 | # Japan - Japanese 1071 | JP_ja: 1072 | address_template: | 1073 | {{{country}}} 1074 | {{{postcode}}} 1075 | {{#first}} {{{state}}} || {{{state_district}}} {{/first}} 1076 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1077 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1078 | {{{road}}} 1079 | {{{house_number}}} 1080 | {{{house}}} 1081 | {{{attention}}} 1082 | postformat_replace: 1083 | # fix the postcode to make it \d\d\d-\d\d\d\d 1084 | - [" (\\d{3})(\\d{4})\n"," $1-$2\n"] 1085 | 1086 | # Kenya 1087 | KE: 1088 | address_template: | 1089 | {{{attention}}} 1090 | {{{house}}} 1091 | {{{house_number}}} {{{road}}} 1092 | {{#first}} {{{city}}} || {{{town}}} || {{{state}}} || {{{village}}} {{/first}} 1093 | {{{postcode}}} 1094 | {{{country}}} 1095 | 1096 | # Kyrgyzstan 1097 | KG: 1098 | address_template: *generic11 1099 | 1100 | # Cambodia 1101 | KH: 1102 | address_template: | 1103 | {{{attention}}} 1104 | {{{house}}} 1105 | {{{house_number}}} {{{road}}} 1106 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1107 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{{postcode}}} 1108 | {{{country}}} 1109 | 1110 | # Kiribati 1111 | KI: 1112 | address_template: *generic17 1113 | 1114 | # Comoros 1115 | KM: 1116 | address_template: | 1117 | {{{attention}}} 1118 | {{{house}}} 1119 | {{{road}}} {{{house_number}}} 1120 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1121 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1122 | {{{country}}} 1123 | 1124 | # Saint Kitts and Nevis 1125 | KN: 1126 | address_template: | 1127 | {{{attention}}} 1128 | {{{house}}} 1129 | {{{house_number}}} {{{road}}} 1130 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{#first}} {{{state}}} || {{{island}}} {{/first}} 1131 | {{{country}}} 1132 | 1133 | # Democratic People's Republic of Korea / North Korea 1134 | KP: 1135 | address_template: *generic21 1136 | 1137 | # Republic of Korea / South Korea 1138 | KR: 1139 | address_template: | 1140 | {{{attention}}} 1141 | {{{house}}} 1142 | {{{house_number}}} {{{road}}} 1143 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}}, {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{#first}} {{{state}}} {{/first}} {{{postcode}}} 1144 | {{{country}}} 1145 | 1146 | # South Korea - English 1147 | KR_en: 1148 | address_template: | 1149 | {{{attention}}} 1150 | {{{house}}} 1151 | {{{house_number}}} {{{road}}} 1152 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}}, {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{#first}} {{{state}}} {{/first}} {{{postcode}}} 1153 | {{{country}}} 1154 | 1155 | # South Korea - Korean 1156 | KR_ko: 1157 | address_template: | 1158 | {{{country}}} 1159 | {{#first}} {{{state}}} {{/first}} 1160 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1161 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1162 | {{{road}}} 1163 | {{{house_number}}} 1164 | {{{house}}} 1165 | {{{attention}}} 1166 | {{{postcode}}} 1167 | 1168 | # Kuwait 1169 | KW: 1170 | address_template: | 1171 | {{{attention}}} 1172 | {{{house}}} 1173 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1174 | 1175 | {{{road}}} 1176 | {{{house_number}}} {{{house}}} 1177 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1178 | {{{country}}} 1179 | 1180 | # Cayman Islands 1181 | KY: 1182 | address_template: *generic2 1183 | 1184 | # Kazakhstan 1185 | KZ: 1186 | address_template: *generic11 1187 | 1188 | # Laos 1189 | LA: 1190 | address_template: *generic22 1191 | 1192 | # Lebanon 1193 | LB: 1194 | address_template: *generic2 1195 | postformat_replace: 1196 | # fix the postcode to make it nonbreaking space 1197 | - [" (\\d{4}) (\\d{4})\n"," $1 $2\n"] 1198 | # - ["\n(\\d{4}) (\\d{4}) ","\n$1 $2 "] 1199 | 1200 | 1201 | # Saint Lucia 1202 | LC: 1203 | address_template: *generic17 1204 | 1205 | # Liechtenstein, same as Switzerland 1206 | LI: 1207 | use_country: CH 1208 | 1209 | # Sri Lanka 1210 | LK: 1211 | address_template: *generic20 1212 | 1213 | # Liberia 1214 | LR: 1215 | address_template: *generic1 1216 | 1217 | # Lesotho 1218 | LS: 1219 | address_template: *generic2 1220 | 1221 | # Lithuania 1222 | LT: 1223 | address_template: *generic1 1224 | 1225 | # Luxemburg 1226 | LU: 1227 | address_template: *generic3 1228 | 1229 | # Latvia 1230 | LV: 1231 | address_template: *generic7 1232 | 1233 | # Libya 1234 | LY: 1235 | address_template: *generic17 1236 | 1237 | # Morocco 1238 | MA: 1239 | address_template: *generic3 1240 | 1241 | # Monaco 1242 | MC: 1243 | address_template: *generic3 1244 | 1245 | # Moldova 1246 | MD: 1247 | address_template: | 1248 | {{{attention}}} 1249 | {{{house}}} 1250 | {{{road}}}, {{{house_number}}} 1251 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 1252 | {{{country}}} 1253 | 1254 | # Montenegro 1255 | ME: 1256 | address_template: *generic1 1257 | 1258 | # Collectivité de Saint-Martin 1259 | MF: 1260 | use_country: FR 1261 | change_country: France 1262 | 1263 | # Marsall Islands 1264 | MH: 1265 | use_country: US 1266 | add_component: state=Marsall Islands 1267 | 1268 | # Madagascar 1269 | MG: 1270 | address_template: | 1271 | {{{attention}}} 1272 | {{{house}}} 1273 | {{{house_number}}} {{{road}}} 1274 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1275 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1276 | {{{country}}} 1277 | 1278 | # Macedonia 1279 | MK: 1280 | address_template: *generic1 1281 | 1282 | # Mali 1283 | ML: 1284 | address_template: *generic17 1285 | 1286 | # Myanmar (Burma) 1287 | MM: 1288 | address_template: | 1289 | {{{attention}}} 1290 | {{{house}}} 1291 | {{{house_number}}} {{{road}}} 1292 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}}, {{{postcode}}} 1293 | {{{country}}} 1294 | 1295 | 1296 | # Mongolia 1297 | MN: 1298 | address_template: | 1299 | {{{attention}}} 1300 | {{{house}}} 1301 | {{{city_district}}} 1302 | {{#first}} {{{suburb}}} || {{{neighbourhood}}} {{/first}} 1303 | {{{road}}} 1304 | {{{house_number}}} 1305 | {{{postcode}}} 1306 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1307 | {{{country}}} 1308 | 1309 | # Macau 1310 | MO: 1311 | address_template: | 1312 | {{{attention}}} 1313 | {{{house}}} 1314 | {{{road}}} {{{house_number}}} 1315 | {{#first}} {{{suburb}}} || {{{village}}} || {{{state_district}}} {{/first}} 1316 | {{{country}}} 1317 | 1318 | # Macao - Portuguese 1319 | MO_pt: 1320 | address_template: | 1321 | {{{attention}}} 1322 | {{{house}}} 1323 | {{{road}}} {{{house_number}}} 1324 | {{#first}} {{{suburb}}} || {{{village}}} || {{{state_district}}} {{/first}} 1325 | {{{country}}} 1326 | 1327 | # Macao - Chinese 1328 | MO_zh: 1329 | address_template: | 1330 | {{{country}}} 1331 | {{#first}} {{{suburb}}} || {{{village}}} || {{{state_district}}} {{/first}} 1332 | {{{road}}} 1333 | {{{house_number}}} 1334 | {{{house}}} 1335 | {{{attention}}} 1336 | 1337 | # Northern Mariana Islands 1338 | MP: 1339 | use_country: US 1340 | change_country: United States of America 1341 | add_component: state=Northern Mariana Islands 1342 | 1343 | # Montserrat 1344 | MS: 1345 | address_template: *generic16 1346 | 1347 | # Malta 1348 | MT: 1349 | address_template: | 1350 | {{{attention}}} 1351 | {{{house}}} 1352 | {{{house_number}}} {{{road}}} 1353 | {{#first}} {{{city}}} || {{{town}}} || {{{suburb}}} || {{{village}}} {{/first}} 1354 | {{{postcode}}} 1355 | {{{country}}} 1356 | 1357 | # Martinique - overseas territory of France (FR) 1358 | MQ: 1359 | use_country: FR 1360 | change_country: Martinique, France 1361 | 1362 | # Mauritania 1363 | MR: 1364 | address_template: *generic18 1365 | 1366 | # Mauritius 1367 | MU: 1368 | address_template: *generic18 1369 | 1370 | # Maldives 1371 | MV: 1372 | address_template: *generic2 1373 | 1374 | # Malawi 1375 | MW: 1376 | address_template: *generic16 1377 | 1378 | # Mexico 1379 | MX: 1380 | address_template: | 1381 | {{{attention}}} 1382 | {{{house}}} 1383 | {{{road}}} {{{house_number}}} 1384 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1385 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} 1386 | {{{country}}} 1387 | 1388 | # Malaysia 1389 | MY: 1390 | address_template: | 1391 | {{{attention}}} 1392 | {{{house}}} 1393 | {{{house_number}}} {{{road}}} 1394 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1395 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1396 | {{{state}}} 1397 | {{{country}}} 1398 | 1399 | # Mozambique 1400 | MZ: 1401 | address_template: *generic15 1402 | fallback_template: *fallback4 1403 | 1404 | # Namibia 1405 | NA: 1406 | address_template: *generic2 1407 | 1408 | # New Caledonia, special collectivity of France 1409 | NC: 1410 | use_country: FR 1411 | change_country: Nouvelle-Calédonie, France 1412 | 1413 | # Niger 1414 | NE: 1415 | address_template: | 1416 | {{{attention}}} 1417 | {{{house}}} 1418 | {{{house_number}}} 1419 | {{{road}}} 1420 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1421 | {{{country}}} 1422 | 1423 | # Norfolk Island - same as Australia 1424 | NF: 1425 | use_country: AU 1426 | add_component: state=Norfolk Island 1427 | change_country: Australia 1428 | 1429 | # Nigeria 1430 | NG: 1431 | address_template: | 1432 | {{{attention}}} 1433 | {{{house}}} 1434 | {{{house_number}}} {{{road}}} 1435 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{{postcode}}} 1436 | {{{state}}} 1437 | {{{country}}} 1438 | 1439 | # Nicaragua 1440 | NI: 1441 | address_template: *generic21 1442 | 1443 | # Netherlands 1444 | NL: 1445 | address_template: *generic1 1446 | postformat_replace: 1447 | # fix the postcode to make it \d\d\d\d \w\w 1448 | - ["\n(\\d{4})(\\w{2}) ","\n$1 $2 "] 1449 | - ["\nKoninkrijk der Nederlanden$","\nNederland"] 1450 | 1451 | 1452 | # Norway 1453 | # quoted since python interprets it as a boolean. Silly python! 1454 | "NO": 1455 | address_template: *generic1 1456 | 1457 | # Nepal 1458 | NP: 1459 | address_template: | 1460 | {{{attention}}} 1461 | {{{house}}} 1462 | {{{road}}} {{{house_number}}} 1463 | {{#first}} {{{suburb}}} || {{{neighbourhood}}} || {{{city}}} {{/first}} 1464 | {{#first}} {{{county}}} || {{{state_district}}} || {{{state}}} {{/first}} {{{postcode}}} 1465 | {{{country}}} 1466 | 1467 | # Nauru 1468 | NR: 1469 | address_template: *generic16 1470 | 1471 | # Niue 1472 | NU: 1473 | address_template: *generic16 1474 | 1475 | # New Zealand 1476 | NZ: 1477 | address_template: *generic20 1478 | 1479 | # Oman 1480 | OM: 1481 | address_template: | 1482 | {{{attention}}} 1483 | {{{house}}} 1484 | {{{house_number}}} {{{road}}} 1485 | {{{postcode}}} 1486 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} 1487 | {{{state}}} 1488 | {{{country}}} 1489 | 1490 | # Panama 1491 | PA: 1492 | address_template: | 1493 | {{{attention}}} 1494 | {{{house}}} 1495 | {{{road}}} {{{house_number}}} 1496 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1497 | {{{postcode}}} 1498 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} 1499 | {{{state}}} 1500 | {{{country}}} 1501 | replace: 1502 | - ["city=Panama","Panama City"] 1503 | - ["city=Panamá","Ciudad de Panamá"] 1504 | 1505 | # Peru 1506 | PE: 1507 | address_template: *generic19 1508 | 1509 | # French Polynesia - same as FR 1510 | PF: 1511 | use_country: FR 1512 | change_country: Polynésie française, France 1513 | replace: 1514 | - ["Polynésie française, Îles du Vent \\(eaux territoriales\\)","Polynésie française"] 1515 | 1516 | 1517 | # Papau New Guinea 1518 | PG: 1519 | address_template: | 1520 | {{{attention}}} 1521 | {{{house}}} 1522 | {{{house_number}}} {{{road}}} 1523 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} {{{postcode}}} {{{state}}} 1524 | {{{country}}} 1525 | 1526 | # Philippines 1527 | PH: 1528 | address_template: | 1529 | {{{attention}}} 1530 | {{{house}}} 1531 | {{{house_number}}} {{{road}}} 1532 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{suburb}}} || {{{state_district}}} {{/first}} 1533 | {{{postcode}}} {{{state}}} 1534 | {{{country}}} 1535 | 1536 | # Pakistan 1537 | PK: 1538 | address_template: | 1539 | {{{attention}}} 1540 | {{{house}}} 1541 | {{{house_number}}} {{{road}}} 1542 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1543 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} {{{postcode}}} 1544 | {{{country}}} 1545 | 1546 | # Poland 1547 | PL: 1548 | address_template: *generic1 1549 | postformat_replace: 1550 | # fix the postcode to make it \d\d-\d\d\d 1551 | - ["\n(\\d{2})(\\w{3}) ","\n$1-$2 "] 1552 | 1553 | 1554 | # Saint Pierre and Miquelon - same as FR 1555 | PM: 1556 | use_country: FR 1557 | change_country: Saint-Pierre-et-Miquelon, France 1558 | 1559 | # Pitcairn Islands 1560 | PN: 1561 | address_template: | 1562 | {{{attention}}} 1563 | {{{house}}} 1564 | {{#first}} {{{city}}} || {{{town}}} || {{{island}}} {{/first}} 1565 | {{{country}}} 1566 | 1567 | # Puerto Rico, same as USA 1568 | PR: 1569 | use_country: US 1570 | change_country: United States of America 1571 | add_component: state=Puerto Rico 1572 | 1573 | # Palestine 1574 | PS: 1575 | use_country: IL 1576 | 1577 | # Portugal 1578 | PT: 1579 | address_template: *generic1 1580 | 1581 | # Palau 1582 | PW: 1583 | address_template: *generic1 1584 | 1585 | # Parguay 1586 | PY: 1587 | address_template: *generic1 1588 | 1589 | # Qatar 1590 | QA: 1591 | address_template: *generic17 1592 | 1593 | # Réunion - same as FR 1594 | RE: 1595 | use_country: FR 1596 | change_country: La Réunion, France 1597 | 1598 | 1599 | # Romania 1600 | RO: 1601 | address_template: *generic1 1602 | 1603 | # Serbia 1604 | RS: 1605 | address_template: *generic1 1606 | 1607 | # Russia 1608 | RU: 1609 | address_template: *generic10 1610 | 1611 | # Rwanda 1612 | RW: 1613 | address_template: *generic16 1614 | 1615 | # Saudi Arabia 1616 | SA: 1617 | address_template: | 1618 | {{{attention}}} 1619 | {{{house}}} 1620 | {{{house_number}}} {{{road}}}, {{#first}} {{{village}}} || {{{city_district}}} || {{{suburb}}} || {{{neighbourhood}}} {{/first}} 1621 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} {{/first}} {{{postcode}}} 1622 | {{{country}}} 1623 | 1624 | # Solomon Islands 1625 | SB: 1626 | address_template: *generic17 1627 | 1628 | # Seychelles 1629 | SC: 1630 | address_template: | 1631 | {{{attention}}} 1632 | {{{house}}} 1633 | {{{house_number}}} {{{road}}} 1634 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{island}}} {{/first}} 1635 | {{{island}}} 1636 | {{{country}}} 1637 | 1638 | # Sudan 1639 | SD: 1640 | address_template: *generic1 1641 | 1642 | # Sweden 1643 | SE: 1644 | address_template: *generic1 1645 | postformat_replace: 1646 | # fix the postcode to make it \d\d\d \d\d 1647 | - ["\n(\\d{3})(\\d{2}) ","\n$1 $2 "] 1648 | 1649 | # Singapore 1650 | SG: 1651 | address_template: *generic2 1652 | 1653 | # Saint Helena, Ascension and Tristan da Cunha - same as UK 1654 | SH: 1655 | use_country: GB 1656 | change_country: $state, United Kingdom 1657 | 1658 | # Slovenia 1659 | SI: 1660 | address_template: *generic1 1661 | 1662 | # Svalbard and Jan Mayen - same as Norway 1663 | SJ: 1664 | use_country: "NO" 1665 | change_country: Norway 1666 | 1667 | # Slovakia 1668 | SK: 1669 | address_template: *generic1 1670 | replace: 1671 | - ["^District of ",""] 1672 | 1673 | # Sierra Leone 1674 | SL: 1675 | address_template: *generic16 1676 | 1677 | # San Marino - same as IT 1678 | SM: 1679 | use_country: IT 1680 | 1681 | # Senegal 1682 | SN: 1683 | address_template: *generic3 1684 | 1685 | # Somalia 1686 | SO: 1687 | address_template: *generic21 1688 | 1689 | # Suriname 1690 | SR: 1691 | address_template: *generic21 1692 | 1693 | # South Sudan 1694 | SS: 1695 | address_template: *generic17 1696 | 1697 | # São Tomé and Príncipe 1698 | ST: 1699 | address_template: *generic17 1700 | 1701 | # El Salvador 1702 | SV: 1703 | address_template: | 1704 | {{{attention}}} 1705 | {{{house}}} 1706 | {{{road}}} {{{house_number}}} 1707 | {{{postcode}}} - {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1708 | {{{state}}} 1709 | {{{country}}} 1710 | postformat_replace: 1711 | - ["\n- ","\n "] 1712 | 1713 | # Sint Maarten 1714 | SX: 1715 | address_template: *generic17 1716 | 1717 | # Syria 1718 | SY: 1719 | address_template: | 1720 | {{{attention}}} 1721 | {{{house}}} 1722 | {{{road}}}, {{{house_number}}} 1723 | {{#first}} {{{village}}} || {{{city_district}}} || {{{neighbourhood}}} || {{{suburb}}} {{/first}} 1724 | {{{postcode}}} {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{state}}} {{/first}} 1725 | 1726 | {{{country}}} 1727 | 1728 | 1729 | # Swaziland 1730 | SZ: 1731 | address_template: | 1732 | {{{attention}}} 1733 | {{{house}}} 1734 | {{{road}}} {{{house_number}}} 1735 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 1736 | {{{postcode}}} 1737 | {{{country}}} 1738 | 1739 | # Turks and Caicos Islands - same as UK 1740 | TC: 1741 | use_country: GB 1742 | 1743 | # Chad 1744 | TD: 1745 | address_template: *generic21 1746 | 1747 | # French Southern and Antarctic Lands 1748 | TF: 1749 | use_country: FR 1750 | change_country: Terres australes et antarctiques françaises, France 1751 | 1752 | # Togo 1753 | TG: 1754 | address_template: *generic18 1755 | 1756 | # Thailand 1757 | TH: 1758 | address_template: | 1759 | {{{attention}}} 1760 | {{{house}}} 1761 | {{{house_number}}} {{{village}}} 1762 | {{{road}}} 1763 | {{#first}} {{{neighbourhood}}} || {{{city}}} || {{{town}}} {{/first}}, {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 1764 | {{{state}}} {{{postcode}}} 1765 | {{{country}}} 1766 | 1767 | # Tajikistan 1768 | TJ: 1769 | address_template: *generic1 1770 | 1771 | # Tokelau, territory of New Zealand 1772 | TK: 1773 | use_country: NZ 1774 | change_country: Tokelau, New Zealand 1775 | 1776 | # Timor-Leste/East Timor 1777 | TL: 1778 | address_template: *generic17 1779 | 1780 | # Turkmenistan 1781 | TM: 1782 | address_template: *generic22 1783 | 1784 | # Tunisia 1785 | TN: 1786 | address_template: *generic3 1787 | 1788 | # Tonga 1789 | TO: 1790 | address_template: *generic16 1791 | 1792 | # Turkey 1793 | TR: 1794 | address_template: *generic1 1795 | 1796 | # Trinidad and Tobago 1797 | TT: 1798 | address_template: | 1799 | {{{attention}}} 1800 | {{{house}}} 1801 | {{{house_number}}} {{{road}}} 1802 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 1803 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}}, {{{postcode}}} 1804 | {{{country}}} 1805 | 1806 | # Tuvalu 1807 | TV: 1808 | address_template: | 1809 | {{{attention}}} 1810 | {{{house}}} 1811 | {{{house_number}}} {{{road}}} 1812 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1813 | {{#first}} {{{county}}} || {{{state_district}}} || {{{state}}} || {{{island}}} {{/first}} 1814 | {{{country}}} 1815 | 1816 | # Taiwan 1817 | TW: 1818 | address_template: *generic20 1819 | 1820 | TW_en: 1821 | address_template: *generic20 1822 | 1823 | TW_zh: 1824 | address_template: | 1825 | {{{country}}} 1826 | {{{postcode}}} 1827 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1828 | {{{city_district}}} 1829 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}} 1830 | {{{road}}} 1831 | {{{house_number}}} 1832 | {{{house}}} 1833 | {{{attention}}} 1834 | 1835 | # Tanzania 1836 | TZ: 1837 | address_template: *generic14 1838 | fallback_template: *generic14 1839 | postformat_replace: 1840 | - ["Dar es Salaam\nDar es Salaam","Dar es Salaam"] 1841 | 1842 | # Ukraine 1843 | UA: 1844 | address_template: | 1845 | {{{attention}}} 1846 | {{{house}}} 1847 | {{{road}}}, {{{house_number}}} 1848 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 1849 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 1850 | {{{postcode}}} 1851 | {{{country}}} 1852 | 1853 | # Uganda 1854 | UG: 1855 | address_template: *generic16 1856 | 1857 | # US Minor Outlying Islands, same as USA 1858 | UM: 1859 | fallback_template: *fallback3 1860 | use_country: US 1861 | change_country: United States of America 1862 | add_component: state=US Minor Outlying Islands 1863 | 1864 | # USA 1865 | US: 1866 | address_template: *generic4 1867 | fallback_template: *fallback2 1868 | replace: 1869 | - ["state=United States Virgin Islands","US Virgin Islands"] 1870 | - ["state=USVI","US Virgin Islands"] 1871 | postformat_replace: 1872 | - ["\nUS$","\nUnited States of America"] 1873 | - ["\nUSA$","\nUnited States of America"] 1874 | - ["\nUnited States$","\nUnited States of America"] 1875 | 1876 | # Uzbekistan 1877 | UZ: 1878 | address_template: | 1879 | {{{attention}}} 1880 | {{{house}}} 1881 | {{{road}}} {{{house_number}}} 1882 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}} 1883 | {{#first}} {{{state}}} || {{{state_district}}} {{/first}} 1884 | {{{country}}} 1885 | {{{postcode}}} 1886 | 1887 | # Uruguay 1888 | UY: 1889 | address_template: *generic1 1890 | 1891 | # Vatican City - same as IT 1892 | VA: 1893 | use_country: IT 1894 | 1895 | # Saint Vincent and the Grenadines 1896 | VC: 1897 | address_template: *generic17 1898 | 1899 | # Venezuela 1900 | VE: 1901 | address_template: | 1902 | {{{attention}}} 1903 | {{{house}}} 1904 | {{{road}}} {{{house_number}}} 1905 | {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} {{{postcode}}}, {{#first}} {{{state_code}}} || {{{state}}} {{/first}} 1906 | {{{country}}} 1907 | 1908 | # British Virgin Islands 1909 | VG: 1910 | address_template: | 1911 | {{{attention}}} 1912 | {{{house}}} 1913 | {{{house_number}}} {{{road}}} 1914 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} {{/first}}, {{{island}}} 1915 | {{{country}}}, {{{postcode}}} 1916 | 1917 | # US Virgin Islands, same as USA 1918 | VI: 1919 | use_country: US 1920 | change_country: United States of America 1921 | add_component: state=US Virgin Islands 1922 | 1923 | # Vietnam 1924 | VN: 1925 | address_template: | 1926 | {{{attention}}} 1927 | {{{house}}} 1928 | {{{house_number}}}, {{{road}}} 1929 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{neighbourhood}}} {{/first}}, {{#first}} {{{city}}} || {{{town}}} || {{{state_district}}} || {{{village}}} {{/first}} 1930 | {{{state}}} {{{postcode}}} 1931 | {{{country}}} 1932 | 1933 | # Vanuatu 1934 | VU: 1935 | address_template: *generic17 1936 | 1937 | # Wallis and Futuna, same as France 1938 | WF: 1939 | use_country: FR 1940 | change_country: Wallis-et-Futuna, France 1941 | 1942 | # Samoa 1943 | WS: 1944 | address_template: *generic17 1945 | 1946 | # Yemen 1947 | YE: 1948 | address_template: *generic18 1949 | 1950 | # Mayotte - same as FR 1951 | YT: 1952 | use_country: FR 1953 | change_country: Mayotte, France 1954 | 1955 | # South Africa 1956 | ZA: 1957 | address_template: | 1958 | {{{attention}}} 1959 | {{{house}}} 1960 | {{{house_number}}} {{{road}}} 1961 | {{#first}} {{{suburb}}} || {{{city_district}}} || {{{state_district}}} {{/first}} 1962 | {{#first}} {{{city}}} || {{{town}}} || {{{village}}} || {{{state}}} {{/first}} 1963 | {{{postcode}}} 1964 | {{{country}}} 1965 | 1966 | # Zambia 1967 | ZM: 1968 | address_template: *generic3 1969 | 1970 | # Zimbabwe 1971 | ZW: 1972 | address_template: *generic16 1973 | -------------------------------------------------------------------------------- /osmgeocoder/format.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import yaml 4 | import pystache 5 | import os 6 | from pkg_resources import resource_exists, resource_stream 7 | 8 | def first(address): 9 | def _first(content): 10 | tokens = [token.strip() for token in content.split('||')] 11 | for t in tokens: 12 | result = pystache.render(t, address) 13 | if result.strip() != '': 14 | return result 15 | return '' 16 | return _first 17 | 18 | class AddressFormatter(): 19 | 20 | def __init__(self, config:Optional[str]=None): 21 | # if no opencage data file is specified in the configuration 22 | # we fall back to the one included with this package 23 | if config is None: 24 | 25 | # assume we are in a virtualenv first 26 | self.model = None 27 | try: 28 | if resource_exists('osmgeocoder', 'worldwide.yml'): 29 | self.model = yaml.load(resource_stream('osmgeocoder', 'worldwide.yml'), Loader=yaml.FullLoader) 30 | except ModuleNotFoundError: 31 | pass 32 | 33 | if self.model is None: 34 | # if not found, assume we have been started from a source checkout 35 | my_dir = os.path.dirname(os.path.abspath(__file__)) 36 | config = os.path.abspath(os.path.join(my_dir, 'data/worldwide.yml')) 37 | 38 | with open(config, 'r') as fp: 39 | self.model = yaml.load(fp, Loader=yaml.FullLoader) 40 | else: 41 | with open(config, 'r') as fp: 42 | self.model = yaml.load(fp, Loader=yaml.FullLoader) 43 | 44 | def format(self, address:str, country:Optional[str]=None) -> str: 45 | search_key = country.upper() if country is not None else 'default' 46 | fmt = self.model.get(search_key, None) 47 | if fmt is None: 48 | fmt = self.model.get('default', None) 49 | if fmt is None: 50 | raise RuntimeError("Configuration file for address formatter has no default value!") 51 | 52 | cleaned_address = {} 53 | for key, value in address.items(): 54 | if value is not None: 55 | cleaned_address[key] = value 56 | 57 | cleaned_address['first'] = first(cleaned_address) 58 | return pystache.render(fmt['address_template'], cleaned_address).strip() 59 | -------------------------------------------------------------------------------- /osmgeocoder/forward.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Generator, Tuple, Dict, Any 2 | 3 | import psycopg2 4 | from psycopg2.extras import RealDictCursor 5 | 6 | from requests import post 7 | from requests.exceptions import ConnectionError 8 | 9 | 10 | def fetch_coordinate( 11 | geocoder, 12 | search_term: str, 13 | center:Optional[Tuple[float, float]]=None, 14 | country:Optional[str]=None, 15 | radius=20000, 16 | limit=20 17 | ) -> Generator[Dict[str, Any], None, None]: 18 | """ 19 | Fetch probable coordinates from openstreetmap or openaddresses.io data using 20 | the search term. 21 | 22 | If the postal service is running and configured the search_term will be 23 | pre-parsed by using that service. 24 | 25 | The results will be sorted by distance to center coordinate, if there is no center 26 | coordinate the results will be sorted by trigram similarity. 27 | 28 | This is a generator that returns an iterator of dict instances with the following 29 | keys: house, road, house_number, postcode, city, country, location, trgm_dist, dist 30 | 31 | Not all keys have to be filled at all times. 32 | 33 | :param geocoder: geocoder instance 34 | :param search_term: user input 35 | :param center: center coordinate used for distance sorting 36 | :param country: if set the query will be limited to this country 37 | :param radius: max search radius around the center coordinate 38 | :param limit: maximum number of results to return 39 | """ 40 | 41 | if geocoder.postal_service is not None: 42 | try: 43 | response = post(geocoder.postal_service['service_url'] + '/split', json={"query": search_term}) 44 | if response.status_code == 200: 45 | parsed_address = response.json()[0] 46 | else: 47 | parsed_address = { 'road': search_term } 48 | except ConnectionError: 49 | parsed_address = { 'road': search_term } 50 | 51 | for result in fetch_coordinate_struct( 52 | geocoder, 53 | road=parsed_address.get('road', parsed_address.get('house', None)), 54 | house_number=parsed_address.get('house_number', None), 55 | postcode=parsed_address.get('postcode', None), 56 | city=parsed_address.get('city', None), 57 | country=country, 58 | center=center, 59 | radius=radius, 60 | limit=limit 61 | ): 62 | yield result 63 | 64 | def fetch_coordinate_struct( 65 | geocoder, 66 | road:Optional[str]=None, 67 | house_number:Optional[str]=None, 68 | postcode:Optional[str]=None, 69 | city:Optional[str]=None, 70 | country:Optional[str]=None, 71 | center:Optional[Tuple[float, float]]=None, 72 | radius=20000, 73 | limit=20 74 | ) -> Generator[Dict[str, Any], None, None]: 75 | """ 76 | Fetch probable coordinates from openstreetmap or openaddresses.io data using 77 | the structured search terms. 78 | 79 | The results will be sorted by distance to center coordinate, if there is no center 80 | coordinate the results will be sorted by trigram similarity. 81 | 82 | This is a generator that returns an iterator of dict instances with the following 83 | keys: house, road, house_number, postcode, city, country, location, trgm_dist, dist 84 | 85 | Not all keys have to be filled at all times. 86 | 87 | :param geocoder: geocoder instance 88 | :param road: optional, user input 89 | :param house_number: optional, user input 90 | :param postcode: optional, user input 91 | :param city: optional, user input 92 | :param country: if set the query will be limited to this country 93 | :param center: center coordinate used for distance sorting 94 | :param radius: max search radius around the center coordinate 95 | :param limit: maximum number of results to return 96 | """ 97 | 98 | query = ''' 99 | SELECT * FROM geocode_{typ}( 100 | %(road)s, 101 | %(house_number)s, 102 | %(postcode)s, 103 | %(city)s, 104 | %(limit)s, 105 | ST_Transform( 106 | ST_SetSRID( 107 | ST_MakePoint(%(lon)s, %(lat)s), 108 | 4326 109 | ), 110 | 3857 111 | ), 112 | %(radius)s, 113 | %(country)s 114 | ) LIMIT %(limit)s; 115 | ''' 116 | 117 | cursor = geocoder.db.cursor(cursor_factory=RealDictCursor) 118 | 119 | for typ in ['osm']: # TODO: Implement for openaddresses.io 120 | q = query.format(typ=typ) 121 | cursor.execute(q, { 122 | 'lat': center[0] if center is not None else None, 123 | 'lon': center[1] if center is not None else None, 124 | 'radius': radius, 125 | 'limit': limit, 126 | 'country': country, 127 | 'road': road, 128 | 'house_number': house_number, 129 | 'postcode': postcode, 130 | 'city': city 131 | }) 132 | 133 | if cursor.rowcount > 0: 134 | break 135 | 136 | for result in cursor: 137 | yield result -------------------------------------------------------------------------------- /osmgeocoder/geocoder.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Any, Generator, Optional 2 | 3 | import psycopg2 4 | from psycopg2.extras import RealDictCursor 5 | from shapely.wkb import loads 6 | from pyproj import Proj, transform 7 | 8 | from .format import AddressFormatter 9 | from .reverse import fetch_address 10 | from .forward import fetch_coordinate, fetch_coordinate_struct 11 | 12 | 13 | class Geocoder(): 14 | 15 | def __init__(self, 16 | db:Optional[Dict[str, Any]]=None, 17 | db_handle=None, 18 | address_formatter_config:Optional[str]=None, 19 | postal:Optional[Dict[str, Any]]=None 20 | ): 21 | """ 22 | Initialize a new geocoder 23 | 24 | :param db: DB Connection string (mutually exclusive with ``db_handle``) 25 | :param db_handle: Already opened DB Connection, useful if this connection 26 | is handled by a web framework like django 27 | :param address_formatter_config: Custom configuration for the address formatter, 28 | by default uses the datafile included in the bundle 29 | :param postal: postal service information, dict with at least ``service_url`` 30 | """ 31 | self.postal_service = postal 32 | if db is not None: 33 | self.db = self._init_db(db) 34 | if db_handle is not None: 35 | self.db = db_handle 36 | self.formatter = AddressFormatter(config=address_formatter_config) 37 | 38 | def _init_db(self, db_config:Dict[str, Any]): 39 | connstring = [] 40 | for key, value in db_config.items(): 41 | connstring.append("{}={}".format(key, value)) 42 | connection = psycopg2.connect(" ".join(connstring)) 43 | 44 | return connection 45 | 46 | def forward( 47 | self, 48 | address:str, 49 | country:Optional[str]=None, 50 | center:Optional[Tuple[float, float]]=None 51 | ) -> List[Tuple[str, float, float]]: 52 | """ 53 | Forward geocode address (string -> coordinate tuple) from search string and return formatted address 54 | 55 | :param address: Address to fetch a point for, if you're not running the postal classifier the search 56 | will be limited to a street name 57 | :param country: optional, country name to search in (native language, e.g. "Deutschland" or "France") 58 | :param center: optional, center coordinate (EPSG 4326/WGS84 (lat, lon) tuple) to sort result by distance 59 | :returns: List of Tuples of Name, Latitude, Longitude 60 | """ 61 | mercProj = Proj(init='epsg:3857') 62 | latlonProj = Proj(init='epsg:4326') 63 | 64 | results = [] 65 | for coordinate in fetch_coordinate(self, address, country=country, center=center): 66 | p = loads(coordinate['location'], hex=True) 67 | 68 | name = self.formatter.format(coordinate) 69 | 70 | # project location back to lat/lon 71 | lon, lat = transform(mercProj, latlonProj, p.x, p.y) 72 | results.append(( 73 | name, lat, lon 74 | )) 75 | 76 | return results 77 | 78 | def forward_structured_dict( 79 | self, 80 | road:Optional[str]=None, 81 | house_number:Optional[str]=None, 82 | postcode:Optional[str]=None, 83 | city:Optional[str]=None, 84 | country:Optional[str]=None, 85 | center:Optional[Tuple[float, float]]=None 86 | ) -> List[Dict[str, Any]]: 87 | """ 88 | Forward geocode address (strings -> coordinate tuple) from structured data and return dictionary 89 | 90 | :param road: Street or road name if known 91 | :param house_number: House number (string!) if known 92 | :param postcode: Postcode (string!) if known 93 | :param city: City name if known 94 | :param country: optional, country name to search in (native language, e.g. "Deutschland" or "France") 95 | :param center: optional, center coordinate (EPSG 4326/WGS84 (lat, lon) tuple) to sort result by distance 96 | :returns: List of Dictionaries with at least 'lat' and 'lon' members 97 | """ 98 | mercProj = Proj(init='epsg:3857') 99 | latlonProj = Proj(init='epsg:4326') 100 | 101 | results = [] 102 | for coordinate in fetch_coordinate_struct( 103 | self, road=road, house_number=house_number, 104 | postcode=postcode, city=city, country=country, 105 | center=center): 106 | 107 | p = loads(coordinate['location'], hex=True) 108 | 109 | # project location back to lat/lon 110 | lon, lat = transform(mercProj, latlonProj, p.x, p.y) 111 | coordinate['lat'] = lat 112 | coordinate['lon'] = lon 113 | 114 | results.append(coordinate) 115 | 116 | return results 117 | 118 | def forward_structured( 119 | self, 120 | road:Optional[str]=None, 121 | house_number:Optional[str]=None, 122 | postcode:Optional[str]=None, 123 | city:Optional[str]=None, 124 | country:Optional[str]=None, 125 | center:Optional[Tuple[float, float]]=None 126 | ) -> List[Tuple[str, float, float]]: 127 | """ 128 | Forward geocode address (strings -> coordinate tuple) from structured data and return formatted address 129 | 130 | :param road: Street or road name if known 131 | :param house_number: House number (string!) if known 132 | :param postcode: Postcode (string!) if known 133 | :param city: City name if known 134 | :param country: optional, country name to search in (native language, e.g. "Deutschland" or "France") 135 | :param center: optional, center coordinate (EPSG 4326/WGS84 (lat, lon) tuple) to sort result by distance 136 | :returns: List of Tuples of Name, Latitude, Longitude 137 | """ 138 | data = self.forward_structured_dict( 139 | road=road, 140 | house_number=house_number, 141 | postcode=postcode, 142 | city=city, 143 | country=country, 144 | center=center 145 | ) 146 | 147 | results = [] 148 | for coordinate in data: 149 | name = self.formatter.format(coordinate) 150 | 151 | results.append(( 152 | name, coordinate['lat'], coordinate['lon'] 153 | )) 154 | 155 | return results 156 | 157 | def reverse_dict( 158 | self, 159 | lat:float, 160 | lon:float, 161 | radius=100, 162 | limit=10 163 | ) -> Generator[Dict[str, Any], None, None]: 164 | """ 165 | Reverse geocode coordinate to address dictionary 166 | 167 | :param lat: Latitude (EPSG 4326/WGS 84) 168 | :param lon: Longitude (EPSG 4326/WGS 84) 169 | :param radius: Search radius 170 | :param limit: Maximum number of matches to return, defaults to 10 171 | :returns: generator for addresses formatted to local merit (may contain linebreaks) 172 | """ 173 | 174 | items = fetch_address(self, (lat, lon), radius, projection='epsg:4326', limit=limit) 175 | for item in items: 176 | if item is not None: 177 | yield item 178 | 179 | def reverse( 180 | self, 181 | lat:float, 182 | lon:float, 183 | radius=100, 184 | limit=10 185 | ) -> Generator[str, None, None]: 186 | """ 187 | Reverse geocode coordinate to address string 188 | 189 | :param lat: Latitude (EPSG 4326/WGS 84) 190 | :param lon: Longitude (EPSG 4326/WGS 84) 191 | :param radius: Search radius 192 | :param limit: Maximum number of matches to return, defaults to 10 193 | :returns: generator for addresses formatted to local merit (may contain linebreaks) 194 | """ 195 | items = self.reverse_dict(lat, lon, radius=radius, limit=limit) 196 | for item in items: 197 | yield self.formatter.format(item) 198 | 199 | def reverse_epsg3857_dict( 200 | self, 201 | x:float, 202 | y:float, 203 | radius=100, 204 | limit=10 205 | ) -> Generator[Dict[str, Any], None, None]: 206 | """ 207 | Reverse geocode coordinate to address string 208 | this one uses the EPSG 3857 aka. Web Mercator projection which is the format 209 | that is used in the DB already, so by using this function we avoid to re-project 210 | from and into this projection all the time if we're working with web mercator 211 | internally. 212 | 213 | :param x: X (EPSG 3857/Web Mercator) 214 | :param y: Y (EPSG 3857/Web Mercator) 215 | :param radius: Search radius 216 | :param limit: Maximum number of matches to return, defaults to 10 217 | :returns: generator for addresses formatted to local merit (may contain linebreaks) 218 | """ 219 | 220 | items = fetch_address(self, (x, y), radius, projection='epsg:3857', limit=limit) 221 | for item in items: 222 | if item is not None: 223 | yield item 224 | 225 | def reverse_epsg3857( 226 | self, 227 | x:float, 228 | y:float, 229 | radius=100, 230 | limit=10 231 | ) -> Generator[str, None, None]: 232 | """ 233 | Reverse geocode coordinate to address string 234 | this one uses the EPSG 3857 aka. Web Mercator projection which is the format 235 | that is used in the DB already, so by using this function we avoid to re-project 236 | from and into this projection all the time if we're working with web mercator 237 | internally. 238 | 239 | :param x: X (EPSG 3857/Web Mercator) 240 | :param y: Y (EPSG 3857/Web Mercator) 241 | :param radius: Search radius 242 | :param limit: Maximum number of matches to return, defaults to 10 243 | :returns: generator for addresses formatted to local merit (may contain linebreaks) 244 | """ 245 | items = self.reverse_epsg3857_dict(x, y, radius=radius, limit=limit) 246 | for item in items: 247 | yield self.formatter.format(item) 248 | 249 | def predict_text(self, input:str) -> Generator[str, None, None]: 250 | """ 251 | Predict word the user is typing currently 252 | 253 | :param input: user input 254 | :returns: generator for word list, sorted by most common 255 | """ 256 | query = 'SELECT word FROM predict_text(%s)' 257 | 258 | cursor = self.db.cursor(cursor_factory=RealDictCursor) 259 | cursor.execute(query, [input]) 260 | 261 | for result in cursor: 262 | yield result['word'] 263 | -------------------------------------------------------------------------------- /osmgeocoder/reverse.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Generator, Dict, Any 2 | 3 | import psycopg2 4 | from psycopg2.extras import RealDictCursor 5 | from pyproj import Proj 6 | from time import time 7 | 8 | def fetch_address( 9 | geocoder, 10 | center:Tuple[float, float], 11 | radius:float, 12 | projection='epsg:4326', 13 | limit=1 14 | ) -> Generator[Dict[str, Any], None, None]: 15 | """ 16 | Fetch address by searching osm and openaddresses.io data. 17 | 18 | This is a generator and returns an iterator of dicts with the 19 | following keys: house, road, house_number, postcode, city, distance. 20 | 21 | Not all keys will be filled for all results. 22 | 23 | :param geocoder: the geocoder class instance 24 | :param center: center coordinate for which to fetch the address 25 | :param radius: query radius 26 | :param projection: projection type of the coordinate, currently supported: ``epsg:4326`` and ``epsg:3857`` 27 | :param limit: maximum number of results to return 28 | """ 29 | 30 | if projection == 'epsg:4326': 31 | mercProj = Proj(init='epsg:3857') 32 | x, y = mercProj(center[1], center[0]) 33 | elif projection == 'epsg:3857': 34 | x = center[0] 35 | y = center[1] 36 | else: 37 | raise ValueError('Unsupported projection {}'.format(projection)) 38 | 39 | query = ''' 40 | SELECT * FROM point_to_address_osm( 41 | ST_SetSRID( 42 | ST_MakePoint(%(x)s, %(y)s), 43 | 3857 44 | ), 45 | %(radius)s 46 | ) LIMIT {limit}; 47 | '''.format(limit=int(limit)) 48 | 49 | cursor = geocoder.db.cursor(cursor_factory=RealDictCursor) 50 | cursor.execute(query, { 'x': x, 'y': y, 'radius': radius }) 51 | 52 | if cursor.rowcount == 0: 53 | # try openaddresses.io 54 | query = ''' 55 | SELECT * FROM point_to_address_oa( 56 | ST_SetSRID( 57 | ST_MakePoint(%(x)s, %(y)s), 58 | 3857 59 | ), 60 | %(radius)s 61 | ) LIMIT {limit}; 62 | '''.format(limit=int(limit)) 63 | cursor.execute(query, { 'x': x, 'y': y, 'radius': radius }) 64 | 65 | for result in cursor: 66 | yield result 67 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | psycopg2-binary 2 | pyproj 3 | shapely[vectorized] 4 | requests 5 | pyyaml 6 | pystache 7 | python-geohash 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | # allow setup.py to be run from any path 5 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) 6 | 7 | setup( 8 | name='osmgeocoder', 9 | version='2.1.0', 10 | description='OpenStreetMap and OpenAddresses.io based geocoder', 11 | long_description=''' 12 | Python implementation for a OSM / Openaddresses.io Geocoder. 13 | 14 | This geocoder is implemented in PostgreSQL DB functions as much as possible, there is a simple API and an example flask app included. 15 | 16 | You will need PostgreSQL 9.5+ (or 11.0+ for Openaddresses.io) with PostGIS installed as well as some disk space and data-files from OpenStreetMap and (optionally) OpenAddresses.io. 17 | 18 | Data import will be done via [Omniscale's imposm3](https://github.com/omniscale/imposm3) and a supplied python script to import the openaddresses.io data. 19 | 20 | Optionally you can use the [libpostal machine learning address classifier](https://github.com/openvenues/libpostal) to parse addresses supplied as input to the forward geocoder. 21 | 22 | For formatting the addresses from the reverse geocoder the `worldwide.yml` from [OpenCageData address-formatting repository](https://github.com/OpenCageData/address-formatting) is used to format the address according to customs in the country that is been encoded. 23 | 24 | See `README.md` in the [repository](https://github.com/dunkelstern/osmgeocoder) for more information. 25 | ''', 26 | long_description_content_type='text/markdown', 27 | url='https://github.com/dunkelstern/osmgeocoder', 28 | author='Johannes Schriewer', 29 | author_email='hallo@dunkelstern.de', 30 | license='LICENSE.txt', 31 | include_package_data=True, 32 | classifiers=[ 33 | 'Development Status :: 5 - Production/Stable', 34 | 'Intended Audience :: Developers', 35 | 'License :: OSI Approved :: BSD License', 36 | 'Programming Language :: Python', 37 | 'Programming Language :: Python :: 3', 38 | 'Programming Language :: Python :: 3.7', 39 | 'Programming Language :: Python :: 3.8', 40 | 'Programming Language :: Python :: 3.9', 41 | 'Programming Language :: Python :: 3.10', 42 | 'Operating System :: OS Independent' 43 | ], 44 | keywords='osm openstreetmap geocoding geocoder openaddresses.io', 45 | packages=['osmgeocoder'], 46 | scripts=[ 47 | 'bin/address2coordinate.py', 48 | 'bin/coordinate2address.py', 49 | 'bin/geocoder_service.py', 50 | 'bin/postal_service.py', 51 | 'bin/import_openaddress_data.py', 52 | 'bin/prepare_osm.py', 53 | 'bin/finalize_geocoder.py' 54 | ], 55 | install_requires=[ 56 | 'psycopg2 >= 2.8', 57 | 'pyproj >= 1.9', 58 | 'Shapely >= 1.6', 59 | 'requests >= 2.18', 60 | 'PyYAML >= 5.0', 61 | 'pystache >= 0.5', 62 | 'python-geohash >= 0.8.5' 63 | ], 64 | dependency_links=[ 65 | ] 66 | ) 67 | --------------------------------------------------------------------------------