├── .gitignore ├── .hgignore ├── .hgtags ├── CHANGES ├── LICENSE ├── MANIFEST.in ├── README.rst ├── doc ├── Makefile ├── make.bat └── source │ ├── concepts.rst │ ├── conf.py │ ├── index.rst │ ├── install.rst │ └── parsing.rst ├── imposm ├── __init__.py └── parser │ ├── __init__.py │ ├── pbf │ ├── __init__.py │ ├── multiproc.py │ ├── osm.cc │ └── parser.py │ ├── simple.py │ ├── test │ ├── __init__.py │ ├── test.osm │ ├── test.osm.bz2 │ ├── test.pbf │ └── test_simple_parser.py │ ├── util.py │ └── xml │ ├── __init__.py │ ├── multiproc.py │ ├── parser.py │ └── util.py ├── osm.proto ├── release.py ├── setup.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | .pyc 2 | .egg-info 3 | doc/build 4 | dist/ 5 | .tox 6 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | a8a6bd0d5234c805f7e979939a2b0a5f9372ea2b v1.0.1 2 | c7e1cd122f00f1aeddfaf4c213ce85ec70fd42f4 v1.0.0 3 | 5307e5693e0c0869b934c418e9c98a9280bfa99f v1.0.2 4 | 11640b8eea21493f00c7a1b77e2fcebfb14b30f7 v1.0.3 5 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | 4 | 1.0.7 2015-01-10 5 | ~~~~~~~~~~~~~~~~ 6 | 7 | - updated protobuf parser 8 | 9 | 1.0.6 2014-10-31 10 | ~~~~~~~~~~~~~~~~ 11 | 12 | - fixed bug in protobuf parser that could cause segfaults 13 | 14 | 1.0.5 2013-09-13 15 | ~~~~~~~~~~~~~~~~ 16 | 17 | - support PBF without granularity value 18 | - improved support for non-pretty-printed XML 19 | 20 | 1.0.4 2012-12-10 21 | ~~~~~~~~~~~~~~~~ 22 | 23 | - improved support for non-pretty-printed XML 24 | - fixed dependency check for multiprocessing 25 | 26 | 1.0.3 2011-07-21 27 | ~~~~~~~~~~~~~~~~ 28 | 29 | - support for uncompressed PBF 30 | - bug fix for PBF without dense nodes 31 | 32 | 1.0.2 2011-03-10 33 | ~~~~~~~~~~~~~~~~ 34 | 35 | - improved regexp based XML coord parser 36 | - prevent mmap overflow in XMLChunker without coord_callback 37 | - successfully parsed whole planet.osm 38 | 39 | 1.0.0 2011-02-22 40 | ~~~~~~~~~~~~~~~~ 41 | 42 | - first release 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include README.rst 3 | include CHANGES 4 | include LICENSE 5 | include setup.py 6 | include osm.proto 7 | 8 | recursive-include imposm/parser/test *.osm *.pbf *.osm.bz2 9 | exclude imposm/parser/pbf/osm.pb.cc 10 | exclude imposm/parser/pbf/osm.pb.h 11 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | imposm.parser - OpenStreetMap XML/PBF parser for Python 2 | ======================================================= 3 | 4 | ``imposm.parser`` is a Python library that parses OpenStreetMap data in `XML `_ and `PBF `_ format. 5 | 6 | It has a simple API and it is fast and easy to use. It also works across multiple CPU/cores for extra speed. 7 | 8 | .. note:: 9 | **Imposm-parser is in maintenance mode and it's unlikely that we will provide any further releases.** 10 | 11 | 12 | It is developed and supported by `Omniscale `_ and released under the `Apache Software License 2.0 `_. 13 | 14 | Example 15 | ------- 16 | 17 | Here is an example that parses an OSM file and counts all ways that are tagged as a highway. 18 | :: 19 | 20 | from imposm.parser import OSMParser 21 | 22 | # simple class that handles the parsed OSM data. 23 | class HighwayCounter(object): 24 | highways = 0 25 | 26 | def ways(self, ways): 27 | # callback method for ways 28 | for osmid, tags, refs in ways: 29 | if 'highway' in tags: 30 | self.highways += 1 31 | 32 | # instantiate counter and parser and start parsing 33 | counter = HighwayCounter() 34 | p = OSMParser(concurrency=4, ways_callback=counter.ways) 35 | p.parse('germany.osm.pbf') 36 | 37 | # done 38 | print counter.highways 39 | 40 | 41 | Source and issue tracker 42 | ------------------------ 43 | 44 | Source code and issue tracker are available at ``_. 45 | 46 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | 15 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " dirhtml to make HTML files named index.html in directories" 21 | @echo " singlehtml to make a single large HTML file" 22 | @echo " pickle to make pickle files" 23 | @echo " json to make JSON files" 24 | @echo " htmlhelp to make HTML files and a HTML help project" 25 | @echo " qthelp to make HTML files and a qthelp project" 26 | @echo " devhelp to make HTML files and a Devhelp project" 27 | @echo " epub to make an epub" 28 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 29 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 30 | @echo " text to make text files" 31 | @echo " man to make manual pages" 32 | @echo " changes to make an overview of all changed/added/deprecated items" 33 | @echo " linkcheck to check all external links for integrity" 34 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 35 | 36 | clean: 37 | -rm -rf $(BUILDDIR)/* 38 | 39 | html: 40 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 41 | @echo 42 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 43 | 44 | dirhtml: 45 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 48 | 49 | singlehtml: 50 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 51 | @echo 52 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 53 | 54 | pickle: 55 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 56 | @echo 57 | @echo "Build finished; now you can process the pickle files." 58 | 59 | json: 60 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 61 | @echo 62 | @echo "Build finished; now you can process the JSON files." 63 | 64 | htmlhelp: 65 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 66 | @echo 67 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 68 | ".hhp project file in $(BUILDDIR)/htmlhelp." 69 | 70 | qthelp: 71 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 72 | @echo 73 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 74 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 75 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/imposmparser.qhcp" 76 | @echo "To view the help file:" 77 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/imposmparser.qhc" 78 | 79 | devhelp: 80 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 81 | @echo 82 | @echo "Build finished." 83 | @echo "To view the help file:" 84 | @echo "# mkdir -p $$HOME/.local/share/devhelp/imposmparser" 85 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/imposmparser" 86 | @echo "# devhelp" 87 | 88 | epub: 89 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 90 | @echo 91 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 92 | 93 | latex: 94 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 95 | @echo 96 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 97 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 98 | "(use \`make latexpdf' here to do that automatically)." 99 | 100 | latexpdf: 101 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 102 | @echo "Running LaTeX files through pdflatex..." 103 | make -C $(BUILDDIR)/latex all-pdf 104 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 105 | 106 | text: 107 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 108 | @echo 109 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 110 | 111 | man: 112 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 113 | @echo 114 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 115 | 116 | changes: 117 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 118 | @echo 119 | @echo "The overview file is in $(BUILDDIR)/changes." 120 | 121 | linkcheck: 122 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 123 | @echo 124 | @echo "Link check complete; look for any errors in the above output " \ 125 | "or in $(BUILDDIR)/linkcheck/output.txt." 126 | 127 | doctest: 128 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 129 | @echo "Testing of doctests in the sources finished, look at the " \ 130 | "results in $(BUILDDIR)/doctest/output.txt." 131 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | if NOT "%PAPER%" == "" ( 11 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 12 | ) 13 | 14 | if "%1" == "" goto help 15 | 16 | if "%1" == "help" ( 17 | :help 18 | echo.Please use `make ^` where ^ is one of 19 | echo. html to make standalone HTML files 20 | echo. dirhtml to make HTML files named index.html in directories 21 | echo. singlehtml to make a single large HTML file 22 | echo. pickle to make pickle files 23 | echo. json to make JSON files 24 | echo. htmlhelp to make HTML files and a HTML help project 25 | echo. qthelp to make HTML files and a qthelp project 26 | echo. devhelp to make HTML files and a Devhelp project 27 | echo. epub to make an epub 28 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 29 | echo. text to make text files 30 | echo. man to make manual pages 31 | echo. changes to make an overview over all changed/added/deprecated items 32 | echo. linkcheck to check all external links for integrity 33 | echo. doctest to run all doctests embedded in the documentation if enabled 34 | goto end 35 | ) 36 | 37 | if "%1" == "clean" ( 38 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 39 | del /q /s %BUILDDIR%\* 40 | goto end 41 | ) 42 | 43 | if "%1" == "html" ( 44 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 45 | if errorlevel 1 exit /b 1 46 | echo. 47 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 48 | goto end 49 | ) 50 | 51 | if "%1" == "dirhtml" ( 52 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 53 | if errorlevel 1 exit /b 1 54 | echo. 55 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 56 | goto end 57 | ) 58 | 59 | if "%1" == "singlehtml" ( 60 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 61 | if errorlevel 1 exit /b 1 62 | echo. 63 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 64 | goto end 65 | ) 66 | 67 | if "%1" == "pickle" ( 68 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 69 | if errorlevel 1 exit /b 1 70 | echo. 71 | echo.Build finished; now you can process the pickle files. 72 | goto end 73 | ) 74 | 75 | if "%1" == "json" ( 76 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished; now you can process the JSON files. 80 | goto end 81 | ) 82 | 83 | if "%1" == "htmlhelp" ( 84 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished; now you can run HTML Help Workshop with the ^ 88 | .hhp project file in %BUILDDIR%/htmlhelp. 89 | goto end 90 | ) 91 | 92 | if "%1" == "qthelp" ( 93 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 94 | if errorlevel 1 exit /b 1 95 | echo. 96 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 97 | .qhcp project file in %BUILDDIR%/qthelp, like this: 98 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\imposmparser.qhcp 99 | echo.To view the help file: 100 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\imposmparser.ghc 101 | goto end 102 | ) 103 | 104 | if "%1" == "devhelp" ( 105 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 106 | if errorlevel 1 exit /b 1 107 | echo. 108 | echo.Build finished. 109 | goto end 110 | ) 111 | 112 | if "%1" == "epub" ( 113 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 117 | goto end 118 | ) 119 | 120 | if "%1" == "latex" ( 121 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 122 | if errorlevel 1 exit /b 1 123 | echo. 124 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 125 | goto end 126 | ) 127 | 128 | if "%1" == "text" ( 129 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 130 | if errorlevel 1 exit /b 1 131 | echo. 132 | echo.Build finished. The text files are in %BUILDDIR%/text. 133 | goto end 134 | ) 135 | 136 | if "%1" == "man" ( 137 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 141 | goto end 142 | ) 143 | 144 | if "%1" == "changes" ( 145 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.The overview file is in %BUILDDIR%/changes. 149 | goto end 150 | ) 151 | 152 | if "%1" == "linkcheck" ( 153 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Link check complete; look for any errors in the above output ^ 157 | or in %BUILDDIR%/linkcheck/output.txt. 158 | goto end 159 | ) 160 | 161 | if "%1" == "doctest" ( 162 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 163 | if errorlevel 1 exit /b 1 164 | echo. 165 | echo.Testing of doctests in the sources finished, look at the ^ 166 | results in %BUILDDIR%/doctest/output.txt. 167 | goto end 168 | ) 169 | 170 | :end 171 | -------------------------------------------------------------------------------- /doc/source/concepts.rst: -------------------------------------------------------------------------------- 1 | Concepts 2 | ======== 3 | 4 | To use ``imposm.parser`` you need to understand three basic concepts: Types, Callbacks and Filter 5 | 6 | Types 7 | ----- 8 | 9 | .. note:: In this document Node, Way, Relation with a capital refer to the OSM types and `node`, `way`, `relation` refer to the Imposm types. 10 | 11 | OSM has three fundamental element types: Nodes, Ways and Relations. ``imposm.parser`` distinguishes the OSM Nodes between `coords` and `nodes`. 12 | 13 | `coords` only store coordinates and there are `coords` for *every* OSM Node. `nodes` also store tags and there are *only* `nodes` for OSM Nodes *with* tags. 14 | 15 | 16 | coords 17 | ~~~~~~ 18 | 19 | A tuple with the OSM ID, the longitude and latitude of that node. 20 | 21 | :: 22 | 23 | (4234432, 175.2, -32.1) 24 | 25 | ``imposm.parser`` will return a `coord` for each OSM Node, even if this OSM Node is also a `node` (i.e. it has tags). 26 | 27 | nodes 28 | ~~~~~ 29 | 30 | A tuple with the OSM ID, a tags dictionary and a nested tuple with the longitude and latitude of that node. 31 | 32 | :: 33 | 34 | (982347, {'name': 'Somewhere', 'place': 'village'}, (-120.2, 23.21)) 35 | 36 | 37 | ways 38 | ~~~~ 39 | 40 | A tuple with the OSM ID, a tags dictionary and a list of references. 41 | 42 | :: 43 | 44 | (87644, {'name': 'my way', 'highway': 'path'}, [123, 345, 567]) 45 | 46 | relations 47 | ~~~~~~~~~ 48 | 49 | A tuple with the OSM ID, a tags dictionary and a list of member tuples. 50 | Each member tuple contains the reference, the type (one of `'node'`, `'way'`, `'relation'`) and the role. 51 | 52 | :: 53 | 54 | (87644, {'type': 'multipolygon'}, [(123, 'way', 'outer'), (234, 'way', 'inner')]) 55 | 56 | .. _concepts_callbacks: 57 | 58 | Callbacks 59 | --------- 60 | 61 | The parser takes four callback functions for each data type (`coords`, `nodes`, `ways` and `relations`). The callbacks are optional, i.e. you don't need to pass a relations callback if you are not interested in relations. 62 | 63 | The functions should expect a list with zero or more items of the corresponding type. 64 | 65 | Here is an example callback that prints the coordinates of all Nodes. 66 | 67 | :: 68 | 69 | def coords_callback(coords): 70 | for osm_id, lon, lat in coords: 71 | print '%s %.4f %.4f' % (osm_id, lon, lat) 72 | 73 | 74 | .. _concepts_tag_filters: 75 | 76 | Tag filters 77 | ----------- 78 | 79 | Tag filter are functions that manipulate tag dictionaries. The functions should modify the dictionary in-place, the return value is ignored. 80 | 81 | Elements will be handled different, if you remove all tags from the dictionary. `nodes` and `relations` with empty tags will not be returned, but `ways` will be, since they might be needed for building relations. 82 | 83 | Here is an example filter that filters the tags with a whitelist. 84 | :: 85 | 86 | whitelist = set(('name', 'place', 'amenity')) 87 | 88 | def tag_filter(tags): 89 | for key in tags.keys(): 90 | if key not in whitelist: 91 | del tags[key] 92 | if 'name' in tags and len(tags) == 1: 93 | # tags with only a name have no information 94 | # how to handle this element 95 | del tags['name'] 96 | 97 | 98 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # imposm.parser documentation build configuration file, created by 4 | # sphinx-quickstart on Thu Feb 17 15:23:25 2011. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'imposm.parser' 44 | copyright = u'2011, Oliver Tonnhofer' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '1.0' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '1.0.8a' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = [] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'sphinxdoc' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'imposmparserdoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | # The paper size ('letter' or 'a4'). 173 | #latex_paper_size = 'letter' 174 | 175 | # The font size ('10pt', '11pt' or '12pt'). 176 | #latex_font_size = '10pt' 177 | 178 | # Grouping the document tree into LaTeX files. List of tuples 179 | # (source start file, target name, title, author, documentclass [howto/manual]). 180 | latex_documents = [ 181 | ('index', 'imposmparser.tex', u'imposm.parser Documentation', 182 | u'Oliver Tonnhofer', 'manual'), 183 | ] 184 | 185 | # The name of an image file (relative to this directory) to place at the top of 186 | # the title page. 187 | #latex_logo = None 188 | 189 | # For "manual" documents, if this is true, then toplevel headings are parts, 190 | # not chapters. 191 | #latex_use_parts = False 192 | 193 | # If true, show page references after internal links. 194 | #latex_show_pagerefs = False 195 | 196 | # If true, show URL addresses after external links. 197 | #latex_show_urls = False 198 | 199 | # Additional stuff for the LaTeX preamble. 200 | #latex_preamble = '' 201 | 202 | # Documents to append as an appendix to all manuals. 203 | #latex_appendices = [] 204 | 205 | # If false, no module index is generated. 206 | #latex_domain_indices = True 207 | 208 | 209 | # -- Options for manual page output -------------------------------------------- 210 | 211 | # One entry per manual page. List of tuples 212 | # (source start file, name, description, authors, manual section). 213 | man_pages = [ 214 | ('index', 'imposmparser', u'imposm.parser Documentation', 215 | [u'Oliver Tonnhofer'], 1) 216 | ] 217 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. include:: ../../README.rst 3 | 4 | .. Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | install 10 | concepts 11 | parsing 12 | 13 | .. Indices and tables 14 | .. ================== 15 | .. 16 | .. * :ref:`genindex` 17 | .. * :ref:`modindex` 18 | .. * :ref:`search` 19 | 20 | -------------------------------------------------------------------------------- /doc/source/install.rst: -------------------------------------------------------------------------------- 1 | .. Installation 2 | .. ============ 3 | 4 | Requirements 5 | ------------ 6 | 7 | ``imposm.parser`` runs with Python 2.5, 2.6 and 2.7 and is tested on Linux and Mac OS X. 8 | 9 | The PBF parser is written as a C extension and you need to have a C/C++ compiler, the Python libraries and Google Protobuf. 10 | 11 | On Ubuntu:: 12 | 13 | sudo aptitude install build-essential python-devel protobuf-compiler libprotobuf-dev 14 | 15 | Installation 16 | ------------ 17 | 18 | You can install ``imposm.parser`` with ``pip`` or ``easy_install``. 19 | 20 | :: 21 | 22 | pip install imposm.parser 23 | 24 | :: 25 | 26 | easy_install imposm.parser 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /doc/source/parsing.rst: -------------------------------------------------------------------------------- 1 | Parsing API 2 | =========== 3 | 4 | Imposm comes with a single ``OSMParser`` class that implements a simple to use, callback-based parser for OSM files. 5 | 6 | It supports `XML `_ and `PBF `_ files. It also supports BZip2 compressed XML files. 7 | 8 | Concurrency 9 | ~~~~~~~~~~~ 10 | 11 | The parser uses multiprocessing to distribute the parsing across multiple CPUs. This does work with PBF as well as XML files. 12 | 13 | You can pass the ``concurrency`` as an argument to ``OSMParser`` and it defaults to the number of CPU and cores of the host system. ``concurrency`` defines the number of parser processes. The main process where the callbacks are handled and the decompression (if you have a .bzip2 file) are handled in additional processes. So you might get better results if you reduce this number on systems with more than two cores. 14 | 15 | You can double the number on systems with hyper threading CPUs. 16 | 17 | 18 | API 19 | ~~~ 20 | 21 | .. module:: imposm.parser 22 | 23 | .. autoclass:: OSMParser 24 | :members: 25 | -------------------------------------------------------------------------------- /imposm/__init__.py: -------------------------------------------------------------------------------- 1 | __import__('pkg_resources').declare_namespace(__name__) -------------------------------------------------------------------------------- /imposm/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from imposm.parser.simple import OSMParser 2 | 3 | __all__ = ['OSMParser'] -------------------------------------------------------------------------------- /imposm/parser/pbf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/pbf/__init__.py -------------------------------------------------------------------------------- /imposm/parser/pbf/multiproc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Omniscale GmbH & Co. KG 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import multiprocessing 16 | 17 | from imposm.parser.pbf.parser import PBFFile, PBFParser 18 | from imposm.parser.util import setproctitle 19 | 20 | class PBFParserProcess(PBFParser, multiprocessing.Process): 21 | def __init__(self, pos_queue, *args, **kw): 22 | multiprocessing.Process.__init__(self) 23 | PBFParser.__init__(self, *args, **kw) 24 | self.daemon = True 25 | self.pos_queue = pos_queue 26 | 27 | def run(self): 28 | setproctitle('imposm pbf parser') 29 | while True: 30 | pos = self.pos_queue.get() 31 | if pos is None: 32 | self.pos_queue.task_done() 33 | break 34 | 35 | self.parse(pos['filename'], offset=pos['blob_pos'], 36 | size=pos['blob_size']) 37 | self.pos_queue.task_done() 38 | 39 | class PBFMultiProcParser(object): 40 | nodes_tag_filter = None 41 | ways_tag_filter = None 42 | relations_tag_filter = None 43 | 44 | def __init__(self, pool_size, nodes_queue=None, ways_queue=None, 45 | relations_queue=None, coords_queue=None, marshal_elem_data=False): 46 | self.pool_size = pool_size 47 | self.nodes_callback = nodes_queue.put if nodes_queue else None 48 | self.ways_callback = ways_queue.put if ways_queue else None 49 | self.relations_callback = relations_queue.put if relations_queue else None 50 | self.coords_callback = coords_queue.put if coords_queue else None 51 | self.marshal = marshal_elem_data 52 | def parse(self, filename): 53 | pos_queue = multiprocessing.JoinableQueue(32) 54 | pool = [] 55 | for _ in xrange(self.pool_size): 56 | proc = PBFParserProcess(pos_queue, nodes_callback=self.nodes_callback, 57 | coords_callback=self.coords_callback, ways_callback=self.ways_callback, 58 | relations_callback=self.relations_callback, 59 | nodes_tag_filter=self.nodes_tag_filter, 60 | ways_tag_filter=self.ways_tag_filter, 61 | relations_tag_filter=self.relations_tag_filter, 62 | marshal=self.marshal 63 | ) 64 | pool.append(proc) 65 | proc.start() 66 | 67 | reader = PBFFile(filename) 68 | 69 | for pos in reader.blob_offsets(): 70 | pos_queue.put(pos) 71 | 72 | pos_queue.join() 73 | 74 | for proc in pool: 75 | pos_queue.put(None) 76 | for proc in pool: 77 | proc.join() 78 | 79 | if __name__ == '__main__': 80 | import sys 81 | 82 | def count_proc(type, queue): 83 | def count(): 84 | count = 0 85 | while True: 86 | nodes = queue.get() 87 | if nodes is None: 88 | queue.task_done() 89 | break 90 | count += len(nodes) 91 | queue.task_done() 92 | print type, count 93 | return count 94 | 95 | 96 | nodes_queue = multiprocessing.JoinableQueue(128) 97 | ways_queue = multiprocessing.JoinableQueue(128) 98 | relations_queue = multiprocessing.JoinableQueue(128) 99 | 100 | procs = [ 101 | multiprocessing.Process(target=count_proc('nodes', nodes_queue)), 102 | multiprocessing.Process(target=count_proc('ways', ways_queue)), 103 | multiprocessing.Process(target=count_proc('relations', relations_queue)) 104 | ] 105 | for proc in procs: 106 | proc.start() 107 | 108 | parser = PBFMultiProcParser(2, nodes_queue=nodes_queue, 109 | ways_queue=ways_queue, relations_queue=relations_queue) 110 | parser.parse(sys.argv[1]) 111 | 112 | nodes_queue.put(None) 113 | ways_queue.put(None) 114 | relations_queue.put(None) 115 | 116 | for proc in procs: 117 | proc.join() -------------------------------------------------------------------------------- /imposm/parser/pbf/parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Omniscale GmbH & Co. KG 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import with_statement 16 | 17 | import struct 18 | import sys 19 | import zlib 20 | 21 | from marshal import dumps 22 | 23 | from imposm.parser.pbf import OSMPBF 24 | 25 | SUPPORTED_FEATURES = set(['OsmSchema-V0.6', 'DenseNodes']) 26 | 27 | 28 | _MEMBERTYPE = {0 : 'node', 29 | 1 : 'way', 30 | 2 : 'relation'} 31 | 32 | 33 | 34 | class PBFParser(object): 35 | """ 36 | OSM PBF parser. 37 | 38 | :param xxx_callback: 39 | callback functions for coords, nodes, ways and relations. 40 | Each callback function gets called with a list of multiple elements. 41 | 42 | :param xxx_filter: 43 | functions that can manipulate the tag dictionary. 44 | Nodes and relations without tags will not passed to the callback. 45 | 46 | :param marshal: 47 | return the data as a marshaled string 48 | """ 49 | def __init__(self, nodes_callback=None, ways_callback=None, 50 | relations_callback=None, coords_callback=None, nodes_tag_filter=None, 51 | ways_tag_filter=None, relations_tag_filter=None, marshal=False): 52 | self.nodes_callback = nodes_callback 53 | self.ways_callback = ways_callback 54 | self.relations_callback = relations_callback 55 | self.coords_callback = coords_callback 56 | self.nodes_tag_filter = nodes_tag_filter 57 | self.ways_tag_filter = ways_tag_filter 58 | self.relations_tag_filter = relations_tag_filter 59 | self.marshal = marshal 60 | 61 | def parse(self, filename, offset, size): 62 | """ 63 | Parse primitive block from `filename`. 64 | 65 | :param filename: path to PBF file 66 | :param offset: byte offset of the primitive block to parse 67 | :param size: size in bytes of the primitive block to parse 68 | """ 69 | reader = PrimitiveBlockParser(filename, offset, size) 70 | 71 | if self.nodes_callback or self.coords_callback: 72 | self.handle_nodes(reader) 73 | if self.ways_callback: 74 | self.handle_ways(reader) 75 | if self.relations_callback: 76 | self.handle_relations(reader) 77 | 78 | def handle_nodes(self, reader): 79 | nodes = [] 80 | coords = [] 81 | nodes_callback = self.nodes_callback 82 | coords_callback = self.coords_callback 83 | for node in reader.nodes(): 84 | if nodes_callback: 85 | if self.nodes_tag_filter: 86 | self.nodes_tag_filter(node[1]) 87 | if node[1]: 88 | if self.marshal: 89 | nodes.append((node[0], dumps((node[1], node[2]), 2))) 90 | else: 91 | nodes.append((node[0], node[1], node[2])) 92 | if len(nodes) >= 256: 93 | nodes_callback(nodes) 94 | nodes = [] 95 | if coords_callback: 96 | coords.append((node[0], node[2][0], node[2][1])) 97 | if len(coords) >= 512: 98 | coords_callback(coords) 99 | coords = [] 100 | if nodes_callback: 101 | nodes_callback(nodes) 102 | if coords_callback: 103 | coords_callback(coords) 104 | 105 | def handle_ways(self, reader): 106 | ways = [] 107 | for way in reader.ways(): 108 | if self.ways_tag_filter: 109 | self.ways_tag_filter(way[1]) 110 | # always return ways, might be needed for relations 111 | if self.marshal: 112 | ways.append((way[0], dumps((way[1], way[2]), 2))) 113 | else: 114 | ways.append((way[0], way[1], way[2])) 115 | if len(ways) >= 256: 116 | self.ways_callback(ways) 117 | ways = [] 118 | self.ways_callback(ways) 119 | 120 | def handle_relations(self, reader): 121 | relations = [] 122 | for relation in reader.relations(): 123 | if self.relations_tag_filter: 124 | self.relations_tag_filter(relation[1]) 125 | if not relation[1]: 126 | continue 127 | if self.marshal: 128 | relations.append((relation[0], dumps((relation[1], relation[2]), 2))) 129 | else: 130 | relations.append((relation[0], relation[1], relation[2])) 131 | if len(relations) >= 256: 132 | self.relations_callback(relations) 133 | relations = [] 134 | self.relations_callback(relations) 135 | 136 | def decoded_stringtable(stringtable): 137 | result = [] 138 | for s in stringtable: 139 | result.append(s.decode('utf-8')) 140 | return result 141 | 142 | class PrimitiveBlockParser(object): 143 | """ 144 | Low level PBF primitive block parser. 145 | 146 | Parses a single primitive block and handles OSM PBF internals like 147 | dense nodes, delta encoding, stringtables, etc. 148 | 149 | :param filename: path to PBF file 150 | :param offset: byte offset of the primitive block to parse 151 | :param size: size in bytes of the primitive block to parse 152 | 153 | """ 154 | def __init__(self, filename, blob_pos, blob_size): 155 | self.pos = filename, blob_pos, blob_size 156 | data = read_blob_data(filename, blob_pos, blob_size) 157 | self.primitive_block = OSMPBF.PrimitiveBlock() 158 | self.primitive_block.ParseFromString(data) 159 | self.primitivegroup = self.primitive_block.primitivegroup 160 | self.stringtable = decoded_stringtable(self.primitive_block.stringtable.s) 161 | 162 | def __repr__(self): 163 | return '' % (self.pos, ) 164 | 165 | def _get_tags(self, element, pos): 166 | tags = {} 167 | key = None 168 | value = None 169 | keyflag = False 170 | if pos >= len(element): 171 | return {}, pos 172 | while True: 173 | key_val = element[pos] 174 | pos += 1 175 | if key_val == 0: 176 | break 177 | if not keyflag: 178 | key = key_val 179 | keyflag = True 180 | else: 181 | value = key_val 182 | tags[self.stringtable[key]] = self.stringtable[value] 183 | keyflag = False 184 | return tags, pos 185 | 186 | def nodes(self): 187 | """ 188 | Return an iterator for all *nodes* in this primitive block. 189 | 190 | :rtype: iterator of ``(osm_id, tags, (lon, lat))`` tuples 191 | """ 192 | for group in self.primitivegroup: 193 | dense = group.dense 194 | if dense: 195 | granularity = self.primitive_block.granularity or 100 196 | lat_offset = self.primitive_block.lat_offset or 0 197 | lon_offset = self.primitive_block.lon_offset or 0 198 | coord_scale = 0.000000001 199 | get_tags = self._get_tags 200 | ids = dense.id 201 | lats = dense.lat 202 | lons = dense.lon 203 | keys_vals = dense.keys_vals 204 | last_id = last_lat = last_lon = last_keysvals_pos = 0 205 | for i in xrange(len(ids)): 206 | last_id += ids[i] 207 | last_lat += coord_scale * (lat_offset + (granularity * lats[i])) 208 | last_lon += coord_scale * (lon_offset + (granularity * lons[i])) 209 | tags, last_keysvals_pos = get_tags(keys_vals, last_keysvals_pos) 210 | yield (last_id, tags, (last_lon, last_lat)) 211 | nodes = group.nodes 212 | if nodes: 213 | for node in nodes: 214 | keys, vals = node.keys, node.vals 215 | tags = [] 216 | for i in xrange(len(keys)): 217 | tags.append((self.stringtable[keys[i]], self.stringtable[vals[i]])) 218 | yield (node.id, tags, (node.lon, node.lat)) 219 | 220 | def ways(self): 221 | """ 222 | Return an iterator for all *ways* in this primitive block. 223 | 224 | :rtype: iterator of ``(osm_id, tags, [ref1, ref2, ...])`` tuples 225 | """ 226 | for group in self.primitivegroup: 227 | ways = group.ways 228 | if ways: 229 | for way in ways: 230 | keys = way.keys 231 | vals = way.vals 232 | delta_refs = way.refs 233 | 234 | tags = {} 235 | for i in xrange(len(keys)): 236 | tags[self.stringtable[keys[i]]] = self.stringtable[vals[i]] 237 | refs = [] 238 | ref = 0 239 | for delta in delta_refs: 240 | ref += delta 241 | refs.append(ref) 242 | yield (way.id, tags, refs) 243 | 244 | def relations(self): 245 | """ 246 | Return an iterator for all *relations* in this primitive block. 247 | 248 | :rtype: iterator of ``(osm_id, tags, [(ref1, type, role), ...])`` tuples 249 | 250 | """ 251 | for group in self.primitivegroup: 252 | relations = group.relations 253 | if relations: 254 | for relation in relations: 255 | members = [] 256 | memids = relation.memids 257 | rel_types = relation.types 258 | roles_sids = relation.roles_sid 259 | keys = relation.keys 260 | vals = relation.vals 261 | memid = 0 262 | for i in xrange(len(rel_types)): 263 | memid += memids[i] 264 | members.append((memid, _MEMBERTYPE[rel_types[i]], self.stringtable[roles_sids[i]])) 265 | tags = {} 266 | for i in xrange(len(keys)): 267 | tags[self.stringtable[keys[i]]] = self.stringtable[vals[i]] 268 | yield (relation.id, tags, members) 269 | 270 | class PBFHeader(object): 271 | def __init__(self, filename, blob_pos, blob_size): 272 | data = read_blob_data(filename, blob_pos, blob_size) 273 | self.header_block = OSMPBF.HeaderBlock() 274 | self.header_block.ParseFromString(data) 275 | 276 | def required_features(self): 277 | return set(self.header_block.required_features) 278 | 279 | 280 | def read_blob_data(filename, blob_pos, blob_size): 281 | """ 282 | Returns the (unzipped) blob data from filename and position. 283 | """ 284 | with open(filename, 'rb') as f: 285 | f.seek(blob_pos) 286 | blob_data = f.read(blob_size) 287 | 288 | blob = OSMPBF.Blob() 289 | blob.ParseFromString(blob_data) 290 | raw_data = blob.raw 291 | if raw_data: 292 | return raw_data 293 | return zlib.decompress(blob.zlib_data) 294 | 295 | import time 296 | 297 | class PBFFile(object): 298 | """ 299 | OSM PBF file reader. 300 | 301 | Parses the low-level file structure with header sizes, 302 | offsets and blob headers. 303 | 304 | :param filename: path to the PBF file 305 | """ 306 | def __init__(self, filename): 307 | self.filename = filename 308 | self.file = open(filename, 'rb') 309 | self.next_blob_pos = self.prev_blob_pos = 0 310 | header_offsets = self._skip_header() 311 | self.header = PBFHeader(self.filename, header_offsets['blob_pos'], header_offsets['blob_size']) 312 | self.check_features() 313 | 314 | def check_features(self): 315 | missing_features = self.header.required_features().difference(SUPPORTED_FEATURES) 316 | if missing_features: 317 | raise NotImplementedError( 318 | '%s requires features not implemented by this parser: %s' % 319 | (self.filename, ', '.join(missing_features)) 320 | ) 321 | 322 | def _skip_header(self): 323 | return self.blob_offsets().next() 324 | 325 | def seek(self, pos): 326 | self.next_blob_pos = pos 327 | 328 | def rewind(self): 329 | self.next_blob_pos = self.prev_blob_pos 330 | 331 | def blob_offsets(self): 332 | """ 333 | Returns an iterator of the blob offsets in this file. 334 | 335 | Each offsets is stored in a dictionary with: 336 | 337 | - `filename` the path of this PBF file. 338 | - `blob_pos` the byte offset 339 | - `blob_size` the size of this blob in bytes 340 | """ 341 | while True: 342 | self.file.seek(self.next_blob_pos) 343 | 344 | blob_header_size = self._blob_header_size() 345 | if not blob_header_size: break 346 | 347 | blob_size = self._blob_size(self.file.read(blob_header_size)) 348 | blob_pos = self.next_blob_pos + 4 + blob_header_size 349 | blob_header_pos=self.next_blob_pos, 350 | prev_blob_header_pos = self.prev_blob_pos 351 | self.prev_blob_pos = self.next_blob_pos 352 | self.next_blob_pos = blob_pos + blob_size 353 | yield dict(blob_pos=blob_pos, blob_size=blob_size, 354 | blob_header_pos=blob_header_pos, 355 | prev_blob_header_pos=prev_blob_header_pos, 356 | filename=self.filename) 357 | 358 | def primitive_block_parsers(self): 359 | """ 360 | Returns an iterator of PrimitiveBlockParser. 361 | """ 362 | for pos in self.blob_offsets(): 363 | yield PrimitiveBlockParser(self.filename, pos['blob_pos'], pos['blob_size']) 364 | 365 | def _blob_size(self, data): 366 | blob_header = OSMPBF.BlobHeader() 367 | blob_header.ParseFromString(data) 368 | return blob_header.datasize 369 | 370 | def _blob_header_size(self): 371 | bytes = self.file.read(4) 372 | if bytes: 373 | return struct.unpack('!i', bytes)[0] 374 | return None 375 | 376 | 377 | def read_pbf(filename): 378 | pbf = PBFFile(filename) 379 | for block in pbf.primitive_block_parsers(): 380 | for node in block.nodes(): 381 | pass 382 | for way in block.ways(): 383 | pass 384 | for relation in block.relations(): 385 | pass 386 | 387 | if __name__ == '__main__': 388 | from timeit import Timer 389 | n = 1 390 | r = 1 391 | print "reading %s, number of calls: %d, repeat: %d" %(sys.argv[1],n,r) 392 | t = Timer("read_pbf(sys.argv[1])", "from __main__ import read_pbf") 393 | times = t.repeat(r,n) 394 | avrg_times = [] 395 | for time in times: 396 | avrg_times.append(time/n) 397 | print "avrg time/call: %f" %(min(avrg_times)) 398 | -------------------------------------------------------------------------------- /imposm/parser/simple.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Omniscale GmbH & Co. KG 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import with_statement 16 | 17 | import multiprocessing 18 | import sys 19 | import time 20 | 21 | from Queue import Empty 22 | 23 | from imposm.parser.util import default_concurrency, fileinput, setproctitle 24 | 25 | class OSMParser(object): 26 | """ 27 | High-level OSM parser. 28 | 29 | :param concurrency: 30 | number of parser processes to start. Defaults to the number of CPUs. 31 | :param xxx_callback: 32 | callback functions for coords, nodes, ways and relations. 33 | Each callback function gets called with a list of multiple elements. 34 | See :ref:`callback concepts `. 35 | 36 | :param xxx_filter: 37 | functions that can manipulate the tag dictionary. 38 | Nodes and relations without tags will not passed to the callback. 39 | See :ref:`tag filter concepts `. 40 | 41 | """ 42 | def __init__(self, concurrency=None, nodes_callback=None, ways_callback=None, 43 | relations_callback=None, coords_callback=None, nodes_tag_filter=None, 44 | ways_tag_filter=None, relations_tag_filter=None, marshal_elem_data=False): 45 | self.concurrency = concurrency or default_concurrency() 46 | assert self.concurrency >= 1 47 | self.nodes_callback = nodes_callback 48 | self.ways_callback = ways_callback 49 | self.relations_callback = relations_callback 50 | self.coords_callback = coords_callback 51 | self.nodes_tag_filter = nodes_tag_filter 52 | self.ways_tag_filter = ways_tag_filter 53 | self.relations_tag_filter = relations_tag_filter 54 | self.marshal_elem_data = marshal_elem_data 55 | 56 | def parse(self, filename): 57 | """ 58 | Parse the given file. Detects the filetype based on the file suffix. 59 | Supports ``.pbf``, ``.osm`` and ``.osm.bz2``. 60 | """ 61 | if filename.endswith('.pbf'): 62 | return self.parse_pbf_file(filename) 63 | elif filename.endswith(('.osm', '.osm.bz2')): 64 | return self.parse_xml_file(filename) 65 | else: 66 | raise NotImplementedError('unknown file extension') 67 | 68 | def parse_pbf_file(self, filename): 69 | """ 70 | Parse a PBF file. 71 | """ 72 | from imposm.parser.pbf.multiproc import PBFMultiProcParser 73 | return self._parse(filename, PBFMultiProcParser) 74 | 75 | def parse_xml_file(self, filename): 76 | """ 77 | Parse a XML file. 78 | Supports BZip2 compressed files if the filename ends with ``.bz2``. 79 | """ 80 | from imposm.parser.xml.multiproc import XMLMultiProcParser 81 | with fileinput(filename) as input: 82 | return self._parse(input, XMLMultiProcParser) 83 | 84 | def _parse(self, input, parser_class): 85 | queues_callbacks = {} 86 | if self.coords_callback: 87 | queues_callbacks['coords'] = (multiprocessing.JoinableQueue(512), 88 | self.coords_callback) 89 | if self.nodes_callback: 90 | queues_callbacks['nodes'] = (multiprocessing.JoinableQueue(128), 91 | self.nodes_callback) 92 | if self.ways_callback: 93 | queues_callbacks['ways'] = (multiprocessing.JoinableQueue(128), 94 | self.ways_callback) 95 | if self.relations_callback: 96 | queues_callbacks['relations'] = (multiprocessing.JoinableQueue(128), 97 | self.relations_callback) 98 | 99 | def parse_it(): 100 | setproctitle('imposm parser') 101 | queues = dict([(type, q) for type, (q, c) in queues_callbacks.items()]) 102 | 103 | parser = parser_class(self.concurrency, 104 | ways_queue=queues.get('ways'), 105 | coords_queue=queues.get('coords'), 106 | nodes_queue=queues.get('nodes'), 107 | relations_queue=queues.get('relations'), 108 | marshal_elem_data=self.marshal_elem_data 109 | ) 110 | parser.nodes_tag_filter = self.nodes_tag_filter 111 | parser.ways_tag_filter = self.ways_tag_filter 112 | parser.relations_tag_filter = self.relations_tag_filter 113 | parser.parse(input) 114 | for q in queues.values(): 115 | q.put(None) 116 | 117 | proc = multiprocessing.Process(target=parse_it) 118 | proc.start() 119 | 120 | while queues_callbacks: 121 | processed = False 122 | for items_type, (queue, callback) in queues_callbacks.items(): 123 | try: 124 | items = None 125 | while True: 126 | items = queue.get_nowait() 127 | if items is None: 128 | queue.task_done() 129 | del queues_callbacks[items_type] 130 | break 131 | else: 132 | callback(items) 133 | if items: 134 | processed = True 135 | except Empty: 136 | pass 137 | if not processed: 138 | # wait a ms if all queues were empty 139 | # to give the parser a chance to fill them up 140 | time.sleep(0.001) 141 | proc.join() 142 | -------------------------------------------------------------------------------- /imposm/parser/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/test/__init__.py -------------------------------------------------------------------------------- /imposm/parser/test/test.osm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /imposm/parser/test/test.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/test/test.osm.bz2 -------------------------------------------------------------------------------- /imposm/parser/test/test.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/test/test.pbf -------------------------------------------------------------------------------- /imposm/parser/test/test_simple_parser.py: -------------------------------------------------------------------------------- 1 | # -:- encoding: utf8 -:- 2 | # Copyright 2011 Omniscale GmbH & Co. KG 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | from imposm.parser import OSMParser 18 | from nose.tools import eq_ 19 | 20 | class ParserTestBase(object): 21 | osm_filename = None 22 | ways_filter = None 23 | nodes_filter = None 24 | relations_filter = None 25 | def __init__(self): 26 | self.nodes = [] 27 | self.coords = [] 28 | self.ways = [] 29 | self.relations = [] 30 | 31 | def parse_nodes(self, nodes): 32 | self.nodes.extend(nodes) 33 | def parse_coords(self, coords): 34 | self.coords.extend(coords) 35 | def parse_ways(self, ways): 36 | self.ways.extend(ways) 37 | def parse_relations(self, relations): 38 | self.relations.extend(relations) 39 | 40 | def parse(self): 41 | parser = OSMParser(2, 42 | nodes_callback=self.parse_nodes, 43 | coords_callback=self.parse_coords, 44 | ways_callback=self.parse_ways, 45 | relations_callback=self.parse_relations, 46 | nodes_tag_filter=self.nodes_filter, 47 | ways_tag_filter=self.ways_filter, 48 | relations_tag_filter=self.relations_filter, 49 | ) 50 | osm_filename = os.path.join(os.path.dirname(__file__), self.osm_filename) 51 | parser.parse(osm_filename) 52 | 53 | def test_parse_result(self): 54 | self.parse() 55 | eq_(len(self.nodes), 1) 56 | eq_(self.nodes[0], 57 | (2, {'name': 'test', 'created_by': 'hand'}, (10.0, 51.0))) 58 | 59 | eq_(len(self.coords), 2) 60 | eq_(self.coords[0], (1, 10.0, 50.0)) 61 | eq_(self.coords[1], (2, 10.0, 51.0)) 62 | 63 | eq_(len(self.ways), 1) 64 | eq_(self.ways[0], 65 | (3, {'highway': 'primary'}, [1, 2])) 66 | 67 | eq_(len(self.relations), 1) 68 | eq_(self.relations[0], 69 | (4, {'name': u'ܵlåû†é'}, [(123, 'way', 'outer'), (124, 'way', 'inner')])) 70 | 71 | class ParserTestBaseWithFilter(ParserTestBase): 72 | def nodes_filter(self, tags): 73 | for tag in tags.keys(): 74 | if tag != 'name': 75 | del tags[tag] 76 | 77 | ways_filter = nodes_filter 78 | def relations_filter(self, tags): 79 | tags.clear() 80 | 81 | def test_parse_result(self): 82 | self.parse() 83 | eq_(len(self.nodes), 1) 84 | eq_(self.nodes[0], 85 | (2, {'name': 'test'}, (10.0, 51.0))) 86 | 87 | eq_(len(self.coords), 2) 88 | eq_(self.coords[0], (1, 10.0, 50.0)) 89 | eq_(self.coords[1], (2, 10.0, 51.0)) 90 | 91 | eq_(len(self.ways), 1) 92 | eq_(self.ways[0], 93 | (3, {}, [1, 2])) 94 | 95 | eq_(len(self.relations), 0) 96 | 97 | class TestXML(ParserTestBase): 98 | osm_filename = 'test.osm' 99 | 100 | class TestBZIP2(ParserTestBase): 101 | osm_filename = 'test.osm.bz2' 102 | 103 | class TestPBF(ParserTestBase): 104 | osm_filename = 'test.pbf' 105 | 106 | class TestXMLWithFilter(ParserTestBaseWithFilter): 107 | osm_filename = 'test.osm' 108 | 109 | class TestPBFWithFilter(ParserTestBaseWithFilter): 110 | osm_filename = 'test.pbf' 111 | -------------------------------------------------------------------------------- /imposm/parser/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Omniscale GmbH & Co. KG 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import contextlib 16 | import multiprocessing 17 | import subprocess 18 | 19 | try: 20 | from setproctitle import setproctitle 21 | setproctitle 22 | except ImportError: 23 | setproctitle = lambda x: None 24 | 25 | def default_concurrency(): 26 | return multiprocessing.cpu_count() 27 | 28 | def bzip_reader(filename): 29 | p = subprocess.Popen(['bunzip2', '-c', filename], bufsize=-1, stdout=subprocess.PIPE) 30 | return p.stdout 31 | 32 | @contextlib.contextmanager 33 | def fileinput(filename): 34 | if filename.endswith('bz2'): 35 | yield bzip_reader(filename) 36 | else: 37 | fh = open(filename, 'rb') 38 | yield fh 39 | fh.close() 40 | 41 | def estimate_records(files): 42 | records = 0 43 | for f in files: 44 | fsize = os.path.getsize(f) 45 | if f.endswith('.bz2'): 46 | fsize *= 11 # observed bzip2 compression factor on osm data 47 | if f.endswith('.pbf'): 48 | fsize *= 15 # observed pbf compression factor on osm data 49 | records += fsize/200 50 | 51 | return int(records) -------------------------------------------------------------------------------- /imposm/parser/xml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/xml/__init__.py -------------------------------------------------------------------------------- /imposm/parser/xml/multiproc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Omniscale GmbH & Co. KG 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import mmap 16 | import multiprocessing 17 | import re 18 | 19 | from Queue import Empty 20 | 21 | from imposm.parser.xml.parser import XMLParser 22 | from imposm.parser.util import setproctitle 23 | 24 | KiB = 1024 25 | MiB = 1024*KiB 26 | 27 | READ_SIZE = 512*KiB 28 | 29 | 30 | class MMapReader(object): 31 | def __init__(self, m, size): 32 | self.m = m 33 | self.m.seek(0) 34 | self.size = size 35 | 36 | def read(self, size=None): 37 | if size is None: 38 | size = self.size - self.m.tell() 39 | else: 40 | size = min(self.size - self.m.tell(), size) 41 | return self.m.read(size) 42 | 43 | def readline(self): 44 | cur_pos = self.m.tell() 45 | if cur_pos >= self.size: 46 | return 47 | nl_pos = self.m.find('\n') 48 | self.m.seek(cur_pos) 49 | return self.m.read(nl_pos-cur_pos) 50 | 51 | def seek(self, n): 52 | self.m.seek(n) 53 | 54 | class XMLParserProcess(XMLParser, multiprocessing.Process): 55 | def __init__(self, mmap_pool, mmap_queue, *args, **kw): 56 | multiprocessing.Process.__init__(self) 57 | XMLParser.__init__(self, *args, **kw) 58 | self.daemon = True 59 | self.mmap_pool = mmap_pool 60 | self.mmap_queue = mmap_queue 61 | 62 | def run(self): 63 | setproctitle('imposm xml parser') 64 | while True: 65 | mmap_idx, size = self.mmap_queue.get() 66 | if mmap_idx is None: 67 | self.mmap_queue.task_done() 68 | break 69 | xml = MMapReader(self.mmap_pool.get(mmap_idx), size) 70 | self.parse(xml) 71 | self.mmap_queue.task_done() 72 | self.mmap_pool.free(mmap_idx) 73 | 74 | 75 | 76 | class XMLMultiProcParser(object): 77 | nodes_tag_filter = None 78 | ways_tag_filter = None 79 | relations_tag_filter = None 80 | 81 | def __init__(self, pool_size, nodes_queue=None, ways_queue=None, 82 | relations_queue=None, coords_queue=None, marshal_elem_data=False): 83 | self.pool_size = pool_size 84 | self.pool = [] 85 | self.nodes_callback = nodes_queue.put if nodes_queue else None 86 | self.ways_callback = ways_queue.put if ways_queue else None 87 | self.relations_callback = relations_queue.put if relations_queue else None 88 | self.coords_callback = coords_queue.put if coords_queue else None 89 | xml_chunk_size=READ_SIZE 90 | self.mmap_pool = MMapPool(pool_size*8, xml_chunk_size*8) 91 | self.mmap_queue = multiprocessing.JoinableQueue(8) 92 | self.marshal_elem_data = marshal_elem_data 93 | 94 | def parse(self, stream): 95 | assert not self.pool 96 | 97 | for _ in xrange(self.pool_size): 98 | proc = XMLParserProcess(self.mmap_pool, self.mmap_queue, nodes_callback=self.nodes_callback, 99 | coords_callback=self.coords_callback, ways_callback=self.ways_callback, 100 | relations_callback=self.relations_callback, 101 | nodes_tag_filter=self.nodes_tag_filter, 102 | ways_tag_filter=self.ways_tag_filter, 103 | relations_tag_filter=self.relations_tag_filter, 104 | marshal_elem_data=self.marshal_elem_data, 105 | ) 106 | self.pool.append(proc) 107 | proc.start() 108 | 109 | chunker = XMLChunker(stream, self.mmap_pool, xml_chunk_size=READ_SIZE) 110 | chunker.read(self.mmap_queue, coords_callback=self.coords_callback) 111 | 112 | self.mmap_queue.join() 113 | for proc in self.pool: 114 | self.mmap_queue.put((None, None)) 115 | for proc in self.pool: 116 | proc.join() 117 | 118 | 119 | class MMapPool(object): 120 | """ 121 | Manages multiple mmap files. 122 | The mmap files can be read and written in different processes. 123 | """ 124 | def __init__(self, n, mmap_size): 125 | self.n = n 126 | self.mmap_size = mmap_size 127 | self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)] 128 | self.free_mmaps = set(range(n)) 129 | self.free_queue = multiprocessing.JoinableQueue() 130 | 131 | def new(self): 132 | """ 133 | Return a free mmap file. 134 | 135 | :returns: index, mmap file 136 | """ 137 | if not self.free_mmaps: 138 | self.free_mmaps.add(self.free_queue.get()) 139 | self.free_queue.task_done() 140 | while True: 141 | # fetch unless free_queue is empty 142 | try: 143 | self.free_mmaps.add(self.free_queue.get_nowait()) 144 | self.free_queue.task_done() 145 | except Empty: 146 | break 147 | mmap_idx = self.free_mmaps.pop() 148 | return mmap_idx, self.pool[mmap_idx] 149 | 150 | def join(self): 151 | while len(self.free_mmaps) < self.n: 152 | self.free_mmaps.add(self.free_queue.get()) 153 | self.free_queue.task_done() 154 | 155 | def get(self, idx): 156 | """ 157 | Return mmap file with `idx`. 158 | """ 159 | return self.pool[idx] 160 | 161 | def free(self, idx): 162 | """ 163 | Mark mmap file with `idx` as free. 164 | """ 165 | self.free_queue.put(idx) 166 | 167 | class XMLChunker(object): 168 | """ 169 | Reads and chunks OSM XML file. 170 | 171 | Reads OSM XML from `stream` and writes chunks of it into mmap files from 172 | the `mmap_pool`. 173 | 174 | :params xml_chunk_size: chunk XML after this many bytes 175 | """ 176 | def __init__(self, stream, mmap_pool, xml_chunk_size): 177 | self.stream = stream 178 | self.size = xml_chunk_size 179 | self._last_line = None 180 | self.mmap_pool = mmap_pool 181 | self.current_mmap_idx = 0 182 | self._skip_header() 183 | 184 | def _skip_header(self): 185 | for line in self.stream: 186 | if line.lstrip().startswith('") 194 | return stream 195 | 196 | def _finished_xml_outstream(self, last_line, stream): 197 | if '\n') 199 | return self.current_mmap_idx, stream.tell() 200 | 201 | def read(self, mmaps_queue, coords_callback=None): 202 | """ 203 | Read and chunk all 204 | """ 205 | coord_node_match = None 206 | xml_nodes = self._new_xml_outstream() 207 | coords = [] 208 | coord_node_re_match = re.compile(r'^\s*').match 210 | node_re_match = re.compile(r'^\s*').match 211 | xml_nodes.write(self._last_line) 212 | split = False 213 | line = '' 214 | for line in self.stream: 215 | if coords_callback: 216 | coord_node_match = coord_node_re_match(line) 217 | if coord_node_match: 218 | osm_id, lat, lon = coord_node_match.groups() 219 | coords.append((int(osm_id), float(lon), float(lat))) 220 | if len(coords) >= 512: 221 | coords_callback(coords) 222 | coords = [] 223 | else: 224 | xml_nodes.write(line) 225 | else: 226 | xml_nodes.write(line) 227 | if split: 228 | if (line.rstrip().endswith(('', '', '')) 229 | or (coords_callback and coord_node_match) 230 | or (not coords_callback and node_re_match(line))): 231 | mmaps_queue.put(self._finished_xml_outstream(line, xml_nodes)) 232 | xml_nodes = self._new_xml_outstream() 233 | split = False 234 | elif xml_nodes.tell() > self.size: 235 | split = True 236 | if coords_callback: 237 | coords_callback(coords) 238 | 239 | # we are at the end of the stream and assume we wrote the end tag 240 | # to xml_nodes. we set line to closing tag here to avoid additional 241 | # end tag in case the last line(s) is blank 242 | line = '= 512: 90 | self.coords_callback(coords) 91 | coords = [] 92 | if len(nodes) >= 128: 93 | self.nodes_callback(nodes) 94 | nodes = [] 95 | if len(relations) >= 128: 96 | self.relations_callback(relations) 97 | relations = [] 98 | if len(ways) >= 128: 99 | self.ways_callback(ways) 100 | ways = [] 101 | 102 | root.clear() 103 | 104 | if self.coords_callback: 105 | self.coords_callback(coords) 106 | if self.nodes_callback: 107 | self.nodes_callback(nodes) 108 | if self.ways_callback: 109 | self.ways_callback(ways) 110 | if self.relations_callback: 111 | self.relations_callback(relations) -------------------------------------------------------------------------------- /imposm/parser/xml/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Omniscale GmbH & Co. KG 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import with_statement 16 | from xml.etree import cElementTree as ET 17 | from contextlib import contextmanager 18 | 19 | def iterparse(fileobj): 20 | """ 21 | Return root object and iterparser for given ``fileobj``. 22 | """ 23 | context = ET.iterparse(fileobj, events=("start", "end")) 24 | context = iter(context) 25 | _event, root = context.next() 26 | return root, context 27 | 28 | @contextmanager 29 | def log_file_on_exception(xml): 30 | try: 31 | yield 32 | except SyntaxError, ex: 33 | import tempfile 34 | fd_, filename = tempfile.mkstemp('.osm') 35 | xml.seek(0) 36 | with open(filename, 'w') as f: 37 | f.write(xml.read()) 38 | print 'SyntaxError in xml: %s, (stored dump %s)' % (ex, filename) -------------------------------------------------------------------------------- /osm.proto: -------------------------------------------------------------------------------- 1 | /** Copyright (c) 2010 Scott A. Crosby. 2 | 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as 5 | published by the Free Software Foundation, either version 3 of the 6 | License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program. If not, see . 15 | 16 | */ 17 | 18 | option java_package = "crosby.binary"; 19 | package OSMPBF; 20 | 21 | //protoc --java_out=../.. fileformat.proto 22 | 23 | 24 | // 25 | // STORAGE LAYER: Storing primitives. 26 | // 27 | 28 | message Blob { 29 | optional bytes raw = 1; // No compression 30 | optional int32 raw_size = 2; // When compressed, the uncompressed size 31 | 32 | // Possible compressed versions of the data. 33 | optional bytes zlib_data = 3; 34 | 35 | // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED. 36 | optional bytes lzma_data = 4; 37 | 38 | // Formerly used for bzip2 compressed data. Depreciated in 2010. 39 | optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number. 40 | } 41 | 42 | /* A file contains an sequence of fileblock headers, each prefixed by 43 | their length in network byte order, followed by a data block 44 | containing the actual data. types staring with a "_" are reserved. 45 | */ 46 | 47 | message BlobHeader { 48 | required string type = 1; 49 | optional bytes indexdata = 2; 50 | required int32 datasize = 3; 51 | } 52 | 53 | 54 | /** Copyright (c) 2010 Scott A. Crosby. 55 | 56 | This program is free software: you can redistribute it and/or modify 57 | it under the terms of the GNU Lesser General Public License as 58 | published by the Free Software Foundation, either version 3 of the 59 | License, or (at your option) any later version. 60 | 61 | This program is distributed in the hope that it will be useful, 62 | but WITHOUT ANY WARRANTY; without even the implied warranty of 63 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 64 | GNU General Public License for more details. 65 | 66 | You should have received a copy of the GNU General Public License 67 | along with this program. If not, see . 68 | 69 | */ 70 | 71 | /* OSM Binary file format 72 | 73 | This is the master schema file of the OSM binary file format. This 74 | file is designed to support limited random-access and future 75 | extendability. 76 | 77 | A binary OSM file consists of a sequence of FileBlocks (please see 78 | fileformat.proto). The first fileblock contains a serialized instance 79 | of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that 80 | contain the primitives. 81 | 82 | Each primitiveblock is designed to be independently parsable. It 83 | contains a string table storing all strings in that block (keys and 84 | values in tags, roles in relations, usernames, etc.) as well as 85 | metadata containing the precision of coordinates or timestamps in that 86 | block. 87 | 88 | A primitiveblock contains a sequence of primitive groups, each 89 | containing primitives of the same type (nodes, densenodes, ways, 90 | relations). Coordinates are stored in signed 64-bit integers. Lat&lon 91 | are measured in units nanodegrees. The default of 92 | granularity of 100 nanodegrees corresponds to about 1cm on the ground, 93 | and a full lat or lon fits into 32 bits. 94 | 95 | Converting an integer to a lattitude or longitude uses the formula: 96 | $OUT = IN * granularity / 10**9$. Many encoding schemes use delta 97 | coding when representing nodes and relations. 98 | 99 | */ 100 | 101 | ////////////////////////////////////////////////////////////////////////// 102 | ////////////////////////////////////////////////////////////////////////// 103 | 104 | /* Contains the file header. */ 105 | 106 | message HeaderBlock { 107 | optional HeaderBBox bbox = 1; 108 | /* Additional tags to aid in parsing this dataset */ 109 | repeated string required_features = 4; 110 | repeated string optional_features = 5; 111 | 112 | optional string writingprogram = 16; 113 | optional string source = 17; // From the bbox field. 114 | } 115 | 116 | 117 | /** The bounding box field in the OSM header. BBOX, as used in the OSM 118 | header. Units are always in nanodegrees -- they do not obey 119 | granularity rules. */ 120 | 121 | message HeaderBBox { 122 | required sint64 left = 1; 123 | required sint64 right = 2; 124 | required sint64 top = 3; 125 | required sint64 bottom = 4; 126 | } 127 | 128 | 129 | /////////////////////////////////////////////////////////////////////// 130 | /////////////////////////////////////////////////////////////////////// 131 | 132 | 133 | message PrimitiveBlock { 134 | required StringTable stringtable = 1; 135 | repeated PrimitiveGroup primitivegroup = 2; 136 | 137 | // Granularity, units of nanodegrees, used to store coordinates in this block 138 | optional int32 granularity = 17 [default=100]; 139 | // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees. 140 | optional int64 lat_offset = 19 [default=0]; 141 | optional int64 lon_offset = 20 [default=0]; 142 | 143 | // Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. 144 | optional int32 date_granularity = 18 [default=1000]; 145 | 146 | 147 | // Proposed extension: 148 | //optional BBox bbox = XX; 149 | } 150 | 151 | // Group of OSMPrimitives. All primitives in a group must be the same type. 152 | message PrimitiveGroup { 153 | repeated Node nodes = 1; 154 | optional DenseNodes dense = 2; 155 | repeated Way ways = 3; 156 | repeated Relation relations = 4; 157 | repeated ChangeSet changesets = 5; 158 | } 159 | 160 | 161 | /** String table, contains the common strings in each block. 162 | 163 | Note that we reserve index '0' as a delimiter, so the entry at that 164 | index in the table is ALWAYS blank and unused. 165 | 166 | */ 167 | message StringTable { 168 | repeated bytes s = 1; 169 | } 170 | 171 | /* Optional metadata that may be included into each primitive. */ 172 | message Info { 173 | optional int32 version = 1 [default = -1]; 174 | optional int64 timestamp = 2; 175 | optional int64 changeset = 3; 176 | optional int32 uid = 4; 177 | optional uint32 user_sid = 5; // String IDs 178 | } 179 | 180 | /** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */ 181 | message DenseInfo { 182 | repeated int32 version = 1 [packed = true]; 183 | repeated sint64 timestamp = 2 [packed = true]; // DELTA coded 184 | repeated sint64 changeset = 3 [packed = true]; // DELTA coded 185 | repeated sint32 uid = 4 [packed = true]; // DELTA coded 186 | repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded 187 | } 188 | 189 | 190 | // THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW. 191 | // TODO: REMOVE THIS? 192 | message ChangeSet { 193 | required int64 id = 1; 194 | // 195 | // // Parallel arrays. 196 | // repeated uint32 keys = 2 [packed = true]; // String IDs. 197 | // repeated uint32 vals = 3 [packed = true]; // String IDs. 198 | // 199 | // optional Info info = 4; 200 | 201 | // optional int64 created_at = 8; 202 | // optional int64 closetime_delta = 9; 203 | // optional bool open = 10; 204 | // optional HeaderBBox bbox = 11; 205 | } 206 | 207 | 208 | message Node { 209 | required sint64 id = 1; 210 | // Parallel arrays. 211 | repeated uint32 keys = 2 [packed = true]; // String IDs. 212 | repeated uint32 vals = 3 [packed = true]; // String IDs. 213 | 214 | optional Info info = 4; // May be omitted in omitmeta 215 | 216 | required sint64 lat = 8; 217 | required sint64 lon = 9; 218 | } 219 | 220 | /* Used to densly represent a sequence of nodes that do not have any tags. 221 | 222 | We represent these nodes columnwise as five columns: ID's, lats, and 223 | lons, all delta coded. When metadata is not omitted, 224 | 225 | We encode keys & vals for all nodes as a single array of integers 226 | containing key-stringid and val-stringid, using a stringid of 0 as a 227 | delimiter between nodes. 228 | 229 | ( ( )* '0' )* 230 | */ 231 | 232 | message DenseNodes { 233 | repeated sint64 id = 1 [packed = true]; // DELTA coded 234 | 235 | //repeated Info info = 4; 236 | optional DenseInfo denseinfo = 5; 237 | 238 | repeated sint64 lat = 8 [packed = true]; // DELTA coded 239 | repeated sint64 lon = 9 [packed = true]; // DELTA coded 240 | 241 | // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless. 242 | repeated int32 keys_vals = 10 [packed = true]; 243 | } 244 | 245 | 246 | message Way { 247 | required int64 id = 1; 248 | // Parallel arrays. 249 | repeated uint32 keys = 2 [packed = true]; 250 | repeated uint32 vals = 3 [packed = true]; 251 | 252 | optional Info info = 4; 253 | 254 | repeated sint64 refs = 8 [packed = true]; // DELTA coded 255 | } 256 | 257 | message Relation { 258 | enum MemberType { 259 | NODE = 0; 260 | WAY = 1; 261 | RELATION = 2; 262 | } 263 | required int64 id = 1; 264 | 265 | // Parallel arrays. 266 | repeated uint32 keys = 2 [packed = true]; 267 | repeated uint32 vals = 3 [packed = true]; 268 | 269 | optional Info info = 4; 270 | 271 | // Parallel arrays 272 | repeated int32 roles_sid = 8 [packed = true]; 273 | repeated sint64 memids = 9 [packed = true]; // DELTA encoded 274 | repeated MemberType types = 10 [packed = true]; 275 | } 276 | 277 | -------------------------------------------------------------------------------- /release.py: -------------------------------------------------------------------------------- 1 | import scriptine 2 | from scriptine import path 3 | from scriptine.shell import backtick_, sh 4 | 5 | PACKAGE_NAME = 'imposm.parser' 6 | REMOTE_DOC_LOCATION = 'omniscale.de:domains/imposm.org/docs/imposm.parser' 7 | 8 | VERSION_FILES = [ 9 | ('setup.py', 'version="###"'), 10 | ('doc/source/conf.py', "version = '##'"), 11 | ('doc/source/conf.py', "release = '###'"), 12 | ] 13 | 14 | def version_command(): 15 | print version() 16 | 17 | def prepare_command(tag=""): 18 | sh('python setup.py egg_info -D -b "%s"' % tag) 19 | 20 | def version(): 21 | package_name = PACKAGE_NAME 22 | version = backtick_('grep Version: %(package_name)s.egg-info/PKG-INFO' % locals()) 23 | version = version.split(':')[-1].strip() 24 | return version 25 | 26 | def clean_all_command(): 27 | path('build/').rmtree(ignore_errors=True) 28 | for pyc in path.cwd().walkfiles('*.pyc'): 29 | pyc.remove() 30 | 31 | def bump_version_command(version): 32 | short_version = '.'.join(version.split('.')[:2]) 33 | for filename, replace in VERSION_FILES: 34 | if '###' in replace: 35 | search_for = replace.replace('###', '[^\'"]+') 36 | replace_with = replace.replace('###', version) 37 | else: 38 | search_for = replace.replace('##', '[^\'"]+') 39 | replace_with = replace.replace('##', short_version) 40 | 41 | search_for = search_for.replace('"', '\\"') 42 | replace_with = replace_with.replace('"', '\\"') 43 | sh('''perl -p -i -e "s/%(search_for)s/%(replace_with)s/" %(filename)s ''' % locals()) 44 | 45 | prepare_command() 46 | 47 | def build_docs_command(): 48 | sh('python setup.py build_sphinx') 49 | ver = version() 50 | package_name = PACKAGE_NAME 51 | sh("tar -c -v -z -C build/sphinx/ -f dist/%(package_name)s-docs-%(ver)s.tar.gz -s " 52 | "'/^html/%(package_name)s-docs-%(ver)s/' html" 53 | % locals()) 54 | 55 | def upload_docs_command(): 56 | ver = version() 57 | remote_doc_location = REMOTE_DOC_LOCATION 58 | sh('rsync -a -v -P -z build/sphinx/html/ %(remote_doc_location)s/%(ver)s' % locals()) 59 | 60 | def build_sdist_command(): 61 | sh('python setup.py egg_info -b "" -D sdist') 62 | 63 | def upload_sdist_command(): 64 | sh('python setup.py egg_info -b "" -D sdist') 65 | ver = version() 66 | remote_rel_location = REMOTE_REL_LOCATION 67 | sh('scp dist/imposm.parser-%(ver)s.* %(remote_rel_location)s' % locals()) 68 | 69 | def upload_final_sdist_command(): 70 | sh('python setup.py egg_info -b "" -D sdist upload') 71 | 72 | def link_latest_command(ver=None): 73 | if ver is None: 74 | ver = version() 75 | host, path = REMOTE_DOC_LOCATION.split(':') 76 | sh('ssh %(host)s "cd %(path)s && rm latest && ln -s %(ver)s latest"' % locals()) 77 | 78 | if __name__ == '__main__': 79 | scriptine.run() 80 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import platform 3 | from setuptools import setup, Extension, find_packages 4 | from setuptools.command.build_ext import build_ext 5 | from distutils.errors import DistutilsPlatformError 6 | 7 | import subprocess 8 | 9 | class build_ext_with_protpbuf(build_ext): 10 | def run(self): 11 | try: 12 | proc = subprocess.Popen( 13 | ['protoc', '--cpp_out', 'imposm/parser/pbf/', 'osm.proto'], 14 | stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 15 | except OSError, ex: 16 | if ex.errno == errno.ENOENT: 17 | print ("Could not find protoc command. Make sure protobuf is " 18 | "installed and your PATH environment is set.") 19 | raise DistutilsPlatformError("Failed to generate protbuf " 20 | "CPP files with protoc.") 21 | else: 22 | raise 23 | out = proc.communicate()[0] 24 | result = proc.wait() 25 | if result != 0: 26 | print "Error during protbuf files generation with protoc:" 27 | print out 28 | raise DistutilsPlatformError("Failed to generate protbuf " 29 | "CPP files with protoc.") 30 | build_ext.run(self) 31 | 32 | 33 | install_requires = [] 34 | if tuple(map(str, platform.python_version_tuple())) < ('2', '6'): 35 | install_requires.append('multiprocessing>=2.6') 36 | 37 | setup( 38 | name='imposm.parser', 39 | version="1.0.8a", 40 | description='Fast and easy OpenStreetMap XML/PBF parser.', 41 | long_description=open('README.rst').read() + open('CHANGES').read(), 42 | author='Oliver Tonnhofer', 43 | author_email='olt@omniscale.de', 44 | url='http://imposm.org/docs/imposm.parser/latest/', 45 | license='Apache Software License 2.0', 46 | packages=find_packages(), 47 | namespace_packages = ['imposm'], 48 | include_package_data=True, 49 | package_data = {'': ['*.xml', '*.osm', '*.osm.bz2']}, 50 | install_requires=install_requires, 51 | classifiers=[ 52 | "Development Status :: 4 - Beta", 53 | "License :: OSI Approved :: Apache Software License", 54 | "Operating System :: OS Independent", 55 | "Programming Language :: C", 56 | "Programming Language :: C++", 57 | "Programming Language :: Python :: 2.5", 58 | "Programming Language :: Python :: 2.6", 59 | "Programming Language :: Python :: 2.7", 60 | "Topic :: Software Development :: Libraries", 61 | "Topic :: Scientific/Engineering :: GIS", 62 | ], 63 | ext_modules=[ 64 | Extension("imposm.parser.pbf.OSMPBF", 65 | ["imposm/parser/pbf/osm.cc", "imposm/parser/pbf/osm.pb.cc"], libraries=['protobuf']), 66 | ], 67 | cmdclass={'build_ext':build_ext_with_protpbuf}, 68 | ) 69 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py25,py26,py27 3 | 4 | [testenv] 5 | changedir = {toxworkdir} 6 | commands = nosetests imposm --with-xunit --xunit-file={toxinidir}/nosetests-{envname}.xml 7 | deps = nose>=0.10.4 8 | --------------------------------------------------------------------------------