├── .gitignore
├── .hgignore
├── .hgtags
├── CHANGES
├── LICENSE
├── MANIFEST.in
├── README.rst
├── doc
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── concepts.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── install.rst
    │   └── parsing.rst
├── imposm
    ├── __init__.py
    └── parser
    │   ├── __init__.py
    │   ├── pbf
    │       ├── __init__.py
    │       ├── multiproc.py
    │       ├── osm.cc
    │       └── parser.py
    │   ├── simple.py
    │   ├── test
    │       ├── __init__.py
    │       ├── test.osm
    │       ├── test.osm.bz2
    │       ├── test.pbf
    │       └── test_simple_parser.py
    │   ├── util.py
    │   └── xml
    │       ├── __init__.py
    │       ├── multiproc.py
    │       ├── parser.py
    │       └── util.py
├── osm.proto
├── release.py
├── setup.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.egg-info
3 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
1 | .pyc
2 | .egg-info
3 | doc/build
4 | dist/
5 | .tox
6 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
1 | a8a6bd0d5234c805f7e979939a2b0a5f9372ea2b v1.0.1
2 | c7e1cd122f00f1aeddfaf4c213ce85ec70fd42f4 v1.0.0
3 | 5307e5693e0c0869b934c418e9c98a9280bfa99f v1.0.2
4 | 11640b8eea21493f00c7a1b77e2fcebfb14b30f7 v1.0.3
5 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | ---------
 3 | 
 4 | 1.0.7 2015-01-10
 5 | ~~~~~~~~~~~~~~~~
 6 | 
 7 | - updated protobuf parser
 8 | 
 9 | 1.0.6 2014-10-31
10 | ~~~~~~~~~~~~~~~~
11 | 
12 | - fixed bug in protobuf parser that could cause segfaults
13 | 
14 | 1.0.5 2013-09-13
15 | ~~~~~~~~~~~~~~~~
16 | 
17 | - support PBF without granularity value
18 | - improved support for non-pretty-printed XML
19 | 
20 | 1.0.4 2012-12-10
21 | ~~~~~~~~~~~~~~~~
22 | 
23 | - improved support for non-pretty-printed XML
24 | - fixed dependency check for multiprocessing
25 | 
26 | 1.0.3 2011-07-21
27 | ~~~~~~~~~~~~~~~~
28 | 
29 | - support for uncompressed PBF
30 | - bug fix for PBF without dense nodes
31 | 
32 | 1.0.2 2011-03-10
33 | ~~~~~~~~~~~~~~~~
34 | 
35 | - improved regexp based XML coord parser
36 | - prevent mmap overflow in XMLChunker without coord_callback
37 | - successfully parsed whole planet.osm
38 | 
39 | 1.0.0 2011-02-22
40 | ~~~~~~~~~~~~~~~~
41 | 
42 | - first release
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include MANIFEST.in
 2 | include README.rst
 3 | include CHANGES
 4 | include LICENSE
 5 | include setup.py
 6 | include osm.proto
 7 | 
 8 | recursive-include imposm/parser/test *.osm *.pbf *.osm.bz2
 9 | exclude imposm/parser/pbf/osm.pb.cc
10 | exclude imposm/parser/pbf/osm.pb.h
11 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | imposm.parser - OpenStreetMap XML/PBF parser for Python
 2 | =======================================================
 3 | 
 4 | ``imposm.parser`` is a Python library that parses OpenStreetMap data in `XML <http://wiki.openstreetmap.org/wiki/.osm>`_ and `PBF <http://wiki.openstreetmap.org/wiki/PBF_Format>`_ format.
 5 | 
 6 | It has a simple API and it is fast and easy to use. It also works across multiple CPU/cores for extra speed.
 7 | 
 8 | .. note::
 9 |   **Imposm-parser is in maintenance mode and it's unlikely that we will provide any further releases.**
10 | 
11 | 
12 | It is developed and supported by `Omniscale <http://omniscale.com>`_ and released under the `Apache Software License 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
13 | 
14 | Example
15 | -------
16 | 
17 | Here is an example that parses an OSM file and counts all ways that are tagged as a highway.
18 | ::
19 | 
20 |   from imposm.parser import OSMParser
21 | 
22 |   # simple class that handles the parsed OSM data.
23 |   class HighwayCounter(object):
24 |       highways = 0
25 | 
26 |       def ways(self, ways):
27 |           # callback method for ways
28 |           for osmid, tags, refs in ways:
29 |               if 'highway' in tags:
30 |                 self.highways += 1
31 | 
32 |   # instantiate counter and parser and start parsing
33 |   counter = HighwayCounter()
34 |   p = OSMParser(concurrency=4, ways_callback=counter.ways)
35 |   p.parse('germany.osm.pbf')
36 | 
37 |   # done
38 |   print counter.highways
39 | 
40 | 
41 | Source and issue tracker
42 | ------------------------
43 | 
44 | Source code and issue tracker are available at `<https://github.com/omniscale/imposm-parser>`_.
45 | 
46 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | 
 15 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest
 16 | 
 17 | help:
 18 | 	@echo "Please use \`make <target>' where <target> is one of"
 19 | 	@echo "  html       to make standalone HTML files"
 20 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 21 | 	@echo "  singlehtml to make a single large HTML file"
 22 | 	@echo "  pickle     to make pickle files"
 23 | 	@echo "  json       to make JSON files"
 24 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 25 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 26 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 27 | 	@echo "  epub       to make an epub"
 28 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 29 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 30 | 	@echo "  text       to make text files"
 31 | 	@echo "  man        to make manual pages"
 32 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 33 | 	@echo "  linkcheck  to check all external links for integrity"
 34 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 35 | 
 36 | clean:
 37 | 	-rm -rf $(BUILDDIR)/*
 38 | 
 39 | html:
 40 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 41 | 	@echo
 42 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 43 | 
 44 | dirhtml:
 45 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 48 | 
 49 | singlehtml:
 50 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 53 | 
 54 | pickle:
 55 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 56 | 	@echo
 57 | 	@echo "Build finished; now you can process the pickle files."
 58 | 
 59 | json:
 60 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the JSON files."
 63 | 
 64 | htmlhelp:
 65 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 66 | 	@echo
 67 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 68 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 69 | 
 70 | qthelp:
 71 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 72 | 	@echo
 73 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 74 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 75 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/imposmparser.qhcp"
 76 | 	@echo "To view the help file:"
 77 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/imposmparser.qhc"
 78 | 
 79 | devhelp:
 80 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 81 | 	@echo
 82 | 	@echo "Build finished."
 83 | 	@echo "To view the help file:"
 84 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/imposmparser"
 85 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/imposmparser"
 86 | 	@echo "# devhelp"
 87 | 
 88 | epub:
 89 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 90 | 	@echo
 91 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 92 | 
 93 | latex:
 94 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 95 | 	@echo
 96 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
 97 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
 98 | 	      "(use \`make latexpdf' here to do that automatically)."
 99 | 
100 | latexpdf:
101 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
102 | 	@echo "Running LaTeX files through pdflatex..."
103 | 	make -C $(BUILDDIR)/latex all-pdf
104 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
105 | 
106 | text:
107 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
108 | 	@echo
109 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
110 | 
111 | man:
112 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
113 | 	@echo
114 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
115 | 
116 | changes:
117 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
118 | 	@echo
119 | 	@echo "The overview file is in $(BUILDDIR)/changes."
120 | 
121 | linkcheck:
122 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
123 | 	@echo
124 | 	@echo "Link check complete; look for any errors in the above output " \
125 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
126 | 
127 | doctest:
128 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
129 | 	@echo "Testing of doctests in the sources finished, look at the " \
130 | 	      "results in $(BUILDDIR)/doctest/output.txt."
131 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | if NOT "%PAPER%" == "" (
 11 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 12 | )
 13 | 
 14 | if "%1" == "" goto help
 15 | 
 16 | if "%1" == "help" (
 17 | 	:help
 18 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 19 | 	echo.  html       to make standalone HTML files
 20 | 	echo.  dirhtml    to make HTML files named index.html in directories
 21 | 	echo.  singlehtml to make a single large HTML file
 22 | 	echo.  pickle     to make pickle files
 23 | 	echo.  json       to make JSON files
 24 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 25 | 	echo.  qthelp     to make HTML files and a qthelp project
 26 | 	echo.  devhelp    to make HTML files and a Devhelp project
 27 | 	echo.  epub       to make an epub
 28 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 29 | 	echo.  text       to make text files
 30 | 	echo.  man        to make manual pages
 31 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 32 | 	echo.  linkcheck  to check all external links for integrity
 33 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 34 | 	goto end
 35 | )
 36 | 
 37 | if "%1" == "clean" (
 38 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 39 | 	del /q /s %BUILDDIR%\*
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "html" (
 44 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 45 | 	if errorlevel 1 exit /b 1
 46 | 	echo.
 47 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 48 | 	goto end
 49 | )
 50 | 
 51 | if "%1" == "dirhtml" (
 52 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 53 | 	if errorlevel 1 exit /b 1
 54 | 	echo.
 55 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 56 | 	goto end
 57 | )
 58 | 
 59 | if "%1" == "singlehtml" (
 60 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 61 | 	if errorlevel 1 exit /b 1
 62 | 	echo.
 63 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 64 | 	goto end
 65 | )
 66 | 
 67 | if "%1" == "pickle" (
 68 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 69 | 	if errorlevel 1 exit /b 1
 70 | 	echo.
 71 | 	echo.Build finished; now you can process the pickle files.
 72 | 	goto end
 73 | )
 74 | 
 75 | if "%1" == "json" (
 76 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished; now you can process the JSON files.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "htmlhelp" (
 84 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 88 | .hhp project file in %BUILDDIR%/htmlhelp.
 89 | 	goto end
 90 | )
 91 | 
 92 | if "%1" == "qthelp" (
 93 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 94 | 	if errorlevel 1 exit /b 1
 95 | 	echo.
 96 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
 97 | .qhcp project file in %BUILDDIR%/qthelp, like this:
 98 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\imposmparser.qhcp
 99 | 	echo.To view the help file:
100 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\imposmparser.ghc
101 | 	goto end
102 | )
103 | 
104 | if "%1" == "devhelp" (
105 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
106 | 	if errorlevel 1 exit /b 1
107 | 	echo.
108 | 	echo.Build finished.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "epub" (
113 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
117 | 	goto end
118 | )
119 | 
120 | if "%1" == "latex" (
121 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
122 | 	if errorlevel 1 exit /b 1
123 | 	echo.
124 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
125 | 	goto end
126 | )
127 | 
128 | if "%1" == "text" (
129 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
130 | 	if errorlevel 1 exit /b 1
131 | 	echo.
132 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "man" (
137 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "changes" (
145 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.The overview file is in %BUILDDIR%/changes.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "linkcheck" (
153 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Link check complete; look for any errors in the above output ^
157 | or in %BUILDDIR%/linkcheck/output.txt.
158 | 	goto end
159 | )
160 | 
161 | if "%1" == "doctest" (
162 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
163 | 	if errorlevel 1 exit /b 1
164 | 	echo.
165 | 	echo.Testing of doctests in the sources finished, look at the ^
166 | results in %BUILDDIR%/doctest/output.txt.
167 | 	goto end
168 | )
169 | 
170 | :end
171 | 


--------------------------------------------------------------------------------
/doc/source/concepts.rst:
--------------------------------------------------------------------------------
 1 | Concepts
 2 | ========
 3 | 
 4 | To use ``imposm.parser`` you need to understand three basic concepts: Types, Callbacks and Filter
 5 | 
 6 | Types
 7 | -----
 8 | 
 9 | .. note:: In this document Node, Way, Relation with a capital refer to the OSM types and `node`, `way`, `relation` refer to the Imposm types.
10 | 
11 | OSM has three fundamental element types: Nodes, Ways and Relations. ``imposm.parser`` distinguishes the OSM Nodes between `coords` and `nodes`.
12 | 
13 | `coords` only store coordinates and there are `coords` for *every* OSM Node. `nodes` also store tags and there are *only* `nodes` for OSM Nodes *with* tags. 
14 | 
15 | 
16 | coords
17 | ~~~~~~
18 | 
19 | A tuple with the OSM ID, the longitude and latitude of that node.
20 | 
21 | ::
22 |   
23 |   (4234432, 175.2, -32.1)
24 | 
25 | ``imposm.parser`` will return a `coord` for each OSM Node, even if this OSM Node is also a `node` (i.e. it has tags).
26 | 
27 | nodes
28 | ~~~~~
29 | 
30 | A tuple with the OSM ID, a tags dictionary and a nested tuple with the longitude and latitude of that node.
31 | 
32 | ::
33 | 
34 |   (982347, {'name': 'Somewhere', 'place': 'village'}, (-120.2, 23.21))
35 | 
36 | 
37 | ways
38 | ~~~~
39 | 
40 | A tuple with the OSM ID, a tags dictionary and a list of references.
41 | 
42 | ::
43 | 
44 |   (87644, {'name': 'my way', 'highway': 'path'}, [123, 345, 567])
45 | 
46 | relations
47 | ~~~~~~~~~
48 | 
49 | A tuple with the OSM ID, a tags dictionary and a list of member tuples.
50 | Each member tuple contains the reference, the type (one of `'node'`, `'way'`, `'relation'`) and the role.
51 | 
52 | ::
53 | 
54 |   (87644, {'type': 'multipolygon'}, [(123, 'way', 'outer'), (234, 'way', 'inner')])
55 | 
56 | .. _concepts_callbacks:
57 | 
58 | Callbacks
59 | ---------
60 | 
61 | The parser takes four callback functions for each data type (`coords`, `nodes`, `ways` and `relations`). The callbacks are optional, i.e. you don't need to pass a relations callback if you are not interested in relations.
62 | 
63 | The functions should expect a list with zero or more items of the corresponding type.
64 | 
65 | Here is an example callback that prints the coordinates of all Nodes.
66 | 
67 | ::
68 | 
69 |   def coords_callback(coords):
70 |     for osm_id, lon, lat in coords:
71 |       print '%s %.4f %.4f' % (osm_id, lon, lat)
72 | 
73 | 
74 | .. _concepts_tag_filters:
75 | 
76 | Tag filters
77 | -----------
78 | 
79 | Tag filter are functions that manipulate tag dictionaries. The functions should modify the dictionary in-place, the return value is ignored.
80 | 
81 | Elements will be handled different, if you remove all tags from the dictionary. `nodes` and `relations` with empty tags will not be returned, but `ways` will be, since they might be needed for building relations.
82 | 
83 | Here is an example filter that filters the tags with a whitelist.
84 | ::
85 | 
86 |   whitelist = set(('name', 'place', 'amenity'))
87 |   
88 |   def tag_filter(tags):
89 |     for key in tags.keys():
90 |       if key not in whitelist:
91 |         del tags[key]
92 |     if 'name' in tags and len(tags) == 1:
93 |       # tags with only a name have no information
94 |       # how to handle this element
95 |       del tags['name']
96 |   
97 | 
98 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # imposm.parser documentation build configuration file, created by
  4 | # sphinx-quickstart on Thu Feb 17 15:23:25 2011.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = ['sphinx.ext.autodoc']
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.rst'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'imposm.parser'
 44 | copyright = u'2011, Oliver Tonnhofer'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '1.0'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '1.0.8a'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = []
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'sphinxdoc'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | #html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'imposmparserdoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | # The paper size ('letter' or 'a4').
173 | #latex_paper_size = 'letter'
174 | 
175 | # The font size ('10pt', '11pt' or '12pt').
176 | #latex_font_size = '10pt'
177 | 
178 | # Grouping the document tree into LaTeX files. List of tuples
179 | # (source start file, target name, title, author, documentclass [howto/manual]).
180 | latex_documents = [
181 |   ('index', 'imposmparser.tex', u'imposm.parser Documentation',
182 |    u'Oliver Tonnhofer', 'manual'),
183 | ]
184 | 
185 | # The name of an image file (relative to this directory) to place at the top of
186 | # the title page.
187 | #latex_logo = None
188 | 
189 | # For "manual" documents, if this is true, then toplevel headings are parts,
190 | # not chapters.
191 | #latex_use_parts = False
192 | 
193 | # If true, show page references after internal links.
194 | #latex_show_pagerefs = False
195 | 
196 | # If true, show URL addresses after external links.
197 | #latex_show_urls = False
198 | 
199 | # Additional stuff for the LaTeX preamble.
200 | #latex_preamble = ''
201 | 
202 | # Documents to append as an appendix to all manuals.
203 | #latex_appendices = []
204 | 
205 | # If false, no module index is generated.
206 | #latex_domain_indices = True
207 | 
208 | 
209 | # -- Options for manual page output --------------------------------------------
210 | 
211 | # One entry per manual page. List of tuples
212 | # (source start file, name, description, authors, manual section).
213 | man_pages = [
214 |     ('index', 'imposmparser', u'imposm.parser Documentation',
215 |      [u'Oliver Tonnhofer'], 1)
216 | ]
217 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. include:: ../../README.rst
 3 | 
 4 | .. Contents:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    install
10 |    concepts
11 |    parsing
12 | 
13 | .. Indices and tables
14 | .. ==================
15 | ..
16 | .. * :ref:`genindex`
17 | .. * :ref:`modindex`
18 | .. * :ref:`search`
19 | 
20 | 


--------------------------------------------------------------------------------
/doc/source/install.rst:
--------------------------------------------------------------------------------
 1 | .. Installation
 2 | .. ============
 3 | 
 4 | Requirements
 5 | ------------
 6 | 
 7 | ``imposm.parser`` runs with Python 2.5, 2.6 and 2.7 and is tested on Linux and Mac OS X.
 8 | 
 9 | The PBF parser is written as a C extension and you need to have a C/C++ compiler, the Python libraries and Google Protobuf.
10 | 
11 | On Ubuntu::
12 | 
13 |   sudo aptitude install build-essential python-devel protobuf-compiler libprotobuf-dev
14 | 
15 | Installation
16 | ------------
17 | 
18 | You can install ``imposm.parser`` with ``pip`` or ``easy_install``.
19 | 
20 | ::
21 | 
22 |   pip install imposm.parser
23 |   
24 | ::
25 | 
26 |   easy_install imposm.parser
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/source/parsing.rst:
--------------------------------------------------------------------------------
 1 | Parsing API
 2 | ===========
 3 | 
 4 | Imposm comes with a single ``OSMParser`` class that implements a simple to use, callback-based parser for OSM files.
 5 | 
 6 | It supports `XML <http://wiki.openstreetmap.org/wiki/.osm>`_ and `PBF <http://wiki.openstreetmap.org/wiki/PBF_Format>`_ files. It also supports BZip2 compressed XML files.
 7 | 
 8 | Concurrency
 9 | ~~~~~~~~~~~
10 | 
11 | The parser uses multiprocessing to distribute the parsing across multiple CPUs. This does work with PBF as well as XML files.
12 | 
13 | You can pass the ``concurrency`` as an argument to ``OSMParser`` and it defaults to the number of CPU and cores of the host system. ``concurrency`` defines the number of parser processes. The main process where the callbacks are handled and the decompression (if you have a .bzip2 file) are handled in additional processes. So you might get better results if you reduce this number on systems with more than two cores.
14 | 
15 | You can double the number on systems with hyper threading CPUs.
16 | 
17 | 
18 | API
19 | ~~~
20 | 
21 | .. module:: imposm.parser
22 | 
23 | .. autoclass:: OSMParser
24 |   :members:
25 | 


--------------------------------------------------------------------------------
/imposm/__init__.py:
--------------------------------------------------------------------------------
1 | __import__('pkg_resources').declare_namespace(__name__)


--------------------------------------------------------------------------------
/imposm/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from imposm.parser.simple import OSMParser
2 | 
3 | __all__ = ['OSMParser']


--------------------------------------------------------------------------------
/imposm/parser/pbf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/pbf/__init__.py


--------------------------------------------------------------------------------
/imposm/parser/pbf/multiproc.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Omniscale GmbH & Co. KG
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import multiprocessing
 16 | 
 17 | from imposm.parser.pbf.parser import PBFFile, PBFParser
 18 | from imposm.parser.util import setproctitle
 19 | 
 20 | class PBFParserProcess(PBFParser, multiprocessing.Process):
 21 |     def __init__(self, pos_queue, *args, **kw):
 22 |         multiprocessing.Process.__init__(self)
 23 |         PBFParser.__init__(self, *args, **kw)
 24 |         self.daemon = True
 25 |         self.pos_queue = pos_queue
 26 | 
 27 |     def run(self):
 28 |         setproctitle('imposm pbf parser')
 29 |         while True:
 30 |             pos = self.pos_queue.get()
 31 |             if pos is None:
 32 |                 self.pos_queue.task_done()
 33 |                 break
 34 | 
 35 |             self.parse(pos['filename'], offset=pos['blob_pos'],
 36 |                     size=pos['blob_size'])
 37 |             self.pos_queue.task_done()
 38 | 
 39 | class PBFMultiProcParser(object):
 40 |     nodes_tag_filter = None
 41 |     ways_tag_filter = None
 42 |     relations_tag_filter = None
 43 | 
 44 |     def __init__(self, pool_size, nodes_queue=None, ways_queue=None,
 45 |         relations_queue=None, coords_queue=None, marshal_elem_data=False):
 46 |         self.pool_size = pool_size
 47 |         self.nodes_callback = nodes_queue.put if nodes_queue else None
 48 |         self.ways_callback = ways_queue.put if ways_queue else None
 49 |         self.relations_callback = relations_queue.put if relations_queue else None
 50 |         self.coords_callback = coords_queue.put if coords_queue else None
 51 |         self.marshal = marshal_elem_data
 52 |     def parse(self, filename):
 53 |         pos_queue = multiprocessing.JoinableQueue(32)
 54 |         pool = []
 55 |         for _ in xrange(self.pool_size):
 56 |             proc = PBFParserProcess(pos_queue, nodes_callback=self.nodes_callback,
 57 |                 coords_callback=self.coords_callback, ways_callback=self.ways_callback,
 58 |                 relations_callback=self.relations_callback,
 59 |                 nodes_tag_filter=self.nodes_tag_filter,
 60 |                 ways_tag_filter=self.ways_tag_filter,
 61 |                 relations_tag_filter=self.relations_tag_filter,
 62 |                 marshal=self.marshal
 63 |             )
 64 |             pool.append(proc)
 65 |             proc.start()
 66 | 
 67 |         reader = PBFFile(filename)
 68 | 
 69 |         for pos in reader.blob_offsets():
 70 |             pos_queue.put(pos)
 71 | 
 72 |         pos_queue.join()
 73 | 
 74 |         for proc in pool:
 75 |             pos_queue.put(None)
 76 |         for proc in pool:
 77 |             proc.join()
 78 | 
 79 | if __name__ == '__main__':
 80 |     import sys
 81 | 
 82 |     def count_proc(type, queue):
 83 |         def count():
 84 |             count = 0
 85 |             while True:
 86 |                 nodes = queue.get()
 87 |                 if nodes is None:
 88 |                     queue.task_done()
 89 |                     break
 90 |                 count += len(nodes)
 91 |                 queue.task_done()
 92 |             print type, count
 93 |         return count
 94 | 
 95 | 
 96 |     nodes_queue = multiprocessing.JoinableQueue(128)
 97 |     ways_queue = multiprocessing.JoinableQueue(128)
 98 |     relations_queue = multiprocessing.JoinableQueue(128)
 99 | 
100 |     procs = [
101 |         multiprocessing.Process(target=count_proc('nodes', nodes_queue)),
102 |         multiprocessing.Process(target=count_proc('ways', ways_queue)),
103 |         multiprocessing.Process(target=count_proc('relations', relations_queue))
104 |     ]
105 |     for proc in procs:
106 |         proc.start()
107 | 
108 |     parser = PBFMultiProcParser(2, nodes_queue=nodes_queue,
109 |         ways_queue=ways_queue, relations_queue=relations_queue)
110 |     parser.parse(sys.argv[1])
111 | 
112 |     nodes_queue.put(None)
113 |     ways_queue.put(None)
114 |     relations_queue.put(None)
115 | 
116 |     for proc in procs:
117 |         proc.join()


--------------------------------------------------------------------------------
/imposm/parser/pbf/parser.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Omniscale GmbH & Co. KG
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import with_statement
 16 | 
 17 | import struct
 18 | import sys
 19 | import zlib
 20 | 
 21 | from marshal import dumps
 22 | 
 23 | from imposm.parser.pbf import OSMPBF
 24 | 
 25 | SUPPORTED_FEATURES = set(['OsmSchema-V0.6', 'DenseNodes'])
 26 | 
 27 | 
 28 | _MEMBERTYPE = {0 : 'node',
 29 |                1 : 'way',
 30 |                2 : 'relation'}
 31 | 
 32 | 
 33 | 
 34 | class PBFParser(object):
 35 |     """
 36 |     OSM PBF parser.
 37 | 
 38 |     :param xxx_callback: 
 39 |         callback functions for coords, nodes, ways and relations.
 40 |         Each callback function gets called with a list of multiple elements.
 41 |     
 42 |     :param xxx_filter:
 43 |         functions that can manipulate the tag dictionary.
 44 |         Nodes and relations without tags will not passed to the callback.
 45 |     
 46 |     :param marshal:
 47 |         return the data as a marshaled string
 48 |     """
 49 |     def __init__(self, nodes_callback=None, ways_callback=None,
 50 |         relations_callback=None, coords_callback=None, nodes_tag_filter=None,
 51 |         ways_tag_filter=None, relations_tag_filter=None, marshal=False):
 52 |         self.nodes_callback = nodes_callback
 53 |         self.ways_callback = ways_callback
 54 |         self.relations_callback = relations_callback
 55 |         self.coords_callback = coords_callback
 56 |         self.nodes_tag_filter = nodes_tag_filter
 57 |         self.ways_tag_filter = ways_tag_filter
 58 |         self.relations_tag_filter = relations_tag_filter
 59 |         self.marshal = marshal
 60 |     
 61 |     def parse(self, filename, offset, size):
 62 |         """
 63 |         Parse primitive block from `filename`.
 64 |         
 65 |         :param filename: path to PBF file
 66 |         :param offset: byte offset of the primitive block to parse
 67 |         :param size: size in bytes of the primitive block to parse
 68 |         """
 69 |         reader = PrimitiveBlockParser(filename, offset, size)
 70 |         
 71 |         if self.nodes_callback or self.coords_callback:
 72 |             self.handle_nodes(reader)
 73 |         if self.ways_callback:
 74 |             self.handle_ways(reader)
 75 |         if self.relations_callback:
 76 |             self.handle_relations(reader)
 77 |     
 78 |     def handle_nodes(self, reader):
 79 |         nodes = []
 80 |         coords = []
 81 |         nodes_callback = self.nodes_callback
 82 |         coords_callback = self.coords_callback
 83 |         for node in reader.nodes():
 84 |             if nodes_callback:
 85 |                 if self.nodes_tag_filter:
 86 |                     self.nodes_tag_filter(node[1])
 87 |                 if node[1]:
 88 |                     if self.marshal:
 89 |                         nodes.append((node[0], dumps((node[1], node[2]), 2)))
 90 |                     else:
 91 |                         nodes.append((node[0], node[1], node[2]))
 92 |                 if len(nodes) >= 256:
 93 |                     nodes_callback(nodes)
 94 |                     nodes = []
 95 |             if coords_callback:
 96 |                 coords.append((node[0], node[2][0], node[2][1]))
 97 |                 if len(coords) >= 512:
 98 |                     coords_callback(coords)
 99 |                     coords = []
100 |         if nodes_callback:
101 |             nodes_callback(nodes)
102 |         if coords_callback:
103 |             coords_callback(coords)
104 | 
105 |     def handle_ways(self, reader):
106 |         ways = []
107 |         for way in reader.ways():
108 |             if self.ways_tag_filter:
109 |                 self.ways_tag_filter(way[1])
110 |             # always return ways, might be needed for relations
111 |             if self.marshal:
112 |                 ways.append((way[0], dumps((way[1], way[2]), 2)))
113 |             else:
114 |                 ways.append((way[0], way[1], way[2]))
115 |             if len(ways) >= 256:
116 |                 self.ways_callback(ways)
117 |                 ways = []
118 |         self.ways_callback(ways)
119 | 
120 |     def handle_relations(self, reader):
121 |         relations = []
122 |         for relation in reader.relations():
123 |             if self.relations_tag_filter:
124 |                 self.relations_tag_filter(relation[1])
125 |                 if not relation[1]:
126 |                     continue
127 |             if self.marshal:
128 |                 relations.append((relation[0], dumps((relation[1], relation[2]), 2)))
129 |             else:
130 |                 relations.append((relation[0], relation[1], relation[2]))
131 |             if len(relations) >= 256:
132 |                 self.relations_callback(relations)
133 |                 relations = []
134 |         self.relations_callback(relations)
135 | 
136 | def decoded_stringtable(stringtable):
137 |     result = []
138 |     for s in stringtable:
139 |         result.append(s.decode('utf-8'))
140 |     return result
141 | 
142 | class PrimitiveBlockParser(object):
143 |     """
144 |     Low level PBF primitive block parser.
145 |     
146 |     Parses a single primitive block and handles OSM PBF internals like
147 |     dense nodes, delta encoding, stringtables, etc.
148 |     
149 |     :param filename: path to PBF file
150 |     :param offset: byte offset of the primitive block to parse
151 |     :param size: size in bytes of the primitive block to parse
152 |     
153 |     """
154 |     def __init__(self, filename, blob_pos, blob_size):
155 |         self.pos = filename, blob_pos, blob_size
156 |         data = read_blob_data(filename, blob_pos, blob_size)
157 |         self.primitive_block = OSMPBF.PrimitiveBlock()
158 |         self.primitive_block.ParseFromString(data)
159 |         self.primitivegroup = self.primitive_block.primitivegroup
160 |         self.stringtable = decoded_stringtable(self.primitive_block.stringtable.s)
161 |     
162 |     def __repr__(self):
163 |         return '<PrimitiveBlockParser %r>' % (self.pos, )
164 |     
165 |     def _get_tags(self, element, pos):
166 |         tags = {}
167 |         key = None
168 |         value = None
169 |         keyflag = False
170 |         if pos >= len(element):
171 |             return {}, pos 
172 |         while True:
173 |             key_val = element[pos]
174 |             pos += 1
175 |             if key_val == 0:
176 |                 break
177 |             if not keyflag:
178 |                 key = key_val
179 |                 keyflag = True
180 |             else:
181 |                 value = key_val
182 |                 tags[self.stringtable[key]] = self.stringtable[value]
183 |                 keyflag = False
184 |         return tags, pos
185 |     
186 |     def nodes(self):
187 |         """
188 |         Return an iterator for all *nodes* in this primitive block.
189 |         
190 |         :rtype: iterator of ``(osm_id, tags, (lon, lat))`` tuples
191 |         """
192 |         for group in self.primitivegroup:
193 |             dense = group.dense
194 |             if dense:
195 |                 granularity = self.primitive_block.granularity or 100
196 |                 lat_offset = self.primitive_block.lat_offset or 0
197 |                 lon_offset = self.primitive_block.lon_offset or 0
198 |                 coord_scale = 0.000000001
199 |                 get_tags = self._get_tags
200 |                 ids = dense.id
201 |                 lats = dense.lat
202 |                 lons = dense.lon
203 |                 keys_vals = dense.keys_vals
204 |                 last_id = last_lat = last_lon = last_keysvals_pos = 0
205 |                 for i in xrange(len(ids)):
206 |                     last_id += ids[i]
207 |                     last_lat += coord_scale * (lat_offset + (granularity * lats[i]))
208 |                     last_lon += coord_scale * (lon_offset + (granularity * lons[i]))
209 |                     tags, last_keysvals_pos = get_tags(keys_vals, last_keysvals_pos)
210 |                     yield (last_id, tags, (last_lon, last_lat))
211 |             nodes = group.nodes
212 |             if nodes:
213 |                 for node in nodes:
214 |                     keys, vals = node.keys, node.vals
215 |                     tags = []
216 |                     for i in xrange(len(keys)):
217 |                         tags.append((self.stringtable[keys[i]], self.stringtable[vals[i]]))
218 |                     yield (node.id, tags, (node.lon, node.lat))
219 |     
220 |     def ways(self):
221 |         """
222 |         Return an iterator for all *ways* in this primitive block.
223 |         
224 |         :rtype: iterator of ``(osm_id, tags, [ref1, ref2, ...])`` tuples
225 |         """
226 |         for group in self.primitivegroup:
227 |             ways = group.ways
228 |             if ways:
229 |                 for way in ways:
230 |                     keys = way.keys
231 |                     vals = way.vals
232 |                     delta_refs = way.refs
233 |                     
234 |                     tags = {}
235 |                     for i in xrange(len(keys)):
236 |                         tags[self.stringtable[keys[i]]] = self.stringtable[vals[i]]
237 |                     refs = []
238 |                     ref = 0
239 |                     for delta in delta_refs:
240 |                         ref += delta
241 |                         refs.append(ref)
242 |                     yield (way.id, tags, refs)
243 |     
244 |     def relations(self):
245 |         """
246 |         Return an iterator for all *relations* in this primitive block.
247 |         
248 |         :rtype: iterator of ``(osm_id, tags, [(ref1, type, role), ...])`` tuples
249 |         
250 |         """
251 |         for group in self.primitivegroup:
252 |             relations = group.relations
253 |             if relations:
254 |                 for relation in relations:
255 |                     members = []
256 |                     memids = relation.memids
257 |                     rel_types = relation.types
258 |                     roles_sids = relation.roles_sid
259 |                     keys = relation.keys
260 |                     vals = relation.vals
261 |                     memid = 0
262 |                     for i in xrange(len(rel_types)):
263 |                         memid += memids[i]
264 |                         members.append((memid, _MEMBERTYPE[rel_types[i]], self.stringtable[roles_sids[i]]))
265 |                     tags = {}
266 |                     for i in xrange(len(keys)):
267 |                         tags[self.stringtable[keys[i]]] = self.stringtable[vals[i]]
268 |                     yield (relation.id, tags, members)
269 |                     
270 | class PBFHeader(object):
271 |     def __init__(self, filename, blob_pos, blob_size):
272 |         data = read_blob_data(filename, blob_pos, blob_size)
273 |         self.header_block = OSMPBF.HeaderBlock()
274 |         self.header_block.ParseFromString(data)
275 |         
276 |     def required_features(self):
277 |         return set(self.header_block.required_features)
278 | 
279 | 
280 | def read_blob_data(filename, blob_pos, blob_size):
281 |     """
282 |     Returns the (unzipped) blob data from filename and position.
283 |     """
284 |     with open(filename, 'rb') as f:
285 |         f.seek(blob_pos)
286 |         blob_data = f.read(blob_size)
287 |         
288 |     blob = OSMPBF.Blob()
289 |     blob.ParseFromString(blob_data)
290 |     raw_data = blob.raw
291 |     if raw_data:
292 |         return raw_data
293 |     return zlib.decompress(blob.zlib_data)
294 | 
295 | import time
296 | 
297 | class PBFFile(object):
298 |     """
299 |     OSM PBF file reader.
300 |     
301 |     Parses the low-level file structure with header sizes,
302 |     offsets and blob headers.
303 |     
304 |     :param filename: path to the PBF file
305 |     """
306 |     def __init__(self, filename):
307 |         self.filename = filename
308 |         self.file = open(filename, 'rb')
309 |         self.next_blob_pos = self.prev_blob_pos = 0
310 |         header_offsets = self._skip_header()
311 |         self.header = PBFHeader(self.filename, header_offsets['blob_pos'], header_offsets['blob_size'])
312 |         self.check_features()
313 |     
314 |     def check_features(self):
315 |         missing_features = self.header.required_features().difference(SUPPORTED_FEATURES)
316 |         if missing_features:
317 |             raise NotImplementedError(
318 |                 '%s requires features not implemented by this parser: %s' %
319 |                 (self.filename, ', '.join(missing_features))
320 |             )
321 |     
322 |     def _skip_header(self):
323 |         return self.blob_offsets().next()
324 |     
325 |     def seek(self, pos):
326 |         self.next_blob_pos = pos
327 |     
328 |     def rewind(self):
329 |         self.next_blob_pos = self.prev_blob_pos
330 |     
331 |     def blob_offsets(self):
332 |         """
333 |         Returns an iterator of the blob offsets in this file.
334 |         
335 |         Each offsets is stored in a dictionary with:
336 |         
337 |         - `filename` the path of this PBF file.
338 |         - `blob_pos` the byte offset
339 |         - `blob_size` the size of this blob in bytes
340 |         """
341 |         while True:
342 |             self.file.seek(self.next_blob_pos)
343 |             
344 |             blob_header_size = self._blob_header_size()
345 |             if not blob_header_size: break
346 |             
347 |             blob_size = self._blob_size(self.file.read(blob_header_size))
348 |             blob_pos = self.next_blob_pos + 4 + blob_header_size
349 |             blob_header_pos=self.next_blob_pos,
350 |             prev_blob_header_pos = self.prev_blob_pos
351 |             self.prev_blob_pos = self.next_blob_pos
352 |             self.next_blob_pos = blob_pos + blob_size
353 |             yield dict(blob_pos=blob_pos, blob_size=blob_size,
354 |                 blob_header_pos=blob_header_pos,
355 |                 prev_blob_header_pos=prev_blob_header_pos,
356 |                 filename=self.filename)
357 |     
358 |     def primitive_block_parsers(self):
359 |         """
360 |         Returns an iterator of PrimitiveBlockParser.
361 |         """
362 |         for pos in self.blob_offsets():
363 |             yield PrimitiveBlockParser(self.filename, pos['blob_pos'], pos['blob_size'])
364 |     
365 |     def _blob_size(self, data):
366 |         blob_header = OSMPBF.BlobHeader()
367 |         blob_header.ParseFromString(data)
368 |         return blob_header.datasize
369 |         
370 |     def _blob_header_size(self):
371 |         bytes = self.file.read(4)
372 |         if bytes: 
373 |             return struct.unpack('!i', bytes)[0]
374 |         return None
375 | 
376 | 
377 | def read_pbf(filename):
378 |     pbf = PBFFile(filename)
379 |     for block in pbf.primitive_block_parsers():
380 |         for node in block.nodes():
381 |             pass
382 |         for way in block.ways():
383 |             pass
384 |         for relation in block.relations():
385 |             pass
386 | 
387 | if __name__ == '__main__':
388 |     from timeit import Timer
389 |     n = 1
390 |     r = 1
391 |     print "reading %s, number of calls: %d, repeat: %d" %(sys.argv[1],n,r)
392 |     t = Timer("read_pbf(sys.argv[1])", "from __main__ import read_pbf")
393 |     times = t.repeat(r,n)
394 |     avrg_times = []
395 |     for time in times:
396 |         avrg_times.append(time/n)                  
397 |     print "avrg time/call: %f" %(min(avrg_times))
398 | 


--------------------------------------------------------------------------------
/imposm/parser/simple.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Omniscale GmbH & Co. KG
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import with_statement
 16 | 
 17 | import multiprocessing
 18 | import sys
 19 | import time
 20 | 
 21 | from Queue import Empty
 22 | 
 23 | from imposm.parser.util import default_concurrency, fileinput, setproctitle
 24 | 
 25 | class OSMParser(object):
 26 |     """
 27 |     High-level OSM parser.
 28 |     
 29 |     :param concurrency:
 30 |         number of parser processes to start. Defaults to the number of CPUs.
 31 |     :param xxx_callback: 
 32 |         callback functions for coords, nodes, ways and relations.
 33 |         Each callback function gets called with a list of multiple elements.
 34 |         See :ref:`callback concepts <concepts_callbacks>`.
 35 |     
 36 |     :param xxx_filter:
 37 |         functions that can manipulate the tag dictionary.
 38 |         Nodes and relations without tags will not passed to the callback.
 39 |         See :ref:`tag filter concepts <concepts_tag_filters>`.
 40 |     
 41 |     """
 42 |     def __init__(self, concurrency=None, nodes_callback=None, ways_callback=None,
 43 |         relations_callback=None, coords_callback=None, nodes_tag_filter=None,
 44 |         ways_tag_filter=None, relations_tag_filter=None, marshal_elem_data=False):
 45 |         self.concurrency = concurrency or default_concurrency()
 46 |         assert self.concurrency >= 1
 47 |         self.nodes_callback = nodes_callback
 48 |         self.ways_callback = ways_callback
 49 |         self.relations_callback = relations_callback
 50 |         self.coords_callback = coords_callback
 51 |         self.nodes_tag_filter = nodes_tag_filter
 52 |         self.ways_tag_filter = ways_tag_filter
 53 |         self.relations_tag_filter = relations_tag_filter
 54 |         self.marshal_elem_data = marshal_elem_data
 55 |     
 56 |     def parse(self, filename):
 57 |         """
 58 |         Parse the given file. Detects the filetype based on the file suffix.
 59 |         Supports ``.pbf``, ``.osm`` and ``.osm.bz2``.
 60 |         """
 61 |         if filename.endswith('.pbf'):
 62 |             return self.parse_pbf_file(filename)
 63 |         elif filename.endswith(('.osm', '.osm.bz2')):
 64 |             return self.parse_xml_file(filename)
 65 |         else:
 66 |             raise NotImplementedError('unknown file extension')
 67 |         
 68 |     def parse_pbf_file(self, filename):
 69 |         """
 70 |         Parse a PBF file.
 71 |         """
 72 |         from imposm.parser.pbf.multiproc import PBFMultiProcParser
 73 |         return self._parse(filename, PBFMultiProcParser)
 74 |     
 75 |     def parse_xml_file(self, filename):
 76 |         """
 77 |         Parse a XML file.
 78 |         Supports BZip2 compressed files if the filename ends with ``.bz2``.
 79 |         """
 80 |         from imposm.parser.xml.multiproc import XMLMultiProcParser
 81 |         with fileinput(filename) as input:
 82 |             return self._parse(input, XMLMultiProcParser)
 83 |     
 84 |     def _parse(self, input, parser_class):
 85 |         queues_callbacks = {}
 86 |         if self.coords_callback:
 87 |             queues_callbacks['coords'] = (multiprocessing.JoinableQueue(512),
 88 |                                           self.coords_callback)
 89 |         if self.nodes_callback:
 90 |             queues_callbacks['nodes'] = (multiprocessing.JoinableQueue(128),
 91 |                                          self.nodes_callback)
 92 |         if self.ways_callback:
 93 |             queues_callbacks['ways'] = (multiprocessing.JoinableQueue(128),
 94 |                                         self.ways_callback)
 95 |         if self.relations_callback:
 96 |             queues_callbacks['relations'] = (multiprocessing.JoinableQueue(128),
 97 |                                              self.relations_callback)
 98 | 
 99 |         def parse_it():
100 |             setproctitle('imposm parser')
101 |             queues = dict([(type, q) for type, (q, c) in queues_callbacks.items()])
102 |             
103 |             parser = parser_class(self.concurrency,
104 |                 ways_queue=queues.get('ways'),
105 |                 coords_queue=queues.get('coords'),
106 |                 nodes_queue=queues.get('nodes'),
107 |                 relations_queue=queues.get('relations'),
108 |                 marshal_elem_data=self.marshal_elem_data
109 |             )
110 |             parser.nodes_tag_filter = self.nodes_tag_filter
111 |             parser.ways_tag_filter = self.ways_tag_filter
112 |             parser.relations_tag_filter = self.relations_tag_filter
113 |             parser.parse(input)
114 |             for q in queues.values():
115 |                 q.put(None)
116 |             
117 |         proc = multiprocessing.Process(target=parse_it)
118 |         proc.start()
119 |         
120 |         while queues_callbacks:
121 |             processed = False
122 |             for items_type, (queue, callback) in queues_callbacks.items():
123 |                 try:
124 |                     items = None
125 |                     while True:
126 |                         items = queue.get_nowait()
127 |                         if items is None:
128 |                             queue.task_done()
129 |                             del queues_callbacks[items_type]
130 |                             break
131 |                         else:
132 |                             callback(items)
133 |                     if items:
134 |                         processed = True
135 |                 except Empty:
136 |                     pass
137 |             if not processed:
138 |                 # wait a ms if all queues were empty
139 |                 # to give the parser a chance to fill them up
140 |                 time.sleep(0.001)
141 |         proc.join()
142 | 


--------------------------------------------------------------------------------
/imposm/parser/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/test/__init__.py


--------------------------------------------------------------------------------
/imposm/parser/test/test.osm:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | <osm version="0.6" generator="pbf2osm">
 3 | 	<node id="1" lat="50" lon="10" version="1" changeset="1" user="testbot" uid="1" timestamp="2011-12-16T13:24:15Z"/>
 4 | 	<node id="2" lat="51" lon="10" version="1" changeset="1" user="testbot" uid="1" timestamp="2011-12-16T13:24:15Z">
 5 | 		<tag k="created_by" v="hand" />
 6 | 		<tag k="name" v="test" />
 7 | 	</node>
 8 | 	<way id="3" version="3" changeset="5" uid="2" user="testbot" timestamp="2010-07-16T17:36:18Z">
 9 | 		<nd ref="1"/>
10 | 		<nd ref="2"/>
11 | 		<tag k="highway" v="primary" />
12 | 	</way>
13 | 	<relation id="4" version="2" changeset="4" uid="1" user="testbot" timestamp="2010-05-20T19:38:47Z">
14 | 		<member type="way" ref="123" role="outer"/>
15 | 		<member type="way" ref="124" role="inner"/>
16 | 		<tag k="name" v="Üµlåû†é" />
17 | 	</relation>
18 | </osm>
19 | 


--------------------------------------------------------------------------------
/imposm/parser/test/test.osm.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/test/test.osm.bz2


--------------------------------------------------------------------------------
/imposm/parser/test/test.pbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/test/test.pbf


--------------------------------------------------------------------------------
/imposm/parser/test/test_simple_parser.py:
--------------------------------------------------------------------------------
  1 | # -:- encoding: utf8 -:-
  2 | # Copyright 2011 Omniscale GmbH & Co. KG
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | # 
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import os
 17 | from imposm.parser import OSMParser
 18 | from nose.tools import eq_
 19 | 
 20 | class ParserTestBase(object):
 21 |     osm_filename = None
 22 |     ways_filter = None
 23 |     nodes_filter = None
 24 |     relations_filter = None
 25 |     def __init__(self):
 26 |         self.nodes = []
 27 |         self.coords = []
 28 |         self.ways = []
 29 |         self.relations = []
 30 |     
 31 |     def parse_nodes(self, nodes):
 32 |         self.nodes.extend(nodes)
 33 |     def parse_coords(self, coords):
 34 |         self.coords.extend(coords)
 35 |     def parse_ways(self, ways):
 36 |         self.ways.extend(ways)
 37 |     def parse_relations(self, relations):
 38 |         self.relations.extend(relations)
 39 | 
 40 |     def parse(self):
 41 |         parser = OSMParser(2,
 42 |             nodes_callback=self.parse_nodes,
 43 |             coords_callback=self.parse_coords,
 44 |             ways_callback=self.parse_ways,
 45 |             relations_callback=self.parse_relations,
 46 |             nodes_tag_filter=self.nodes_filter,
 47 |             ways_tag_filter=self.ways_filter,
 48 |             relations_tag_filter=self.relations_filter,
 49 |         )
 50 |         osm_filename = os.path.join(os.path.dirname(__file__), self.osm_filename)
 51 |         parser.parse(osm_filename)
 52 |     
 53 |     def test_parse_result(self):
 54 |         self.parse()
 55 |         eq_(len(self.nodes), 1)
 56 |         eq_(self.nodes[0],
 57 |             (2, {'name': 'test', 'created_by': 'hand'}, (10.0, 51.0)))
 58 |         
 59 |         eq_(len(self.coords), 2)
 60 |         eq_(self.coords[0], (1, 10.0, 50.0))
 61 |         eq_(self.coords[1], (2, 10.0, 51.0))
 62 |         
 63 |         eq_(len(self.ways), 1)
 64 |         eq_(self.ways[0],
 65 |             (3, {'highway': 'primary'}, [1, 2]))
 66 | 
 67 |         eq_(len(self.relations), 1)
 68 |         eq_(self.relations[0],
 69 |             (4, {'name': u'Üµlåû†é'}, [(123, 'way', 'outer'), (124, 'way', 'inner')]))
 70 | 
 71 | class ParserTestBaseWithFilter(ParserTestBase):
 72 |     def nodes_filter(self, tags):
 73 |         for tag in tags.keys():
 74 |             if tag != 'name':
 75 |                 del tags[tag]
 76 |     
 77 |     ways_filter = nodes_filter
 78 |     def relations_filter(self, tags):
 79 |         tags.clear()
 80 |     
 81 |     def test_parse_result(self):
 82 |         self.parse()
 83 |         eq_(len(self.nodes), 1)
 84 |         eq_(self.nodes[0],
 85 |             (2, {'name': 'test'}, (10.0, 51.0)))
 86 |         
 87 |         eq_(len(self.coords), 2)
 88 |         eq_(self.coords[0], (1, 10.0, 50.0))
 89 |         eq_(self.coords[1], (2, 10.0, 51.0))
 90 |         
 91 |         eq_(len(self.ways), 1)
 92 |         eq_(self.ways[0],
 93 |             (3, {}, [1, 2]))
 94 | 
 95 |         eq_(len(self.relations), 0)
 96 |         
 97 | class TestXML(ParserTestBase):
 98 |     osm_filename = 'test.osm'
 99 | 
100 | class TestBZIP2(ParserTestBase):
101 |     osm_filename = 'test.osm.bz2'
102 | 
103 | class TestPBF(ParserTestBase):
104 |     osm_filename = 'test.pbf'
105 | 
106 | class TestXMLWithFilter(ParserTestBaseWithFilter):
107 |     osm_filename = 'test.osm'
108 | 
109 | class TestPBFWithFilter(ParserTestBaseWithFilter):
110 |     osm_filename = 'test.pbf'
111 | 


--------------------------------------------------------------------------------
/imposm/parser/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Omniscale GmbH & Co. KG
 2 | # 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import contextlib
16 | import multiprocessing
17 | import subprocess
18 | 
19 | try:
20 |     from setproctitle import setproctitle
21 |     setproctitle
22 | except ImportError:
23 |     setproctitle = lambda x: None
24 | 
25 | def default_concurrency():
26 |     return multiprocessing.cpu_count()
27 | 
28 | def bzip_reader(filename):
29 |     p = subprocess.Popen(['bunzip2', '-c', filename], bufsize=-1, stdout=subprocess.PIPE)
30 |     return p.stdout
31 | 
32 | @contextlib.contextmanager
33 | def fileinput(filename):
34 |     if filename.endswith('bz2'):
35 |         yield bzip_reader(filename)
36 |     else:
37 |         fh = open(filename, 'rb')
38 |         yield fh
39 |         fh.close()
40 | 
41 | def estimate_records(files):
42 |     records = 0
43 |     for f in files:
44 |         fsize = os.path.getsize(f)
45 |         if f.endswith('.bz2'):
46 |             fsize *= 11 # observed bzip2 compression factor on osm data
47 |         if f.endswith('.pbf'):
48 |             fsize *= 15 # observed pbf compression factor on osm data
49 |         records += fsize/200
50 |     
51 |     return int(records)


--------------------------------------------------------------------------------
/imposm/parser/xml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omniscale/imposm-parser/c1045e989af3d7d31086c2662dc632add5a45ed1/imposm/parser/xml/__init__.py


--------------------------------------------------------------------------------
/imposm/parser/xml/multiproc.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Omniscale GmbH & Co. KG
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import mmap
 16 | import multiprocessing
 17 | import re
 18 | 
 19 | from Queue import Empty
 20 | 
 21 | from imposm.parser.xml.parser import XMLParser
 22 | from imposm.parser.util import setproctitle
 23 | 
 24 | KiB = 1024
 25 | MiB = 1024*KiB
 26 | 
 27 | READ_SIZE = 512*KiB
 28 | 
 29 | 
 30 | class MMapReader(object):
 31 |     def __init__(self, m, size):
 32 |         self.m = m
 33 |         self.m.seek(0)
 34 |         self.size = size
 35 | 
 36 |     def read(self, size=None):
 37 |         if size is None:
 38 |             size = self.size - self.m.tell()
 39 |         else:
 40 |             size = min(self.size - self.m.tell(), size)
 41 |         return self.m.read(size)
 42 | 
 43 |     def readline(self):
 44 |         cur_pos = self.m.tell()
 45 |         if cur_pos >= self.size:
 46 |             return
 47 |         nl_pos = self.m.find('\n')
 48 |         self.m.seek(cur_pos)
 49 |         return self.m.read(nl_pos-cur_pos)
 50 | 
 51 |     def seek(self, n):
 52 |         self.m.seek(n)
 53 | 
 54 | class XMLParserProcess(XMLParser, multiprocessing.Process):
 55 |     def __init__(self, mmap_pool, mmap_queue, *args, **kw):
 56 |         multiprocessing.Process.__init__(self)
 57 |         XMLParser.__init__(self, *args, **kw)
 58 |         self.daemon = True
 59 |         self.mmap_pool = mmap_pool
 60 |         self.mmap_queue = mmap_queue
 61 | 
 62 |     def run(self):
 63 |         setproctitle('imposm xml parser')
 64 |         while True:
 65 |             mmap_idx, size = self.mmap_queue.get()
 66 |             if mmap_idx is None:
 67 |                 self.mmap_queue.task_done()
 68 |                 break
 69 |             xml = MMapReader(self.mmap_pool.get(mmap_idx), size)
 70 |             self.parse(xml)
 71 |             self.mmap_queue.task_done()
 72 |             self.mmap_pool.free(mmap_idx)
 73 | 
 74 | 
 75 | 
 76 | class XMLMultiProcParser(object):
 77 |     nodes_tag_filter = None
 78 |     ways_tag_filter = None
 79 |     relations_tag_filter = None
 80 | 
 81 |     def __init__(self, pool_size, nodes_queue=None, ways_queue=None,
 82 |         relations_queue=None, coords_queue=None, marshal_elem_data=False):
 83 |         self.pool_size = pool_size
 84 |         self.pool = []
 85 |         self.nodes_callback = nodes_queue.put if nodes_queue else None
 86 |         self.ways_callback = ways_queue.put if ways_queue else None
 87 |         self.relations_callback = relations_queue.put if relations_queue else None
 88 |         self.coords_callback = coords_queue.put if coords_queue else None
 89 |         xml_chunk_size=READ_SIZE
 90 |         self.mmap_pool = MMapPool(pool_size*8, xml_chunk_size*8)
 91 |         self.mmap_queue = multiprocessing.JoinableQueue(8)
 92 |         self.marshal_elem_data = marshal_elem_data
 93 | 
 94 |     def parse(self, stream):
 95 |         assert not self.pool
 96 | 
 97 |         for _ in xrange(self.pool_size):
 98 |             proc = XMLParserProcess(self.mmap_pool, self.mmap_queue, nodes_callback=self.nodes_callback,
 99 |                 coords_callback=self.coords_callback, ways_callback=self.ways_callback,
100 |                 relations_callback=self.relations_callback,
101 |                 nodes_tag_filter=self.nodes_tag_filter,
102 |                 ways_tag_filter=self.ways_tag_filter,
103 |                 relations_tag_filter=self.relations_tag_filter,
104 |                 marshal_elem_data=self.marshal_elem_data,
105 |             )
106 |             self.pool.append(proc)
107 |             proc.start()
108 | 
109 |         chunker = XMLChunker(stream, self.mmap_pool, xml_chunk_size=READ_SIZE)
110 |         chunker.read(self.mmap_queue, coords_callback=self.coords_callback)
111 | 
112 |         self.mmap_queue.join()
113 |         for proc in self.pool:
114 |             self.mmap_queue.put((None, None))
115 |         for proc in self.pool:
116 |             proc.join()
117 | 
118 | 
119 | class MMapPool(object):
120 |     """
121 |     Manages multiple mmap files.
122 |     The mmap files can be read and written in different processes.
123 |     """
124 |     def __init__(self, n, mmap_size):
125 |         self.n = n
126 |         self.mmap_size = mmap_size
127 |         self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)]
128 |         self.free_mmaps = set(range(n))
129 |         self.free_queue = multiprocessing.JoinableQueue()
130 | 
131 |     def new(self):
132 |         """
133 |         Return a free mmap file.
134 | 
135 |         :returns: index, mmap file
136 |         """
137 |         if not self.free_mmaps:
138 |             self.free_mmaps.add(self.free_queue.get())
139 |             self.free_queue.task_done()
140 |         while True:
141 |             # fetch unless free_queue is empty
142 |             try:
143 |                 self.free_mmaps.add(self.free_queue.get_nowait())
144 |                 self.free_queue.task_done()
145 |             except Empty:
146 |                 break
147 |         mmap_idx = self.free_mmaps.pop()
148 |         return mmap_idx, self.pool[mmap_idx]
149 | 
150 |     def join(self):
151 |         while len(self.free_mmaps) < self.n:
152 |             self.free_mmaps.add(self.free_queue.get())
153 |             self.free_queue.task_done()
154 | 
155 |     def get(self, idx):
156 |         """
157 |         Return mmap file with `idx`.
158 |         """
159 |         return self.pool[idx]
160 | 
161 |     def free(self, idx):
162 |         """
163 |         Mark mmap file with `idx` as free.
164 |         """
165 |         self.free_queue.put(idx)
166 | 
167 | class XMLChunker(object):
168 |     """
169 |     Reads and chunks OSM XML file.
170 | 
171 |     Reads OSM XML from `stream` and writes chunks of it into mmap files from
172 |     the `mmap_pool`.
173 | 
174 |     :params xml_chunk_size: chunk XML after this many bytes
175 |     """
176 |     def __init__(self, stream, mmap_pool, xml_chunk_size):
177 |         self.stream = stream
178 |         self.size = xml_chunk_size
179 |         self._last_line = None
180 |         self.mmap_pool = mmap_pool
181 |         self.current_mmap_idx = 0
182 |         self._skip_header()
183 | 
184 |     def _skip_header(self):
185 |         for line in self.stream:
186 |             if line.lstrip().startswith('<node '):
187 |                 self._last_line = line
188 |                 return
189 | 
190 |     def _new_xml_outstream(self):
191 |         self.current_mmap_idx, stream = self.mmap_pool.new()
192 |         stream.seek(0)
193 |         stream.write("<osm xmlns:xapi='http://www.informationfreeway.org/xapi/0.6'>")
194 |         return stream
195 | 
196 |     def _finished_xml_outstream(self, last_line, stream):
197 |         if '</osm' not in last_line:
198 |             stream.write('</osm>\n')
199 |         return self.current_mmap_idx, stream.tell()
200 | 
201 |     def read(self, mmaps_queue, coords_callback=None):
202 |         """
203 |         Read and chunk all
204 |         """
205 |         coord_node_match = None
206 |         xml_nodes = self._new_xml_outstream()
207 |         coords = []
208 |         coord_node_re_match = re.compile(r'^\s*<node id="(\d+)" .*lat="([-0-9.]+)" '
209 |                                           'lon="([-0-9.]+)".*/>').match
210 |         node_re_match = re.compile(r'^\s*<node .*/>').match
211 |         xml_nodes.write(self._last_line)
212 |         split = False
213 |         line = ''
214 |         for line in self.stream:
215 |             if coords_callback:
216 |                 coord_node_match = coord_node_re_match(line)
217 |                 if coord_node_match:
218 |                     osm_id, lat, lon = coord_node_match.groups()
219 |                     coords.append((int(osm_id), float(lon), float(lat)))
220 |                     if len(coords) >= 512:
221 |                         coords_callback(coords)
222 |                         coords = []
223 |                 else:
224 |                     xml_nodes.write(line)
225 |             else:
226 |                 xml_nodes.write(line)
227 |             if split:
228 |                 if (line.rstrip().endswith(('</way>', '</node>', '</relation>'))
229 |                     or (coords_callback and coord_node_match)
230 |                     or (not coords_callback and node_re_match(line))):
231 |                     mmaps_queue.put(self._finished_xml_outstream(line, xml_nodes))
232 |                     xml_nodes = self._new_xml_outstream()
233 |                     split = False
234 |             elif xml_nodes.tell() > self.size:
235 |                 split = True
236 |         if coords_callback:
237 |             coords_callback(coords)
238 | 
239 |         # we are at the end of the stream and assume we wrote the end tag
240 |         # to xml_nodes. we set line to closing tag here to avoid additional
241 |         # end tag in case the last line(s) is blank
242 |         line = '</osm'
243 |         mmaps_queue.put(self._finished_xml_outstream(line, xml_nodes))
244 | 
245 | if __name__ == '__main__':
246 |     import sys
247 | 
248 | 
249 |     def count_proc(type, queue):
250 |         def count():
251 |             count = 0
252 |             while True:
253 |                 nodes = queue.get()
254 |                 if nodes is None:
255 |                     queue.task_done()
256 |                     break
257 |                 count += len(nodes)
258 |                 queue.task_done()
259 |             print type, count
260 |         return count
261 | 
262 | 
263 |     nodes_queue = multiprocessing.JoinableQueue(128)
264 |     ways_queue = multiprocessing.JoinableQueue(128)
265 |     relations_queue = multiprocessing.JoinableQueue(128)
266 | 
267 | 
268 |     procs = [
269 |         multiprocessing.Process(target=count_proc('nodes', nodes_queue)),
270 |         multiprocessing.Process(target=count_proc('ways', ways_queue)),
271 |         multiprocessing.Process(target=count_proc('relations', relations_queue))
272 |     ]
273 |     for proc in procs:
274 |         proc.start()
275 | 
276 |     parser = XMLMultiProcParser(open(sys.argv[1]), 2, nodes_queue=nodes_queue,
277 |         ways_queue=ways_queue, relations_queue=relations_queue)
278 |     parser.start()
279 | 
280 |     nodes_queue.put(None)
281 |     nodes_queue.join()
282 | 
283 |     ways_queue.put(None)
284 |     ways_queue.join()
285 |     relations_queue.put(None)
286 |     relations_queue.join()
287 | 
288 |     for proc in procs:
289 |         proc.join()


--------------------------------------------------------------------------------
/imposm/parser/xml/parser.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Omniscale GmbH & Co. KG
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import with_statement
 16 | 
 17 | from marshal import dumps
 18 | 
 19 | from imposm.parser.xml.util import log_file_on_exception, iterparse
 20 | 
 21 | class XMLParser(object):
 22 |     def __init__(self, nodes_callback=None, ways_callback=None,
 23 |         relations_callback=None, coords_callback=None, nodes_tag_filter=None,
 24 |         ways_tag_filter=None, relations_tag_filter=None, marshal_elem_data=False):
 25 |         self.nodes_callback = nodes_callback
 26 |         self.ways_callback = ways_callback
 27 |         self.relations_callback = relations_callback
 28 |         self.coords_callback = coords_callback
 29 |         self.nodes_tag_filter = nodes_tag_filter
 30 |         self.ways_tag_filter = ways_tag_filter
 31 |         self.relations_tag_filter = relations_tag_filter
 32 |         self.marshal_elem_data = marshal_elem_data
 33 |     
 34 |     def parse(self, xml):
 35 |         with log_file_on_exception(xml):
 36 |             coords = []
 37 |             nodes = []
 38 |             ways = []
 39 |             relations = []
 40 |             tags = {}
 41 |             refs = []
 42 |             members = []
 43 |             root, context = iterparse(xml)
 44 |             
 45 |             for event, elem in context:
 46 |                 if event == 'start': continue
 47 |                 if elem.tag == 'tag':
 48 |                     tags[elem.attrib['k']] = elem.attrib['v']
 49 |                 elif elem.tag == 'node':
 50 |                     osmid = int(elem.attrib['id'])
 51 |                     x, y = float(elem.attrib['lon']), float(elem.attrib['lat'])
 52 |                     if self.coords_callback:
 53 |                         coords.append((osmid, x, y))
 54 |                     if self.nodes_tag_filter:
 55 |                         self.nodes_tag_filter(tags)
 56 |                     if tags and self.nodes_callback:
 57 |                         if self.marshal_elem_data:
 58 |                             nodes.append((osmid, dumps((tags, (x, y)), 2)))
 59 |                         else:
 60 |                             nodes.append((osmid, tags, (x, y)))
 61 |                     tags = {}
 62 |                 elif elem.tag == 'nd':
 63 |                     refs.append(int(elem.attrib['ref']))
 64 |                 elif elem.tag == 'member':
 65 |                     members.append((int(elem.attrib['ref']), elem.attrib['type'], elem.attrib['role']))
 66 |                 elif elem.tag == 'way':
 67 |                     osm_id = int(elem.attrib['id'])
 68 |                     if self.ways_tag_filter:
 69 |                         self.ways_tag_filter(tags)
 70 |                     if self.ways_callback:
 71 |                         if self.marshal_elem_data:
 72 |                             ways.append((osm_id, dumps((tags, refs), 2)))
 73 |                         else:
 74 |                             ways.append((osm_id, tags, refs))
 75 |                     refs = []
 76 |                     tags = {}
 77 |                 elif elem.tag == 'relation':
 78 |                     osm_id = int(elem.attrib['id'])
 79 |                     if self.relations_tag_filter:
 80 |                         self.relations_tag_filter(tags)
 81 |                     if tags and self.relations_callback:
 82 |                         if self.marshal_elem_data:
 83 |                             relations.append((osm_id, dumps((tags, members), 2)))
 84 |                         else:
 85 |                             relations.append((osm_id, tags, members))
 86 |                     members = []
 87 |                     tags = {}
 88 |             
 89 |                 if len(coords) >= 512:
 90 |                     self.coords_callback(coords)
 91 |                     coords = []
 92 |                 if len(nodes) >= 128:
 93 |                     self.nodes_callback(nodes)
 94 |                     nodes = []
 95 |                 if len(relations) >= 128:
 96 |                     self.relations_callback(relations)
 97 |                     relations = []
 98 |                 if len(ways) >= 128:
 99 |                     self.ways_callback(ways)
100 |                     ways = []
101 | 
102 |                 root.clear()
103 |         
104 |             if self.coords_callback:
105 |                 self.coords_callback(coords)
106 |             if self.nodes_callback:
107 |                 self.nodes_callback(nodes)
108 |             if self.ways_callback:
109 |                 self.ways_callback(ways)
110 |             if self.relations_callback:
111 |                 self.relations_callback(relations)


--------------------------------------------------------------------------------
/imposm/parser/xml/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Omniscale GmbH & Co. KG
 2 | # 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import with_statement
16 | from xml.etree import cElementTree as ET
17 | from contextlib import contextmanager
18 | 
19 | def iterparse(fileobj):
20 |     """
21 |     Return root object and iterparser for given ``fileobj``. 
22 |     """
23 |     context = ET.iterparse(fileobj, events=("start", "end"))
24 |     context = iter(context)
25 |     _event, root = context.next()
26 |     return root, context
27 | 
28 | @contextmanager
29 | def log_file_on_exception(xml):
30 |     try:
31 |         yield
32 |     except SyntaxError, ex:
33 |         import tempfile
34 |         fd_, filename = tempfile.mkstemp('.osm')
35 |         xml.seek(0)
36 |         with open(filename, 'w') as f:
37 |             f.write(xml.read())
38 |         print 'SyntaxError in xml: %s, (stored dump %s)' % (ex, filename)


--------------------------------------------------------------------------------
/osm.proto:
--------------------------------------------------------------------------------
  1 | /** Copyright (c) 2010 Scott A. Crosby. <scott@sacrosby.com>
  2 | 
  3 |    This program is free software: you can redistribute it and/or modify
  4 |    it under the terms of the GNU Lesser General Public License as 
  5 |    published by the Free Software Foundation, either version 3 of the 
  6 |    License, or (at your option) any later version.
  7 | 
  8 |    This program is distributed in the hope that it will be useful,
  9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |    GNU General Public License for more details.
 12 | 
 13 |    You should have received a copy of the GNU General Public License
 14 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | 
 16 | */
 17 | 
 18 | option java_package = "crosby.binary";
 19 | package OSMPBF;
 20 | 
 21 | //protoc --java_out=../.. fileformat.proto
 22 | 
 23 | 
 24 | //
 25 | //  STORAGE LAYER: Storing primitives.
 26 | //
 27 | 
 28 | message Blob {
 29 |   optional bytes raw = 1; // No compression
 30 |   optional int32 raw_size = 2; // When compressed, the uncompressed size
 31 | 
 32 |   // Possible compressed versions of the data.
 33 |   optional bytes zlib_data = 3;
 34 | 
 35 |   // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED.
 36 |   optional bytes lzma_data = 4;
 37 | 
 38 |   // Formerly used for bzip2 compressed data. Depreciated in 2010.
 39 |   optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number.
 40 | }
 41 | 
 42 | /* A file contains an sequence of fileblock headers, each prefixed by
 43 | their length in network byte order, followed by a data block
 44 | containing the actual data. types staring with a "_" are reserved.
 45 | */
 46 | 
 47 | message BlobHeader {
 48 |   required string type = 1;
 49 |   optional bytes indexdata = 2;
 50 |   required int32 datasize = 3;
 51 | }
 52 | 
 53 | 
 54 | /** Copyright (c) 2010 Scott A. Crosby. <scott@sacrosby.com>
 55 | 
 56 |    This program is free software: you can redistribute it and/or modify
 57 |    it under the terms of the GNU Lesser General Public License as 
 58 |    published by the Free Software Foundation, either version 3 of the 
 59 |    License, or (at your option) any later version.
 60 | 
 61 |    This program is distributed in the hope that it will be useful,
 62 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 63 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 64 |    GNU General Public License for more details.
 65 | 
 66 |    You should have received a copy of the GNU General Public License
 67 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 68 | 
 69 | */
 70 | 
 71 | /* OSM Binary file format 
 72 | 
 73 | This is the master schema file of the OSM binary file format. This
 74 | file is designed to support limited random-access and future
 75 | extendability.
 76 | 
 77 | A binary OSM file consists of a sequence of FileBlocks (please see
 78 | fileformat.proto). The first fileblock contains a serialized instance
 79 | of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that
 80 | contain the primitives.
 81 | 
 82 | Each primitiveblock is designed to be independently parsable. It
 83 | contains a string table storing all strings in that block (keys and
 84 | values in tags, roles in relations, usernames, etc.) as well as
 85 | metadata containing the precision of coordinates or timestamps in that
 86 | block.
 87 | 
 88 | A primitiveblock contains a sequence of primitive groups, each
 89 | containing primitives of the same type (nodes, densenodes, ways,
 90 | relations). Coordinates are stored in signed 64-bit integers. Lat&lon
 91 | are measured in units <granularity> nanodegrees. The default of
 92 | granularity of 100 nanodegrees corresponds to about 1cm on the ground,
 93 | and a full lat or lon fits into 32 bits.
 94 | 
 95 | Converting an integer to a lattitude or longitude uses the formula:
 96 | $OUT = IN * granularity / 10**9$. Many encoding schemes use delta
 97 | coding when representing nodes and relations.
 98 | 
 99 | */
100 | 
101 | //////////////////////////////////////////////////////////////////////////
102 | //////////////////////////////////////////////////////////////////////////
103 | 
104 | /* Contains the file header. */
105 | 
106 | message HeaderBlock {
107 |   optional HeaderBBox bbox = 1;
108 |   /* Additional tags to aid in parsing this dataset */
109 |   repeated string required_features = 4;
110 |   repeated string optional_features = 5;
111 | 
112 |   optional string writingprogram = 16; 
113 |   optional string source = 17; // From the bbox field.
114 | }
115 | 
116 | 
117 | /** The bounding box field in the OSM header. BBOX, as used in the OSM
118 | header. Units are always in nanodegrees -- they do not obey
119 | granularity rules. */
120 | 
121 | message HeaderBBox {
122 |    required sint64 left = 1;
123 |    required sint64 right = 2;
124 |    required sint64 top = 3;
125 |    required sint64 bottom = 4;
126 | }
127 | 
128 | 
129 | ///////////////////////////////////////////////////////////////////////
130 | ///////////////////////////////////////////////////////////////////////
131 | 
132 | 
133 | message PrimitiveBlock {
134 |   required StringTable stringtable = 1;
135 |   repeated PrimitiveGroup primitivegroup = 2;
136 | 
137 |   // Granularity, units of nanodegrees, used to store coordinates in this block
138 |   optional int32 granularity = 17 [default=100]; 
139 |   // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees.
140 |   optional int64 lat_offset = 19 [default=0];
141 |   optional int64 lon_offset = 20 [default=0]; 
142 | 
143 | // Granularity of dates, normally represented in units of milliseconds since the 1970 epoch.
144 |   optional int32 date_granularity = 18 [default=1000]; 
145 | 
146 | 
147 |   // Proposed extension:
148 |   //optional BBox bbox = XX;
149 | }
150 | 
151 | // Group of OSMPrimitives. All primitives in a group must be the same type.
152 | message PrimitiveGroup {
153 |   repeated Node     nodes = 1;
154 |   optional DenseNodes dense = 2;
155 |   repeated Way      ways = 3;
156 |   repeated Relation relations = 4;
157 |   repeated ChangeSet changesets = 5;
158 | }
159 | 
160 | 
161 | /** String table, contains the common strings in each block.
162 | 
163 |  Note that we reserve index '0' as a delimiter, so the entry at that
164 |  index in the table is ALWAYS blank and unused.
165 | 
166 |  */
167 | message StringTable {
168 |    repeated bytes s = 1;
169 | }
170 | 
171 | /* Optional metadata that may be included into each primitive. */
172 | message Info {
173 |    optional int32 version = 1 [default = -1];
174 |    optional int64 timestamp = 2;
175 |    optional int64 changeset = 3;
176 |    optional int32 uid = 4;
177 |    optional uint32 user_sid = 5; // String IDs
178 | }
179 | 
180 | /** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */
181 | message DenseInfo {
182 |    repeated int32 version = 1 [packed = true]; 
183 |    repeated sint64 timestamp = 2 [packed = true]; // DELTA coded
184 |    repeated sint64 changeset = 3 [packed = true]; // DELTA coded
185 |    repeated sint32 uid = 4 [packed = true]; // DELTA coded
186 |    repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded
187 | }
188 | 
189 | 
190 | // THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW.
191 | // TODO:    REMOVE THIS?
192 | message ChangeSet {
193 |    required int64 id = 1;
194 | //   
195 | //   // Parallel arrays.
196 | //   repeated uint32 keys = 2 [packed = true]; // String IDs.
197 | //   repeated uint32 vals = 3 [packed = true]; // String IDs.
198 | //
199 | //   optional Info info = 4;
200 | 
201 | //   optional int64 created_at = 8;
202 | //   optional int64 closetime_delta = 9;
203 | //   optional bool open = 10;
204 | //   optional HeaderBBox bbox = 11;
205 | }
206 | 
207 | 
208 | message Node {
209 |    required sint64 id = 1;
210 |    // Parallel arrays.
211 |    repeated uint32 keys = 2 [packed = true]; // String IDs.
212 |    repeated uint32 vals = 3 [packed = true]; // String IDs.
213 | 
214 |    optional Info info = 4; // May be omitted in omitmeta
215 | 
216 |    required sint64 lat = 8;
217 |    required sint64 lon = 9;
218 | }
219 | 
220 | /* Used to densly represent a sequence of nodes that do not have any tags.
221 | 
222 | We represent these nodes columnwise as five columns: ID's, lats, and
223 | lons, all delta coded. When metadata is not omitted, 
224 | 
225 | We encode keys & vals for all nodes as a single array of integers
226 | containing key-stringid and val-stringid, using a stringid of 0 as a
227 | delimiter between nodes.
228 | 
229 |    ( (<keyid> <valid>)* '0' )*
230 |  */
231 | 
232 | message DenseNodes {
233 |    repeated sint64 id = 1 [packed = true]; // DELTA coded
234 | 
235 |    //repeated Info info = 4;
236 |    optional DenseInfo denseinfo = 5;
237 | 
238 |    repeated sint64 lat = 8 [packed = true]; // DELTA coded
239 |    repeated sint64 lon = 9 [packed = true]; // DELTA coded
240 | 
241 |    // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless.
242 |    repeated int32 keys_vals = 10 [packed = true]; 
243 | }
244 | 
245 | 
246 | message Way {
247 |    required int64 id = 1;
248 |    // Parallel arrays.
249 |    repeated uint32 keys = 2 [packed = true];
250 |    repeated uint32 vals = 3 [packed = true];
251 | 
252 |    optional Info info = 4;
253 | 
254 |    repeated sint64 refs = 8 [packed = true];  // DELTA coded
255 | }
256 | 
257 | message Relation {
258 |   enum MemberType {
259 |     NODE = 0;
260 |     WAY = 1;
261 |     RELATION = 2;
262 |   } 
263 |    required int64 id = 1;
264 | 
265 |    // Parallel arrays.
266 |    repeated uint32 keys = 2 [packed = true];
267 |    repeated uint32 vals = 3 [packed = true];
268 | 
269 |    optional Info info = 4;
270 | 
271 |    // Parallel arrays
272 |    repeated int32 roles_sid = 8 [packed = true];
273 |    repeated sint64 memids = 9 [packed = true]; // DELTA encoded
274 |    repeated MemberType types = 10 [packed = true];
275 | }
276 | 
277 | 


--------------------------------------------------------------------------------
/release.py:
--------------------------------------------------------------------------------
 1 | import scriptine
 2 | from scriptine import path
 3 | from scriptine.shell import backtick_, sh
 4 | 
 5 | PACKAGE_NAME = 'imposm.parser'
 6 | REMOTE_DOC_LOCATION = 'omniscale.de:domains/imposm.org/docs/imposm.parser'
 7 | 
 8 | VERSION_FILES = [
 9 |     ('setup.py', 'version="###"'),
10 |     ('doc/source/conf.py', "version = '##'"),
11 |     ('doc/source/conf.py', "release = '###'"),
12 | ]
13 | 
14 | def version_command():
15 |     print version()
16 | 
17 | def prepare_command(tag=""):
18 |     sh('python setup.py egg_info -D -b "%s"' % tag)
19 | 
20 | def version():
21 |     package_name = PACKAGE_NAME
22 |     version = backtick_('grep Version: %(package_name)s.egg-info/PKG-INFO' % locals())
23 |     version = version.split(':')[-1].strip()
24 |     return version
25 | 
26 | def clean_all_command():
27 |     path('build/').rmtree(ignore_errors=True)
28 |     for pyc in path.cwd().walkfiles('*.pyc'):
29 |         pyc.remove()
30 | 
31 | def bump_version_command(version):
32 |     short_version = '.'.join(version.split('.')[:2])
33 |     for filename, replace in VERSION_FILES:
34 |         if '###' in replace:
35 |             search_for = replace.replace('###', '[^\'"]+')
36 |             replace_with = replace.replace('###', version)
37 |         else:
38 |             search_for = replace.replace('##', '[^\'"]+')
39 |             replace_with = replace.replace('##', short_version)
40 | 
41 |         search_for = search_for.replace('"', '\\"')
42 |         replace_with = replace_with.replace('"', '\\"')
43 |         sh('''perl -p -i -e "s/%(search_for)s/%(replace_with)s/" %(filename)s ''' % locals())
44 | 
45 |     prepare_command()
46 | 
47 | def build_docs_command():
48 |     sh('python setup.py build_sphinx')
49 |     ver = version()
50 |     package_name = PACKAGE_NAME
51 |     sh("tar -c -v -z -C build/sphinx/ -f dist/%(package_name)s-docs-%(ver)s.tar.gz -s "
52 |        "'/^html/%(package_name)s-docs-%(ver)s/' html"
53 |         % locals())
54 | 
55 | def upload_docs_command():
56 |     ver = version()
57 |     remote_doc_location = REMOTE_DOC_LOCATION
58 |     sh('rsync -a -v -P -z build/sphinx/html/ %(remote_doc_location)s/%(ver)s' % locals())
59 | 
60 | def build_sdist_command():
61 |     sh('python setup.py egg_info -b "" -D sdist')
62 | 
63 | def upload_sdist_command():
64 |     sh('python setup.py egg_info -b "" -D sdist')
65 |     ver = version()
66 |     remote_rel_location = REMOTE_REL_LOCATION
67 |     sh('scp dist/imposm.parser-%(ver)s.* %(remote_rel_location)s' % locals())
68 | 
69 | def upload_final_sdist_command():
70 |     sh('python setup.py egg_info -b "" -D sdist upload')
71 | 
72 | def link_latest_command(ver=None):
73 |     if ver is None:
74 |         ver = version()
75 |     host, path = REMOTE_DOC_LOCATION.split(':')
76 |     sh('ssh %(host)s "cd %(path)s && rm latest && ln -s %(ver)s latest"' % locals())
77 | 
78 | if __name__ == '__main__':
79 |     scriptine.run()
80 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import platform
 3 | from setuptools import setup, Extension, find_packages
 4 | from setuptools.command.build_ext import build_ext
 5 | from distutils.errors import DistutilsPlatformError
 6 | 
 7 | import subprocess
 8 | 
 9 | class build_ext_with_protpbuf(build_ext):
10 |     def run(self):
11 |         try:
12 |             proc = subprocess.Popen(
13 |                 ['protoc', '--cpp_out', 'imposm/parser/pbf/', 'osm.proto'],
14 |                 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
15 |         except OSError, ex:
16 |             if ex.errno == errno.ENOENT:
17 |                 print ("Could not find protoc command. Make sure protobuf is "
18 |                     "installed and your PATH environment is set.")
19 |                 raise DistutilsPlatformError("Failed to generate protbuf "
20 |                     "CPP files with protoc.")
21 |             else:
22 |                 raise
23 |         out = proc.communicate()[0]
24 |         result = proc.wait()
25 |         if result != 0:
26 |             print "Error during protbuf files generation with protoc:"
27 |             print out
28 |             raise DistutilsPlatformError("Failed to generate protbuf "
29 |                 "CPP files with protoc.")
30 |         build_ext.run(self)
31 | 
32 | 
33 | install_requires = []
34 | if tuple(map(str, platform.python_version_tuple())) < ('2', '6'):
35 |     install_requires.append('multiprocessing>=2.6')
36 | 
37 | setup(
38 |     name='imposm.parser',
39 |     version="1.0.8a",
40 |     description='Fast and easy OpenStreetMap XML/PBF parser.',
41 |     long_description=open('README.rst').read() + open('CHANGES').read(),
42 |     author='Oliver Tonnhofer',
43 |     author_email='olt@omniscale.de',
44 |     url='http://imposm.org/docs/imposm.parser/latest/',
45 |     license='Apache Software License 2.0',
46 |     packages=find_packages(),
47 |     namespace_packages = ['imposm'],
48 |     include_package_data=True,
49 |     package_data = {'': ['*.xml', '*.osm', '*.osm.bz2']},
50 |     install_requires=install_requires,
51 |     classifiers=[
52 |         "Development Status :: 4 - Beta",
53 |         "License :: OSI Approved :: Apache Software License",
54 |         "Operating System :: OS Independent",
55 |         "Programming Language :: C",
56 |         "Programming Language :: C++",
57 |         "Programming Language :: Python :: 2.5",
58 |         "Programming Language :: Python :: 2.6",
59 |         "Programming Language :: Python :: 2.7",
60 |         "Topic :: Software Development :: Libraries",
61 |         "Topic :: Scientific/Engineering :: GIS",
62 |     ],
63 |     ext_modules=[
64 |         Extension("imposm.parser.pbf.OSMPBF",
65 |             ["imposm/parser/pbf/osm.cc", "imposm/parser/pbf/osm.pb.cc"], libraries=['protobuf']),
66 |     ],
67 |     cmdclass={'build_ext':build_ext_with_protpbuf},
68 | )
69 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py25,py26,py27
3 | 
4 | [testenv]
5 | changedir = {toxworkdir}
6 | commands = nosetests imposm --with-xunit --xunit-file={toxinidir}/nosetests-{envname}.xml
7 | deps = nose>=0.10.4
8 | 


--------------------------------------------------------------------------------