├── .github
    └── workflows
    │   └── ci.yaml
├── .gitignore
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── flatdata
    └── osm.flatdata
├── osmflat
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── README.md
    ├── examples
    │   ├── README.md
    │   ├── berlin-features.png
    │   ├── berlin-features.svg
    │   ├── berlin-roads.png
    │   ├── cities.rs
    │   ├── count.rs
    │   ├── debug.rs
    │   ├── pub-names.rs
    │   ├── read.rs
    │   ├── render-features.rs
    │   ├── render-roads.rs
    │   └── road-length.rs
    └── src
    │   ├── lib.rs
    │   ├── osmflat_generated.rs
    │   └── tags.rs
├── osmflatc
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── README.md
    ├── build.rs
    └── src
    │   ├── args.rs
    │   ├── ids.rs
    │   ├── main.rs
    │   ├── osmpbf.rs
    │   ├── parallel.rs
    │   ├── proto
    │       ├── fileformat.proto
    │       └── osmformat.proto
    │   ├── stats.rs
    │   └── strings.rs
└── rustfmt.toml


/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |   schedule:
 8 |     - cron: '00 01 * * *'
 9 | 
10 | env:
11 |   CARGO_INCREMENTAL: 0
12 |   RUST_BACKTRACE: short
13 |   RUSTFLAGS: "-D warnings -W rust-2021-compatibility"
14 |   RUSTUP_MAX_RETRIES: 10
15 | 
16 | jobs:
17 |   rust:
18 |     name: Rust
19 |     runs-on: ${{ matrix.os }}
20 |     strategy:
21 |       matrix:
22 |         build: [stable, macos, win-msvc, win-gnu]
23 |         include:
24 |         - build: stable
25 |           os: ubuntu-latest
26 |           rust: stable
27 |         - build: macos
28 |           os: macOS-latest
29 |           rust: stable
30 |         - build: win-msvc
31 |           os: windows-2019
32 |           rust: stable
33 |         - build: win-gnu
34 |           os: windows-2019
35 |           rust: stable-x86_64-gnu
36 |     env:
37 |       RUSTFLAGS: -D warnings
38 |       CARGO_INCREMENTAL: 0
39 |     steps:
40 |       - name: Checkout repository
41 |         uses: actions/checkout@v1
42 |         with:
43 |           fetch-depth: 1
44 |       - name: Install Protoc
45 |         uses: arduino/setup-protoc@v1
46 |         with:
47 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
48 |       - name: Install Rust
49 |         uses: hecrj/setup-rust-action@v1
50 |         with:
51 |           rust-version: ${{ matrix.rust }}
52 |       - run: cargo build --all-targets
53 |       - run: cargo test
54 |       - run: cargo doc
55 | 
56 |   rustfmt:
57 |     name: rustfmt
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |     - name: Checkout repository
61 |       uses: actions/checkout@v1
62 |       with:
63 |         fetch-depth: 1
64 |     - name: Install rustfmt
65 |       run: rustup component add rustfmt
66 |     - name: Check formatting
67 |       run: cargo fmt -- --check
68 | 
69 |   clippy:
70 |     name: clippy
71 |     runs-on: ubuntu-latest
72 |     steps:
73 |     - name: Checkout repository
74 |       uses: actions/checkout@v1
75 |       with:
76 |         fetch-depth: 1
77 |     - name: Install Protoc
78 |       uses: arduino/setup-protoc@v1
79 |     - name: Install clippy
80 |       run: rustup component add clippy
81 |     - name: Clippy
82 |       run: cargo clippy --all-targets
83 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Cargo.lock
2 | /target
3 | **/*.rs.bk
4 | /data
5 | **/.DS_Store
6 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |     "osmflat",
 4 |     "osmflatc",
 5 | ]
 6 | resolver = "2"
 7 | 
 8 | [patch.crates-io]
 9 | osmflat = { path = "osmflat" }
10 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 The Rust Project Developers
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # osmflat ![][ci]
 2 | 
 3 | ![berlin-features]
 4 | 
 5 | Flat OpenStreetMap (OSM) data format providing an efficient *random* data
 6 | access through [memory mapped files].
 7 | 
 8 | The data format is described and implemented in [flatdata]. The [schema]
 9 | describes the fundamental OSM data structures: nodes, ways, relations and
10 | tags as simple non-nested data structures. The relations between these are
11 | expressed through indexes.
12 | 
13 | ## Compiler
14 | 
15 | Besides the library for working with osmflat archives, the crate `osmflatc`
16 | contains an OSM [pbf format][PBF format] to osmflat data compiler.
17 | 
18 | To compile OSM data from pbf to osmflat use:
19 | 
20 | ```shell
21 | cargo run --release -- input.osm.pbf output.osm.flatdata
22 | ```
23 | 
24 | The output is a flatdata which is a directory consisting of several
25 | files. The schema is also part of the archive. It is checked every time the
26 | archive is opened. This guarantees that the compiler which was used to produce
27 | the archive fits to the schema used for reading it. The archive data is not
28 | compressed.
29 | 
30 | ## Using data
31 | 
32 | You can use any [flatdata] supported language for reading an osmflat archive.
33 | For reading the data in Rust, we provide the `osmflat` crate.
34 | 
35 | First, add this to your Cargo.toml:
36 | 
37 | ```toml
38 | [dependencies]
39 | osmflat = "0.3.0"
40 | ```
41 | 
42 | Now, you can open an osmflat archive as any other flatdata archive and read its
43 | data:
44 | 
45 | ```rust
46 | use osmflat::{FileResourceStorage, Osm};
47 | 
48 | fn main() {
49 |     let storage = FileResourceStorage::new("path/to/archive.osm.flatdata");
50 |     let archive = Osm::open(storage).unwrap();
51 | 
52 |     for node in archive.nodes().iter() {
53 |         println!("{:?}", node);
54 |     }
55 | }
56 | ```
57 | 
58 | ## Examples
59 | 
60 | Check the [osmflat/examples] directory. Feel free to add another example, if
61 | you have an idea what to do with the amazing OSM data in few lines of code. 😁
62 | 
63 | The above map was rendered by `osmflat/examples/roads2png.rs` in ~ 170 loc from
64 | the osmflat archive based on the [latest][latest-berlin-map] Berlin OSM data.
65 | 
66 | ## License
67 | 
68 |  * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
69 |    http://www.apache.org/licenses/LICENSE-2.0)
70 |  * MIT License ([LICENSE-MIT](LICENSE-MIT) or
71 |    http://opensource.org/licenses/MIT)
72 | 
73 | The files [src/proto/fileformat.proto](src/proto/fileformat.proto) and
74 | [src/proto/osmformat.proto](src/proto/osmformat.proto) are copies from the
75 | [OSM-binary] project and are under the LGPLv3 license.
76 | 
77 | ### Contribution
78 | 
79 | Unless you explicitly state otherwise, any contribution intentionally submitted
80 | for inclusion in this document by you, as defined in the Apache-2.0 license,
81 | shall be dual licensed as above, without any additional terms or conditions.
82 | 
83 | [flatdata]: https://github.com/heremaps/flatdata
84 | [schema]: flatdata/osm.flatdata
85 | [memory mapped files]: https://en.wikipedia.org/wiki/Memory-mapped_file
86 | [PBF format]: https://wiki.openstreetmap.org/wiki/PBF_Format
87 | [osmflat/examples]: osmflat/examples
88 | [latest-berlin-map]: http://download.geofabrik.de/europe/germany/berlin.html
89 | [OSM-binary]: https://github.com/scrosby/OSM-binary
90 | [ci]: https://github.com/boxdot/osmflat-rs/workflows/ci/badge.svg
91 | [berlin-features]: https://github.com/boxdot/osmflat-rs/blob/master/osmflat/examples/berlin-features.png
92 | 


--------------------------------------------------------------------------------
/flatdata/osm.flatdata:
--------------------------------------------------------------------------------
  1 | /// OSM data types and archive
  2 | 
  3 | namespace osm {
  4 | 
  5 | // Max 40 bits value used to indicate null references.
  6 | /**
  7 |  * Special value which represents an invalid index.
  8 |  */
  9 | const u64 INVALID_IDX = 0xFFFFFFFFFF;
 10 | 
 11 | /**
 12 |  * Metadata attached to the archive.
 13 |  */
 14 | struct Header {
 15 |     /**
 16 |      * All coordinates in this archive are scaled by this constant
 17 |      * To get the original degree-based coordinate back compute (latitude/coord_scale,longitude/coord_scale)
 18 |      */
 19 |     coord_scale: i32;
 20 | 
 21 |     /// Bounding box (min longitude scaled with `header.coord_scale`)
 22 |     bbox_left: i32 : 32;
 23 |     /// Bounding box (max longitude scaled with `header.coord_scale`)
 24 |     bbox_right: i32 : 32;
 25 |     /// Bounding box (max latitude scaled with `header.coord_scale`)
 26 |     bbox_top: i32 : 32;
 27 |     /// Bounding box (min latitude scaled with `header.coord_scale`)
 28 |     bbox_bottom: i32 : 32;
 29 | 
 30 |     /// Writing program used to write the data (reference to `stringtable`).
 31 |     writingprogram_idx: u64 : 40;
 32 |     /// The origin (source) of the data.
 33 |     source_idx: u64 : 40;
 34 | 
 35 |     /**
 36 |      * Replication timestamp, expressed in seconds since the epoch.
 37 |      * See [`state.txt`].
 38 |      *
 39 |      * [`state.txt`]: https://wiki.openstreetmap.org/wiki/Planet.osm/diffs#Minute.2C_Hour.2C_and_Day_Files_Organisation
 40 |      */
 41 |     replication_timestamp: i64 : 64;
 42 |     /**
 43 |      * Replication sequence number (`sequenceNumber` from [`state.txt`]).
 44 |      *
 45 |      * [`state.txt`]: https://wiki.openstreetmap.org/wiki/Planet.osm/diffs#Minute.2C_Hour.2C_and_Day_Files_Organisation
 46 |      */
 47 |     replication_sequence_number: i64 : 64;
 48 |     /**
 49 |      * Replication base URL (reference to `stringtable`).
 50 |      */
 51 |     replication_base_url_idx: u64 : 40;
 52 | }
 53 | 
 54 | /**
 55 |  * A `(key, value)` attached to a `Node`, `Way`, or `Relation.
 56 |  */
 57 | struct Tag {
 58 |     /// Key index in `stringtable`
 59 |     key_idx: u64 : 40;
 60 |     /// Value index in `stringtable`
 61 |     value_idx: u64 : 40;
 62 | }
 63 | 
 64 | /**
 65 |  * A node is one of the core elements in the OpenStreetMap data model.
 66 |  *
 67 |  * It consists of a single point in space defined by its latitude, longitude and node id.
 68 |  *
 69 |  * See <https://wiki.openstreetmap.org/wiki/Node>.
 70 |  */
 71 | struct Node {
 72 |     /// Latitude (scaled with `header.coord_scale`).
 73 |     lat: i32 : 32;
 74 |     /// Longitude (scaled with `header.coord_scale`).
 75 |     lon: i32 : 32;
 76 |     /**
 77 |      * Range of tags attached to this node.
 78 |      *
 79 |      * The values of the range are indexes in the `tags_index` vector.
 80 |      */
 81 |     @range(tags)
 82 |     tag_first_idx: u64 : 40;
 83 | }
 84 | 
 85 | /**
 86 |  * Index of a node.
 87 |  */
 88 | struct NodeIndex {
 89 |     /// Index in the `nodes` vector.
 90 |     @optional(INVALID_IDX)
 91 |     value: u64 : 40;
 92 | }
 93 | 
 94 | /**
 95 |  * A way is an ordered list of nodes.
 96 |  *
 97 |  * See <https://wiki.openstreetmap.org/wiki/Way>.
 98 |  */
 99 | struct Way {
100 |     /**
101 |      * Range of tags attached to this node.
102 |      *
103 |      * The values of the range are indexes in the `tags_index` vector.
104 |      */
105 |     @range(tags)
106 |     tag_first_idx: u64 : 40;
107 |     /**
108 |      * Range of nodes this way consists of.
109 |      *
110 |      * The values of the range are indexes in the `nodes_index` vector.
111 |      */
112 |     @range(refs)
113 |     ref_first_idx: u64 : 40;
114 | }
115 | 
116 | /**
117 |  * Index of a tag.
118 |  */
119 | struct TagIndex {
120 |     /// Index in the `tags` vector.
121 |     value: u64 : 40;
122 | }
123 | 
124 | /// Node member of a relation.
125 | struct NodeMember {
126 |     /// Index of the node in the `nodes` vector.
127 |     @optional(INVALID_IDX)
128 |     node_idx: u64 : 40;
129 |     /**
130 |      * Optional textual field describing the function of the node in the relation.
131 |      *
132 |      * Index in `stringtable`.
133 |      */
134 |     role_idx: u64 : 40;
135 | }
136 | 
137 | /// Way member of a relation.
138 | struct WayMember {
139 |     /// Index of the way in the `ways` vector.
140 |     @optional(INVALID_IDX)
141 |     way_idx: u64 : 40;
142 |     /**
143 |      * Optional textual field describing the function of the way in the relation.
144 |      *
145 |      * Index in `stringtable`.
146 |      */
147 |     role_idx: u64 : 40;
148 | }
149 | 
150 | /// Relation member of a relation.
151 | struct RelationMember {
152 |     /// Index of the relation in the `relations` vector.
153 |     @optional(INVALID_IDX)
154 |     relation_idx: u64 : 40;
155 |     /**
156 |      * Optional textual field describing the function of the relation in the parent relation.
157 |      *
158 |      * Index in `stringtable`.
159 |      */
160 |     role_idx: u64 : 40;
161 | }
162 | 
163 | /**
164 |  * A relation is an ordered list of one or more nodes, ways and/or relations as members.
165 |  *
166 |  * See <https://wiki.openstreetmap.org/wiki/Relation>.
167 |  */
168 | struct Relation {
169 |     /**
170 |      * Range of tags attached to this relation.
171 |      *
172 |      * The values of the range are indexes in the `tags` vector.
173 |      */
174 |     @range(tags)
175 |     tag_first_idx: u64 : 40;
176 | }
177 | 
178 | struct Id {
179 |     value: u64 : 40;
180 | }
181 | 
182 | /**
183 |  * An optional sub-archive storing the original OSM ids of nodes, ways, and relations
184 |  */
185 | archive Ids {
186 |     /**
187 |      * List of OSM ids of all nodes in the parent archive
188 |      * nodes[i] has its id stored in ids.nodes[i]
189 |      */
190 |     nodes: vector< Id >;
191 | 
192 |     /**
193 |      * List of OSM ids of all ways in the parent archive
194 |      * ways[i] has its id stored in ids.ways[i]
195 |      */
196 |     ways: vector< Id >;
197 | 
198 |     /**
199 |      * List of OSM ids of all relations in the parent archive
200 |      * relations[i] has its id stored in ids.relations[i]
201 |      */
202 |     relations: vector< Id >;
203 | }
204 | 
205 | /**
206 |  * OSM data archive
207 |  *
208 |  * Relations and relation members are indexed with the same index, i.e.
209 |  * a relation at index `i` in the vector `relations` has the members
210 |  * at index `i` in the multivector `relation_members`.
211 |  *
212 |  * All 1:n relationships are modeled in-place by using an additional index. This is a
213 |  * common pattern in flatdata. For example, a node might have multiple tags attached
214 |  * to it. To model this, a node in `nodes` references the first tag attached to it
215 |  * by storing an index in the `tags_index` vector. The next node in `nodes` again
216 |  * references its first tag, which is the last tag (exclusive) of the previous node.
217 |  *
218 |  * ```text
219 |  * nodes:      [ ..., n_1, n_2, ... ]
220 |  *                     |    |
221 |  *                     |    +-------+
222 |  *                     v            v
223 |  * tags_index: [ ..., t_11, t_12, ..., t_1n, t_21, ... t_2m, ... ]
224 |  * ```
225 |  */
226 | @bound_implicitly(Relations: relations, relation_members)
227 | archive Osm {
228 |     /**
229 |      * Header which contains the metadata attached to the archive.
230 |      */
231 |     @explicit_reference( Header.writingprogram_idx, stringtable )
232 |     @explicit_reference( Header.source_idx, stringtable )
233 |     @explicit_reference( Header.replication_base_url_idx, stringtable )
234 |     header: Header;
235 | 
236 |     /**
237 |      * List of nodes.
238 |      *
239 |      * A node references a range of tags in the `tags_index` vector.
240 |      */
241 |     @explicit_reference( Node.tag_first_idx, tags_index )
242 |     nodes: vector<Node>;
243 | 
244 |     /**
245 |      * List of ways.
246 |      *
247 |      * A way references
248 |      *
249 |      * * a range of tags in the `tags_index` vector, and
250 |      * * a range of nodes in the `nodes_index` vector.
251 |      */
252 |     @explicit_reference( Way.tag_first_idx, tags_index )
253 |     @explicit_reference( Way.ref_first_idx, nodes_index )
254 |     ways: vector<Way>;
255 | 
256 |     /**
257 |      * List of relations.
258 |      *
259 |      * A relation references a range of tags in `tags_index` vectors.
260 |      * Members are attached to a relation implicitly: members that belong to a
261 |      * relation at index `i` are at index `i` in the `relation_members` multivector.
262 |      */
263 |     @explicit_reference( Relation.tag_first_idx, tags_index )
264 |     relations: vector<Relation>;
265 | 
266 |     /**
267 |      * Members attached to relations.
268 |      *
269 |      * An index in this multivector corresponds to an index in the `relations` vector.
270 |      *
271 |      * A member has a variadic type: `NodeMember`, `WayMember` or `RelationMember`.
272 |      * Each type references its role in the `stringtable` raw data. Additionally,
273 |      *
274 |      * * a node member references a node in the `nodes` vector,
275 |      * * a way member references a way in the `ways` vector,
276 |      * * a relation member references a relation in the `relations` vector.
277 |      */
278 |     @explicit_reference( NodeMember.node_idx, nodes )
279 |     @explicit_reference( NodeMember.role_idx, stringtable )
280 |     @explicit_reference( WayMember.way_idx, ways )
281 |     @explicit_reference( WayMember.role_idx, stringtable )
282 |     @explicit_reference( RelationMember.relation_idx, relations )
283 |     @explicit_reference( RelationMember.role_idx, stringtable )
284 |     relation_members: multivector<40, NodeMember, WayMember, RelationMember>;
285 | 
286 |     /**
287 |      * List of tags.
288 |      *
289 |      * A tag references its key and value in the `stringtable` raw data.
290 |      */
291 |     @explicit_reference( Tag.key_idx, stringtable )
292 |     @explicit_reference( Tag.value_idx, stringtable )
293 |     tags: vector<Tag>;
294 | 
295 |     /**
296 |      * Auxiliary index of tags to model 1:n relationships between nodes, ways, relations
297 |      * and tags.
298 |      */
299 |     @explicit_reference( TagIndex.value, tags )
300 |     tags_index: vector<TagIndex>;
301 | 
302 |     /**
303 |      * Auxiliary index of nodes to model 1:n relationship between ways and nodes.
304 |      */
305 |     @explicit_reference( NodeIndex.value, nodes )
306 |     nodes_index: vector<NodeIndex>;
307 | 
308 |     /**
309 |      * List of strings separated by `\0`.
310 |      */
311 |     stringtable: raw_data;
312 | 
313 |     @optional
314 |     ids: archive Ids;
315 | }
316 | } // namespace osm
317 | 


--------------------------------------------------------------------------------
/osmflat/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "osmflat"
 3 | version = "0.3.1"
 4 | authors = [
 5 |     "boxdot <d@zerovolt.org>",
 6 |     "Christian Vetter <veaac.fdirct@gmail.com>",
 7 |     "Gabriel Féron <feron.gabriel@gmail.com>"
 8 | ]
 9 | license = "MIT/Apache-2.0"
10 | description = "OpenStreetMap (OSM) data format providing an efficient random data access through memory mapped files."
11 | repository = "https://github.com/boxdot/osmflat-rs"
12 | keywords = ["serialization", "osm", "openstreetmap", "flatdata"]
13 | categories = ["encoding"]
14 | readme = "README.md"
15 | edition = "2021"
16 | 
17 | [dependencies]
18 | flatdata = "0.5.3"
19 | 
20 | [dev-dependencies]
21 | clap = { version = "4.1.4", features = ["derive"] }
22 | itertools = "0.13.0"
23 | png = "0.17.7"
24 | serde = { version = "1.0.152", features = ["derive"] }
25 | serde_json = "1.0.91"
26 | smallvec = "1.10.0"
27 | svg = "0.17.0"
28 | 
29 | [features]
30 | default = []
31 | tar = ["flatdata/tar"]
32 | 


--------------------------------------------------------------------------------
/osmflat/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/osmflat/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/osmflat/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/osmflat/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | This a collection of examples showing how to use the `osmflat` library.
 4 | Some of the examples were ported from the `libosmium`'s
 5 | [examples directory].
 6 | 
 7 | The source code in this directory is under public domain, and can be freely
 8 | copied and modified.
 9 | 
10 | ## Getting started
11 | 
12 | * `read` - reads the contents of the input archive.
13 | * `count` - counts the number of nodes, ways, and relations in the input archive.
14 | * `dump` - dumps the contents of the input archive in a debug format.
15 | 
16 | ## Simple
17 | 
18 | * `pub-names` - shows the names and addresses of all pubs.
19 | * `road-length` - calculates the length of the road network in the input archive.
20 | 
21 | ## Rendering
22 | 
23 | * `render-roads` - renders all roads by using a simple Bresenham line algorithm as PNG.
24 |   <p align="center">
25 |     <img src="berlin-roads.png" alt="Berlin Roads" width="500">
26 |   </p>
27 | * `render-features` - renders selected features from the input archive as SVG.
28 |   <p align="center">
29 |     <img src="berlin-features.svg" alt="Berlin Features" width="500">
30 |   </p>
31 | 
32 | [examples directory]: https://github.com/osmcode/libosmium/tree/master/examples
33 | 


--------------------------------------------------------------------------------
/osmflat/examples/berlin-features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boxdot/osmflat-rs/53439c6d891d04033408f8adba376a5e82d7e82f/osmflat/examples/berlin-features.png


--------------------------------------------------------------------------------
/osmflat/examples/berlin-roads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boxdot/osmflat-rs/53439c6d891d04033408f8adba376a5e82d7e82f/osmflat/examples/berlin-roads.png


--------------------------------------------------------------------------------
/osmflat/examples/cities.rs:
--------------------------------------------------------------------------------
 1 | //! Scans all OSM nodes and extracts list of cities with name and
 2 | //! population in JSON format.
 3 | //!
 4 | //! LICENSE
 5 | //!
 6 | //! The code in this example file is released into the Public Domain.
 7 | 
 8 | use osmflat::{find_tag, has_tag, Osm};
 9 | use serde::Serialize;
10 | use std::str;
11 | 
12 | #[derive(Debug, Default, Serialize)]
13 | struct City<'a> {
14 |     name: &'a str,
15 |     population: usize,
16 | }
17 | 
18 | fn main() -> Result<(), Box<dyn std::error::Error>> {
19 |     let archive_dir = std::env::args()
20 |         .nth(1)
21 |         .ok_or("USAGE: cities <osmflat-archive>")?;
22 |     let archive = Osm::open(osmflat::FileResourceStorage::new(archive_dir))?;
23 | 
24 |     // Iterate through all nodes
25 |     let cities: Vec<City> = archive
26 |         .nodes()
27 |         .iter()
28 |         // filter nodes that does not have a place=city tag
29 |         .filter(|node| has_tag(&archive, node.tags(), b"place", b"city"))
30 |         .filter_map(|node| {
31 |             // try to collect population and country
32 |             Some(City {
33 |                 name: str::from_utf8(find_tag(&archive, node.tags(), b"name")?).ok()?,
34 |                 population: str::from_utf8(find_tag(&archive, node.tags(), b"population")?)
35 |                     .ok()?
36 |                     .parse()
37 |                     .ok()?,
38 |             })
39 |         })
40 |         .collect();
41 | 
42 |     let stdout = std::io::stdout();
43 |     serde_json::to_writer(stdout.lock(), &cities)?;
44 | 
45 |     Ok(())
46 | }
47 | 


--------------------------------------------------------------------------------
/osmflat/examples/count.rs:
--------------------------------------------------------------------------------
 1 | //! Counts the number of nodes, ways, and relations in the input archive.
 2 | //!
 3 | //! LICENSE
 4 | //!
 5 | //! The code in this example file is released into the Public Domain.
 6 | 
 7 | use osmflat::{FileResourceStorage, Osm};
 8 | 
 9 | fn main() -> Result<(), Box<dyn std::error::Error>> {
10 |     let archive_dir = std::env::args()
11 |         .nth(1)
12 |         .ok_or("USAGE: debug <osmflat-archive>")?;
13 |     let archive = Osm::open(FileResourceStorage::new(archive_dir))?;
14 | 
15 |     println!("Nodes: {}", archive.nodes().len());
16 |     println!("Ways: {}", archive.ways().len());
17 |     println!("Relations: {}", archive.relations().len());
18 | 
19 |     Ok(())
20 | }
21 | 


--------------------------------------------------------------------------------
/osmflat/examples/debug.rs:
--------------------------------------------------------------------------------
  1 | //! Dumps the contents of the input archive in a debug format.
  2 | //!
  3 | //! Demonstrates
  4 | //!
  5 | //! * iteration through all fundamental types
  6 | //! * accessing of fields and following of references
  7 | //!
  8 | //! LICENSE
  9 | //!
 10 | //! The code in this example file is released into the Public Domain.
 11 | 
 12 | use clap::Parser;
 13 | use osmflat::{iter_tags, FileResourceStorage, Osm, RelationMembersRef};
 14 | 
 15 | use std::path::PathBuf;
 16 | use std::str::{self, Utf8Error};
 17 | 
 18 | #[derive(Debug)]
 19 | struct Header<'ar> {
 20 |     #[allow(unused)]
 21 |     bbox: (f64, f64, f64, f64),
 22 |     #[allow(unused)]
 23 |     writingprogram: &'ar str,
 24 |     #[allow(unused)]
 25 |     source: &'ar str,
 26 |     #[allow(unused)]
 27 |     replication_timestamp: i64,
 28 |     #[allow(unused)]
 29 |     replication_sequence_number: i64,
 30 |     #[allow(unused)]
 31 |     replication_base_url: &'ar str,
 32 | }
 33 | 
 34 | #[derive(Debug)]
 35 | struct Node<'ar> {
 36 |     #[allow(unused)]
 37 |     id: Option<u64>,
 38 |     #[allow(unused)]
 39 |     lat: f64,
 40 |     #[allow(unused)]
 41 |     lon: f64,
 42 |     #[allow(unused)]
 43 |     tags: Vec<(&'ar str, &'ar str)>,
 44 | }
 45 | 
 46 | #[derive(Debug)]
 47 | struct Way<'ar> {
 48 |     #[allow(unused)]
 49 |     id: Option<u64>,
 50 |     #[allow(unused)]
 51 |     tags: Vec<(&'ar str, &'ar str)>,
 52 |     #[allow(unused)]
 53 |     nodes: Vec<Option<u64>>,
 54 | }
 55 | 
 56 | #[derive(Debug)]
 57 | struct Relation<'ar> {
 58 |     #[allow(unused)]
 59 |     id: Option<u64>,
 60 |     #[allow(unused)]
 61 |     tags: Vec<(&'ar str, &'ar str)>,
 62 |     #[allow(unused)]
 63 |     members: Vec<Member<'ar>>,
 64 | }
 65 | 
 66 | #[derive(Debug)]
 67 | struct Member<'ar> {
 68 |     #[allow(unused)]
 69 |     r#type: Type,
 70 |     #[allow(unused)]
 71 |     idx: Option<u64>,
 72 |     #[allow(unused)]
 73 |     role: &'ar str,
 74 | }
 75 | 
 76 | #[derive(Debug)]
 77 | enum Type {
 78 |     Node,
 79 |     Way,
 80 |     Relation,
 81 | }
 82 | 
 83 | impl<'ar> Member<'ar> {
 84 |     fn new_slice(
 85 |         archive: &'ar Osm,
 86 |         relation_idx: usize,
 87 |     ) -> impl Iterator<Item = Result<Member<'ar>, Utf8Error>> {
 88 |         let strings = archive.stringtable();
 89 |         archive
 90 |             .relation_members()
 91 |             .at(relation_idx)
 92 |             .map(move |member| {
 93 |                 let res = match member {
 94 |                     RelationMembersRef::NodeMember(m) => Member {
 95 |                         r#type: Type::Node,
 96 |                         idx: m.node_idx(),
 97 |                         role: strings.substring(m.role_idx() as usize)?,
 98 |                     },
 99 |                     RelationMembersRef::WayMember(m) => Member {
100 |                         r#type: Type::Way,
101 |                         idx: m.way_idx(),
102 |                         role: strings.substring(m.role_idx() as usize)?,
103 |                     },
104 |                     RelationMembersRef::RelationMember(m) => Member {
105 |                         r#type: Type::Relation,
106 |                         idx: m.relation_idx(),
107 |                         role: strings.substring(m.role_idx() as usize)?,
108 |                     },
109 |                 };
110 |                 Ok(res)
111 |             })
112 |     }
113 | }
114 | 
115 | /// output osmflatdata: nodes, ways, and/or relations
116 | #[derive(Debug, Parser)]
117 | struct Args {
118 |     /// input osmflat archive
119 |     input: PathBuf,
120 |     /// which types to print: (n)odes, (w)ays, or (r)elations
121 |     #[arg(long, default_value = "nwr")]
122 |     types: String,
123 |     /// amount of entities to print
124 |     #[arg(long)]
125 |     num: Option<usize>,
126 | }
127 | 
128 | fn main() -> Result<(), Box<dyn std::error::Error>> {
129 |     let args = Args::parse();
130 |     let archive = Osm::open(FileResourceStorage::new(args.input))?;
131 | 
132 |     let header = archive.header();
133 |     let strings = archive.stringtable();
134 | 
135 |     let scale_coord = |x| x as f64 / header.coord_scale() as f64;
136 | 
137 |     // print header
138 |     let header = Header {
139 |         bbox: (
140 |             scale_coord(header.bbox_left()),
141 |             scale_coord(header.bbox_right()),
142 |             scale_coord(header.bbox_top()),
143 |             scale_coord(header.bbox_bottom()),
144 |         ),
145 |         writingprogram: strings.substring(header.writingprogram_idx() as usize)?,
146 |         source: strings.substring(header.source_idx() as usize)?,
147 |         replication_timestamp: header.replication_timestamp(),
148 |         replication_sequence_number: header.replication_sequence_number(),
149 |         replication_base_url: strings.substring(header.replication_base_url_idx() as usize)?,
150 |     };
151 |     println!("{header:#?}");
152 | 
153 |     let collect_utf8_tags = |tags| -> Vec<(&str, &str)> {
154 |         iter_tags(&archive, tags)
155 |             .filter_map(|(k, v)| match (str::from_utf8(k), str::from_utf8(v)) {
156 |                 (Ok(k), Ok(v)) => Some((k, v)),
157 |                 _ => None,
158 |             })
159 |             .collect()
160 |     };
161 | 
162 |     // print nodes
163 |     let mut node_ids = archive.ids().map(|x| x.nodes()).into_iter().flatten();
164 |     if args.types.contains('n') {
165 |         for node in archive.nodes().iter().take(args.num.unwrap_or(usize::MAX)) {
166 |             let node = Node {
167 |                 id: node_ids.next().map(|x| x.value()),
168 |                 lat: scale_coord(node.lat()),
169 |                 lon: scale_coord(node.lon()),
170 |                 tags: collect_utf8_tags(node.tags()),
171 |             };
172 | 
173 |             println!("{node:#?}");
174 |         }
175 |     }
176 | 
177 |     // print ways
178 |     let nodes_index = archive.nodes_index();
179 |     let mut way_ids = archive.ids().map(|x| x.ways()).into_iter().flatten();
180 |     if args.types.contains('w') {
181 |         for way in archive.ways().iter().take(args.num.unwrap_or(usize::MAX)) {
182 |             let way = Way {
183 |                 id: way_ids.next().map(|x| x.value()),
184 |                 tags: collect_utf8_tags(way.tags()),
185 |                 nodes: way
186 |                     .refs()
187 |                     .map(|idx| nodes_index[idx as usize].value())
188 |                     .collect(),
189 |             };
190 | 
191 |             println!("{way:#?}");
192 |         }
193 |     }
194 | 
195 |     // print relations
196 |     let mut relation_ids = archive.ids().map(|x| x.ways()).into_iter().flatten();
197 |     if args.types.contains('r') {
198 |         for (relation_idx, relation) in archive.relations()[..3]
199 |             .iter()
200 |             .take(args.num.unwrap_or(usize::MAX))
201 |             .enumerate()
202 |         {
203 |             let members: Result<Vec<_>, _> = Member::new_slice(&archive, relation_idx).collect();
204 |             let relation = Relation {
205 |                 id: relation_ids.next().map(|x| x.value()),
206 |                 tags: collect_utf8_tags(relation.tags()),
207 |                 members: members?,
208 |             };
209 | 
210 |             println!("{relation:#?}");
211 |         }
212 |     }
213 | 
214 |     Ok(())
215 | }
216 | 


--------------------------------------------------------------------------------
/osmflat/examples/pub-names.rs:
--------------------------------------------------------------------------------
 1 | //! Shows the names and addresses of all pubs.
 2 | //!
 3 | //! Demonstrates
 4 | //!
 5 | //!  * iteration through tags belonging to a node and a way
 6 | //!  * accessing of tags by key
 7 | //!  * filtering of tags
 8 | //!
 9 | //! LICENSE
10 | //!
11 | //! The code in this example file is released into the Public Domain.
12 | 
13 | use osmflat::{find_tag, has_tag, iter_tags, FileResourceStorage, Osm};
14 | use std::str;
15 | 
16 | fn main() -> Result<(), Box<dyn std::error::Error>> {
17 |     let archive_dir = std::env::args()
18 |         .nth(1)
19 |         .ok_or("USAGE: pub_names <osmflat-archive>")?;
20 |     let archive = Osm::open(FileResourceStorage::new(archive_dir))?;
21 | 
22 |     let nodes_tags = archive.nodes().iter().map(|node| node.tags());
23 |     let ways_tags = archive.ways().iter().map(|way| way.tags());
24 | 
25 |     for tag_range in nodes_tags.chain(ways_tags) {
26 |         if has_tag(&archive, tag_range.clone(), b"amenity", b"pub") {
27 |             let name = find_tag(&archive, tag_range.clone(), b"name");
28 |             let name = name.map(|s| str::from_utf8(s).unwrap_or("broken pub name"));
29 |             println!("{}", name.unwrap_or("unknown pub name"));
30 | 
31 |             let addrs = iter_tags(&archive, tag_range).filter(|(k, _)| k.starts_with(b"addr:"));
32 |             for (k, v) in addrs {
33 |                 if let (Ok(addr_type), Ok(addr)) = (str::from_utf8(k), str::from_utf8(v)) {
34 |                     println!("  {addr_type}: {addr}");
35 |                 }
36 |             }
37 |         }
38 |     }
39 | 
40 |     Ok(())
41 | }
42 | 


--------------------------------------------------------------------------------
/osmflat/examples/read.rs:
--------------------------------------------------------------------------------
 1 | //! Reads the contents of the input archive.
 2 | //!
 3 | //! LICENSE
 4 | //!
 5 | //! The code in this example file is released into the Public Domain.
 6 | 
 7 | use osmflat::{FileResourceStorage, Osm};
 8 | 
 9 | fn main() -> Result<(), Box<dyn std::error::Error>> {
10 |     let archive_dir = std::env::args()
11 |         .nth(1)
12 |         .ok_or("USAGE: read <osmflat-archive>")?;
13 |     let archive = Osm::open(FileResourceStorage::new(archive_dir))?;
14 | 
15 |     for _node in archive.nodes() {
16 |         // do nothing
17 |     }
18 | 
19 |     for _way in archive.ways() {
20 |         // do nothing
21 |     }
22 | 
23 |     for _relation in archive.relations() {
24 |         // do nothing
25 |     }
26 | 
27 |     Ok(())
28 | }
29 | 


--------------------------------------------------------------------------------
/osmflat/examples/render-features.rs:
--------------------------------------------------------------------------------
  1 | //! Renders selected features from the input archive as svg.
  2 | //!
  3 | //! For supported features check `Category` enum and `classify` function.
  4 | //!
  5 | //! For each feature, we retrieve the coordinates lazily from osm nodes, and
  6 | //! then produce polylines styled based on the category, cf. `render_svg`
  7 | //! function. The coordinates are in lon, lat.
  8 | //!
  9 | //! Inside of svg we just use the coordinates as is (except for swapped x/y
 10 | //! axes), plus we apply a transformation to adjust the coordinates to the
 11 | //! viewport. Obviously, it is slower the render such svg on the screen.
 12 | //! However, the final svg contains already so many polyline, that having alrady
 13 | //! transformed coordinates does not change much. If you need speed when showing
 14 | //! the svg, feel free to apply simplifications in this program.
 15 | //!
 16 | //! LICENSE
 17 | //!
 18 | //! The code in this example file is released into the Public Domain.
 19 | 
 20 | use clap::Parser;
 21 | use osmflat::{iter_tags, FileResourceStorage, Node, Osm, Relation, RelationMembersRef, Way};
 22 | use smallvec::{smallvec, SmallVec};
 23 | use svg::{node::element, Document};
 24 | 
 25 | use std::f64;
 26 | use std::fmt::Write;
 27 | use std::io;
 28 | use std::ops::Range;
 29 | use std::path::PathBuf;
 30 | use std::str;
 31 | 
 32 | /// Geographic coordinates represented by (latitude, longitude).
 33 | #[derive(Debug, Clone, Copy, Default, PartialEq, PartialOrd)]
 34 | struct GeoCoord {
 35 |     lat: f64,
 36 |     lon: f64,
 37 | }
 38 | 
 39 | impl GeoCoord {
 40 |     fn min(self, other: Self) -> Self {
 41 |         Self {
 42 |             lat: self.lat.min(other.lat),
 43 |             lon: self.lon.min(other.lon),
 44 |         }
 45 |     }
 46 | 
 47 |     fn max(self, other: Self) -> Self {
 48 |         Self {
 49 |             lat: self.lat.max(other.lat),
 50 |             lon: self.lon.max(other.lon),
 51 |         }
 52 |     }
 53 | }
 54 | 
 55 | /// Convert osmflat Node into GeoCoord.
 56 | impl GeoCoord {
 57 |     fn from_node(node: &Node, coord_scale: i32) -> Self {
 58 |         Self {
 59 |             lat: node.lat() as f64 / coord_scale as f64,
 60 |             lon: node.lon() as f64 / coord_scale as f64,
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | /// Polyline which can be transformed into an iterator over `GeoCoord`'s.
 66 | struct Polyline {
 67 |     inner: SmallVec<[Range<u64>; 4]>,
 68 | }
 69 | 
 70 | impl From<Range<u64>> for Polyline {
 71 |     fn from(range: Range<u64>) -> Self {
 72 |         Self {
 73 |             inner: smallvec![range],
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | impl Polyline {
 79 |     #[allow(clippy::iter_overeager_cloned)]
 80 |     fn into_iter(self, archive: &Osm) -> Option<impl Iterator<Item = GeoCoord> + '_> {
 81 |         let nodes_index = archive.nodes_index();
 82 |         let nodes = archive.nodes();
 83 |         let mut indices = self.inner.iter().cloned().flatten();
 84 |         let scale = archive.header().coord_scale();
 85 |         if indices.any(|idx| nodes_index[idx as usize].value().is_none()) {
 86 |             None
 87 |         } else {
 88 |             let indices = self.inner.into_iter().flatten();
 89 |             Some(indices.map(move |idx| {
 90 |                 GeoCoord::from_node(
 91 |                     &nodes[nodes_index[idx as usize].value().unwrap() as usize],
 92 |                     scale,
 93 |                 )
 94 |             }))
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | /// Categories of features we support in this renderer.
100 | #[derive(Debug, Clone, Copy)]
101 | enum Category {
102 |     Road,
103 |     Park,
104 |     River(u32), // River with width
105 |     Water,
106 | }
107 | 
108 | /// Feature in osmflat.
109 | ///
110 | /// Idx points either into ways or relations, depending on the `Category`.
111 | struct Feature {
112 |     idx: usize,
113 |     cat: Category,
114 | }
115 | 
116 | impl Feature {
117 |     fn into_polyline(self, archive: &Osm) -> Option<Polyline> {
118 |         match self.cat {
119 |             Category::Road | Category::River(_) => {
120 |                 Some(way_into_polyline(&archive.ways()[self.idx]))
121 |             }
122 |             Category::Park | Category::Water => multipolygon_into_polyline(archive, self.idx),
123 |         }
124 |     }
125 | }
126 | 
127 | fn way_into_polyline(way: &Way) -> Polyline {
128 |     Polyline {
129 |         inner: smallvec![way.refs()],
130 |     }
131 | }
132 | 
133 | fn multipolygon_into_polyline(archive: &Osm, idx: usize) -> Option<Polyline> {
134 |     let members = archive.relation_members().at(idx);
135 |     let strings = archive.stringtable();
136 |     let ways = archive.ways();
137 | 
138 |     let inner: Option<SmallVec<[Range<u64>; 4]>> = members
139 |         .filter_map(|m| match m {
140 |             RelationMembersRef::WayMember(way_member)
141 |                 if strings.substring(way_member.role_idx() as usize) == Ok("outer") =>
142 |             {
143 |                 Some(way_member.way_idx().map(|idx| ways[idx as usize].refs()))
144 |             }
145 |             _ => None,
146 |         })
147 |         .collect();
148 |     inner.map(|inner| Polyline { inner })
149 | }
150 | 
151 | /// Classifies all features from osmflat we want to render.
152 | fn classify(archive: &Osm) -> impl Iterator<Item = Feature> + '_ {
153 |     let ways = archive.ways().iter().enumerate();
154 |     let ways = ways
155 |         .filter_map(move |(idx, way)| classify_way(archive, way).map(|cat| Feature { idx, cat }));
156 |     let rels = archive.relations().iter().enumerate();
157 |     let rels = rels.filter_map(move |(idx, rel)| {
158 |         classify_relation(archive, rel).map(|cat| Feature { idx, cat })
159 |     });
160 |     ways.chain(rels)
161 | }
162 | 
163 | fn classify_way(archive: &Osm, way: &Way) -> Option<Category> {
164 |     // Filter all ways that have less than 2 nodes.
165 |     if way.refs().end <= way.refs().start + 2 {
166 |         return None;
167 |     }
168 | 
169 |     const UNWANTED_HIGHWAY_TYPES: [&[u8]; 9] = [
170 |         b"pedestrian",
171 |         b"steps",
172 |         b"footway",
173 |         b"construction",
174 |         b"bic",
175 |         b"cycleway",
176 |         b"layby",
177 |         b"bridleway",
178 |         b"path",
179 |     ];
180 | 
181 |     // Filter all ways that do not have a highway tag. Also check for specific
182 |     // values.
183 |     for (key, val) in iter_tags(archive, way.tags()) {
184 |         if key == b"highway" {
185 |             if UNWANTED_HIGHWAY_TYPES.contains(&val) {
186 |                 return None;
187 |             }
188 |             return Some(Category::Road);
189 |         } else if key == b"waterway" {
190 |             for (key, val) in iter_tags(archive, way.tags()) {
191 |                 if key == b"width" || key == b"maxwidth" {
192 |                     let width: u32 = str::from_utf8(val).ok()?.parse().ok()?;
193 |                     return Some(Category::River(width));
194 |                 }
195 |             }
196 |             return Some(Category::River(1));
197 |         }
198 |     }
199 |     None
200 | }
201 | 
202 | fn classify_relation(archive: &Osm, relation: &Relation) -> Option<Category> {
203 |     let mut is_multipolygon = false;
204 |     let mut is_park = false;
205 |     let mut is_lake = false;
206 | 
207 |     for (key, val) in iter_tags(archive, relation.tags()) {
208 |         if key == b"type" && val == b"multipolygon" {
209 |             if is_park {
210 |                 return Some(Category::Park);
211 |             }
212 |             if is_lake {
213 |                 return Some(Category::Water);
214 |             }
215 |             is_multipolygon = true;
216 |         }
217 |         if (key == b"leisure" && val == b"park")
218 |             || (key == b"landuse" && (val == b"recreation_ground" || val == b"forest"))
219 |         {
220 |             if is_multipolygon {
221 |                 return Some(Category::Park);
222 |             }
223 |             is_park = true;
224 |         }
225 |         if key == b"water" && val == b"lake" {
226 |             if is_multipolygon {
227 |                 return Some(Category::Water);
228 |             }
229 |             is_lake = true;
230 |         }
231 |     }
232 |     None
233 | }
234 | 
235 | /// Renders svg from classified polylines.
236 | fn render_svg<P>(
237 |     archive: &Osm,
238 |     classified_polylines: P,
239 |     output: PathBuf,
240 |     width: u32,
241 |     height: u32,
242 | ) -> Result<(), io::Error>
243 | where
244 |     P: Iterator<Item = (Polyline, Category)>,
245 | {
246 |     let mut document = Document::new().set("viewBox", (0, 0, width, height));
247 |     let mut road_group = element::Group::new()
248 |         .set("stroke", "#001F3F")
249 |         .set("stroke-width", "0.3")
250 |         .set("fill", "none");
251 |     let mut park_group = element::Group::new()
252 |         .set("stroke", "#3D9970")
253 |         .set("fill", "#3D9970")
254 |         .set("fill-opacity", 0.3);
255 |     let mut river_group = element::Group::new()
256 |         .set("stroke", "#0074D9")
257 |         .set("fill", "none")
258 |         .set("stroke-opacity", 0.8);
259 |     let mut lake_group = element::Group::new()
260 |         .set("stroke", "#0074D9")
261 |         .set("fill", "#0074D9")
262 |         .set("fill-opacity", 0.3);
263 | 
264 |     let mut min_coord = GeoCoord {
265 |         lat: f64::MAX,
266 |         lon: f64::MAX,
267 |     };
268 |     let mut max_coord = GeoCoord {
269 |         lat: f64::MIN,
270 |         lon: f64::MIN,
271 |     };
272 | 
273 |     let mut points = String::new(); // reuse string buffer inside the for-loop
274 |     for (poly, cat) in classified_polylines {
275 |         points.clear();
276 |         let poly_iter = match poly.into_iter(archive) {
277 |             Some(x) => x,
278 |             None => continue,
279 |         };
280 |         for coord in poly_iter {
281 |             // collect extent
282 |             min_coord = min_coord.min(coord);
283 |             max_coord = max_coord.max(coord);
284 |             // accumulate polyline points
285 |             write!(&mut points, "{:.5},{:.5} ", coord.lon, coord.lat)
286 |                 .expect("failed to write coordinates");
287 |         }
288 | 
289 |         let polyline = element::Polyline::new().set("points", &points[..]);
290 | 
291 |         match cat {
292 |             Category::Road => {
293 |                 road_group = road_group.add(polyline);
294 |             }
295 |             Category::River(width) => {
296 |                 river_group = river_group.add(polyline).set("stroke-width", width);
297 |             }
298 |             Category::Park => {
299 |                 park_group = park_group.add(polyline);
300 |             }
301 |             Category::Water => {
302 |                 lake_group = lake_group.add(polyline);
303 |             }
304 |         }
305 |     }
306 | 
307 |     let mut transform = element::Group::new().set(
308 |         "transform",
309 |         format!(
310 |             "scale({:.5} {:.5}) translate({:.5} {:.5})", /* Note: svg transformations are
311 |                                                           * applied from right to left */
312 |             f64::from(width) / (max_coord.lon - min_coord.lon),
313 |             f64::from(height) / (min_coord.lat - max_coord.lat), // invert y-axis
314 |             -min_coord.lon,
315 |             -max_coord.lat,
316 |         ),
317 |     );
318 | 
319 |     transform = transform
320 |         .add(road_group)
321 |         .add(river_group)
322 |         .add(lake_group)
323 |         .add(park_group);
324 | 
325 |     let style = element::Style::new(
326 |         r#"
327 |         text {
328 |             font-family: arial;
329 |             font-size: 8px;
330 |             color: #001F3F;
331 |             opacity: 0.3;
332 |         }
333 | 
334 |         polyline {
335 |             vector-effect: non-scaling-stroke;
336 |         }
337 |     "#,
338 |     );
339 | 
340 |     let notice = element::Text::new("© OpenStreetMap Contributors")
341 |         .set("x", width.saturating_sub(10))
342 |         .set("y", height.saturating_sub(10))
343 |         .set("text-anchor", "end");
344 | 
345 |     document = document.add(style).add(transform).add(notice);
346 |     svg::save(output, &document)
347 | }
348 | 
349 | /// render map features as a SVG
350 | #[derive(Debug, Parser)]
351 | #[clap(name = "render-features")]
352 | struct Args {
353 |     /// osmflat archive
354 |     osmflat_archive: PathBuf,
355 | 
356 |     /// SVG filename to output
357 |     #[clap(long, short = 'o')]
358 |     output: PathBuf,
359 | 
360 |     /// width of the image
361 |     #[clap(long, default_value = "800")]
362 |     width: u32,
363 | 
364 |     /// height of the image
365 |     #[clap(long, default_value = "600")]
366 |     height: u32,
367 | }
368 | 
369 | fn main() -> Result<(), Box<dyn std::error::Error>> {
370 |     let args = Args::parse();
371 | 
372 |     let storage = FileResourceStorage::new(args.osmflat_archive);
373 |     let archive = Osm::open(storage)?;
374 | 
375 |     let features = classify(&archive);
376 |     let archive_inner = archive.clone();
377 |     let classified_polylines = features.filter_map(move |f| {
378 |         let cat = f.cat;
379 |         f.into_polyline(&archive_inner).map(|p| (p, cat))
380 |     });
381 |     render_svg(
382 |         &archive,
383 |         classified_polylines,
384 |         args.output,
385 |         args.width,
386 |         args.height,
387 |     )?;
388 |     Ok(())
389 | }
390 | 


--------------------------------------------------------------------------------
/osmflat/examples/render-roads.rs:
--------------------------------------------------------------------------------
  1 | //! Renders all roads by using a simple Bresenham line algorithm.
  2 | //!
  3 | //! LICENSE
  4 | //!
  5 | //! The code in this example file is released into the Public Domain.
  6 | 
  7 | use osmflat::{find_tag_by, FileResourceStorage, Node, Osm, Way};
  8 | 
  9 | use clap::Parser;
 10 | use itertools::Itertools;
 11 | 
 12 | use std::f64::consts::PI;
 13 | use std::fs::File;
 14 | use std::io::BufWriter;
 15 | use std::path::PathBuf;
 16 | 
 17 | /// Geographic coordinates represented by (latitude, longitude).
 18 | #[derive(Debug, Clone, Copy, Default, PartialEq, PartialOrd)]
 19 | struct GeoCoord {
 20 |     lat: f64,
 21 |     lon: f64,
 22 | }
 23 | 
 24 | /// Convert osmflat Node into GeoCoord.
 25 | impl GeoCoord {
 26 |     fn from_node(node: &Node, coord_scale: i32) -> Self {
 27 |         Self {
 28 |             lat: node.lat() as f64 / coord_scale as f64,
 29 |             lon: node.lon() as f64 / coord_scale as f64,
 30 |         }
 31 |     }
 32 | }
 33 | 
 34 | #[derive(Debug)]
 35 | struct Image {
 36 |     w: u32,
 37 |     h: u32,
 38 |     data: Vec<u8>,
 39 | }
 40 | 
 41 | impl Image {
 42 |     fn new(w: u32, h: u32) -> Self {
 43 |         Self {
 44 |             w,
 45 |             h,
 46 |             data: vec![255; (w * h) as usize],
 47 |         }
 48 |     }
 49 | 
 50 |     fn set_black(&mut self, x: u32, y: u32) {
 51 |         self.data[(y * self.w + x) as usize] = 0;
 52 |     }
 53 | }
 54 | 
 55 | fn compute_bounds(mut iter: impl Iterator<Item = GeoCoord>) -> (GeoCoord, GeoCoord) {
 56 |     let first_coord = iter.next().unwrap_or_default();
 57 |     iter.fold((first_coord, first_coord), |(min, max), coord| {
 58 |         (
 59 |             GeoCoord {
 60 |                 lat: min.lat.min(coord.lat),
 61 |                 lon: min.lon.min(coord.lon),
 62 |             },
 63 |             GeoCoord {
 64 |                 lat: max.lat.max(coord.lat),
 65 |                 lon: max.lon.max(coord.lon),
 66 |             },
 67 |         )
 68 |     })
 69 | }
 70 | 
 71 | fn map_transform(
 72 |     (width, height): (u32, u32),
 73 |     (min, max): (GeoCoord, GeoCoord),
 74 | ) -> impl FnMut(GeoCoord) -> (i32, i32) + Copy {
 75 |     move |coord: GeoCoord| {
 76 |         (
 77 |             ((coord.lon - min.lon) * f64::from(width) / (max.lon - min.lon)) as i32,
 78 |             ((max.lat - coord.lat) * f64::from(height) / (max.lat - min.lat)) as i32,
 79 |         )
 80 |     }
 81 | }
 82 | 
 83 | fn way_coords<'a>(archive: &'a Osm, way: &Way) -> Option<impl Iterator<Item = GeoCoord> + 'a> {
 84 |     let nodes = archive.nodes();
 85 |     let nodes_index = archive.nodes_index();
 86 |     let path = way.refs().map(move |i| &nodes_index[i as usize]);
 87 |     let scale = archive.header().coord_scale();
 88 |     if path.clone().any(|node| node.value().is_none()) {
 89 |         None
 90 |     } else {
 91 |         Some(
 92 |             path.map(move |node| {
 93 |                 GeoCoord::from_node(&nodes[node.value().unwrap() as usize], scale)
 94 |             }),
 95 |         )
 96 |     }
 97 | }
 98 | 
 99 | fn way_filter(way: &Way, archive: &Osm) -> bool {
100 |     const UNWANTED_HIGHWAY_TYPES: [&[u8]; 9] = [
101 |         b"pedestrian\0",
102 |         b"steps\0",
103 |         b"footway\0",
104 |         b"construction\0",
105 |         b"bic\0",
106 |         b"cycleway\0",
107 |         b"layby\0",
108 |         b"bridleway\0",
109 |         b"path\0",
110 |     ];
111 | 
112 |     // Filter all ways that do not have desirable highway tag.
113 |     find_tag_by(archive, way.tags(), |key_block, val_block| {
114 |         key_block.starts_with(b"highway\0")
115 |             && !UNWANTED_HIGHWAY_TYPES
116 |                 .iter()
117 |                 .any(|t| val_block.starts_with(t))
118 |     })
119 |     .is_some()
120 | }
121 | 
122 | fn roads(archive: &Osm) -> impl Iterator<Item = &Way> {
123 |     archive
124 |         .ways()
125 |         .iter()
126 |         .filter(move |&way| way_filter(way, archive))
127 | }
128 | 
129 | /// Bresenham's line algorithm
130 | ///
131 | /// https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm
132 | fn bresenham(mut x0: i32, mut y0: i32, x1: i32, y1: i32) -> impl Iterator<Item = (i32, i32)> {
133 |     let dx = (x1 - x0).abs();
134 |     let sx = if x0 < x1 { 1 } else { -1 };
135 |     let dy = -(y1 - y0).abs();
136 |     let sy = if y0 < y1 { 1 } else { -1 };
137 |     let mut err = dx + dy;
138 | 
139 |     std::iter::from_fn(move || {
140 |         if x0 == x1 && y0 == y1 {
141 |             return None;
142 |         }
143 |         let res = (x0, y0);
144 |         let e2 = 2 * err;
145 |         if e2 >= dy {
146 |             err += dy;
147 |             x0 += sx;
148 |         }
149 |         if e2 <= dx {
150 |             err += dx;
151 |             y0 += sy;
152 |         }
153 |         Some(res)
154 |     })
155 | }
156 | 
157 | fn render(archive: &Osm, width: u32) -> Image {
158 |     // compute extent
159 |     let coords = roads(archive)
160 |         .filter_map(|way| way_coords(archive, way))
161 |         .flatten();
162 |     let (min, max) = compute_bounds(coords);
163 | 
164 |     // compute ratio and height
165 |     let ratio = (max.lat - min.lat) / (max.lon - min.lon) / (max.lat / 180. * PI).cos();
166 |     let height = (f64::from(width) * ratio) as u32;
167 | 
168 |     // create world -> raster transformation
169 |     let t = map_transform((width - 1, height - 1), (min, max));
170 | 
171 |     // draw
172 |     let mut image = Image::new(width, height);
173 | 
174 |     let line_segments = roads(archive)
175 |         .filter_map(|way| Some(way_coords(archive, way)?.map(t).tuple_windows()))
176 |         .flatten();
177 | 
178 |     for ((x0, y0), (x1, y1)) in line_segments {
179 |         for (x, y) in bresenham(x0, y0, x1, y1) {
180 |             image.set_black(x as u32, y as u32);
181 |         }
182 |     }
183 | 
184 |     image
185 | }
186 | 
187 | /// Renders roads as a PNG
188 | #[derive(Debug, Parser)]
189 | struct Args {
190 |     /// input osmflat archive
191 |     input: PathBuf,
192 |     /// output PNG filename
193 |     #[clap(long, short = 'o')]
194 |     output: PathBuf,
195 |     /// width of the image (height is derived from ratio)
196 |     #[clap(long, short = 'w', default_value = "4320")]
197 |     width: u32,
198 | }
199 | 
200 | fn main() -> Result<(), Box<dyn std::error::Error>> {
201 |     let args = Args::parse();
202 | 
203 |     let archive = Osm::open(FileResourceStorage::new(args.input))?;
204 | 
205 |     let image = render(&archive, args.width);
206 | 
207 |     let buf = BufWriter::new(File::create(&args.output)?);
208 |     let mut encoder = png::Encoder::new(buf, image.w, image.h);
209 |     encoder.set_color(png::ColorType::Grayscale);
210 |     encoder.set_depth(png::BitDepth::Eight);
211 |     let mut writer = encoder.write_header()?;
212 |     writer.write_image_data(&image.data[..])?;
213 | 
214 |     Ok(())
215 | }
216 | 


--------------------------------------------------------------------------------
/osmflat/examples/road-length.rs:
--------------------------------------------------------------------------------
 1 | //! Calculates the length of the road network (everything tagged `highway=*`)
 2 | //! in the input archive.
 3 | //!
 4 | //! Demonstrates
 5 | //!
 6 | //!  * iteration through ways
 7 | //!  * accessing of tags belonging to a way
 8 | //!  * accessing of nodes belonging to a way
 9 | //!  * length calculation on the Earth using the haversine function
10 | //!
11 | //! LICENSE
12 | //!
13 | //! The code in this example file is released into the Public Domain.
14 | 
15 | use itertools::Itertools;
16 | use osmflat::{FileResourceStorage, Node, Osm};
17 | 
18 | struct Coords {
19 |     lat: f64,
20 |     lon: f64,
21 | }
22 | 
23 | impl Coords {
24 |     fn from_node(node: &Node, coord_scale: i32) -> Self {
25 |         Self {
26 |             lat: node.lat() as f64 / coord_scale as f64,
27 |             lon: node.lon() as f64 / coord_scale as f64,
28 |         }
29 |     }
30 | }
31 | 
32 | fn haversine_distance(c1: Coords, c2: Coords) -> f64 {
33 |     /// Earth's radius for WGS84 in meters
34 |     const EARTH_RADIUS_IN_METERS: f64 = 6_372_797.560_856;
35 | 
36 |     let mut lonh = ((c1.lon - c2.lon).to_radians() * 0.5).sin();
37 |     lonh *= lonh;
38 |     let mut lath = ((c1.lat - c2.lat).to_radians() * 0.5).sin();
39 |     lath *= lath;
40 |     let tmp = c1.lat.to_radians().cos() * c2.lat.to_radians().cos();
41 |     2.0 * EARTH_RADIUS_IN_METERS * (lath + tmp * lonh).sqrt().asin()
42 | }
43 | 
44 | fn main() -> Result<(), Box<dyn std::error::Error>> {
45 |     let archive_dir = std::env::args()
46 |         .nth(1)
47 |         .ok_or("USAGE: road_length <osmflat-archive>")?;
48 |     let archive = Osm::open(FileResourceStorage::new(archive_dir))?;
49 |     let header = archive.header();
50 | 
51 |     let tags = archive.tags();
52 |     let tags_index = archive.tags_index();
53 |     let strings = archive.stringtable();
54 | 
55 |     let highways = archive.ways().iter().filter(|way| {
56 |         way.tags().any(|idx| {
57 |             // A way reference a range of tags by storing a contiguous range of
58 |             // indexes in `tags_index`. Each of these references a tag in `tags`.
59 |             // This is a common pattern when flattening 1 to n relations.
60 |             let tag = &tags[tags_index[idx as usize].value() as usize];
61 |             strings.substring_raw(tag.key_idx() as usize) == b"highway"
62 |         })
63 |     });
64 | 
65 |     let nodes = archive.nodes();
66 |     let nodes_index = archive.nodes_index();
67 | 
68 |     let lengths = highways.filter_map(|way| {
69 |         let coords = way.refs().map(|idx| {
70 |             // A way references a range of nodes by storing a contiguous range of
71 |             // indexes in `nodes_index`. Each of these references a node in `nodes`.
72 |             // This is a common pattern when flattening 1 to n relations.
73 |             Some(Coords::from_node(
74 |                 &nodes[nodes_index[idx as usize].value()? as usize],
75 |                 header.coord_scale(),
76 |             ))
77 |         });
78 |         let length: Option<f64> = coords
79 |             .clone()
80 |             .zip(coords.skip(1))
81 |             .map(|(from, to)| Some(haversine_distance(from?, to?)))
82 |             .fold_options(0.0, |acc, x| acc + x);
83 |         length
84 |     });
85 | 
86 |     let length: f64 = lengths.sum();
87 |     println!("Length: {:.0} km", length / 1000.0);
88 | 
89 |     Ok(())
90 | }
91 | 


--------------------------------------------------------------------------------
/osmflat/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![deny(missing_docs)]
 2 | #![allow(clippy::all)] // generated code is not clippy friendly
 3 | 
 4 | //! Flat OpenStreetMap (OSM) data format providing an efficient *random* data
 5 | //! access through [memory mapped files].
 6 | //!
 7 | //! The data format is described and implemented in [flatdata]. The [schema]
 8 | //! describes the fundamental OSM data structures: nodes, ways, relations and
 9 | //! tags as simple non-nested data structures. The relations between these are
10 | //! expressed through indexes.
11 | //!
12 | //! ## Examples
13 | //!
14 | //! Open a flatdata archive (compiled from pbf with [`osmflatc`]) and iterate
15 | //! through nodes:
16 | //!
17 | //! ```rust,no_run
18 | //! use osmflat::{FileResourceStorage, Osm};
19 | //!
20 | //! fn main() {
21 | //!     let storage = FileResourceStorage::new("path/to/archive.osm.flatdata");
22 | //!     let archive = Osm::open(storage).unwrap();
23 | //!
24 | //!     for node in archive.nodes().iter() {
25 | //!         println!("{:?}", node);
26 | //!     }
27 | //! }
28 | //! ```
29 | //!
30 | //! For more examples, see the [examples] directory.
31 | //!
32 | //! [flatdata]: https://github.com/heremaps/flatdata
33 | //! [schema]: https://github.com/boxdot/osmflat-rs/blob/master/flatdata/osm.flatdata
34 | //! [memory mapped files]: https://en.wikipedia.org/wiki/Memory-mapped_file
35 | //! [`osmflatc`]: https://github.com/boxdot/osmflat-rs/tree/master/osmflatc
36 | //! [examples]: https://github.com/boxdot/osmflat-rs/tree/master/osmflat/examples
37 | 
38 | // generated osm module
39 | include!("osmflat_generated.rs");
40 | 
41 | mod tags;
42 | 
43 | pub use crate::osm::*;
44 | pub use crate::tags::*;
45 | 
46 | // re-export what is needed from flatdata to use osmflat
47 | pub use flatdata::FileResourceStorage;
48 | #[cfg(feature = "tar")]
49 | pub use flatdata::TarArchiveResourceStorage;
50 | 


--------------------------------------------------------------------------------
/osmflat/src/tags.rs:
--------------------------------------------------------------------------------
 1 | //! All functions in this module operate on raw bytes for performance reasons.
 2 | //! It is easy to combine these with `std::str::from_utf8` family of functions,
 3 | //! to lift them to operate on `str`.
 4 | 
 5 | use crate::Osm;
 6 | use std::ops::Range;
 7 | 
 8 | /// Returns an iterator over tags specified by `range`.
 9 | ///
10 | /// When searching for a tag by key consider to use `find_tag` which
11 | /// performs better.
12 | #[inline]
13 | pub fn iter_tags(archive: &Osm, range: Range<u64>) -> impl Iterator<Item = (&[u8], &[u8])> + Clone {
14 |     let tags = archive.tags();
15 |     let tags_index = archive.tags_index();
16 |     let strings = archive.stringtable();
17 | 
18 |     range.map(move |idx| {
19 |         let tag = &tags[tags_index[idx as usize].value() as usize];
20 |         let key = strings.substring_raw(tag.key_idx() as usize);
21 |         let val = strings.substring_raw(tag.value_idx() as usize);
22 |         (key, val)
23 |     })
24 | }
25 | 
26 | /// Finds the first tag in the given `range` which satisfies the predicate
27 | /// applied to the key and value and returns the corresponding value.
28 | ///
29 | /// Note that the predicate function is called on the whole key block and value
30 | /// block. These are zero (`\0`) divided blocks of bytes that start at the key
31 | /// resp. value, and contain the rest string data. In particular, the len of
32 | /// the block is *not* the len of the key resp. value. The user is responsible
33 | /// to check or find the zero terminator.
34 | #[inline]
35 | pub fn find_tag_by(
36 |     archive: &Osm,
37 |     mut range: Range<u64>,
38 |     mut predicate: impl FnMut(&[u8], &[u8]) -> bool,
39 | ) -> Option<&[u8]> {
40 |     let tags = archive.tags();
41 |     let tags_index = archive.tags_index();
42 |     let strings = archive.stringtable();
43 | 
44 |     range.find_map(move |idx| {
45 |         let tag = &tags[tags_index[idx as usize].value() as usize];
46 |         let key_block = &strings.as_bytes()[tag.key_idx() as usize..];
47 |         let value_block = &strings.as_bytes()[tag.value_idx() as usize..];
48 |         if predicate(key_block, value_block) {
49 |             Some(strings.substring_raw(tag.value_idx() as usize))
50 |         } else {
51 |             None
52 |         }
53 |     })
54 | }
55 | 
56 | /// Finds a tag by its key in the given `range` and returns the corresponding
57 | /// value.
58 | #[inline]
59 | pub fn find_tag<'a>(archive: &'a Osm, range: Range<u64>, key: &[u8]) -> Option<&'a [u8]> {
60 |     find_tag_by(archive, range, |key_block, _| {
61 |         key_block.starts_with(key) && *key_block.get(key.len()).unwrap_or(&0) == 0
62 |     })
63 | }
64 | 
65 | /// Checks if there is a tag in `range` with a given `key` and `value`.
66 | #[inline]
67 | pub fn has_tag(archive: &Osm, range: Range<u64>, key: &[u8], value: &[u8]) -> bool {
68 |     let tags = archive.tags();
69 |     let tags_index = archive.tags_index();
70 |     let strings = archive.stringtable();
71 | 
72 |     let matches = |idx, value| {
73 |         let block = &strings.as_bytes()[idx as usize..];
74 |         block.starts_with(value) && *block.get(value.len()).unwrap_or(&0) == 0
75 |     };
76 | 
77 |     for idx in range {
78 |         let tag = &tags[tags_index[idx as usize].value() as usize];
79 |         if matches(tag.key_idx(), key) {
80 |             return matches(tag.value_idx(), value);
81 |         }
82 |     }
83 |     false
84 | }
85 | 


--------------------------------------------------------------------------------
/osmflatc/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "osmflatc"
 3 | version = "0.3.1"
 4 | authors = [
 5 |     "boxdot <d@zerovolt.org>",
 6 |     "Christian Vetter <veaac.fdirct@gmail.com>",
 7 |     "Gabriel Féron <feron.gabriel@gmail.com>"
 8 | ]
 9 | license = "MIT/Apache-2.0"
10 | description = "Compiler of OpenStreetMap (OSM) Data from osm.pbf format to osm.flatdata format"
11 | repository = "https://github.com/boxdot/osmflat-rs"
12 | keywords = ["serialization", "osm", "openstreetmap", "flatdata"]
13 | categories = ["encoding"]
14 | readme = "README.md"
15 | edition = "2021"
16 | 
17 | [dependencies]
18 | byteorder = "1.4.3"
19 | bytes = "1.4.0"
20 | clap = { version = "4.1.4", features = ["derive"] }
21 | crossbeam = "0.8.2"
22 | env_logger = "0.11.0"
23 | flatdata = "0.5.3"
24 | flate2 = "1.0.25"
25 | itertools = "0.13.0"
26 | log = "0.4.17"
27 | memmap2 = "0.9.0"
28 | osmflat = "0.3.0"
29 | parking_lot = "0.12.1"
30 | prost = "0.13.2"
31 | prost-derive = "0.13.2"
32 | prost-types = "0.13.2"
33 | rayon = "1.6.1"
34 | ahash = "0.8.3"
35 | indicatif = "0.17.3"
36 | 
37 | [build-dependencies]
38 | prost-build = "0.13.2"
39 | 
40 | [dev-dependencies]
41 | proptest = "1.0.0"
42 | 


--------------------------------------------------------------------------------
/osmflatc/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/osmflatc/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/osmflatc/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/osmflatc/build.rs:
--------------------------------------------------------------------------------
 1 | extern crate prost_build;
 2 | 
 3 | fn main() {
 4 |     prost_build::compile_protos(
 5 |         &["src/proto/osmformat.proto", "src/proto/fileformat.proto"],
 6 |         &["src/proto"],
 7 |     )
 8 |     .expect("failed to compile protobuf");
 9 | }
10 | 


--------------------------------------------------------------------------------
/osmflatc/src/args.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | use clap::Parser;
 4 | 
 5 | /// Compiler of Open Street Data from osm.pbf format to osm.flatdata format
 6 | #[derive(Debug, Parser)]
 7 | #[clap(about, version, author)]
 8 | pub struct Args {
 9 |     /// Verbose mode (-v, -vv, -vvv, etc.)
10 |     #[clap(short, long, action = clap::ArgAction::Count)]
11 |     pub verbose: u8,
12 | 
13 |     /// Input OSM pbf file
14 |     pub input: PathBuf,
15 | 
16 |     /// Output directory for OSM flatdata archive
17 |     pub output: PathBuf,
18 | 
19 |     /// Whether to compile the optional ids subs
20 |     #[arg(long = "ids")]
21 |     pub ids: bool,
22 | }
23 | 


--------------------------------------------------------------------------------
/osmflatc/src/ids.rs:
--------------------------------------------------------------------------------
  1 | const ID_BLOCK_SIZE: usize = 1 << 24;
  2 | const DENSE_LOOKUP_BLOCK_SIZE: usize = 1 << 4;
  3 | 
  4 | /// An IdBlock can either be Sparse or Dense
  5 | /// Sparse: A sorted list of ids, the position determines the index
  6 | /// Dense: A bitset of the whole range. An additional offsets lookup
  7 | ///        provides fast lookup for the index by storing the sum of
  8 | ///        set bits every DENSE_LOOKUP_BLOCK_SIZE * 8 bits
  9 | #[derive(Debug, Clone)]
 10 | enum IdBlock {
 11 |     Dense {
 12 |         includes: Vec<u8>,
 13 |         offsets: Vec<u32>,
 14 |     },
 15 |     Sparse(Vec<u32>),
 16 | }
 17 | 
 18 | impl IdBlock {
 19 |     /// Amount if ids in the block
 20 |     fn count(&self) -> u32 {
 21 |         match self {
 22 |             IdBlock::Sparse(ids) => ids.len() as u32,
 23 |             IdBlock::Dense { offsets, includes } => {
 24 |                 let last_bits: u32 = includes[includes.len() - DENSE_LOOKUP_BLOCK_SIZE..]
 25 |                     .iter()
 26 |                     .map(|x| x.count_ones())
 27 |                     .sum();
 28 |                 *offsets.last().unwrap() + last_bits
 29 |             }
 30 |         }
 31 |     }
 32 | 
 33 |     /// adds a truncated id into the current block
 34 |     fn insert(&mut self, x: u32) {
 35 |         match self {
 36 |             IdBlock::Sparse(ids) => {
 37 |                 if ids.len() * 8 < ID_BLOCK_SIZE / 8 {
 38 |                     ids.push(x)
 39 |                 } else {
 40 |                     let mut dense = IdBlock::Dense {
 41 |                         includes: vec![0; ID_BLOCK_SIZE / 8],
 42 |                         offsets: vec![0; ID_BLOCK_SIZE / 8 / DENSE_LOOKUP_BLOCK_SIZE],
 43 |                     };
 44 |                     for id in ids {
 45 |                         dense.insert(*id);
 46 |                     }
 47 |                     dense.insert(x);
 48 | 
 49 |                     *self = dense;
 50 |                 }
 51 |             }
 52 |             IdBlock::Dense { includes, .. } => includes[x as usize / 8] |= 1 << (x % 8),
 53 |         }
 54 |     }
 55 | 
 56 |     // established lookups
 57 |     fn finalize(&mut self) {
 58 |         if let IdBlock::Dense { includes, offsets } = self {
 59 |             for block in 0..offsets.len() - 1 {
 60 |                 offsets[block + 1] = includes
 61 |                     [block * DENSE_LOOKUP_BLOCK_SIZE..(block + 1) * DENSE_LOOKUP_BLOCK_SIZE]
 62 |                     .iter()
 63 |                     .map(|x| x.count_ones())
 64 |                     .sum();
 65 |             }
 66 |             for block in 0..offsets.len() - 1 {
 67 |                 offsets[block + 1] += offsets[block];
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 |     // find the positions/index of a truncated id (if it is in the block)
 73 |     fn pos(&self, x: u32) -> Option<u32> {
 74 |         match self {
 75 |             IdBlock::Sparse(ids) => ids.binary_search(&x).ok().map(|x| x as u32),
 76 |             IdBlock::Dense { includes, offsets } => {
 77 |                 if (includes[x as usize / 8] & (1 << (x % 8))) == 0 {
 78 |                     None
 79 |                 } else {
 80 |                     let offset_pos = x as usize / 8 / DENSE_LOOKUP_BLOCK_SIZE;
 81 |                     let start_block = offset_pos * 8 * DENSE_LOOKUP_BLOCK_SIZE;
 82 |                     let rest = x as usize % (8 * DENSE_LOOKUP_BLOCK_SIZE);
 83 |                     let mut result = offsets[offset_pos];
 84 |                     for i in start_block..start_block + rest {
 85 |                         result += ((includes[i / 8] & (1 << (i % 8))) != 0) as u32;
 86 |                     }
 87 |                     Some(result)
 88 |                 }
 89 |             }
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | /// Maps u64 integers to a consecutive range of ids
 95 | #[derive(Debug)]
 96 | pub struct IdTable {
 97 |     // map u64 id x to u32 by storing a sorted mapping table for each value of x / 2^24
 98 |     data: Vec<(u64, IdBlock)>,
 99 | }
100 | 
101 | #[derive(Debug, Default)]
102 | pub struct IdTableBuilder {
103 |     // stored the same data as IdTable, but still in process of being build
104 |     data: Vec<IdBlock>,
105 |     last_id: Option<u64>,
106 |     next_id: u64,
107 | }
108 | 
109 | impl IdTableBuilder {
110 |     pub fn new() -> Self {
111 |         Default::default()
112 |     }
113 | 
114 |     /// Inserts an Id and returns a mapped index
115 |     pub fn insert(&mut self, x: u64) -> u64 {
116 |         if let Some(last_id) = self.last_id {
117 |             assert!(last_id < x, "Ids are expected to be sorted");
118 |         }
119 |         self.last_id = Some(x);
120 |         let id_set = (x >> 24) as usize;
121 |         if self.data.len() <= id_set {
122 |             self.data.resize(id_set + 1, IdBlock::Sparse(Vec::new()));
123 |         }
124 |         self.data[id_set].insert((x % (1u64 << 24)) as u32);
125 |         let result = self.next_id;
126 |         self.next_id += 1;
127 |         result
128 |     }
129 | 
130 |     pub fn build(mut self) -> IdTable {
131 |         for ids in &mut self.data {
132 |             ids.finalize();
133 |         }
134 |         let result = self
135 |             .data
136 |             .into_iter()
137 |             .scan(0, |state, ids| {
138 |                 let offset = *state;
139 |                 *state += ids.count() as u64;
140 |                 Some((offset, ids))
141 |             })
142 |             .collect();
143 |         IdTable { data: result }
144 |     }
145 | }
146 | 
147 | impl IdTable {
148 |     pub fn get(&self, x: u64) -> Option<u64> {
149 |         let id_set = (x >> 24) as usize;
150 |         if id_set > self.data.len() {
151 |             return None;
152 |         }
153 |         self.data[id_set]
154 |             .1
155 |             .pos((x % (1u64 << 24)) as u32)
156 |             .map(|pos| self.data[id_set].0 + pos as u64)
157 |     }
158 | }
159 | 
160 | #[cfg(test)]
161 | mod test {
162 |     use super::*;
163 | 
164 |     #[test]
165 |     fn test_mapping_of_small_ints() {
166 |         let mut builder = IdTableBuilder::new();
167 |         let mut data = [9, 8, 7, 4, 3, 10, 13];
168 |         data.sort_unstable();
169 |         for x in data.iter() {
170 |             builder.insert(*x);
171 |         }
172 | 
173 |         let lookup = builder.build();
174 |         for (pos, x) in data.iter().enumerate() {
175 |             let res = lookup.get(*x);
176 |             assert_eq!(res, Some(pos as u64));
177 |         }
178 | 
179 |         for x in [0, 1, 2, 5, 6, 11, 12, 14].iter() {
180 |             let res = lookup.get(*x);
181 |             assert_eq!(res, None);
182 |         }
183 |     }
184 | 
185 |     #[test]
186 |     fn test_mapping_of_large_ints() {
187 |         let mut builder = IdTableBuilder::new();
188 |         let mut data = [2, 1, 1_u64 << 33, 1_u64 << 34];
189 |         data.sort_unstable();
190 |         for x in data.iter() {
191 |             builder.insert(*x);
192 |         }
193 | 
194 |         let lookup = builder.build();
195 |         for (pos, x) in data.iter().enumerate() {
196 |             let res = lookup.get(*x);
197 |             assert_eq!(res, Some(pos as u64));
198 |         }
199 | 
200 |         for x in [0, 3, (1_u64 << 33) + 1, (1_u64 << 34) + 1, 1_u64 << 35].iter() {
201 |             let res = lookup.get(*x);
202 |             assert_eq!(res, None);
203 |         }
204 |     }
205 | 
206 |     #[test]
207 |     fn test_large_indices() {
208 |         let mut builder = IdTableBuilder::new();
209 |         let mut data = [2, 1, 1_u64 << 33, 1_u64 << 34];
210 |         data.sort_unstable();
211 |         for x in data.iter() {
212 |             builder.insert(*x);
213 |         }
214 | 
215 |         let lookup = builder.build();
216 |         for (pos, x) in data.iter().enumerate() {
217 |             let res = lookup.get(*x);
218 |             assert_eq!(res, Some(pos as u64));
219 |         }
220 | 
221 |         for x in [0, 3, (1_u64 << 33) + 1, (1_u64 << 34) + 1, 1_u64 << 35].iter() {
222 |             let res = lookup.get(*x);
223 |             assert_eq!(res, None);
224 |         }
225 |     }
226 | 
227 |     #[test]
228 |     fn test_dense() {
229 |         let mut builder = IdTableBuilder::new();
230 |         let mut data = Vec::new();
231 |         for i in 0..ID_BLOCK_SIZE {
232 |             data.push(i as u64 * 3 + (1_u64 << 34));
233 |         }
234 |         data.sort_unstable();
235 |         for x in data.iter() {
236 |             builder.insert(*x);
237 |         }
238 | 
239 |         let lookup = builder.build();
240 |         for i in 0..ID_BLOCK_SIZE * 3 {
241 |             let res = lookup.get(i as u64 + (1_u64 << 34));
242 |             if i % 3 == 0 {
243 |                 assert_eq!(Some(i as u64 / 3), res);
244 |             } else {
245 |                 assert_eq!(None, res);
246 |             }
247 |         }
248 |     }
249 | }
250 | 


--------------------------------------------------------------------------------
/osmflatc/src/main.rs:
--------------------------------------------------------------------------------
  1 | mod args;
  2 | mod ids;
  3 | mod osmpbf;
  4 | mod parallel;
  5 | mod stats;
  6 | mod strings;
  7 | 
  8 | use crate::osmpbf::{build_block_index, read_block, BlockIndex, BlockType};
  9 | use crate::stats::Stats;
 10 | use crate::strings::StringTable;
 11 | 
 12 | use clap::Parser;
 13 | use flatdata::FileResourceStorage;
 14 | use indicatif::{ProgressBar, ProgressStyle};
 15 | use itertools::Itertools;
 16 | use log::{error, info};
 17 | use memmap2::Mmap;
 18 | 
 19 | use ahash::AHashMap;
 20 | use std::collections::hash_map;
 21 | use std::fs::File;
 22 | use std::io;
 23 | use std::str;
 24 | 
 25 | type Error = Box<dyn std::error::Error>;
 26 | 
 27 | fn serialize_header(
 28 |     header_block: &osmpbf::HeaderBlock,
 29 |     coord_scale: i32,
 30 |     builder: &osmflat::OsmBuilder,
 31 |     stringtable: &mut StringTable,
 32 | ) -> io::Result<()> {
 33 |     let mut header = osmflat::Header::new();
 34 | 
 35 |     header.set_coord_scale(coord_scale);
 36 | 
 37 |     if let Some(ref bbox) = header_block.bbox {
 38 |         header.set_bbox_left((bbox.left / (1000000000 / coord_scale) as i64) as i32);
 39 |         header.set_bbox_right((bbox.right / (1000000000 / coord_scale) as i64) as i32);
 40 |         header.set_bbox_top((bbox.top / (1000000000 / coord_scale) as i64) as i32);
 41 |         header.set_bbox_bottom((bbox.bottom / (1000000000 / coord_scale) as i64) as i32);
 42 |     };
 43 | 
 44 |     header.set_writingprogram_idx(stringtable.insert("osmflatc"));
 45 | 
 46 |     if let Some(ref source) = header_block.source {
 47 |         header.set_source_idx(stringtable.insert(source));
 48 |     }
 49 | 
 50 |     if let Some(timestamp) = header_block.osmosis_replication_timestamp {
 51 |         header.set_replication_timestamp(timestamp);
 52 |     }
 53 | 
 54 |     if let Some(number) = header_block.osmosis_replication_sequence_number {
 55 |         header.set_replication_sequence_number(number);
 56 |     }
 57 | 
 58 |     if let Some(ref url) = header_block.osmosis_replication_base_url {
 59 |         header.set_replication_base_url_idx(stringtable.insert(url));
 60 |     }
 61 | 
 62 |     builder.set_header(&header)?;
 63 |     Ok(())
 64 | }
 65 | 
 66 | #[derive(PartialEq, Eq, Copy, Clone)]
 67 | struct I40 {
 68 |     x: [u8; 5],
 69 | }
 70 | 
 71 | impl I40 {
 72 |     fn from_u64(x: u64) -> Self {
 73 |         let x = x.to_le_bytes();
 74 |         debug_assert_eq!((x[5], x[6], x[7]), (0, 0, 0));
 75 |         Self {
 76 |             x: [x[0], x[1], x[2], x[3], x[4]],
 77 |         }
 78 |     }
 79 | 
 80 |     fn to_u64(self) -> u64 {
 81 |         let extented = [
 82 |             self.x[0], self.x[1], self.x[2], self.x[3], self.x[4], 0, 0, 0,
 83 |         ];
 84 |         u64::from_le_bytes(extented)
 85 |     }
 86 | }
 87 | 
 88 | #[allow(clippy::derived_hash_with_manual_eq)]
 89 | impl std::hash::Hash for I40 {
 90 |     fn hash<H>(&self, h: &mut H)
 91 |     where
 92 |         H: std::hash::Hasher,
 93 |     {
 94 |         // We manually implement Hash like this, since [u8; 5] is slower to hash
 95 |         // than u64 for some/many hash functions
 96 |         self.to_u64().hash(h)
 97 |     }
 98 | }
 99 | 
100 | /// Holds tags external vector and deduplicates tags.
101 | struct TagSerializer<'a> {
102 |     tags: flatdata::ExternalVector<'a, osmflat::Tag>,
103 |     tags_index: flatdata::ExternalVector<'a, osmflat::TagIndex>,
104 |     dedup: AHashMap<(I40, I40), I40>, // deduplication table: (key_idx, val_idx) -> pos
105 | }
106 | 
107 | impl<'a> TagSerializer<'a> {
108 |     fn new(builder: &'a osmflat::OsmBuilder) -> io::Result<Self> {
109 |         Ok(Self {
110 |             tags: builder.start_tags()?,
111 |             tags_index: builder.start_tags_index()?,
112 |             dedup: AHashMap::new(),
113 |         })
114 |     }
115 | 
116 |     fn serialize(&mut self, key_idx: u64, val_idx: u64) -> Result<(), Error> {
117 |         let idx = match self
118 |             .dedup
119 |             .entry((I40::from_u64(key_idx), I40::from_u64(val_idx)))
120 |         {
121 |             hash_map::Entry::Occupied(entry) => entry.get().to_u64(),
122 |             hash_map::Entry::Vacant(entry) => {
123 |                 let idx = self.tags.len() as u64;
124 |                 let tag = self.tags.grow()?;
125 |                 tag.set_key_idx(key_idx);
126 |                 tag.set_value_idx(val_idx);
127 |                 entry.insert(I40::from_u64(idx));
128 |                 idx
129 |             }
130 |         };
131 | 
132 |         let tag_index = self.tags_index.grow()?;
133 |         tag_index.set_value(idx);
134 | 
135 |         Ok(())
136 |     }
137 | 
138 |     fn next_index(&self) -> u64 {
139 |         self.tags_index.len() as u64
140 |     }
141 | 
142 |     fn close(self) {
143 |         if let Err(e) = self.tags.close() {
144 |             panic!("failed to close tags: {}", e);
145 |         }
146 |         if let Err(e) = self.tags_index.close() {
147 |             panic!("failed to close tags index: {}", e);
148 |         }
149 |     }
150 | }
151 | 
152 | /// adds all strings in a table to the lookup and returns a vectors of
153 | /// references to be used instead
154 | fn add_string_table(
155 |     pbf_stringtable: &osmpbf::StringTable,
156 |     stringtable: &mut StringTable,
157 | ) -> Result<Vec<u64>, Error> {
158 |     let mut result = Vec::with_capacity(pbf_stringtable.s.len());
159 |     for x in &pbf_stringtable.s {
160 |         let string = str::from_utf8(x)?;
161 |         result.push(stringtable.insert(string));
162 |     }
163 |     Ok(result)
164 | }
165 | 
166 | fn serialize_dense_nodes(
167 |     block: &osmpbf::PrimitiveBlock,
168 |     granularity: i32,
169 |     nodes: &mut flatdata::ExternalVector<osmflat::Node>,
170 |     node_ids: &mut Option<flatdata::ExternalVector<osmflat::Id>>,
171 |     nodes_id_to_idx: &mut ids::IdTableBuilder,
172 |     stringtable: &mut StringTable,
173 |     tags: &mut TagSerializer,
174 | ) -> Result<Stats, Error> {
175 |     let mut stats = Stats::default();
176 |     let string_refs = add_string_table(&block.stringtable, stringtable)?;
177 |     for group in block.primitivegroup.iter() {
178 |         let dense_nodes = group.dense.as_ref().unwrap();
179 | 
180 |         let pbf_granularity = block.granularity.unwrap_or(100);
181 |         let lat_offset = block.lat_offset.unwrap_or(0);
182 |         let lon_offset = block.lon_offset.unwrap_or(0);
183 |         let mut lat = 0;
184 |         let mut lon = 0;
185 | 
186 |         let mut tags_offset = 0;
187 | 
188 |         let mut id = 0;
189 |         for i in 0..dense_nodes.id.len() {
190 |             id += dense_nodes.id[i];
191 | 
192 |             let index = nodes_id_to_idx.insert(id as u64);
193 |             assert_eq!(index as usize, nodes.len());
194 | 
195 |             let node = nodes.grow()?;
196 |             if let Some(ids) = node_ids {
197 |                 ids.grow()?.set_value(id as u64);
198 |             }
199 | 
200 |             lat += dense_nodes.lat[i];
201 |             lon += dense_nodes.lon[i];
202 |             node.set_lat(
203 |                 ((lat_offset + (i64::from(pbf_granularity) * lat)) / granularity as i64) as i32,
204 |             );
205 |             node.set_lon(
206 |                 ((lon_offset + (i64::from(pbf_granularity) * lon)) / granularity as i64) as i32,
207 |             );
208 | 
209 |             if tags_offset < dense_nodes.keys_vals.len() {
210 |                 node.set_tag_first_idx(tags.next_index());
211 |                 loop {
212 |                     let k = dense_nodes.keys_vals[tags_offset];
213 |                     tags_offset += 1;
214 | 
215 |                     if k == 0 {
216 |                         break; // separator
217 |                     }
218 | 
219 |                     let v = dense_nodes.keys_vals[tags_offset];
220 |                     tags_offset += 1;
221 | 
222 |                     tags.serialize(string_refs[k as usize], string_refs[v as usize])?;
223 |                 }
224 |             }
225 |         }
226 |         assert_eq!(tags_offset, dense_nodes.keys_vals.len());
227 |         stats.num_nodes += dense_nodes.id.len();
228 |     }
229 |     Ok(stats)
230 | }
231 | 
232 | fn resolve_ways(
233 |     block: &osmpbf::PrimitiveBlock,
234 |     nodes_id_to_idx: &ids::IdTable,
235 | ) -> (Vec<Option<u64>>, Stats) {
236 |     let mut result = Vec::new();
237 |     let mut stats = Stats::default();
238 |     for group in &block.primitivegroup {
239 |         for pbf_way in &group.ways {
240 |             let mut node_ref = 0;
241 |             for delta in &pbf_way.refs {
242 |                 node_ref += delta;
243 |                 let idx = nodes_id_to_idx.get(node_ref as u64);
244 |                 stats.num_unresolved_node_ids += idx.is_none() as usize;
245 | 
246 |                 result.push(idx);
247 |             }
248 |         }
249 |     }
250 |     (result, stats)
251 | }
252 | 
253 | #[allow(clippy::too_many_arguments)]
254 | fn serialize_ways(
255 |     block: &osmpbf::PrimitiveBlock,
256 |     nodes_id_to_idx: &[Option<u64>],
257 |     ways: &mut flatdata::ExternalVector<osmflat::Way>,
258 |     way_ids: &mut Option<flatdata::ExternalVector<osmflat::Id>>,
259 |     ways_id_to_idx: &mut ids::IdTableBuilder,
260 |     stringtable: &mut StringTable,
261 |     tags: &mut TagSerializer,
262 |     nodes_index: &mut flatdata::ExternalVector<osmflat::NodeIndex>,
263 | ) -> Result<Stats, Error> {
264 |     let mut stats = Stats::default();
265 |     let string_refs = add_string_table(&block.stringtable, stringtable)?;
266 |     let mut nodes_idx = nodes_id_to_idx.iter().cloned();
267 |     for group in &block.primitivegroup {
268 |         for pbf_way in &group.ways {
269 |             let index = ways_id_to_idx.insert(pbf_way.id as u64);
270 |             assert_eq!(index as usize, ways.len());
271 | 
272 |             let way = ways.grow()?;
273 |             if let Some(ids) = way_ids {
274 |                 ids.grow()?.set_value(pbf_way.id as u64);
275 |             }
276 | 
277 |             debug_assert_eq!(pbf_way.keys.len(), pbf_way.vals.len(), "invalid input data");
278 |             way.set_tag_first_idx(tags.next_index());
279 | 
280 |             for i in 0..pbf_way.keys.len() {
281 |                 tags.serialize(
282 |                     string_refs[pbf_way.keys[i] as usize],
283 |                     string_refs[pbf_way.vals[i] as usize],
284 |                 )?;
285 |             }
286 | 
287 |             way.set_ref_first_idx(nodes_index.len() as u64);
288 |             for _ in &pbf_way.refs {
289 |                 nodes_index.grow()?.set_value(nodes_idx.next().unwrap());
290 |             }
291 |         }
292 |         stats.num_ways += group.ways.len();
293 |     }
294 |     Ok(stats)
295 | }
296 | 
297 | fn build_relations_index<I>(data: &[u8], block_index: I) -> Result<ids::IdTable, Error>
298 | where
299 |     I: ExactSizeIterator<Item = BlockIndex> + Send + 'static,
300 | {
301 |     let mut result = ids::IdTableBuilder::new();
302 |     let pb = ProgressBar::new(block_index.len() as u64)
303 |         .with_style(pb_style())
304 |         .with_prefix("Building relations index");
305 |     parallel::parallel_process(
306 |         block_index,
307 |         |idx| read_block(data, &idx),
308 |         |block: Result<osmpbf::PrimitiveBlock, _>| -> Result<(), Error> {
309 |             for group in &block?.primitivegroup {
310 |                 for relation in &group.relations {
311 |                     result.insert(relation.id as u64);
312 |                 }
313 |             }
314 |             pb.inc(1);
315 |             Ok(())
316 |         },
317 |     )?;
318 |     pb.finish();
319 | 
320 |     Ok(result.build())
321 | }
322 | 
323 | #[allow(clippy::too_many_arguments)]
324 | fn serialize_relations(
325 |     block: &osmpbf::PrimitiveBlock,
326 |     nodes_id_to_idx: &ids::IdTable,
327 |     ways_id_to_idx: &ids::IdTable,
328 |     relations_id_to_idx: &ids::IdTable,
329 |     stringtable: &mut StringTable,
330 |     relations: &mut flatdata::ExternalVector<osmflat::Relation>,
331 |     relation_ids: &mut Option<flatdata::ExternalVector<osmflat::Id>>,
332 |     relation_members: &mut flatdata::MultiVector<osmflat::RelationMembers>,
333 |     tags: &mut TagSerializer,
334 | ) -> Result<Stats, Error> {
335 |     let mut stats = Stats::default();
336 |     let string_refs = add_string_table(&block.stringtable, stringtable)?;
337 |     for group in &block.primitivegroup {
338 |         for pbf_relation in &group.relations {
339 |             let relation = relations.grow()?;
340 |             if let Some(ids) = relation_ids {
341 |                 ids.grow()?.set_value(pbf_relation.id as u64);
342 |             }
343 | 
344 |             debug_assert_eq!(
345 |                 pbf_relation.keys.len(),
346 |                 pbf_relation.vals.len(),
347 |                 "invalid input data"
348 |             );
349 |             relation.set_tag_first_idx(tags.next_index());
350 |             for i in 0..pbf_relation.keys.len() {
351 |                 tags.serialize(
352 |                     string_refs[pbf_relation.keys[i] as usize],
353 |                     string_refs[pbf_relation.vals[i] as usize],
354 |                 )?;
355 |             }
356 | 
357 |             debug_assert!(
358 |                 pbf_relation.roles_sid.len() == pbf_relation.memids.len()
359 |                     && pbf_relation.memids.len() == pbf_relation.types.len(),
360 |                 "invalid input data"
361 |             );
362 | 
363 |             let mut memid = 0;
364 |             let mut members = relation_members.grow()?;
365 |             for i in 0..pbf_relation.roles_sid.len() {
366 |                 memid += pbf_relation.memids[i];
367 | 
368 |                 let member_type = osmpbf::relation::MemberType::try_from(pbf_relation.types[i]);
369 |                 debug_assert!(member_type.is_ok());
370 | 
371 |                 match member_type.unwrap() {
372 |                     osmpbf::relation::MemberType::Node => {
373 |                         let idx = nodes_id_to_idx.get(memid as u64);
374 |                         stats.num_unresolved_node_ids = idx.is_none() as usize;
375 | 
376 |                         let member = members.add_node_member();
377 |                         member.set_node_idx(idx);
378 |                         member.set_role_idx(string_refs[pbf_relation.roles_sid[i] as usize]);
379 |                     }
380 |                     osmpbf::relation::MemberType::Way => {
381 |                         let idx = ways_id_to_idx.get(memid as u64);
382 |                         stats.num_unresolved_way_ids = idx.is_none() as usize;
383 | 
384 |                         let member = members.add_way_member();
385 |                         member.set_way_idx(idx);
386 |                         member.set_role_idx(string_refs[pbf_relation.roles_sid[i] as usize]);
387 |                     }
388 |                     osmpbf::relation::MemberType::Relation => {
389 |                         let idx = relations_id_to_idx.get(memid as u64);
390 |                         stats.num_unresolved_rel_ids = idx.is_none() as usize;
391 | 
392 |                         let member = members.add_relation_member();
393 |                         member.set_relation_idx(idx);
394 |                         member.set_role_idx(string_refs[pbf_relation.roles_sid[i] as usize]);
395 |                     }
396 |                 }
397 |             }
398 |             stats.num_relations += 1;
399 |         }
400 |     }
401 |     Ok(stats)
402 | }
403 | 
404 | #[allow(clippy::too_many_arguments)]
405 | fn serialize_dense_node_blocks(
406 |     builder: &osmflat::OsmBuilder,
407 |     granularity: i32,
408 |     mut node_ids: Option<flatdata::ExternalVector<osmflat::Id>>,
409 |     blocks: Vec<BlockIndex>,
410 |     data: &[u8],
411 |     tags: &mut TagSerializer,
412 |     stringtable: &mut StringTable,
413 |     stats: &mut Stats,
414 | ) -> Result<ids::IdTable, Error> {
415 |     let mut nodes_id_to_idx = ids::IdTableBuilder::new();
416 |     let mut nodes = builder.start_nodes()?;
417 |     let pb = ProgressBar::new(blocks.len() as u64)
418 |         .with_style(pb_style())
419 |         .with_prefix("Converting dense nodes");
420 |     parallel::parallel_process(
421 |         blocks.into_iter(),
422 |         |idx| read_block(data, &idx),
423 |         |block| -> Result<osmpbf::PrimitiveBlock, Error> {
424 |             let block = block?;
425 |             *stats += serialize_dense_nodes(
426 |                 &block,
427 |                 granularity,
428 |                 &mut nodes,
429 |                 &mut node_ids,
430 |                 &mut nodes_id_to_idx,
431 |                 stringtable,
432 |                 tags,
433 |             )?;
434 | 
435 |             pb.inc(1);
436 |             Ok(block)
437 |         },
438 |     )?;
439 |     pb.finish();
440 | 
441 |     // fill tag_first_idx of the sentry, since it contains the end of the tag range
442 |     // of the last node
443 |     nodes.grow()?.set_tag_first_idx(tags.next_index());
444 |     nodes.close()?;
445 |     if let Some(ids) = node_ids {
446 |         ids.close()?;
447 |     }
448 |     info!("Dense nodes converted.");
449 |     info!("Building dense nodes index...");
450 |     let nodes_id_to_idx = nodes_id_to_idx.build();
451 |     info!("Dense nodes index built.");
452 |     Ok(nodes_id_to_idx)
453 | }
454 | 
455 | type PrimitiveBlockWithIds = (osmpbf::PrimitiveBlock, (Vec<Option<u64>>, Stats));
456 | 
457 | #[allow(clippy::too_many_arguments)]
458 | fn serialize_way_blocks(
459 |     builder: &osmflat::OsmBuilder,
460 |     mut way_ids: Option<flatdata::ExternalVector<osmflat::Id>>,
461 |     blocks: Vec<BlockIndex>,
462 |     data: &[u8],
463 |     nodes_id_to_idx: &ids::IdTable,
464 |     tags: &mut TagSerializer,
465 |     stringtable: &mut StringTable,
466 |     stats: &mut Stats,
467 | ) -> Result<ids::IdTable, Error> {
468 |     let mut ways_id_to_idx = ids::IdTableBuilder::new();
469 |     let mut ways = builder.start_ways()?;
470 |     let pb = ProgressBar::new(blocks.len() as u64)
471 |         .with_style(pb_style())
472 |         .with_prefix("Converting ways");
473 |     let mut nodes_index = builder.start_nodes_index()?;
474 |     parallel::parallel_process(
475 |         blocks.into_iter(),
476 |         |idx| {
477 |             let block: osmpbf::PrimitiveBlock = read_block(data, &idx)?;
478 |             let ids = resolve_ways(&block, nodes_id_to_idx);
479 |             Ok((block, ids))
480 |         },
481 |         |block: io::Result<PrimitiveBlockWithIds>| -> Result<osmpbf::PrimitiveBlock, Error> {
482 |             let (block, (ids, stats_resolve)) = block?;
483 |             *stats += stats_resolve;
484 |             *stats += serialize_ways(
485 |                 &block,
486 |                 &ids,
487 |                 &mut ways,
488 |                 &mut way_ids,
489 |                 &mut ways_id_to_idx,
490 |                 stringtable,
491 |                 tags,
492 |                 &mut nodes_index,
493 |             )?;
494 |             pb.inc(1);
495 | 
496 |             Ok(block)
497 |         },
498 |     )?;
499 | 
500 |     {
501 |         let sentinel = ways.grow()?;
502 |         sentinel.set_tag_first_idx(tags.next_index());
503 |         sentinel.set_ref_first_idx(nodes_index.len() as u64);
504 |     }
505 |     ways.close()?;
506 |     if let Some(ids) = way_ids {
507 |         ids.close()?;
508 |     }
509 |     nodes_index.close()?;
510 | 
511 |     pb.finish();
512 |     info!("Ways converted.");
513 |     info!("Building ways index...");
514 |     let ways_id_to_idx = ways_id_to_idx.build();
515 |     info!("Way index built.");
516 |     Ok(ways_id_to_idx)
517 | }
518 | 
519 | #[allow(clippy::too_many_arguments)]
520 | fn serialize_relation_blocks(
521 |     builder: &osmflat::OsmBuilder,
522 |     mut relation_ids: Option<flatdata::ExternalVector<osmflat::Id>>,
523 |     blocks: Vec<BlockIndex>,
524 |     data: &[u8],
525 |     nodes_id_to_idx: &ids::IdTable,
526 |     ways_id_to_idx: &ids::IdTable,
527 |     tags: &mut TagSerializer,
528 |     stringtable: &mut StringTable,
529 |     stats: &mut Stats,
530 | ) -> Result<(), Error> {
531 |     // We need to build the index of relation ids first, since relations can refer
532 |     // again to relations.
533 |     let relations_id_to_idx = build_relations_index(data, blocks.clone().into_iter())?;
534 | 
535 |     let mut relations = builder.start_relations()?;
536 |     let mut relation_members = builder.start_relation_members()?;
537 | 
538 |     let pb = ProgressBar::new(blocks.len() as u64)
539 |         .with_style(pb_style())
540 |         .with_prefix("Converting relations");
541 |     parallel::parallel_process(
542 |         blocks.into_iter(),
543 |         |idx| read_block(data, &idx),
544 |         |block| -> Result<osmpbf::PrimitiveBlock, Error> {
545 |             let block = block?;
546 |             *stats += serialize_relations(
547 |                 &block,
548 |                 nodes_id_to_idx,
549 |                 ways_id_to_idx,
550 |                 &relations_id_to_idx,
551 |                 stringtable,
552 |                 &mut relations,
553 |                 &mut relation_ids,
554 |                 &mut relation_members,
555 |                 tags,
556 |             )?;
557 |             pb.inc(1);
558 |             Ok(block)
559 |         },
560 |     )?;
561 | 
562 |     {
563 |         let sentinel = relations.grow()?;
564 |         sentinel.set_tag_first_idx(tags.next_index());
565 |     }
566 | 
567 |     relations.close()?;
568 |     if let Some(ids) = relation_ids {
569 |         ids.close()?;
570 |     }
571 |     relation_members.close()?;
572 | 
573 |     pb.finish();
574 |     info!("Relations converted.");
575 | 
576 |     Ok(())
577 | }
578 | 
579 | fn gcd(a: i32, b: i32) -> i32 {
580 |     let (mut x, mut y) = (a.min(b), a.max(b));
581 |     while x > 1 {
582 |         y %= x;
583 |         std::mem::swap(&mut x, &mut y);
584 |     }
585 |     y
586 | }
587 | 
588 | fn run(args: args::Args) -> Result<(), Error> {
589 |     let input_file = File::open(&args.input)?;
590 |     let input_data = unsafe { Mmap::map(&input_file)? };
591 | 
592 |     let storage = FileResourceStorage::new(args.output.clone());
593 |     let builder = osmflat::OsmBuilder::new(storage.clone())?;
594 | 
595 |     // TODO: Would be nice not store all these strings in memory, but to flush them
596 |     // from time to time to disk.
597 |     let mut stringtable = StringTable::new();
598 |     let mut tags = TagSerializer::new(&builder)?;
599 | 
600 |     info!(
601 |         "Initialized new osmflat archive at: {}",
602 |         &args.output.display()
603 |     );
604 | 
605 |     info!("Building index of PBF blocks...");
606 |     let block_index = build_block_index(&input_data);
607 |     let mut greatest_common_granularity = 1000000000;
608 |     for block in &block_index {
609 |         if block.block_type == BlockType::DenseNodes {
610 |             // only DenseNodes have coordinate we need to scale
611 |             if let Some(block_granularity) = block.granularity {
612 |                 greatest_common_granularity =
613 |                     gcd(greatest_common_granularity, block_granularity as i32);
614 |             }
615 |         }
616 |     }
617 |     let coord_scale = 1000000000 / greatest_common_granularity;
618 |     info!(
619 |         "Greatest common granularity: {}, Coordinate scaling factor: {}",
620 |         greatest_common_granularity, coord_scale
621 |     );
622 | 
623 |     // TODO: move out into a function
624 |     let groups = block_index.into_iter().chunk_by(|b| b.block_type);
625 |     let mut pbf_header = Vec::new();
626 |     let mut pbf_dense_nodes = Vec::new();
627 |     let mut pbf_ways = Vec::new();
628 |     let mut pbf_relations = Vec::new();
629 |     for (block_type, blocks) in &groups {
630 |         match block_type {
631 |             BlockType::Header => pbf_header = blocks.collect(),
632 |             BlockType::Nodes => panic!("Found nodes block, only dense nodes are supported now"),
633 |             BlockType::DenseNodes => pbf_dense_nodes = blocks.collect(),
634 |             BlockType::Ways => pbf_ways = blocks.collect(),
635 |             BlockType::Relations => pbf_relations = blocks.collect(),
636 |         }
637 |     }
638 |     info!("PBF block index built.");
639 | 
640 |     // Serialize header
641 |     if pbf_header.len() != 1 {
642 |         return Err(format!(
643 |             "Require exactly one header block, but found {}",
644 |             pbf_header.len()
645 |         )
646 |         .into());
647 |     }
648 |     let idx = &pbf_header[0];
649 |     let pbf_header: osmpbf::HeaderBlock = read_block(&input_data, idx)?;
650 |     serialize_header(&pbf_header, coord_scale, &builder, &mut stringtable)?;
651 |     info!("Header written.");
652 | 
653 |     let mut stats = Stats::default();
654 | 
655 |     let ids_archive;
656 |     let mut node_ids = None;
657 |     let mut way_ids = None;
658 |     let mut relation_ids = None;
659 |     if args.ids {
660 |         ids_archive = builder.ids()?;
661 |         node_ids = Some(ids_archive.start_nodes()?);
662 |         way_ids = Some(ids_archive.start_ways()?);
663 |         relation_ids = Some(ids_archive.start_relations()?);
664 |     }
665 | 
666 |     let nodes_id_to_idx = serialize_dense_node_blocks(
667 |         &builder,
668 |         greatest_common_granularity,
669 |         node_ids,
670 |         pbf_dense_nodes,
671 |         &input_data,
672 |         &mut tags,
673 |         &mut stringtable,
674 |         &mut stats,
675 |     )?;
676 | 
677 |     let ways_id_to_idx = serialize_way_blocks(
678 |         &builder,
679 |         way_ids,
680 |         pbf_ways,
681 |         &input_data,
682 |         &nodes_id_to_idx,
683 |         &mut tags,
684 |         &mut stringtable,
685 |         &mut stats,
686 |     )?;
687 | 
688 |     serialize_relation_blocks(
689 |         &builder,
690 |         relation_ids,
691 |         pbf_relations,
692 |         &input_data,
693 |         &nodes_id_to_idx,
694 |         &ways_id_to_idx,
695 |         &mut tags,
696 |         &mut stringtable,
697 |         &mut stats,
698 |     )?;
699 | 
700 |     // Finalize data structures
701 |     tags.close(); // drop the reference to stringtable
702 | 
703 |     info!("Writing stringtable to disk...");
704 |     builder.set_stringtable(&stringtable.into_bytes())?;
705 | 
706 |     info!("osmflat archive built.");
707 | 
708 |     std::mem::drop(builder);
709 |     osmflat::Osm::open(storage)?;
710 | 
711 |     info!("verified that osmflat archive can be opened.");
712 | 
713 |     println!("{stats}");
714 |     Ok(())
715 | }
716 | 
717 | fn pb_style() -> ProgressStyle {
718 |     ProgressStyle::with_template("{prefix:>24} [{bar:23}] {pos}/{len}: {per_sec} {elapsed}")
719 |         .unwrap()
720 |         .progress_chars("=> ")
721 | }
722 | 
723 | fn main() {
724 |     let args = args::Args::parse();
725 |     let level = match args.verbose {
726 |         0 => "info",
727 |         1 => "debug",
728 |         _ => "trace",
729 |     };
730 |     env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(level))
731 |         .format_target(false)
732 |         .format_module_path(false)
733 |         .format_timestamp_nanos()
734 |         .init();
735 | 
736 |     if let Err(e) = run(args) {
737 |         error!("{e}");
738 |         std::process::exit(1);
739 |     }
740 | }
741 | 


--------------------------------------------------------------------------------
/osmflatc/src/osmpbf.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unknown_lints, clippy::derive_partial_eq_without_eq)]
  2 | 
  3 | use byteorder::{ByteOrder, NetworkEndian};
  4 | use flate2::read::ZlibDecoder;
  5 | use log::info;
  6 | use prost::{self, Message};
  7 | use rayon::prelude::*;
  8 | 
  9 | use std::io::{self, Read};
 10 | 
 11 | include!(concat!(env!("OUT_DIR"), "/osmpbf.rs"));
 12 | 
 13 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 14 | pub enum BlockType {
 15 |     Header,
 16 |     Nodes,
 17 |     DenseNodes,
 18 |     Ways,
 19 |     Relations,
 20 | }
 21 | 
 22 | /// Decode block type from PrimitiveBlock protobuf message
 23 | ///
 24 | /// This does not decode any fields, it just checks which tags are present
 25 | /// in PrimitiveGroup fields of the message.
 26 | ///
 27 | /// `blob` should contain decompressed data of an OSMData PrimitiveBlock.
 28 | ///
 29 | /// Note: We use public API of `prost` crate, which though is not exposed in
 30 | /// the crate and marked with comment that it should be only used from
 31 | /// `prost::Message`.
 32 | pub fn type_and_granularity_from_osmdata_blob(mut blob: &[u8]) -> io::Result<(BlockType, u64)> {
 33 |     const PRIMITIVE_GROUP_TAG: u32 = 2;
 34 |     const GRANULARITY_TAG: u32 = 17;
 35 |     const NODES_TAG: u32 = 1;
 36 |     const DENSE_NODES_TAG: u32 = 2;
 37 |     const WAY_STAG: u32 = 3;
 38 |     const RELATIONS_TAG: u32 = 4;
 39 |     const CHANGESETS_TAG: u32 = 5;
 40 | 
 41 |     let mut block_type = None;
 42 |     let mut granularity = 100; // default value
 43 |     while !blob.is_empty() {
 44 |         // decode fields of PrimitiveBlock
 45 |         let (key, wire_type) = prost::encoding::decode_key(&mut blob)?;
 46 |         let mut blob_copy = blob;
 47 |         if key == PRIMITIVE_GROUP_TAG {
 48 |             // We found a PrimitiveGroup field. There could be several of them, but
 49 |             // follwoing the specs of OSMPBF, all of them will have the same single
 50 |             // optional field, which defines the type of the block.
 51 | 
 52 |             // Decode the number of primitive groups.
 53 |             let _ = prost::encoding::decode_varint(&mut blob_copy)?;
 54 |             // Decode the tag of the first primitive group defining the type.
 55 |             let (tag, _wire_type) = prost::encoding::decode_key(&mut blob_copy)?;
 56 |             block_type = match tag {
 57 |                 NODES_TAG => Some(BlockType::Nodes),
 58 |                 DENSE_NODES_TAG => Some(BlockType::DenseNodes),
 59 |                 WAY_STAG => Some(BlockType::Ways),
 60 |                 RELATIONS_TAG => Some(BlockType::Relations),
 61 |                 CHANGESETS_TAG => {
 62 |                     panic!("found block containing unsupported changesets");
 63 |                 }
 64 |                 _ => {
 65 |                     panic!("invalid input data: malformed primitive block");
 66 |                 }
 67 |             };
 68 |         } else if key == GRANULARITY_TAG {
 69 |             granularity = prost::encoding::decode_varint(&mut blob_copy)?;
 70 |         }
 71 |         // skip payload
 72 |         prost::encoding::skip_field(
 73 |             wire_type,
 74 |             key,
 75 |             &mut blob,
 76 |             prost::encoding::DecodeContext::default(),
 77 |         )?;
 78 |     }
 79 |     match block_type {
 80 |         None => panic!("Found block without primitive group"),
 81 |         Some(x) => Ok((x, granularity)),
 82 |     }
 83 | }
 84 | 
 85 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
 86 | pub struct BlockIndex {
 87 |     pub block_type: BlockType,
 88 |     pub granularity: Option<u64>,
 89 |     pub blob_start: usize,
 90 |     pub blob_len: usize,
 91 | }
 92 | 
 93 | struct BlockIndexIterator<'a> {
 94 |     data: &'a [u8],
 95 |     cursor: usize,
 96 | }
 97 | 
 98 | enum BlobInfo {
 99 |     Header(BlockIndex),
100 |     Unknown(usize, usize, Vec<u8>),
101 | }
102 | 
103 | impl<'a> BlockIndexIterator<'a> {
104 |     fn new(data: &'a [u8]) -> Self {
105 |         Self { data, cursor: 0 }
106 |     }
107 | 
108 |     fn read(&mut self, len: usize) -> &[u8] {
109 |         let data = &self.data[self.cursor..self.cursor + len];
110 |         self.cursor += len;
111 |         data
112 |     }
113 | 
114 |     fn next_blob(&mut self) -> Result<BlobInfo, io::Error> {
115 |         // read size of blob header
116 |         let blob_header_len: i32 = NetworkEndian::read_i32(self.read(4));
117 | 
118 |         // read blob header
119 |         let blob_header = BlobHeader::decode(self.read(blob_header_len as usize))?;
120 | 
121 |         let blob_start = self.cursor;
122 |         let blob_len = blob_header.datasize as usize;
123 | 
124 |         if blob_header.r#type == "OSMHeader" {
125 |             self.cursor += blob_len;
126 |             Ok(BlobInfo::Header(BlockIndex {
127 |                 block_type: BlockType::Header,
128 |                 granularity: None,
129 |                 blob_start,
130 |                 blob_len,
131 |             }))
132 |         } else if blob_header.r#type == "OSMData" {
133 |             // read blob
134 |             Ok(BlobInfo::Unknown(
135 |                 blob_start,
136 |                 blob_len,
137 |                 self.read(blob_header.datasize as usize).to_vec(),
138 |             ))
139 |         } else {
140 |             panic!("unknown blob type");
141 |         }
142 |     }
143 | }
144 | 
145 | impl<'a> Iterator for BlockIndexIterator<'a> {
146 |     type Item = Result<BlobInfo, io::Error>;
147 |     fn next(&mut self) -> Option<Self::Item> {
148 |         if self.cursor < self.data.len() {
149 |             Some(self.next_blob())
150 |         } else {
151 |             None
152 |         }
153 |     }
154 | }
155 | 
156 | pub fn read_block<T: prost::Message + Default>(
157 |     data: &[u8],
158 |     idx: &BlockIndex,
159 | ) -> Result<T, io::Error> {
160 |     let blob = Blob::decode(&data[idx.blob_start..idx.blob_start + idx.blob_len])?;
161 | 
162 |     let mut blob_buf = Vec::new();
163 |     let blob_data = if blob.raw.is_some() {
164 |         blob.raw.as_ref().unwrap()
165 |     } else if blob.zlib_data.is_some() {
166 |         // decompress zlib data
167 |         let data: &Vec<u8> = blob.zlib_data.as_ref().unwrap();
168 |         let mut decoder = ZlibDecoder::new(&data[..]);
169 |         decoder.read_to_end(&mut blob_buf)?;
170 |         &blob_buf
171 |     } else {
172 |         return Err(io::Error::new(
173 |             io::ErrorKind::InvalidData,
174 |             "unknown compression",
175 |         ));
176 |     };
177 |     Ok(T::decode(blob_data.as_slice())?)
178 | }
179 | 
180 | fn blob_type_and_granularity_from_blob_info(
181 |     blob_start: usize,
182 |     blob_len: usize,
183 |     blob: Vec<u8>,
184 | ) -> Result<BlockIndex, io::Error> {
185 |     let blob = Blob::decode(blob.as_slice())?;
186 | 
187 |     let mut blob_buf = Vec::new();
188 |     let blob_data = if blob.raw.is_some() {
189 |         // use raw bytes
190 |         blob.raw.as_ref().unwrap()
191 |     } else if blob.zlib_data.is_some() {
192 |         // decompress zlib data
193 |         let data: &Vec<u8> = blob.zlib_data.as_ref().unwrap();
194 |         let mut decoder = ZlibDecoder::new(&data[..]);
195 |         decoder.read_to_end(&mut blob_buf)?;
196 |         &blob_buf
197 |     } else {
198 |         panic!("can only read raw or zlib compressed blob");
199 |     };
200 |     assert_eq!(
201 |         blob_data.len(),
202 |         blob.raw_size.unwrap_or(blob_data.len() as i32) as usize
203 |     );
204 | 
205 |     let (block_type, granularity) = type_and_granularity_from_osmdata_blob(&blob_data[..])?;
206 |     Ok(BlockIndex {
207 |         block_type,
208 |         granularity: Some(granularity),
209 |         blob_start,
210 |         blob_len,
211 |     })
212 | }
213 | 
214 | pub fn build_block_index(pbf_data: &[u8]) -> Vec<BlockIndex> {
215 |     let mut result: Vec<BlockIndex> = BlockIndexIterator::new(pbf_data)
216 |         .par_bridge()
217 |         .filter_map(|blob| {
218 |             let block = match blob {
219 |                 Ok(BlobInfo::Header(b)) => Ok(b),
220 |                 Ok(BlobInfo::Unknown(start, len, blob)) => {
221 |                     blob_type_and_granularity_from_blob_info(start, len, blob)
222 |                 }
223 |                 Err(e) => Err(e),
224 |             };
225 |             match block {
226 |                 Ok(b) => Some(b),
227 |                 Err(e) => {
228 |                     eprintln!("Skipping block due to error: {e}");
229 |                     None
230 |                 }
231 |             }
232 |         })
233 |         .collect();
234 |     result.par_sort_unstable();
235 |     info!("Found {} blocks", result.len());
236 |     result
237 | }
238 | 


--------------------------------------------------------------------------------
/osmflatc/src/parallel.rs:
--------------------------------------------------------------------------------
 1 | use std::cmp::Reverse;
 2 | use std::collections::BTreeMap;
 3 | use std::sync::{mpsc::sync_channel, Arc};
 4 | 
 5 | use parking_lot::{Condvar, Mutex};
 6 | 
 7 | pub fn parallel_process<Iter, Item, Producer, Data, Consumer, Error, Garbage>(
 8 |     iter: Iter,
 9 |     produce: Producer,
10 |     mut consume: Consumer,
11 | ) -> Result<(), Error>
12 | where
13 |     Iter: Iterator<Item = Item> + Send,
14 |     Producer: Fn(Item) -> Data + Sync,
15 |     Data: Send,
16 |     Consumer: FnMut(Data) -> Result<Garbage, Error>,
17 |     Garbage: Send + 'static,
18 | {
19 |     let num_threads = rayon::current_num_threads();
20 | 
21 |     let iter = Arc::new(Mutex::new(iter.enumerate()));
22 |     let next = Arc::new((Mutex::new(2 * num_threads), Condvar::new()));
23 | 
24 |     crossbeam::scope(|s| {
25 |         let (sender, receiver) = sync_channel(2 * num_threads);
26 |         for _ in 0..num_threads {
27 |             let sender = sender.clone();
28 |             let iter = iter.clone();
29 |             s.spawn(|_| {
30 |                 let sender = sender;
31 |                 let iter = iter;
32 |                 loop {
33 |                     let (i, item) = {
34 |                         match iter.lock().next() {
35 |                             None => break,
36 |                             Some(x) => x,
37 |                         }
38 |                     };
39 | 
40 |                     let data = produce(item);
41 | 
42 |                     let (counter, cond) = &*next;
43 |                     {
44 |                         let mut guard = counter.lock();
45 |                         while *guard <= i {
46 |                             cond.wait(&mut guard);
47 |                         }
48 |                     }
49 | 
50 |                     sender.send((i, data)).unwrap();
51 |                 }
52 |             });
53 |         }
54 |         drop(sender); // drop to make sure iteration will finish once all senders are out of scope
55 | 
56 |         let (garbage_sender, garbage_receiver) = sync_channel(2 * num_threads);
57 | 
58 |         std::thread::spawn(move || {
59 |             // we move dropping of heavy objects to other threads as they can have a lot
60 |             // of allocations (e.g. Vec<String>)
61 |             for garbage in garbage_receiver {
62 |                 std::mem::drop(garbage);
63 |             }
64 |         });
65 | 
66 |         let mut pending = BTreeMap::new();
67 |         let mut next_idx = 0;
68 |         for result in receiver {
69 |             pending.insert(Reverse(result.0), result.1);
70 |             while let Some(data) = pending.remove(&Reverse(next_idx)) {
71 |                 {
72 |                     let mut guard = next.0.lock();
73 |                     *guard += 1;
74 |                     next.1.notify_all();
75 |                 }
76 | 
77 |                 next_idx += 1;
78 |                 let garbage = consume(data)?;
79 |                 garbage_sender.send(garbage).unwrap();
80 |             }
81 |         }
82 |         Ok(())
83 |     })
84 |     .expect("thread panicked")
85 | }
86 | 


--------------------------------------------------------------------------------
/osmflatc/src/proto/fileformat.proto:
--------------------------------------------------------------------------------
 1 | /** Copyright (c) 2010 Scott A. Crosby. <scott@sacrosby.com>
 2 | 
 3 |    This program is free software: you can redistribute it and/or modify
 4 |    it under the terms of the GNU Lesser General Public License as 
 5 |    published by the Free Software Foundation, either version 3 of the 
 6 |    License, or (at your option) any later version.
 7 | 
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU Lesser General Public License for more details.
12 | 
13 |    You should have received a copy of the GNU Lesser General Public License
14 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | 
16 | */
17 | 
18 | syntax = "proto2";
19 | 
20 | option optimize_for = LITE_RUNTIME;
21 | option java_package = "org.openstreetmap.osmosis.osmbinary";
22 | package OSMPBF;
23 | 
24 | //protoc --java_out=../.. fileformat.proto
25 | 
26 | 
27 | //
28 | //  STORAGE LAYER: Storing primitives.
29 | //
30 | 
31 | message Blob {
32 |   optional bytes raw = 1; // No compression
33 |   optional int32 raw_size = 2; // When compressed, the uncompressed size
34 | 
35 |   // Possible compressed versions of the data.
36 |   optional bytes zlib_data = 3;
37 | 
38 |   // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED.
39 |   optional bytes lzma_data = 4;
40 | 
41 |   // Formerly used for bzip2 compressed data. Depreciated in 2010.
42 |   optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number.
43 | }
44 | 
45 | /* A file contains an sequence of fileblock headers, each prefixed by
46 | their length in network byte order, followed by a data block
47 | containing the actual data. types staring with a "_" are reserved.
48 | */
49 | 
50 | message BlobHeader {
51 |   required string type = 1;
52 |   optional bytes indexdata = 2;
53 |   required int32 datasize = 3;
54 | }
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/osmflatc/src/proto/osmformat.proto:
--------------------------------------------------------------------------------
  1 | /** Copyright (c) 2010 Scott A. Crosby. <scott@sacrosby.com>
  2 | 
  3 |    This program is free software: you can redistribute it and/or modify
  4 |    it under the terms of the GNU Lesser General Public License as 
  5 |    published by the Free Software Foundation, either version 3 of the 
  6 |    License, or (at your option) any later version.
  7 | 
  8 |    This program is distributed in the hope that it will be useful,
  9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |    GNU Lesser General Public License for more details.
 12 | 
 13 |    You should have received a copy of the GNU Lesser General Public License
 14 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | 
 16 | */
 17 | 
 18 | syntax = "proto2";
 19 | 
 20 | option optimize_for = LITE_RUNTIME;
 21 | option java_package = "org.openstreetmap.osmosis.osmbinary";
 22 | package OSMPBF;
 23 | 
 24 | /* OSM Binary file format 
 25 | 
 26 | This is the master schema file of the OSM binary file format. This
 27 | file is designed to support limited random-access and future
 28 | extendability.
 29 | 
 30 | A binary OSM file consists of a sequence of FileBlocks (please see
 31 | fileformat.proto). The first fileblock contains a serialized instance
 32 | of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that
 33 | contain the primitives.
 34 | 
 35 | Each primitiveblock is designed to be independently parsable. It
 36 | contains a string table storing all strings in that block (keys and
 37 | values in tags, roles in relations, usernames, etc.) as well as
 38 | metadata containing the precision of coordinates or timestamps in that
 39 | block.
 40 | 
 41 | A primitiveblock contains a sequence of primitive groups, each
 42 | containing primitives of the same type (nodes, densenodes, ways,
 43 | relations). Coordinates are stored in signed 64-bit integers. Lat&lon
 44 | are measured in units <granularity> nanodegrees. The default of
 45 | granularity of 100 nanodegrees corresponds to about 1cm on the ground,
 46 | and a full lat or lon fits into 32 bits.
 47 | 
 48 | Converting an integer to a lattitude or longitude uses the formula:
 49 | $OUT = IN * granularity / 10**9$. Many encoding schemes use delta
 50 | coding when representing nodes and relations.
 51 | 
 52 | */
 53 | 
 54 | //////////////////////////////////////////////////////////////////////////
 55 | //////////////////////////////////////////////////////////////////////////
 56 | 
 57 | /* Contains the file header. */
 58 | 
 59 | message HeaderBlock {
 60 |   optional HeaderBBox bbox = 1;
 61 |   /* Additional tags to aid in parsing this dataset */
 62 |   repeated string required_features = 4;
 63 |   repeated string optional_features = 5;
 64 | 
 65 |   optional string writingprogram = 16; 
 66 |   optional string source = 17; // From the bbox field.
 67 | 
 68 |   /* Tags that allow continuing an Osmosis replication */
 69 | 
 70 |   // replication timestamp, expressed in seconds since the epoch, 
 71 |   // otherwise the same value as in the "timestamp=..." field
 72 |   // in the state.txt file used by Osmosis
 73 |   optional int64 osmosis_replication_timestamp = 32;
 74 | 
 75 |   // replication sequence number (sequenceNumber in state.txt)
 76 |   optional int64 osmosis_replication_sequence_number = 33;
 77 | 
 78 |   // replication base URL (from Osmosis' configuration.txt file)
 79 |   optional string osmosis_replication_base_url = 34;
 80 | }
 81 | 
 82 | 
 83 | /** The bounding box field in the OSM header. BBOX, as used in the OSM
 84 | header. Units are always in nanodegrees -- they do not obey
 85 | granularity rules. */
 86 | 
 87 | message HeaderBBox {
 88 |    required sint64 left = 1;
 89 |    required sint64 right = 2;
 90 |    required sint64 top = 3;
 91 |    required sint64 bottom = 4;
 92 | }
 93 | 
 94 | 
 95 | ///////////////////////////////////////////////////////////////////////
 96 | ///////////////////////////////////////////////////////////////////////
 97 | 
 98 | 
 99 | message PrimitiveBlock {
100 |   required StringTable stringtable = 1;
101 |   repeated PrimitiveGroup primitivegroup = 2;
102 | 
103 |   // Granularity, units of nanodegrees, used to store coordinates in this block
104 |   optional int32 granularity = 17 [default=100]; 
105 |   // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees.
106 |   optional int64 lat_offset = 19 [default=0];
107 |   optional int64 lon_offset = 20 [default=0]; 
108 | 
109 | // Granularity of dates, normally represented in units of milliseconds since the 1970 epoch.
110 |   optional int32 date_granularity = 18 [default=1000]; 
111 | 
112 | 
113 |   // Proposed extension:
114 |   //optional BBox bbox = XX;
115 | }
116 | 
117 | // Group of OSMPrimitives. All primitives in a group must be the same type.
118 | message PrimitiveGroup {
119 |   repeated Node     nodes = 1;
120 |   optional DenseNodes dense = 2;
121 |   repeated Way      ways = 3;
122 |   repeated Relation relations = 4;
123 |   repeated ChangeSet changesets = 5;
124 | }
125 | 
126 | 
127 | /** String table, contains the common strings in each block.
128 | 
129 |  Note that we reserve index '0' as a delimiter, so the entry at that
130 |  index in the table is ALWAYS blank and unused.
131 | 
132 |  */
133 | message StringTable {
134 |    repeated bytes s = 1;
135 | }
136 | 
137 | /* Optional metadata that may be included into each primitive. */
138 | message Info {
139 |    optional int32 version = 1 [default = -1];
140 |    optional int64 timestamp = 2;
141 |    optional int64 changeset = 3;
142 |    optional int32 uid = 4;
143 |    optional uint32 user_sid = 5; // String IDs
144 | 
145 |    // The visible flag is used to store history information. It indicates that
146 |    // the current object version has been created by a delete operation on the
147 |    // OSM API.
148 |    // When a writer sets this flag, it MUST add a required_features tag with
149 |    // value "HistoricalInformation" to the HeaderBlock.
150 |    // If this flag is not available for some object it MUST be assumed to be
151 |    // true if the file has the required_features tag "HistoricalInformation"
152 |    // set.
153 |    optional bool visible = 6;
154 | }
155 | 
156 | /** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */
157 | message DenseInfo {
158 |    repeated int32 version = 1 [packed = true]; 
159 |    repeated sint64 timestamp = 2 [packed = true]; // DELTA coded
160 |    repeated sint64 changeset = 3 [packed = true]; // DELTA coded
161 |    repeated sint32 uid = 4 [packed = true]; // DELTA coded
162 |    repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded
163 | 
164 |    // The visible flag is used to store history information. It indicates that
165 |    // the current object version has been created by a delete operation on the
166 |    // OSM API.
167 |    // When a writer sets this flag, it MUST add a required_features tag with
168 |    // value "HistoricalInformation" to the HeaderBlock.
169 |    // If this flag is not available for some object it MUST be assumed to be
170 |    // true if the file has the required_features tag "HistoricalInformation"
171 |    // set.
172 |    repeated bool visible = 6 [packed = true];
173 | }
174 | 
175 | 
176 | // THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW.
177 | // TODO:    REMOVE THIS?
178 | message ChangeSet {
179 |    required int64 id = 1;
180 | //   
181 | //   // Parallel arrays.
182 | //   repeated uint32 keys = 2 [packed = true]; // String IDs.
183 | //   repeated uint32 vals = 3 [packed = true]; // String IDs.
184 | //
185 | //   optional Info info = 4;
186 | 
187 | //   optional int64 created_at = 8;
188 | //   optional int64 closetime_delta = 9;
189 | //   optional bool open = 10;
190 | //   optional HeaderBBox bbox = 11;
191 | }
192 | 
193 | 
194 | message Node {
195 |    required sint64 id = 1;
196 |    // Parallel arrays.
197 |    repeated uint32 keys = 2 [packed = true]; // String IDs.
198 |    repeated uint32 vals = 3 [packed = true]; // String IDs.
199 | 
200 |    optional Info info = 4; // May be omitted in omitmeta
201 | 
202 |    required sint64 lat = 8;
203 |    required sint64 lon = 9;
204 | }
205 | 
206 | /* Used to densly represent a sequence of nodes that do not have any tags.
207 | 
208 | We represent these nodes columnwise as five columns: ID's, lats, and
209 | lons, all delta coded. When metadata is not omitted, 
210 | 
211 | We encode keys & vals for all nodes as a single array of integers
212 | containing key-stringid and val-stringid, using a stringid of 0 as a
213 | delimiter between nodes.
214 | 
215 |    ( (<keyid> <valid>)* '0' )*
216 |  */
217 | 
218 | message DenseNodes {
219 |    repeated sint64 id = 1 [packed = true]; // DELTA coded
220 | 
221 |    //repeated Info info = 4;
222 |    optional DenseInfo denseinfo = 5;
223 | 
224 |    repeated sint64 lat = 8 [packed = true]; // DELTA coded
225 |    repeated sint64 lon = 9 [packed = true]; // DELTA coded
226 | 
227 |    // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless.
228 |    repeated int32 keys_vals = 10 [packed = true]; 
229 | }
230 | 
231 | 
232 | message Way {
233 |    required int64 id = 1;
234 |    // Parallel arrays.
235 |    repeated uint32 keys = 2 [packed = true];
236 |    repeated uint32 vals = 3 [packed = true];
237 | 
238 |    optional Info info = 4;
239 | 
240 |    repeated sint64 refs = 8 [packed = true];  // DELTA coded
241 | }
242 | 
243 | message Relation {
244 |   enum MemberType {
245 |     NODE = 0;
246 |     WAY = 1;
247 |     RELATION = 2;
248 |   } 
249 |    required int64 id = 1;
250 | 
251 |    // Parallel arrays.
252 |    repeated uint32 keys = 2 [packed = true];
253 |    repeated uint32 vals = 3 [packed = true];
254 | 
255 |    optional Info info = 4;
256 | 
257 |    // Parallel arrays
258 |    repeated int32 roles_sid = 8 [packed = true];
259 |    repeated sint64 memids = 9 [packed = true]; // DELTA encoded
260 |    repeated MemberType types = 10 [packed = true];
261 | }
262 | 
263 | 


--------------------------------------------------------------------------------
/osmflatc/src/stats.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt;
 2 | use std::ops::AddAssign;
 3 | 
 4 | #[derive(Debug, Default)]
 5 | pub struct Stats {
 6 |     pub num_nodes: usize,
 7 |     pub num_ways: usize,
 8 |     pub num_relations: usize,
 9 |     pub num_unresolved_node_ids: usize,
10 |     pub num_unresolved_way_ids: usize,
11 |     pub num_unresolved_rel_ids: usize,
12 | }
13 | 
14 | impl AddAssign for Stats {
15 |     #[inline]
16 |     fn add_assign(&mut self, other: Self) {
17 |         self.num_nodes += other.num_nodes;
18 |         self.num_ways += other.num_ways;
19 |         self.num_relations += other.num_relations;
20 |         self.num_unresolved_node_ids += other.num_unresolved_node_ids;
21 |         self.num_unresolved_way_ids += other.num_unresolved_way_ids;
22 |         self.num_unresolved_rel_ids += other.num_unresolved_rel_ids;
23 |     }
24 | }
25 | 
26 | impl fmt::Display for Stats {
27 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
28 |         write!(
29 |             f,
30 |             r#"Converted:
31 |   nodes:        {}
32 |   ways:         {}
33 |   relations:    {}
34 | Unresolved ids:
35 |   nodes:        {}
36 |   ways:         {}
37 |   relations:    {}"#,
38 |             self.num_nodes,
39 |             self.num_ways,
40 |             self.num_relations,
41 |             self.num_unresolved_node_ids,
42 |             self.num_unresolved_way_ids,
43 |             self.num_unresolved_rel_ids
44 |         )
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/osmflatc/src/strings.rs:
--------------------------------------------------------------------------------
  1 | use ahash::AHashMap;
  2 | 
  3 | #[derive(Debug, Clone, Copy)]
  4 | struct TerminatedStringPtr {
  5 |     ptr: *const u8,
  6 | }
  7 | 
  8 | // We use this (unsafe) wrapper to get the most compact hashmap possible
  9 | // Using a "&'static str" would be bigger due to the length stored
 10 | // Using a String would allocate a lot of individual blocks
 11 | // Using a small-string-optimized structure would create large objects
 12 | impl TerminatedStringPtr {
 13 |     /// Safety:
 14 |     /// Requires the data pointed to to:
 15 |     /// * Be \0 terminated
 16 |     /// * Outlive TerminatedStringPtr
 17 |     unsafe fn from_ptr(ptr: *const u8) -> Self {
 18 |         Self { ptr }
 19 |     }
 20 | 
 21 |     fn as_bytes(&self) -> &[u8] {
 22 |         // Safety:
 23 |         // If constructed properly from a 0-terminated string that outlives this instance this is safe
 24 |         unsafe { std::ffi::CStr::from_ptr(self.ptr as *const i8).to_bytes() }
 25 |     }
 26 | }
 27 | 
 28 | impl PartialEq for TerminatedStringPtr {
 29 |     fn eq(&self, other: &TerminatedStringPtr) -> bool {
 30 |         self.as_bytes() == other.as_bytes()
 31 |     }
 32 | }
 33 | 
 34 | impl std::hash::Hash for TerminatedStringPtr {
 35 |     fn hash<H>(&self, h: &mut H)
 36 |     where
 37 |         H: std::hash::Hasher,
 38 |     {
 39 |         self.as_bytes().hash(h)
 40 |     }
 41 | }
 42 | 
 43 | impl Eq for TerminatedStringPtr {}
 44 | 
 45 | impl std::borrow::Borrow<[u8]> for TerminatedStringPtr {
 46 |     fn borrow(&self) -> &[u8] {
 47 |         self.as_bytes()
 48 |     }
 49 | }
 50 | 
 51 | #[derive(Debug, Default)]
 52 | pub struct StringTable {
 53 |     // Append only, we will never reallocate any data inside
 54 |     data: Vec<Vec<u8>>,
 55 | 
 56 |     // The hashmap references strings in the data block
 57 |     // Since we cannot prove to the compiler that the strings
 58 |     // will be "alive" long enough we have to manage lifetime ourselves
 59 |     indexed_data: AHashMap<TerminatedStringPtr, u64>,
 60 | 
 61 |     size_in_bytes: u64,
 62 | }
 63 | 
 64 | impl StringTable {
 65 |     pub fn new() -> Self {
 66 |         Default::default()
 67 |     }
 68 | 
 69 |     /// Inserts a string into string table and returns its index.
 70 |     ///
 71 |     /// If the string was already inserted before, the string is deduplicated
 72 |     /// and the index to the previous string is returned.
 73 |     pub fn insert(&mut self, s: &str) -> u64 {
 74 |         // Horrible news, we cannot use entry API since it does not support Borrow
 75 |         // See: https://github.com/rust-lang/rust/issues/56167
 76 |         if let Some(&idx) = self.indexed_data.get(s.as_bytes()) {
 77 |             return idx;
 78 |         }
 79 | 
 80 |         let idx = self.size_in_bytes;
 81 |         if self
 82 |             .data
 83 |             .last()
 84 |             .filter(|x| x.len() + s.len() < x.capacity()) // str-len + \0
 85 |             .is_none()
 86 |         {
 87 |             self.data
 88 |                 .push(Vec::with_capacity((1024 * 1024 * 4).max(s.len() + 1)));
 89 |         }
 90 |         // unwrap is ok here, since we just ensured that there is always one entry
 91 |         let buffer = self.data.last_mut().unwrap();
 92 |         let pos = buffer.len();
 93 |         let ptr_before = buffer.as_ptr();
 94 |         buffer.extend(s.as_bytes());
 95 |         buffer.push(0);
 96 |         // Safety: We must never reallocate the buffer
 97 |         debug_assert_eq!(ptr_before, buffer.as_ptr());
 98 |         let key = unsafe {
 99 |             // convert back to str (safe since we know that it is valid UTF, it was created from a str)
100 |             let key: &str = std::str::from_utf8_unchecked(&buffer[pos..]);
101 |             // safe since we make sure to never reallocate/free any buffer
102 |             let key_ptr = key.as_ptr();
103 |             TerminatedStringPtr::from_ptr(key_ptr)
104 |         };
105 |         self.indexed_data.insert(key, idx);
106 | 
107 |         self.size_in_bytes += s.len() as u64 + 1;
108 |         idx
109 |     }
110 | 
111 |     pub fn into_bytes(self) -> Vec<u8> {
112 |         let Self {
113 |             data,
114 |             indexed_data,
115 |             size_in_bytes,
116 |         } = self;
117 |         std::mem::drop(indexed_data);
118 | 
119 |         let mut result = Vec::with_capacity(size_in_bytes as usize);
120 |         for buffer in data {
121 |             result.extend(buffer); // also drops buffer
122 |         }
123 |         result
124 |     }
125 | }
126 | 
127 | #[cfg(test)]
128 | mod test {
129 |     use super::StringTable;
130 |     use proptest::prelude::*;
131 |     use std::collections::HashSet;
132 | 
133 |     #[test]
134 |     fn test_simple_insert() {
135 |         let mut st = StringTable::new();
136 |         assert_eq!(st.insert("hello"), 0);
137 |         assert_eq!(st.insert("world"), 6);
138 |         assert_eq!(st.insert("world"), 6);
139 |         assert_eq!(st.insert("!"), 6 + 6);
140 |         assert_eq!(st.insert("!"), 6 + 6);
141 |         assert_eq!(st.insert("!"), 6 + 6);
142 | 
143 |         let bytes = st.into_bytes();
144 |         println!("{}", ::std::str::from_utf8(&bytes).unwrap());
145 |         assert_eq!(bytes, b"hello\0world\0!\0");
146 |     }
147 | 
148 |     #[test]
149 |     fn test_large_insert() {
150 |         let mut st = StringTable::new();
151 |         assert_eq!(st.insert("hello"), 0);
152 |         assert_eq!(st.insert(&str::repeat("x", 1024 * 1024 * 5)), 6);
153 |         assert_eq!(st.insert("huh"), 1024 * 1024 * 5 + 1 + 6);
154 |         assert_eq!(st.insert(&str::repeat("x", 1024 * 1024 * 5)), 6);
155 |         assert_eq!(st.insert("hello"), 0);
156 | 
157 |         let bytes = st.into_bytes();
158 |         assert_eq!(
159 |             bytes,
160 |             ("hello\0".to_string() + &str::repeat("x", 1024 * 1024 * 5) + "\0huh\0").as_bytes()
161 |         );
162 |     }
163 | 
164 |     #[derive(Debug, Default)]
165 |     struct ReferenceStringTable {
166 |         words: HashSet<String>,
167 |         data: Vec<u8>,
168 |     }
169 | 
170 |     impl ReferenceStringTable {
171 |         fn insert(&mut self, input: String) {
172 |             if !self.words.contains(&input) {
173 |                 self.words.insert(input.clone());
174 |                 self.data.extend(input.as_bytes());
175 |                 self.data.push(b'\0');
176 |             }
177 |         }
178 |     }
179 | 
180 |     proptest! {
181 |         #[test]
182 |         fn sequence_of_insert(ref seq in prop::collection::vec("[^\x00]*", 1..100))
183 |         {
184 |             let mut st = StringTable::new();
185 |             let mut reference_st = ReferenceStringTable::default();
186 |             for input in seq {
187 |                 st.insert(input);
188 |                 reference_st.insert(input.into());
189 |             }
190 |             assert_eq!(st.into_bytes(), reference_st.data);
191 |         }
192 |     }
193 | }
194 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | wrap_comments = true
2 | reorder_imports = true
3 | reorder_modules = true
4 | 


--------------------------------------------------------------------------------