├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── flatdata └── osm.flatdata ├── osmflat ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── examples │ ├── README.md │ ├── berlin-features.png │ ├── berlin-features.svg │ ├── berlin-roads.png │ ├── cities.rs │ ├── count.rs │ ├── debug.rs │ ├── pub-names.rs │ ├── read.rs │ ├── render-features.rs │ ├── render-roads.rs │ └── road-length.rs └── src │ ├── lib.rs │ ├── osmflat_generated.rs │ └── tags.rs ├── osmflatc ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── build.rs └── src │ ├── args.rs │ ├── ids.rs │ ├── main.rs │ ├── osmpbf.rs │ ├── parallel.rs │ ├── proto │ ├── fileformat.proto │ └── osmformat.proto │ ├── stats.rs │ └── strings.rs └── rustfmt.toml /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | schedule: 8 | - cron: '00 01 * * *' 9 | 10 | env: 11 | CARGO_INCREMENTAL: 0 12 | RUST_BACKTRACE: short 13 | RUSTFLAGS: "-D warnings -W rust-2021-compatibility" 14 | RUSTUP_MAX_RETRIES: 10 15 | 16 | jobs: 17 | rust: 18 | name: Rust 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | matrix: 22 | build: [stable, macos, win-msvc, win-gnu] 23 | include: 24 | - build: stable 25 | os: ubuntu-latest 26 | rust: stable 27 | - build: macos 28 | os: macOS-latest 29 | rust: stable 30 | - build: win-msvc 31 | os: windows-2019 32 | rust: stable 33 | - build: win-gnu 34 | os: windows-2019 35 | rust: stable-x86_64-gnu 36 | env: 37 | RUSTFLAGS: -D warnings 38 | CARGO_INCREMENTAL: 0 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v1 42 | with: 43 | fetch-depth: 1 44 | - name: Install Protoc 45 | uses: arduino/setup-protoc@v1 46 | with: 47 | repo-token: ${{ secrets.GITHUB_TOKEN }} 48 | - name: Install Rust 49 | uses: hecrj/setup-rust-action@v1 50 | with: 51 | rust-version: ${{ matrix.rust }} 52 | - run: cargo build --all-targets 53 | - run: cargo test 54 | - run: cargo doc 55 | 56 | rustfmt: 57 | name: rustfmt 58 | runs-on: ubuntu-latest 59 | steps: 60 | - name: Checkout repository 61 | uses: actions/checkout@v1 62 | with: 63 | fetch-depth: 1 64 | - name: Install rustfmt 65 | run: rustup component add rustfmt 66 | - name: Check formatting 67 | run: cargo fmt -- --check 68 | 69 | clippy: 70 | name: clippy 71 | runs-on: ubuntu-latest 72 | steps: 73 | - name: Checkout repository 74 | uses: actions/checkout@v1 75 | with: 76 | fetch-depth: 1 77 | - name: Install Protoc 78 | uses: arduino/setup-protoc@v1 79 | - name: Install clippy 80 | run: rustup component add clippy 81 | - name: Clippy 82 | run: cargo clippy --all-targets 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | /target 3 | **/*.rs.bk 4 | /data 5 | **/.DS_Store 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "osmflat", 4 | "osmflatc", 5 | ] 6 | resolver = "2" 7 | 8 | [patch.crates-io] 9 | osmflat = { path = "osmflat" } 10 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 The Rust Project Developers 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # osmflat ![][ci] 2 | 3 | ![berlin-features] 4 | 5 | Flat OpenStreetMap (OSM) data format providing an efficient *random* data 6 | access through [memory mapped files]. 7 | 8 | The data format is described and implemented in [flatdata]. The [schema] 9 | describes the fundamental OSM data structures: nodes, ways, relations and 10 | tags as simple non-nested data structures. The relations between these are 11 | expressed through indexes. 12 | 13 | ## Compiler 14 | 15 | Besides the library for working with osmflat archives, the crate `osmflatc` 16 | contains an OSM [pbf format][PBF format] to osmflat data compiler. 17 | 18 | To compile OSM data from pbf to osmflat use: 19 | 20 | ```shell 21 | cargo run --release -- input.osm.pbf output.osm.flatdata 22 | ``` 23 | 24 | The output is a flatdata which is a directory consisting of several 25 | files. The schema is also part of the archive. It is checked every time the 26 | archive is opened. This guarantees that the compiler which was used to produce 27 | the archive fits to the schema used for reading it. The archive data is not 28 | compressed. 29 | 30 | ## Using data 31 | 32 | You can use any [flatdata] supported language for reading an osmflat archive. 33 | For reading the data in Rust, we provide the `osmflat` crate. 34 | 35 | First, add this to your Cargo.toml: 36 | 37 | ```toml 38 | [dependencies] 39 | osmflat = "0.3.0" 40 | ``` 41 | 42 | Now, you can open an osmflat archive as any other flatdata archive and read its 43 | data: 44 | 45 | ```rust 46 | use osmflat::{FileResourceStorage, Osm}; 47 | 48 | fn main() { 49 | let storage = FileResourceStorage::new("path/to/archive.osm.flatdata"); 50 | let archive = Osm::open(storage).unwrap(); 51 | 52 | for node in archive.nodes().iter() { 53 | println!("{:?}", node); 54 | } 55 | } 56 | ``` 57 | 58 | ## Examples 59 | 60 | Check the [osmflat/examples] directory. Feel free to add another example, if 61 | you have an idea what to do with the amazing OSM data in few lines of code. 😁 62 | 63 | The above map was rendered by `osmflat/examples/roads2png.rs` in ~ 170 loc from 64 | the osmflat archive based on the [latest][latest-berlin-map] Berlin OSM data. 65 | 66 | ## License 67 | 68 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or 69 | http://www.apache.org/licenses/LICENSE-2.0) 70 | * MIT License ([LICENSE-MIT](LICENSE-MIT) or 71 | http://opensource.org/licenses/MIT) 72 | 73 | The files [src/proto/fileformat.proto](src/proto/fileformat.proto) and 74 | [src/proto/osmformat.proto](src/proto/osmformat.proto) are copies from the 75 | [OSM-binary] project and are under the LGPLv3 license. 76 | 77 | ### Contribution 78 | 79 | Unless you explicitly state otherwise, any contribution intentionally submitted 80 | for inclusion in this document by you, as defined in the Apache-2.0 license, 81 | shall be dual licensed as above, without any additional terms or conditions. 82 | 83 | [flatdata]: https://github.com/heremaps/flatdata 84 | [schema]: flatdata/osm.flatdata 85 | [memory mapped files]: https://en.wikipedia.org/wiki/Memory-mapped_file 86 | [PBF format]: https://wiki.openstreetmap.org/wiki/PBF_Format 87 | [osmflat/examples]: osmflat/examples 88 | [latest-berlin-map]: http://download.geofabrik.de/europe/germany/berlin.html 89 | [OSM-binary]: https://github.com/scrosby/OSM-binary 90 | [ci]: https://github.com/boxdot/osmflat-rs/workflows/ci/badge.svg 91 | [berlin-features]: https://github.com/boxdot/osmflat-rs/blob/master/osmflat/examples/berlin-features.png 92 | -------------------------------------------------------------------------------- /flatdata/osm.flatdata: -------------------------------------------------------------------------------- 1 | /// OSM data types and archive 2 | 3 | namespace osm { 4 | 5 | // Max 40 bits value used to indicate null references. 6 | /** 7 | * Special value which represents an invalid index. 8 | */ 9 | const u64 INVALID_IDX = 0xFFFFFFFFFF; 10 | 11 | /** 12 | * Metadata attached to the archive. 13 | */ 14 | struct Header { 15 | /** 16 | * All coordinates in this archive are scaled by this constant 17 | * To get the original degree-based coordinate back compute (latitude/coord_scale,longitude/coord_scale) 18 | */ 19 | coord_scale: i32; 20 | 21 | /// Bounding box (min longitude scaled with `header.coord_scale`) 22 | bbox_left: i32 : 32; 23 | /// Bounding box (max longitude scaled with `header.coord_scale`) 24 | bbox_right: i32 : 32; 25 | /// Bounding box (max latitude scaled with `header.coord_scale`) 26 | bbox_top: i32 : 32; 27 | /// Bounding box (min latitude scaled with `header.coord_scale`) 28 | bbox_bottom: i32 : 32; 29 | 30 | /// Writing program used to write the data (reference to `stringtable`). 31 | writingprogram_idx: u64 : 40; 32 | /// The origin (source) of the data. 33 | source_idx: u64 : 40; 34 | 35 | /** 36 | * Replication timestamp, expressed in seconds since the epoch. 37 | * See [`state.txt`]. 38 | * 39 | * [`state.txt`]: https://wiki.openstreetmap.org/wiki/Planet.osm/diffs#Minute.2C_Hour.2C_and_Day_Files_Organisation 40 | */ 41 | replication_timestamp: i64 : 64; 42 | /** 43 | * Replication sequence number (`sequenceNumber` from [`state.txt`]). 44 | * 45 | * [`state.txt`]: https://wiki.openstreetmap.org/wiki/Planet.osm/diffs#Minute.2C_Hour.2C_and_Day_Files_Organisation 46 | */ 47 | replication_sequence_number: i64 : 64; 48 | /** 49 | * Replication base URL (reference to `stringtable`). 50 | */ 51 | replication_base_url_idx: u64 : 40; 52 | } 53 | 54 | /** 55 | * A `(key, value)` attached to a `Node`, `Way`, or `Relation. 56 | */ 57 | struct Tag { 58 | /// Key index in `stringtable` 59 | key_idx: u64 : 40; 60 | /// Value index in `stringtable` 61 | value_idx: u64 : 40; 62 | } 63 | 64 | /** 65 | * A node is one of the core elements in the OpenStreetMap data model. 66 | * 67 | * It consists of a single point in space defined by its latitude, longitude and node id. 68 | * 69 | * See . 70 | */ 71 | struct Node { 72 | /// Latitude (scaled with `header.coord_scale`). 73 | lat: i32 : 32; 74 | /// Longitude (scaled with `header.coord_scale`). 75 | lon: i32 : 32; 76 | /** 77 | * Range of tags attached to this node. 78 | * 79 | * The values of the range are indexes in the `tags_index` vector. 80 | */ 81 | @range(tags) 82 | tag_first_idx: u64 : 40; 83 | } 84 | 85 | /** 86 | * Index of a node. 87 | */ 88 | struct NodeIndex { 89 | /// Index in the `nodes` vector. 90 | @optional(INVALID_IDX) 91 | value: u64 : 40; 92 | } 93 | 94 | /** 95 | * A way is an ordered list of nodes. 96 | * 97 | * See . 98 | */ 99 | struct Way { 100 | /** 101 | * Range of tags attached to this node. 102 | * 103 | * The values of the range are indexes in the `tags_index` vector. 104 | */ 105 | @range(tags) 106 | tag_first_idx: u64 : 40; 107 | /** 108 | * Range of nodes this way consists of. 109 | * 110 | * The values of the range are indexes in the `nodes_index` vector. 111 | */ 112 | @range(refs) 113 | ref_first_idx: u64 : 40; 114 | } 115 | 116 | /** 117 | * Index of a tag. 118 | */ 119 | struct TagIndex { 120 | /// Index in the `tags` vector. 121 | value: u64 : 40; 122 | } 123 | 124 | /// Node member of a relation. 125 | struct NodeMember { 126 | /// Index of the node in the `nodes` vector. 127 | @optional(INVALID_IDX) 128 | node_idx: u64 : 40; 129 | /** 130 | * Optional textual field describing the function of the node in the relation. 131 | * 132 | * Index in `stringtable`. 133 | */ 134 | role_idx: u64 : 40; 135 | } 136 | 137 | /// Way member of a relation. 138 | struct WayMember { 139 | /// Index of the way in the `ways` vector. 140 | @optional(INVALID_IDX) 141 | way_idx: u64 : 40; 142 | /** 143 | * Optional textual field describing the function of the way in the relation. 144 | * 145 | * Index in `stringtable`. 146 | */ 147 | role_idx: u64 : 40; 148 | } 149 | 150 | /// Relation member of a relation. 151 | struct RelationMember { 152 | /// Index of the relation in the `relations` vector. 153 | @optional(INVALID_IDX) 154 | relation_idx: u64 : 40; 155 | /** 156 | * Optional textual field describing the function of the relation in the parent relation. 157 | * 158 | * Index in `stringtable`. 159 | */ 160 | role_idx: u64 : 40; 161 | } 162 | 163 | /** 164 | * A relation is an ordered list of one or more nodes, ways and/or relations as members. 165 | * 166 | * See . 167 | */ 168 | struct Relation { 169 | /** 170 | * Range of tags attached to this relation. 171 | * 172 | * The values of the range are indexes in the `tags` vector. 173 | */ 174 | @range(tags) 175 | tag_first_idx: u64 : 40; 176 | } 177 | 178 | struct Id { 179 | value: u64 : 40; 180 | } 181 | 182 | /** 183 | * An optional sub-archive storing the original OSM ids of nodes, ways, and relations 184 | */ 185 | archive Ids { 186 | /** 187 | * List of OSM ids of all nodes in the parent archive 188 | * nodes[i] has its id stored in ids.nodes[i] 189 | */ 190 | nodes: vector< Id >; 191 | 192 | /** 193 | * List of OSM ids of all ways in the parent archive 194 | * ways[i] has its id stored in ids.ways[i] 195 | */ 196 | ways: vector< Id >; 197 | 198 | /** 199 | * List of OSM ids of all relations in the parent archive 200 | * relations[i] has its id stored in ids.relations[i] 201 | */ 202 | relations: vector< Id >; 203 | } 204 | 205 | /** 206 | * OSM data archive 207 | * 208 | * Relations and relation members are indexed with the same index, i.e. 209 | * a relation at index `i` in the vector `relations` has the members 210 | * at index `i` in the multivector `relation_members`. 211 | * 212 | * All 1:n relationships are modeled in-place by using an additional index. This is a 213 | * common pattern in flatdata. For example, a node might have multiple tags attached 214 | * to it. To model this, a node in `nodes` references the first tag attached to it 215 | * by storing an index in the `tags_index` vector. The next node in `nodes` again 216 | * references its first tag, which is the last tag (exclusive) of the previous node. 217 | * 218 | * ```text 219 | * nodes: [ ..., n_1, n_2, ... ] 220 | * | | 221 | * | +-------+ 222 | * v v 223 | * tags_index: [ ..., t_11, t_12, ..., t_1n, t_21, ... t_2m, ... ] 224 | * ``` 225 | */ 226 | @bound_implicitly(Relations: relations, relation_members) 227 | archive Osm { 228 | /** 229 | * Header which contains the metadata attached to the archive. 230 | */ 231 | @explicit_reference( Header.writingprogram_idx, stringtable ) 232 | @explicit_reference( Header.source_idx, stringtable ) 233 | @explicit_reference( Header.replication_base_url_idx, stringtable ) 234 | header: Header; 235 | 236 | /** 237 | * List of nodes. 238 | * 239 | * A node references a range of tags in the `tags_index` vector. 240 | */ 241 | @explicit_reference( Node.tag_first_idx, tags_index ) 242 | nodes: vector; 243 | 244 | /** 245 | * List of ways. 246 | * 247 | * A way references 248 | * 249 | * * a range of tags in the `tags_index` vector, and 250 | * * a range of nodes in the `nodes_index` vector. 251 | */ 252 | @explicit_reference( Way.tag_first_idx, tags_index ) 253 | @explicit_reference( Way.ref_first_idx, nodes_index ) 254 | ways: vector; 255 | 256 | /** 257 | * List of relations. 258 | * 259 | * A relation references a range of tags in `tags_index` vectors. 260 | * Members are attached to a relation implicitly: members that belong to a 261 | * relation at index `i` are at index `i` in the `relation_members` multivector. 262 | */ 263 | @explicit_reference( Relation.tag_first_idx, tags_index ) 264 | relations: vector; 265 | 266 | /** 267 | * Members attached to relations. 268 | * 269 | * An index in this multivector corresponds to an index in the `relations` vector. 270 | * 271 | * A member has a variadic type: `NodeMember`, `WayMember` or `RelationMember`. 272 | * Each type references its role in the `stringtable` raw data. Additionally, 273 | * 274 | * * a node member references a node in the `nodes` vector, 275 | * * a way member references a way in the `ways` vector, 276 | * * a relation member references a relation in the `relations` vector. 277 | */ 278 | @explicit_reference( NodeMember.node_idx, nodes ) 279 | @explicit_reference( NodeMember.role_idx, stringtable ) 280 | @explicit_reference( WayMember.way_idx, ways ) 281 | @explicit_reference( WayMember.role_idx, stringtable ) 282 | @explicit_reference( RelationMember.relation_idx, relations ) 283 | @explicit_reference( RelationMember.role_idx, stringtable ) 284 | relation_members: multivector<40, NodeMember, WayMember, RelationMember>; 285 | 286 | /** 287 | * List of tags. 288 | * 289 | * A tag references its key and value in the `stringtable` raw data. 290 | */ 291 | @explicit_reference( Tag.key_idx, stringtable ) 292 | @explicit_reference( Tag.value_idx, stringtable ) 293 | tags: vector; 294 | 295 | /** 296 | * Auxiliary index of tags to model 1:n relationships between nodes, ways, relations 297 | * and tags. 298 | */ 299 | @explicit_reference( TagIndex.value, tags ) 300 | tags_index: vector; 301 | 302 | /** 303 | * Auxiliary index of nodes to model 1:n relationship between ways and nodes. 304 | */ 305 | @explicit_reference( NodeIndex.value, nodes ) 306 | nodes_index: vector; 307 | 308 | /** 309 | * List of strings separated by `\0`. 310 | */ 311 | stringtable: raw_data; 312 | 313 | @optional 314 | ids: archive Ids; 315 | } 316 | } // namespace osm 317 | -------------------------------------------------------------------------------- /osmflat/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "osmflat" 3 | version = "0.3.1" 4 | authors = [ 5 | "boxdot ", 6 | "Christian Vetter ", 7 | "Gabriel Féron " 8 | ] 9 | license = "MIT/Apache-2.0" 10 | description = "OpenStreetMap (OSM) data format providing an efficient random data access through memory mapped files." 11 | repository = "https://github.com/boxdot/osmflat-rs" 12 | keywords = ["serialization", "osm", "openstreetmap", "flatdata"] 13 | categories = ["encoding"] 14 | readme = "README.md" 15 | edition = "2021" 16 | 17 | [dependencies] 18 | flatdata = "0.5.3" 19 | 20 | [dev-dependencies] 21 | clap = { version = "4.1.4", features = ["derive"] } 22 | itertools = "0.13.0" 23 | png = "0.17.7" 24 | serde = { version = "1.0.152", features = ["derive"] } 25 | serde_json = "1.0.91" 26 | smallvec = "1.10.0" 27 | svg = "0.17.0" 28 | 29 | [features] 30 | default = [] 31 | tar = ["flatdata/tar"] 32 | -------------------------------------------------------------------------------- /osmflat/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /osmflat/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /osmflat/README.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /osmflat/examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This a collection of examples showing how to use the `osmflat` library. 4 | Some of the examples were ported from the `libosmium`'s 5 | [examples directory]. 6 | 7 | The source code in this directory is under public domain, and can be freely 8 | copied and modified. 9 | 10 | ## Getting started 11 | 12 | * `read` - reads the contents of the input archive. 13 | * `count` - counts the number of nodes, ways, and relations in the input archive. 14 | * `dump` - dumps the contents of the input archive in a debug format. 15 | 16 | ## Simple 17 | 18 | * `pub-names` - shows the names and addresses of all pubs. 19 | * `road-length` - calculates the length of the road network in the input archive. 20 | 21 | ## Rendering 22 | 23 | * `render-roads` - renders all roads by using a simple Bresenham line algorithm as PNG. 24 |

25 | Berlin Roads 26 |

27 | * `render-features` - renders selected features from the input archive as SVG. 28 |

29 | Berlin Features 30 |

31 | 32 | [examples directory]: https://github.com/osmcode/libosmium/tree/master/examples 33 | -------------------------------------------------------------------------------- /osmflat/examples/berlin-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boxdot/osmflat-rs/53439c6d891d04033408f8adba376a5e82d7e82f/osmflat/examples/berlin-features.png -------------------------------------------------------------------------------- /osmflat/examples/berlin-roads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boxdot/osmflat-rs/53439c6d891d04033408f8adba376a5e82d7e82f/osmflat/examples/berlin-roads.png -------------------------------------------------------------------------------- /osmflat/examples/cities.rs: -------------------------------------------------------------------------------- 1 | //! Scans all OSM nodes and extracts list of cities with name and 2 | //! population in JSON format. 3 | //! 4 | //! LICENSE 5 | //! 6 | //! The code in this example file is released into the Public Domain. 7 | 8 | use osmflat::{find_tag, has_tag, Osm}; 9 | use serde::Serialize; 10 | use std::str; 11 | 12 | #[derive(Debug, Default, Serialize)] 13 | struct City<'a> { 14 | name: &'a str, 15 | population: usize, 16 | } 17 | 18 | fn main() -> Result<(), Box> { 19 | let archive_dir = std::env::args() 20 | .nth(1) 21 | .ok_or("USAGE: cities ")?; 22 | let archive = Osm::open(osmflat::FileResourceStorage::new(archive_dir))?; 23 | 24 | // Iterate through all nodes 25 | let cities: Vec = archive 26 | .nodes() 27 | .iter() 28 | // filter nodes that does not have a place=city tag 29 | .filter(|node| has_tag(&archive, node.tags(), b"place", b"city")) 30 | .filter_map(|node| { 31 | // try to collect population and country 32 | Some(City { 33 | name: str::from_utf8(find_tag(&archive, node.tags(), b"name")?).ok()?, 34 | population: str::from_utf8(find_tag(&archive, node.tags(), b"population")?) 35 | .ok()? 36 | .parse() 37 | .ok()?, 38 | }) 39 | }) 40 | .collect(); 41 | 42 | let stdout = std::io::stdout(); 43 | serde_json::to_writer(stdout.lock(), &cities)?; 44 | 45 | Ok(()) 46 | } 47 | -------------------------------------------------------------------------------- /osmflat/examples/count.rs: -------------------------------------------------------------------------------- 1 | //! Counts the number of nodes, ways, and relations in the input archive. 2 | //! 3 | //! LICENSE 4 | //! 5 | //! The code in this example file is released into the Public Domain. 6 | 7 | use osmflat::{FileResourceStorage, Osm}; 8 | 9 | fn main() -> Result<(), Box> { 10 | let archive_dir = std::env::args() 11 | .nth(1) 12 | .ok_or("USAGE: debug ")?; 13 | let archive = Osm::open(FileResourceStorage::new(archive_dir))?; 14 | 15 | println!("Nodes: {}", archive.nodes().len()); 16 | println!("Ways: {}", archive.ways().len()); 17 | println!("Relations: {}", archive.relations().len()); 18 | 19 | Ok(()) 20 | } 21 | -------------------------------------------------------------------------------- /osmflat/examples/debug.rs: -------------------------------------------------------------------------------- 1 | //! Dumps the contents of the input archive in a debug format. 2 | //! 3 | //! Demonstrates 4 | //! 5 | //! * iteration through all fundamental types 6 | //! * accessing of fields and following of references 7 | //! 8 | //! LICENSE 9 | //! 10 | //! The code in this example file is released into the Public Domain. 11 | 12 | use clap::Parser; 13 | use osmflat::{iter_tags, FileResourceStorage, Osm, RelationMembersRef}; 14 | 15 | use std::path::PathBuf; 16 | use std::str::{self, Utf8Error}; 17 | 18 | #[derive(Debug)] 19 | struct Header<'ar> { 20 | #[allow(unused)] 21 | bbox: (f64, f64, f64, f64), 22 | #[allow(unused)] 23 | writingprogram: &'ar str, 24 | #[allow(unused)] 25 | source: &'ar str, 26 | #[allow(unused)] 27 | replication_timestamp: i64, 28 | #[allow(unused)] 29 | replication_sequence_number: i64, 30 | #[allow(unused)] 31 | replication_base_url: &'ar str, 32 | } 33 | 34 | #[derive(Debug)] 35 | struct Node<'ar> { 36 | #[allow(unused)] 37 | id: Option, 38 | #[allow(unused)] 39 | lat: f64, 40 | #[allow(unused)] 41 | lon: f64, 42 | #[allow(unused)] 43 | tags: Vec<(&'ar str, &'ar str)>, 44 | } 45 | 46 | #[derive(Debug)] 47 | struct Way<'ar> { 48 | #[allow(unused)] 49 | id: Option, 50 | #[allow(unused)] 51 | tags: Vec<(&'ar str, &'ar str)>, 52 | #[allow(unused)] 53 | nodes: Vec>, 54 | } 55 | 56 | #[derive(Debug)] 57 | struct Relation<'ar> { 58 | #[allow(unused)] 59 | id: Option, 60 | #[allow(unused)] 61 | tags: Vec<(&'ar str, &'ar str)>, 62 | #[allow(unused)] 63 | members: Vec>, 64 | } 65 | 66 | #[derive(Debug)] 67 | struct Member<'ar> { 68 | #[allow(unused)] 69 | r#type: Type, 70 | #[allow(unused)] 71 | idx: Option, 72 | #[allow(unused)] 73 | role: &'ar str, 74 | } 75 | 76 | #[derive(Debug)] 77 | enum Type { 78 | Node, 79 | Way, 80 | Relation, 81 | } 82 | 83 | impl<'ar> Member<'ar> { 84 | fn new_slice( 85 | archive: &'ar Osm, 86 | relation_idx: usize, 87 | ) -> impl Iterator, Utf8Error>> { 88 | let strings = archive.stringtable(); 89 | archive 90 | .relation_members() 91 | .at(relation_idx) 92 | .map(move |member| { 93 | let res = match member { 94 | RelationMembersRef::NodeMember(m) => Member { 95 | r#type: Type::Node, 96 | idx: m.node_idx(), 97 | role: strings.substring(m.role_idx() as usize)?, 98 | }, 99 | RelationMembersRef::WayMember(m) => Member { 100 | r#type: Type::Way, 101 | idx: m.way_idx(), 102 | role: strings.substring(m.role_idx() as usize)?, 103 | }, 104 | RelationMembersRef::RelationMember(m) => Member { 105 | r#type: Type::Relation, 106 | idx: m.relation_idx(), 107 | role: strings.substring(m.role_idx() as usize)?, 108 | }, 109 | }; 110 | Ok(res) 111 | }) 112 | } 113 | } 114 | 115 | /// output osmflatdata: nodes, ways, and/or relations 116 | #[derive(Debug, Parser)] 117 | struct Args { 118 | /// input osmflat archive 119 | input: PathBuf, 120 | /// which types to print: (n)odes, (w)ays, or (r)elations 121 | #[arg(long, default_value = "nwr")] 122 | types: String, 123 | /// amount of entities to print 124 | #[arg(long)] 125 | num: Option, 126 | } 127 | 128 | fn main() -> Result<(), Box> { 129 | let args = Args::parse(); 130 | let archive = Osm::open(FileResourceStorage::new(args.input))?; 131 | 132 | let header = archive.header(); 133 | let strings = archive.stringtable(); 134 | 135 | let scale_coord = |x| x as f64 / header.coord_scale() as f64; 136 | 137 | // print header 138 | let header = Header { 139 | bbox: ( 140 | scale_coord(header.bbox_left()), 141 | scale_coord(header.bbox_right()), 142 | scale_coord(header.bbox_top()), 143 | scale_coord(header.bbox_bottom()), 144 | ), 145 | writingprogram: strings.substring(header.writingprogram_idx() as usize)?, 146 | source: strings.substring(header.source_idx() as usize)?, 147 | replication_timestamp: header.replication_timestamp(), 148 | replication_sequence_number: header.replication_sequence_number(), 149 | replication_base_url: strings.substring(header.replication_base_url_idx() as usize)?, 150 | }; 151 | println!("{header:#?}"); 152 | 153 | let collect_utf8_tags = |tags| -> Vec<(&str, &str)> { 154 | iter_tags(&archive, tags) 155 | .filter_map(|(k, v)| match (str::from_utf8(k), str::from_utf8(v)) { 156 | (Ok(k), Ok(v)) => Some((k, v)), 157 | _ => None, 158 | }) 159 | .collect() 160 | }; 161 | 162 | // print nodes 163 | let mut node_ids = archive.ids().map(|x| x.nodes()).into_iter().flatten(); 164 | if args.types.contains('n') { 165 | for node in archive.nodes().iter().take(args.num.unwrap_or(usize::MAX)) { 166 | let node = Node { 167 | id: node_ids.next().map(|x| x.value()), 168 | lat: scale_coord(node.lat()), 169 | lon: scale_coord(node.lon()), 170 | tags: collect_utf8_tags(node.tags()), 171 | }; 172 | 173 | println!("{node:#?}"); 174 | } 175 | } 176 | 177 | // print ways 178 | let nodes_index = archive.nodes_index(); 179 | let mut way_ids = archive.ids().map(|x| x.ways()).into_iter().flatten(); 180 | if args.types.contains('w') { 181 | for way in archive.ways().iter().take(args.num.unwrap_or(usize::MAX)) { 182 | let way = Way { 183 | id: way_ids.next().map(|x| x.value()), 184 | tags: collect_utf8_tags(way.tags()), 185 | nodes: way 186 | .refs() 187 | .map(|idx| nodes_index[idx as usize].value()) 188 | .collect(), 189 | }; 190 | 191 | println!("{way:#?}"); 192 | } 193 | } 194 | 195 | // print relations 196 | let mut relation_ids = archive.ids().map(|x| x.ways()).into_iter().flatten(); 197 | if args.types.contains('r') { 198 | for (relation_idx, relation) in archive.relations()[..3] 199 | .iter() 200 | .take(args.num.unwrap_or(usize::MAX)) 201 | .enumerate() 202 | { 203 | let members: Result, _> = Member::new_slice(&archive, relation_idx).collect(); 204 | let relation = Relation { 205 | id: relation_ids.next().map(|x| x.value()), 206 | tags: collect_utf8_tags(relation.tags()), 207 | members: members?, 208 | }; 209 | 210 | println!("{relation:#?}"); 211 | } 212 | } 213 | 214 | Ok(()) 215 | } 216 | -------------------------------------------------------------------------------- /osmflat/examples/pub-names.rs: -------------------------------------------------------------------------------- 1 | //! Shows the names and addresses of all pubs. 2 | //! 3 | //! Demonstrates 4 | //! 5 | //! * iteration through tags belonging to a node and a way 6 | //! * accessing of tags by key 7 | //! * filtering of tags 8 | //! 9 | //! LICENSE 10 | //! 11 | //! The code in this example file is released into the Public Domain. 12 | 13 | use osmflat::{find_tag, has_tag, iter_tags, FileResourceStorage, Osm}; 14 | use std::str; 15 | 16 | fn main() -> Result<(), Box> { 17 | let archive_dir = std::env::args() 18 | .nth(1) 19 | .ok_or("USAGE: pub_names ")?; 20 | let archive = Osm::open(FileResourceStorage::new(archive_dir))?; 21 | 22 | let nodes_tags = archive.nodes().iter().map(|node| node.tags()); 23 | let ways_tags = archive.ways().iter().map(|way| way.tags()); 24 | 25 | for tag_range in nodes_tags.chain(ways_tags) { 26 | if has_tag(&archive, tag_range.clone(), b"amenity", b"pub") { 27 | let name = find_tag(&archive, tag_range.clone(), b"name"); 28 | let name = name.map(|s| str::from_utf8(s).unwrap_or("broken pub name")); 29 | println!("{}", name.unwrap_or("unknown pub name")); 30 | 31 | let addrs = iter_tags(&archive, tag_range).filter(|(k, _)| k.starts_with(b"addr:")); 32 | for (k, v) in addrs { 33 | if let (Ok(addr_type), Ok(addr)) = (str::from_utf8(k), str::from_utf8(v)) { 34 | println!(" {addr_type}: {addr}"); 35 | } 36 | } 37 | } 38 | } 39 | 40 | Ok(()) 41 | } 42 | -------------------------------------------------------------------------------- /osmflat/examples/read.rs: -------------------------------------------------------------------------------- 1 | //! Reads the contents of the input archive. 2 | //! 3 | //! LICENSE 4 | //! 5 | //! The code in this example file is released into the Public Domain. 6 | 7 | use osmflat::{FileResourceStorage, Osm}; 8 | 9 | fn main() -> Result<(), Box> { 10 | let archive_dir = std::env::args() 11 | .nth(1) 12 | .ok_or("USAGE: read ")?; 13 | let archive = Osm::open(FileResourceStorage::new(archive_dir))?; 14 | 15 | for _node in archive.nodes() { 16 | // do nothing 17 | } 18 | 19 | for _way in archive.ways() { 20 | // do nothing 21 | } 22 | 23 | for _relation in archive.relations() { 24 | // do nothing 25 | } 26 | 27 | Ok(()) 28 | } 29 | -------------------------------------------------------------------------------- /osmflat/examples/render-features.rs: -------------------------------------------------------------------------------- 1 | //! Renders selected features from the input archive as svg. 2 | //! 3 | //! For supported features check `Category` enum and `classify` function. 4 | //! 5 | //! For each feature, we retrieve the coordinates lazily from osm nodes, and 6 | //! then produce polylines styled based on the category, cf. `render_svg` 7 | //! function. The coordinates are in lon, lat. 8 | //! 9 | //! Inside of svg we just use the coordinates as is (except for swapped x/y 10 | //! axes), plus we apply a transformation to adjust the coordinates to the 11 | //! viewport. Obviously, it is slower the render such svg on the screen. 12 | //! However, the final svg contains already so many polyline, that having alrady 13 | //! transformed coordinates does not change much. If you need speed when showing 14 | //! the svg, feel free to apply simplifications in this program. 15 | //! 16 | //! LICENSE 17 | //! 18 | //! The code in this example file is released into the Public Domain. 19 | 20 | use clap::Parser; 21 | use osmflat::{iter_tags, FileResourceStorage, Node, Osm, Relation, RelationMembersRef, Way}; 22 | use smallvec::{smallvec, SmallVec}; 23 | use svg::{node::element, Document}; 24 | 25 | use std::f64; 26 | use std::fmt::Write; 27 | use std::io; 28 | use std::ops::Range; 29 | use std::path::PathBuf; 30 | use std::str; 31 | 32 | /// Geographic coordinates represented by (latitude, longitude). 33 | #[derive(Debug, Clone, Copy, Default, PartialEq, PartialOrd)] 34 | struct GeoCoord { 35 | lat: f64, 36 | lon: f64, 37 | } 38 | 39 | impl GeoCoord { 40 | fn min(self, other: Self) -> Self { 41 | Self { 42 | lat: self.lat.min(other.lat), 43 | lon: self.lon.min(other.lon), 44 | } 45 | } 46 | 47 | fn max(self, other: Self) -> Self { 48 | Self { 49 | lat: self.lat.max(other.lat), 50 | lon: self.lon.max(other.lon), 51 | } 52 | } 53 | } 54 | 55 | /// Convert osmflat Node into GeoCoord. 56 | impl GeoCoord { 57 | fn from_node(node: &Node, coord_scale: i32) -> Self { 58 | Self { 59 | lat: node.lat() as f64 / coord_scale as f64, 60 | lon: node.lon() as f64 / coord_scale as f64, 61 | } 62 | } 63 | } 64 | 65 | /// Polyline which can be transformed into an iterator over `GeoCoord`'s. 66 | struct Polyline { 67 | inner: SmallVec<[Range; 4]>, 68 | } 69 | 70 | impl From> for Polyline { 71 | fn from(range: Range) -> Self { 72 | Self { 73 | inner: smallvec![range], 74 | } 75 | } 76 | } 77 | 78 | impl Polyline { 79 | #[allow(clippy::iter_overeager_cloned)] 80 | fn into_iter(self, archive: &Osm) -> Option + '_> { 81 | let nodes_index = archive.nodes_index(); 82 | let nodes = archive.nodes(); 83 | let mut indices = self.inner.iter().cloned().flatten(); 84 | let scale = archive.header().coord_scale(); 85 | if indices.any(|idx| nodes_index[idx as usize].value().is_none()) { 86 | None 87 | } else { 88 | let indices = self.inner.into_iter().flatten(); 89 | Some(indices.map(move |idx| { 90 | GeoCoord::from_node( 91 | &nodes[nodes_index[idx as usize].value().unwrap() as usize], 92 | scale, 93 | ) 94 | })) 95 | } 96 | } 97 | } 98 | 99 | /// Categories of features we support in this renderer. 100 | #[derive(Debug, Clone, Copy)] 101 | enum Category { 102 | Road, 103 | Park, 104 | River(u32), // River with width 105 | Water, 106 | } 107 | 108 | /// Feature in osmflat. 109 | /// 110 | /// Idx points either into ways or relations, depending on the `Category`. 111 | struct Feature { 112 | idx: usize, 113 | cat: Category, 114 | } 115 | 116 | impl Feature { 117 | fn into_polyline(self, archive: &Osm) -> Option { 118 | match self.cat { 119 | Category::Road | Category::River(_) => { 120 | Some(way_into_polyline(&archive.ways()[self.idx])) 121 | } 122 | Category::Park | Category::Water => multipolygon_into_polyline(archive, self.idx), 123 | } 124 | } 125 | } 126 | 127 | fn way_into_polyline(way: &Way) -> Polyline { 128 | Polyline { 129 | inner: smallvec![way.refs()], 130 | } 131 | } 132 | 133 | fn multipolygon_into_polyline(archive: &Osm, idx: usize) -> Option { 134 | let members = archive.relation_members().at(idx); 135 | let strings = archive.stringtable(); 136 | let ways = archive.ways(); 137 | 138 | let inner: Option; 4]>> = members 139 | .filter_map(|m| match m { 140 | RelationMembersRef::WayMember(way_member) 141 | if strings.substring(way_member.role_idx() as usize) == Ok("outer") => 142 | { 143 | Some(way_member.way_idx().map(|idx| ways[idx as usize].refs())) 144 | } 145 | _ => None, 146 | }) 147 | .collect(); 148 | inner.map(|inner| Polyline { inner }) 149 | } 150 | 151 | /// Classifies all features from osmflat we want to render. 152 | fn classify(archive: &Osm) -> impl Iterator + '_ { 153 | let ways = archive.ways().iter().enumerate(); 154 | let ways = ways 155 | .filter_map(move |(idx, way)| classify_way(archive, way).map(|cat| Feature { idx, cat })); 156 | let rels = archive.relations().iter().enumerate(); 157 | let rels = rels.filter_map(move |(idx, rel)| { 158 | classify_relation(archive, rel).map(|cat| Feature { idx, cat }) 159 | }); 160 | ways.chain(rels) 161 | } 162 | 163 | fn classify_way(archive: &Osm, way: &Way) -> Option { 164 | // Filter all ways that have less than 2 nodes. 165 | if way.refs().end <= way.refs().start + 2 { 166 | return None; 167 | } 168 | 169 | const UNWANTED_HIGHWAY_TYPES: [&[u8]; 9] = [ 170 | b"pedestrian", 171 | b"steps", 172 | b"footway", 173 | b"construction", 174 | b"bic", 175 | b"cycleway", 176 | b"layby", 177 | b"bridleway", 178 | b"path", 179 | ]; 180 | 181 | // Filter all ways that do not have a highway tag. Also check for specific 182 | // values. 183 | for (key, val) in iter_tags(archive, way.tags()) { 184 | if key == b"highway" { 185 | if UNWANTED_HIGHWAY_TYPES.contains(&val) { 186 | return None; 187 | } 188 | return Some(Category::Road); 189 | } else if key == b"waterway" { 190 | for (key, val) in iter_tags(archive, way.tags()) { 191 | if key == b"width" || key == b"maxwidth" { 192 | let width: u32 = str::from_utf8(val).ok()?.parse().ok()?; 193 | return Some(Category::River(width)); 194 | } 195 | } 196 | return Some(Category::River(1)); 197 | } 198 | } 199 | None 200 | } 201 | 202 | fn classify_relation(archive: &Osm, relation: &Relation) -> Option { 203 | let mut is_multipolygon = false; 204 | let mut is_park = false; 205 | let mut is_lake = false; 206 | 207 | for (key, val) in iter_tags(archive, relation.tags()) { 208 | if key == b"type" && val == b"multipolygon" { 209 | if is_park { 210 | return Some(Category::Park); 211 | } 212 | if is_lake { 213 | return Some(Category::Water); 214 | } 215 | is_multipolygon = true; 216 | } 217 | if (key == b"leisure" && val == b"park") 218 | || (key == b"landuse" && (val == b"recreation_ground" || val == b"forest")) 219 | { 220 | if is_multipolygon { 221 | return Some(Category::Park); 222 | } 223 | is_park = true; 224 | } 225 | if key == b"water" && val == b"lake" { 226 | if is_multipolygon { 227 | return Some(Category::Water); 228 | } 229 | is_lake = true; 230 | } 231 | } 232 | None 233 | } 234 | 235 | /// Renders svg from classified polylines. 236 | fn render_svg

( 237 | archive: &Osm, 238 | classified_polylines: P, 239 | output: PathBuf, 240 | width: u32, 241 | height: u32, 242 | ) -> Result<(), io::Error> 243 | where 244 | P: Iterator, 245 | { 246 | let mut document = Document::new().set("viewBox", (0, 0, width, height)); 247 | let mut road_group = element::Group::new() 248 | .set("stroke", "#001F3F") 249 | .set("stroke-width", "0.3") 250 | .set("fill", "none"); 251 | let mut park_group = element::Group::new() 252 | .set("stroke", "#3D9970") 253 | .set("fill", "#3D9970") 254 | .set("fill-opacity", 0.3); 255 | let mut river_group = element::Group::new() 256 | .set("stroke", "#0074D9") 257 | .set("fill", "none") 258 | .set("stroke-opacity", 0.8); 259 | let mut lake_group = element::Group::new() 260 | .set("stroke", "#0074D9") 261 | .set("fill", "#0074D9") 262 | .set("fill-opacity", 0.3); 263 | 264 | let mut min_coord = GeoCoord { 265 | lat: f64::MAX, 266 | lon: f64::MAX, 267 | }; 268 | let mut max_coord = GeoCoord { 269 | lat: f64::MIN, 270 | lon: f64::MIN, 271 | }; 272 | 273 | let mut points = String::new(); // reuse string buffer inside the for-loop 274 | for (poly, cat) in classified_polylines { 275 | points.clear(); 276 | let poly_iter = match poly.into_iter(archive) { 277 | Some(x) => x, 278 | None => continue, 279 | }; 280 | for coord in poly_iter { 281 | // collect extent 282 | min_coord = min_coord.min(coord); 283 | max_coord = max_coord.max(coord); 284 | // accumulate polyline points 285 | write!(&mut points, "{:.5},{:.5} ", coord.lon, coord.lat) 286 | .expect("failed to write coordinates"); 287 | } 288 | 289 | let polyline = element::Polyline::new().set("points", &points[..]); 290 | 291 | match cat { 292 | Category::Road => { 293 | road_group = road_group.add(polyline); 294 | } 295 | Category::River(width) => { 296 | river_group = river_group.add(polyline).set("stroke-width", width); 297 | } 298 | Category::Park => { 299 | park_group = park_group.add(polyline); 300 | } 301 | Category::Water => { 302 | lake_group = lake_group.add(polyline); 303 | } 304 | } 305 | } 306 | 307 | let mut transform = element::Group::new().set( 308 | "transform", 309 | format!( 310 | "scale({:.5} {:.5}) translate({:.5} {:.5})", /* Note: svg transformations are 311 | * applied from right to left */ 312 | f64::from(width) / (max_coord.lon - min_coord.lon), 313 | f64::from(height) / (min_coord.lat - max_coord.lat), // invert y-axis 314 | -min_coord.lon, 315 | -max_coord.lat, 316 | ), 317 | ); 318 | 319 | transform = transform 320 | .add(road_group) 321 | .add(river_group) 322 | .add(lake_group) 323 | .add(park_group); 324 | 325 | let style = element::Style::new( 326 | r#" 327 | text { 328 | font-family: arial; 329 | font-size: 8px; 330 | color: #001F3F; 331 | opacity: 0.3; 332 | } 333 | 334 | polyline { 335 | vector-effect: non-scaling-stroke; 336 | } 337 | "#, 338 | ); 339 | 340 | let notice = element::Text::new("© OpenStreetMap Contributors") 341 | .set("x", width.saturating_sub(10)) 342 | .set("y", height.saturating_sub(10)) 343 | .set("text-anchor", "end"); 344 | 345 | document = document.add(style).add(transform).add(notice); 346 | svg::save(output, &document) 347 | } 348 | 349 | /// render map features as a SVG 350 | #[derive(Debug, Parser)] 351 | #[clap(name = "render-features")] 352 | struct Args { 353 | /// osmflat archive 354 | osmflat_archive: PathBuf, 355 | 356 | /// SVG filename to output 357 | #[clap(long, short = 'o')] 358 | output: PathBuf, 359 | 360 | /// width of the image 361 | #[clap(long, default_value = "800")] 362 | width: u32, 363 | 364 | /// height of the image 365 | #[clap(long, default_value = "600")] 366 | height: u32, 367 | } 368 | 369 | fn main() -> Result<(), Box> { 370 | let args = Args::parse(); 371 | 372 | let storage = FileResourceStorage::new(args.osmflat_archive); 373 | let archive = Osm::open(storage)?; 374 | 375 | let features = classify(&archive); 376 | let archive_inner = archive.clone(); 377 | let classified_polylines = features.filter_map(move |f| { 378 | let cat = f.cat; 379 | f.into_polyline(&archive_inner).map(|p| (p, cat)) 380 | }); 381 | render_svg( 382 | &archive, 383 | classified_polylines, 384 | args.output, 385 | args.width, 386 | args.height, 387 | )?; 388 | Ok(()) 389 | } 390 | -------------------------------------------------------------------------------- /osmflat/examples/render-roads.rs: -------------------------------------------------------------------------------- 1 | //! Renders all roads by using a simple Bresenham line algorithm. 2 | //! 3 | //! LICENSE 4 | //! 5 | //! The code in this example file is released into the Public Domain. 6 | 7 | use osmflat::{find_tag_by, FileResourceStorage, Node, Osm, Way}; 8 | 9 | use clap::Parser; 10 | use itertools::Itertools; 11 | 12 | use std::f64::consts::PI; 13 | use std::fs::File; 14 | use std::io::BufWriter; 15 | use std::path::PathBuf; 16 | 17 | /// Geographic coordinates represented by (latitude, longitude). 18 | #[derive(Debug, Clone, Copy, Default, PartialEq, PartialOrd)] 19 | struct GeoCoord { 20 | lat: f64, 21 | lon: f64, 22 | } 23 | 24 | /// Convert osmflat Node into GeoCoord. 25 | impl GeoCoord { 26 | fn from_node(node: &Node, coord_scale: i32) -> Self { 27 | Self { 28 | lat: node.lat() as f64 / coord_scale as f64, 29 | lon: node.lon() as f64 / coord_scale as f64, 30 | } 31 | } 32 | } 33 | 34 | #[derive(Debug)] 35 | struct Image { 36 | w: u32, 37 | h: u32, 38 | data: Vec, 39 | } 40 | 41 | impl Image { 42 | fn new(w: u32, h: u32) -> Self { 43 | Self { 44 | w, 45 | h, 46 | data: vec![255; (w * h) as usize], 47 | } 48 | } 49 | 50 | fn set_black(&mut self, x: u32, y: u32) { 51 | self.data[(y * self.w + x) as usize] = 0; 52 | } 53 | } 54 | 55 | fn compute_bounds(mut iter: impl Iterator) -> (GeoCoord, GeoCoord) { 56 | let first_coord = iter.next().unwrap_or_default(); 57 | iter.fold((first_coord, first_coord), |(min, max), coord| { 58 | ( 59 | GeoCoord { 60 | lat: min.lat.min(coord.lat), 61 | lon: min.lon.min(coord.lon), 62 | }, 63 | GeoCoord { 64 | lat: max.lat.max(coord.lat), 65 | lon: max.lon.max(coord.lon), 66 | }, 67 | ) 68 | }) 69 | } 70 | 71 | fn map_transform( 72 | (width, height): (u32, u32), 73 | (min, max): (GeoCoord, GeoCoord), 74 | ) -> impl FnMut(GeoCoord) -> (i32, i32) + Copy { 75 | move |coord: GeoCoord| { 76 | ( 77 | ((coord.lon - min.lon) * f64::from(width) / (max.lon - min.lon)) as i32, 78 | ((max.lat - coord.lat) * f64::from(height) / (max.lat - min.lat)) as i32, 79 | ) 80 | } 81 | } 82 | 83 | fn way_coords<'a>(archive: &'a Osm, way: &Way) -> Option + 'a> { 84 | let nodes = archive.nodes(); 85 | let nodes_index = archive.nodes_index(); 86 | let path = way.refs().map(move |i| &nodes_index[i as usize]); 87 | let scale = archive.header().coord_scale(); 88 | if path.clone().any(|node| node.value().is_none()) { 89 | None 90 | } else { 91 | Some( 92 | path.map(move |node| { 93 | GeoCoord::from_node(&nodes[node.value().unwrap() as usize], scale) 94 | }), 95 | ) 96 | } 97 | } 98 | 99 | fn way_filter(way: &Way, archive: &Osm) -> bool { 100 | const UNWANTED_HIGHWAY_TYPES: [&[u8]; 9] = [ 101 | b"pedestrian\0", 102 | b"steps\0", 103 | b"footway\0", 104 | b"construction\0", 105 | b"bic\0", 106 | b"cycleway\0", 107 | b"layby\0", 108 | b"bridleway\0", 109 | b"path\0", 110 | ]; 111 | 112 | // Filter all ways that do not have desirable highway tag. 113 | find_tag_by(archive, way.tags(), |key_block, val_block| { 114 | key_block.starts_with(b"highway\0") 115 | && !UNWANTED_HIGHWAY_TYPES 116 | .iter() 117 | .any(|t| val_block.starts_with(t)) 118 | }) 119 | .is_some() 120 | } 121 | 122 | fn roads(archive: &Osm) -> impl Iterator { 123 | archive 124 | .ways() 125 | .iter() 126 | .filter(move |&way| way_filter(way, archive)) 127 | } 128 | 129 | /// Bresenham's line algorithm 130 | /// 131 | /// https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm 132 | fn bresenham(mut x0: i32, mut y0: i32, x1: i32, y1: i32) -> impl Iterator { 133 | let dx = (x1 - x0).abs(); 134 | let sx = if x0 < x1 { 1 } else { -1 }; 135 | let dy = -(y1 - y0).abs(); 136 | let sy = if y0 < y1 { 1 } else { -1 }; 137 | let mut err = dx + dy; 138 | 139 | std::iter::from_fn(move || { 140 | if x0 == x1 && y0 == y1 { 141 | return None; 142 | } 143 | let res = (x0, y0); 144 | let e2 = 2 * err; 145 | if e2 >= dy { 146 | err += dy; 147 | x0 += sx; 148 | } 149 | if e2 <= dx { 150 | err += dx; 151 | y0 += sy; 152 | } 153 | Some(res) 154 | }) 155 | } 156 | 157 | fn render(archive: &Osm, width: u32) -> Image { 158 | // compute extent 159 | let coords = roads(archive) 160 | .filter_map(|way| way_coords(archive, way)) 161 | .flatten(); 162 | let (min, max) = compute_bounds(coords); 163 | 164 | // compute ratio and height 165 | let ratio = (max.lat - min.lat) / (max.lon - min.lon) / (max.lat / 180. * PI).cos(); 166 | let height = (f64::from(width) * ratio) as u32; 167 | 168 | // create world -> raster transformation 169 | let t = map_transform((width - 1, height - 1), (min, max)); 170 | 171 | // draw 172 | let mut image = Image::new(width, height); 173 | 174 | let line_segments = roads(archive) 175 | .filter_map(|way| Some(way_coords(archive, way)?.map(t).tuple_windows())) 176 | .flatten(); 177 | 178 | for ((x0, y0), (x1, y1)) in line_segments { 179 | for (x, y) in bresenham(x0, y0, x1, y1) { 180 | image.set_black(x as u32, y as u32); 181 | } 182 | } 183 | 184 | image 185 | } 186 | 187 | /// Renders roads as a PNG 188 | #[derive(Debug, Parser)] 189 | struct Args { 190 | /// input osmflat archive 191 | input: PathBuf, 192 | /// output PNG filename 193 | #[clap(long, short = 'o')] 194 | output: PathBuf, 195 | /// width of the image (height is derived from ratio) 196 | #[clap(long, short = 'w', default_value = "4320")] 197 | width: u32, 198 | } 199 | 200 | fn main() -> Result<(), Box> { 201 | let args = Args::parse(); 202 | 203 | let archive = Osm::open(FileResourceStorage::new(args.input))?; 204 | 205 | let image = render(&archive, args.width); 206 | 207 | let buf = BufWriter::new(File::create(&args.output)?); 208 | let mut encoder = png::Encoder::new(buf, image.w, image.h); 209 | encoder.set_color(png::ColorType::Grayscale); 210 | encoder.set_depth(png::BitDepth::Eight); 211 | let mut writer = encoder.write_header()?; 212 | writer.write_image_data(&image.data[..])?; 213 | 214 | Ok(()) 215 | } 216 | -------------------------------------------------------------------------------- /osmflat/examples/road-length.rs: -------------------------------------------------------------------------------- 1 | //! Calculates the length of the road network (everything tagged `highway=*`) 2 | //! in the input archive. 3 | //! 4 | //! Demonstrates 5 | //! 6 | //! * iteration through ways 7 | //! * accessing of tags belonging to a way 8 | //! * accessing of nodes belonging to a way 9 | //! * length calculation on the Earth using the haversine function 10 | //! 11 | //! LICENSE 12 | //! 13 | //! The code in this example file is released into the Public Domain. 14 | 15 | use itertools::Itertools; 16 | use osmflat::{FileResourceStorage, Node, Osm}; 17 | 18 | struct Coords { 19 | lat: f64, 20 | lon: f64, 21 | } 22 | 23 | impl Coords { 24 | fn from_node(node: &Node, coord_scale: i32) -> Self { 25 | Self { 26 | lat: node.lat() as f64 / coord_scale as f64, 27 | lon: node.lon() as f64 / coord_scale as f64, 28 | } 29 | } 30 | } 31 | 32 | fn haversine_distance(c1: Coords, c2: Coords) -> f64 { 33 | /// Earth's radius for WGS84 in meters 34 | const EARTH_RADIUS_IN_METERS: f64 = 6_372_797.560_856; 35 | 36 | let mut lonh = ((c1.lon - c2.lon).to_radians() * 0.5).sin(); 37 | lonh *= lonh; 38 | let mut lath = ((c1.lat - c2.lat).to_radians() * 0.5).sin(); 39 | lath *= lath; 40 | let tmp = c1.lat.to_radians().cos() * c2.lat.to_radians().cos(); 41 | 2.0 * EARTH_RADIUS_IN_METERS * (lath + tmp * lonh).sqrt().asin() 42 | } 43 | 44 | fn main() -> Result<(), Box> { 45 | let archive_dir = std::env::args() 46 | .nth(1) 47 | .ok_or("USAGE: road_length ")?; 48 | let archive = Osm::open(FileResourceStorage::new(archive_dir))?; 49 | let header = archive.header(); 50 | 51 | let tags = archive.tags(); 52 | let tags_index = archive.tags_index(); 53 | let strings = archive.stringtable(); 54 | 55 | let highways = archive.ways().iter().filter(|way| { 56 | way.tags().any(|idx| { 57 | // A way reference a range of tags by storing a contiguous range of 58 | // indexes in `tags_index`. Each of these references a tag in `tags`. 59 | // This is a common pattern when flattening 1 to n relations. 60 | let tag = &tags[tags_index[idx as usize].value() as usize]; 61 | strings.substring_raw(tag.key_idx() as usize) == b"highway" 62 | }) 63 | }); 64 | 65 | let nodes = archive.nodes(); 66 | let nodes_index = archive.nodes_index(); 67 | 68 | let lengths = highways.filter_map(|way| { 69 | let coords = way.refs().map(|idx| { 70 | // A way references a range of nodes by storing a contiguous range of 71 | // indexes in `nodes_index`. Each of these references a node in `nodes`. 72 | // This is a common pattern when flattening 1 to n relations. 73 | Some(Coords::from_node( 74 | &nodes[nodes_index[idx as usize].value()? as usize], 75 | header.coord_scale(), 76 | )) 77 | }); 78 | let length: Option = coords 79 | .clone() 80 | .zip(coords.skip(1)) 81 | .map(|(from, to)| Some(haversine_distance(from?, to?))) 82 | .fold_options(0.0, |acc, x| acc + x); 83 | length 84 | }); 85 | 86 | let length: f64 = lengths.sum(); 87 | println!("Length: {:.0} km", length / 1000.0); 88 | 89 | Ok(()) 90 | } 91 | -------------------------------------------------------------------------------- /osmflat/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(missing_docs)] 2 | #![allow(clippy::all)] // generated code is not clippy friendly 3 | 4 | //! Flat OpenStreetMap (OSM) data format providing an efficient *random* data 5 | //! access through [memory mapped files]. 6 | //! 7 | //! The data format is described and implemented in [flatdata]. The [schema] 8 | //! describes the fundamental OSM data structures: nodes, ways, relations and 9 | //! tags as simple non-nested data structures. The relations between these are 10 | //! expressed through indexes. 11 | //! 12 | //! ## Examples 13 | //! 14 | //! Open a flatdata archive (compiled from pbf with [`osmflatc`]) and iterate 15 | //! through nodes: 16 | //! 17 | //! ```rust,no_run 18 | //! use osmflat::{FileResourceStorage, Osm}; 19 | //! 20 | //! fn main() { 21 | //! let storage = FileResourceStorage::new("path/to/archive.osm.flatdata"); 22 | //! let archive = Osm::open(storage).unwrap(); 23 | //! 24 | //! for node in archive.nodes().iter() { 25 | //! println!("{:?}", node); 26 | //! } 27 | //! } 28 | //! ``` 29 | //! 30 | //! For more examples, see the [examples] directory. 31 | //! 32 | //! [flatdata]: https://github.com/heremaps/flatdata 33 | //! [schema]: https://github.com/boxdot/osmflat-rs/blob/master/flatdata/osm.flatdata 34 | //! [memory mapped files]: https://en.wikipedia.org/wiki/Memory-mapped_file 35 | //! [`osmflatc`]: https://github.com/boxdot/osmflat-rs/tree/master/osmflatc 36 | //! [examples]: https://github.com/boxdot/osmflat-rs/tree/master/osmflat/examples 37 | 38 | // generated osm module 39 | include!("osmflat_generated.rs"); 40 | 41 | mod tags; 42 | 43 | pub use crate::osm::*; 44 | pub use crate::tags::*; 45 | 46 | // re-export what is needed from flatdata to use osmflat 47 | pub use flatdata::FileResourceStorage; 48 | #[cfg(feature = "tar")] 49 | pub use flatdata::TarArchiveResourceStorage; 50 | -------------------------------------------------------------------------------- /osmflat/src/tags.rs: -------------------------------------------------------------------------------- 1 | //! All functions in this module operate on raw bytes for performance reasons. 2 | //! It is easy to combine these with `std::str::from_utf8` family of functions, 3 | //! to lift them to operate on `str`. 4 | 5 | use crate::Osm; 6 | use std::ops::Range; 7 | 8 | /// Returns an iterator over tags specified by `range`. 9 | /// 10 | /// When searching for a tag by key consider to use `find_tag` which 11 | /// performs better. 12 | #[inline] 13 | pub fn iter_tags(archive: &Osm, range: Range) -> impl Iterator + Clone { 14 | let tags = archive.tags(); 15 | let tags_index = archive.tags_index(); 16 | let strings = archive.stringtable(); 17 | 18 | range.map(move |idx| { 19 | let tag = &tags[tags_index[idx as usize].value() as usize]; 20 | let key = strings.substring_raw(tag.key_idx() as usize); 21 | let val = strings.substring_raw(tag.value_idx() as usize); 22 | (key, val) 23 | }) 24 | } 25 | 26 | /// Finds the first tag in the given `range` which satisfies the predicate 27 | /// applied to the key and value and returns the corresponding value. 28 | /// 29 | /// Note that the predicate function is called on the whole key block and value 30 | /// block. These are zero (`\0`) divided blocks of bytes that start at the key 31 | /// resp. value, and contain the rest string data. In particular, the len of 32 | /// the block is *not* the len of the key resp. value. The user is responsible 33 | /// to check or find the zero terminator. 34 | #[inline] 35 | pub fn find_tag_by( 36 | archive: &Osm, 37 | mut range: Range, 38 | mut predicate: impl FnMut(&[u8], &[u8]) -> bool, 39 | ) -> Option<&[u8]> { 40 | let tags = archive.tags(); 41 | let tags_index = archive.tags_index(); 42 | let strings = archive.stringtable(); 43 | 44 | range.find_map(move |idx| { 45 | let tag = &tags[tags_index[idx as usize].value() as usize]; 46 | let key_block = &strings.as_bytes()[tag.key_idx() as usize..]; 47 | let value_block = &strings.as_bytes()[tag.value_idx() as usize..]; 48 | if predicate(key_block, value_block) { 49 | Some(strings.substring_raw(tag.value_idx() as usize)) 50 | } else { 51 | None 52 | } 53 | }) 54 | } 55 | 56 | /// Finds a tag by its key in the given `range` and returns the corresponding 57 | /// value. 58 | #[inline] 59 | pub fn find_tag<'a>(archive: &'a Osm, range: Range, key: &[u8]) -> Option<&'a [u8]> { 60 | find_tag_by(archive, range, |key_block, _| { 61 | key_block.starts_with(key) && *key_block.get(key.len()).unwrap_or(&0) == 0 62 | }) 63 | } 64 | 65 | /// Checks if there is a tag in `range` with a given `key` and `value`. 66 | #[inline] 67 | pub fn has_tag(archive: &Osm, range: Range, key: &[u8], value: &[u8]) -> bool { 68 | let tags = archive.tags(); 69 | let tags_index = archive.tags_index(); 70 | let strings = archive.stringtable(); 71 | 72 | let matches = |idx, value| { 73 | let block = &strings.as_bytes()[idx as usize..]; 74 | block.starts_with(value) && *block.get(value.len()).unwrap_or(&0) == 0 75 | }; 76 | 77 | for idx in range { 78 | let tag = &tags[tags_index[idx as usize].value() as usize]; 79 | if matches(tag.key_idx(), key) { 80 | return matches(tag.value_idx(), value); 81 | } 82 | } 83 | false 84 | } 85 | -------------------------------------------------------------------------------- /osmflatc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "osmflatc" 3 | version = "0.3.1" 4 | authors = [ 5 | "boxdot ", 6 | "Christian Vetter ", 7 | "Gabriel Féron " 8 | ] 9 | license = "MIT/Apache-2.0" 10 | description = "Compiler of OpenStreetMap (OSM) Data from osm.pbf format to osm.flatdata format" 11 | repository = "https://github.com/boxdot/osmflat-rs" 12 | keywords = ["serialization", "osm", "openstreetmap", "flatdata"] 13 | categories = ["encoding"] 14 | readme = "README.md" 15 | edition = "2021" 16 | 17 | [dependencies] 18 | byteorder = "1.4.3" 19 | bytes = "1.4.0" 20 | clap = { version = "4.1.4", features = ["derive"] } 21 | crossbeam = "0.8.2" 22 | env_logger = "0.11.0" 23 | flatdata = "0.5.3" 24 | flate2 = "1.0.25" 25 | itertools = "0.13.0" 26 | log = "0.4.17" 27 | memmap2 = "0.9.0" 28 | osmflat = "0.3.0" 29 | parking_lot = "0.12.1" 30 | prost = "0.13.2" 31 | prost-derive = "0.13.2" 32 | prost-types = "0.13.2" 33 | rayon = "1.6.1" 34 | ahash = "0.8.3" 35 | indicatif = "0.17.3" 36 | 37 | [build-dependencies] 38 | prost-build = "0.13.2" 39 | 40 | [dev-dependencies] 41 | proptest = "1.0.0" 42 | -------------------------------------------------------------------------------- /osmflatc/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /osmflatc/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /osmflatc/README.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /osmflatc/build.rs: -------------------------------------------------------------------------------- 1 | extern crate prost_build; 2 | 3 | fn main() { 4 | prost_build::compile_protos( 5 | &["src/proto/osmformat.proto", "src/proto/fileformat.proto"], 6 | &["src/proto"], 7 | ) 8 | .expect("failed to compile protobuf"); 9 | } 10 | -------------------------------------------------------------------------------- /osmflatc/src/args.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use clap::Parser; 4 | 5 | /// Compiler of Open Street Data from osm.pbf format to osm.flatdata format 6 | #[derive(Debug, Parser)] 7 | #[clap(about, version, author)] 8 | pub struct Args { 9 | /// Verbose mode (-v, -vv, -vvv, etc.) 10 | #[clap(short, long, action = clap::ArgAction::Count)] 11 | pub verbose: u8, 12 | 13 | /// Input OSM pbf file 14 | pub input: PathBuf, 15 | 16 | /// Output directory for OSM flatdata archive 17 | pub output: PathBuf, 18 | 19 | /// Whether to compile the optional ids subs 20 | #[arg(long = "ids")] 21 | pub ids: bool, 22 | } 23 | -------------------------------------------------------------------------------- /osmflatc/src/ids.rs: -------------------------------------------------------------------------------- 1 | const ID_BLOCK_SIZE: usize = 1 << 24; 2 | const DENSE_LOOKUP_BLOCK_SIZE: usize = 1 << 4; 3 | 4 | /// An IdBlock can either be Sparse or Dense 5 | /// Sparse: A sorted list of ids, the position determines the index 6 | /// Dense: A bitset of the whole range. An additional offsets lookup 7 | /// provides fast lookup for the index by storing the sum of 8 | /// set bits every DENSE_LOOKUP_BLOCK_SIZE * 8 bits 9 | #[derive(Debug, Clone)] 10 | enum IdBlock { 11 | Dense { 12 | includes: Vec, 13 | offsets: Vec, 14 | }, 15 | Sparse(Vec), 16 | } 17 | 18 | impl IdBlock { 19 | /// Amount if ids in the block 20 | fn count(&self) -> u32 { 21 | match self { 22 | IdBlock::Sparse(ids) => ids.len() as u32, 23 | IdBlock::Dense { offsets, includes } => { 24 | let last_bits: u32 = includes[includes.len() - DENSE_LOOKUP_BLOCK_SIZE..] 25 | .iter() 26 | .map(|x| x.count_ones()) 27 | .sum(); 28 | *offsets.last().unwrap() + last_bits 29 | } 30 | } 31 | } 32 | 33 | /// adds a truncated id into the current block 34 | fn insert(&mut self, x: u32) { 35 | match self { 36 | IdBlock::Sparse(ids) => { 37 | if ids.len() * 8 < ID_BLOCK_SIZE / 8 { 38 | ids.push(x) 39 | } else { 40 | let mut dense = IdBlock::Dense { 41 | includes: vec![0; ID_BLOCK_SIZE / 8], 42 | offsets: vec![0; ID_BLOCK_SIZE / 8 / DENSE_LOOKUP_BLOCK_SIZE], 43 | }; 44 | for id in ids { 45 | dense.insert(*id); 46 | } 47 | dense.insert(x); 48 | 49 | *self = dense; 50 | } 51 | } 52 | IdBlock::Dense { includes, .. } => includes[x as usize / 8] |= 1 << (x % 8), 53 | } 54 | } 55 | 56 | // established lookups 57 | fn finalize(&mut self) { 58 | if let IdBlock::Dense { includes, offsets } = self { 59 | for block in 0..offsets.len() - 1 { 60 | offsets[block + 1] = includes 61 | [block * DENSE_LOOKUP_BLOCK_SIZE..(block + 1) * DENSE_LOOKUP_BLOCK_SIZE] 62 | .iter() 63 | .map(|x| x.count_ones()) 64 | .sum(); 65 | } 66 | for block in 0..offsets.len() - 1 { 67 | offsets[block + 1] += offsets[block]; 68 | } 69 | } 70 | } 71 | 72 | // find the positions/index of a truncated id (if it is in the block) 73 | fn pos(&self, x: u32) -> Option { 74 | match self { 75 | IdBlock::Sparse(ids) => ids.binary_search(&x).ok().map(|x| x as u32), 76 | IdBlock::Dense { includes, offsets } => { 77 | if (includes[x as usize / 8] & (1 << (x % 8))) == 0 { 78 | None 79 | } else { 80 | let offset_pos = x as usize / 8 / DENSE_LOOKUP_BLOCK_SIZE; 81 | let start_block = offset_pos * 8 * DENSE_LOOKUP_BLOCK_SIZE; 82 | let rest = x as usize % (8 * DENSE_LOOKUP_BLOCK_SIZE); 83 | let mut result = offsets[offset_pos]; 84 | for i in start_block..start_block + rest { 85 | result += ((includes[i / 8] & (1 << (i % 8))) != 0) as u32; 86 | } 87 | Some(result) 88 | } 89 | } 90 | } 91 | } 92 | } 93 | 94 | /// Maps u64 integers to a consecutive range of ids 95 | #[derive(Debug)] 96 | pub struct IdTable { 97 | // map u64 id x to u32 by storing a sorted mapping table for each value of x / 2^24 98 | data: Vec<(u64, IdBlock)>, 99 | } 100 | 101 | #[derive(Debug, Default)] 102 | pub struct IdTableBuilder { 103 | // stored the same data as IdTable, but still in process of being build 104 | data: Vec, 105 | last_id: Option, 106 | next_id: u64, 107 | } 108 | 109 | impl IdTableBuilder { 110 | pub fn new() -> Self { 111 | Default::default() 112 | } 113 | 114 | /// Inserts an Id and returns a mapped index 115 | pub fn insert(&mut self, x: u64) -> u64 { 116 | if let Some(last_id) = self.last_id { 117 | assert!(last_id < x, "Ids are expected to be sorted"); 118 | } 119 | self.last_id = Some(x); 120 | let id_set = (x >> 24) as usize; 121 | if self.data.len() <= id_set { 122 | self.data.resize(id_set + 1, IdBlock::Sparse(Vec::new())); 123 | } 124 | self.data[id_set].insert((x % (1u64 << 24)) as u32); 125 | let result = self.next_id; 126 | self.next_id += 1; 127 | result 128 | } 129 | 130 | pub fn build(mut self) -> IdTable { 131 | for ids in &mut self.data { 132 | ids.finalize(); 133 | } 134 | let result = self 135 | .data 136 | .into_iter() 137 | .scan(0, |state, ids| { 138 | let offset = *state; 139 | *state += ids.count() as u64; 140 | Some((offset, ids)) 141 | }) 142 | .collect(); 143 | IdTable { data: result } 144 | } 145 | } 146 | 147 | impl IdTable { 148 | pub fn get(&self, x: u64) -> Option { 149 | let id_set = (x >> 24) as usize; 150 | if id_set > self.data.len() { 151 | return None; 152 | } 153 | self.data[id_set] 154 | .1 155 | .pos((x % (1u64 << 24)) as u32) 156 | .map(|pos| self.data[id_set].0 + pos as u64) 157 | } 158 | } 159 | 160 | #[cfg(test)] 161 | mod test { 162 | use super::*; 163 | 164 | #[test] 165 | fn test_mapping_of_small_ints() { 166 | let mut builder = IdTableBuilder::new(); 167 | let mut data = [9, 8, 7, 4, 3, 10, 13]; 168 | data.sort_unstable(); 169 | for x in data.iter() { 170 | builder.insert(*x); 171 | } 172 | 173 | let lookup = builder.build(); 174 | for (pos, x) in data.iter().enumerate() { 175 | let res = lookup.get(*x); 176 | assert_eq!(res, Some(pos as u64)); 177 | } 178 | 179 | for x in [0, 1, 2, 5, 6, 11, 12, 14].iter() { 180 | let res = lookup.get(*x); 181 | assert_eq!(res, None); 182 | } 183 | } 184 | 185 | #[test] 186 | fn test_mapping_of_large_ints() { 187 | let mut builder = IdTableBuilder::new(); 188 | let mut data = [2, 1, 1_u64 << 33, 1_u64 << 34]; 189 | data.sort_unstable(); 190 | for x in data.iter() { 191 | builder.insert(*x); 192 | } 193 | 194 | let lookup = builder.build(); 195 | for (pos, x) in data.iter().enumerate() { 196 | let res = lookup.get(*x); 197 | assert_eq!(res, Some(pos as u64)); 198 | } 199 | 200 | for x in [0, 3, (1_u64 << 33) + 1, (1_u64 << 34) + 1, 1_u64 << 35].iter() { 201 | let res = lookup.get(*x); 202 | assert_eq!(res, None); 203 | } 204 | } 205 | 206 | #[test] 207 | fn test_large_indices() { 208 | let mut builder = IdTableBuilder::new(); 209 | let mut data = [2, 1, 1_u64 << 33, 1_u64 << 34]; 210 | data.sort_unstable(); 211 | for x in data.iter() { 212 | builder.insert(*x); 213 | } 214 | 215 | let lookup = builder.build(); 216 | for (pos, x) in data.iter().enumerate() { 217 | let res = lookup.get(*x); 218 | assert_eq!(res, Some(pos as u64)); 219 | } 220 | 221 | for x in [0, 3, (1_u64 << 33) + 1, (1_u64 << 34) + 1, 1_u64 << 35].iter() { 222 | let res = lookup.get(*x); 223 | assert_eq!(res, None); 224 | } 225 | } 226 | 227 | #[test] 228 | fn test_dense() { 229 | let mut builder = IdTableBuilder::new(); 230 | let mut data = Vec::new(); 231 | for i in 0..ID_BLOCK_SIZE { 232 | data.push(i as u64 * 3 + (1_u64 << 34)); 233 | } 234 | data.sort_unstable(); 235 | for x in data.iter() { 236 | builder.insert(*x); 237 | } 238 | 239 | let lookup = builder.build(); 240 | for i in 0..ID_BLOCK_SIZE * 3 { 241 | let res = lookup.get(i as u64 + (1_u64 << 34)); 242 | if i % 3 == 0 { 243 | assert_eq!(Some(i as u64 / 3), res); 244 | } else { 245 | assert_eq!(None, res); 246 | } 247 | } 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /osmflatc/src/main.rs: -------------------------------------------------------------------------------- 1 | mod args; 2 | mod ids; 3 | mod osmpbf; 4 | mod parallel; 5 | mod stats; 6 | mod strings; 7 | 8 | use crate::osmpbf::{build_block_index, read_block, BlockIndex, BlockType}; 9 | use crate::stats::Stats; 10 | use crate::strings::StringTable; 11 | 12 | use clap::Parser; 13 | use flatdata::FileResourceStorage; 14 | use indicatif::{ProgressBar, ProgressStyle}; 15 | use itertools::Itertools; 16 | use log::{error, info}; 17 | use memmap2::Mmap; 18 | 19 | use ahash::AHashMap; 20 | use std::collections::hash_map; 21 | use std::fs::File; 22 | use std::io; 23 | use std::str; 24 | 25 | type Error = Box; 26 | 27 | fn serialize_header( 28 | header_block: &osmpbf::HeaderBlock, 29 | coord_scale: i32, 30 | builder: &osmflat::OsmBuilder, 31 | stringtable: &mut StringTable, 32 | ) -> io::Result<()> { 33 | let mut header = osmflat::Header::new(); 34 | 35 | header.set_coord_scale(coord_scale); 36 | 37 | if let Some(ref bbox) = header_block.bbox { 38 | header.set_bbox_left((bbox.left / (1000000000 / coord_scale) as i64) as i32); 39 | header.set_bbox_right((bbox.right / (1000000000 / coord_scale) as i64) as i32); 40 | header.set_bbox_top((bbox.top / (1000000000 / coord_scale) as i64) as i32); 41 | header.set_bbox_bottom((bbox.bottom / (1000000000 / coord_scale) as i64) as i32); 42 | }; 43 | 44 | header.set_writingprogram_idx(stringtable.insert("osmflatc")); 45 | 46 | if let Some(ref source) = header_block.source { 47 | header.set_source_idx(stringtable.insert(source)); 48 | } 49 | 50 | if let Some(timestamp) = header_block.osmosis_replication_timestamp { 51 | header.set_replication_timestamp(timestamp); 52 | } 53 | 54 | if let Some(number) = header_block.osmosis_replication_sequence_number { 55 | header.set_replication_sequence_number(number); 56 | } 57 | 58 | if let Some(ref url) = header_block.osmosis_replication_base_url { 59 | header.set_replication_base_url_idx(stringtable.insert(url)); 60 | } 61 | 62 | builder.set_header(&header)?; 63 | Ok(()) 64 | } 65 | 66 | #[derive(PartialEq, Eq, Copy, Clone)] 67 | struct I40 { 68 | x: [u8; 5], 69 | } 70 | 71 | impl I40 { 72 | fn from_u64(x: u64) -> Self { 73 | let x = x.to_le_bytes(); 74 | debug_assert_eq!((x[5], x[6], x[7]), (0, 0, 0)); 75 | Self { 76 | x: [x[0], x[1], x[2], x[3], x[4]], 77 | } 78 | } 79 | 80 | fn to_u64(self) -> u64 { 81 | let extented = [ 82 | self.x[0], self.x[1], self.x[2], self.x[3], self.x[4], 0, 0, 0, 83 | ]; 84 | u64::from_le_bytes(extented) 85 | } 86 | } 87 | 88 | #[allow(clippy::derived_hash_with_manual_eq)] 89 | impl std::hash::Hash for I40 { 90 | fn hash(&self, h: &mut H) 91 | where 92 | H: std::hash::Hasher, 93 | { 94 | // We manually implement Hash like this, since [u8; 5] is slower to hash 95 | // than u64 for some/many hash functions 96 | self.to_u64().hash(h) 97 | } 98 | } 99 | 100 | /// Holds tags external vector and deduplicates tags. 101 | struct TagSerializer<'a> { 102 | tags: flatdata::ExternalVector<'a, osmflat::Tag>, 103 | tags_index: flatdata::ExternalVector<'a, osmflat::TagIndex>, 104 | dedup: AHashMap<(I40, I40), I40>, // deduplication table: (key_idx, val_idx) -> pos 105 | } 106 | 107 | impl<'a> TagSerializer<'a> { 108 | fn new(builder: &'a osmflat::OsmBuilder) -> io::Result { 109 | Ok(Self { 110 | tags: builder.start_tags()?, 111 | tags_index: builder.start_tags_index()?, 112 | dedup: AHashMap::new(), 113 | }) 114 | } 115 | 116 | fn serialize(&mut self, key_idx: u64, val_idx: u64) -> Result<(), Error> { 117 | let idx = match self 118 | .dedup 119 | .entry((I40::from_u64(key_idx), I40::from_u64(val_idx))) 120 | { 121 | hash_map::Entry::Occupied(entry) => entry.get().to_u64(), 122 | hash_map::Entry::Vacant(entry) => { 123 | let idx = self.tags.len() as u64; 124 | let tag = self.tags.grow()?; 125 | tag.set_key_idx(key_idx); 126 | tag.set_value_idx(val_idx); 127 | entry.insert(I40::from_u64(idx)); 128 | idx 129 | } 130 | }; 131 | 132 | let tag_index = self.tags_index.grow()?; 133 | tag_index.set_value(idx); 134 | 135 | Ok(()) 136 | } 137 | 138 | fn next_index(&self) -> u64 { 139 | self.tags_index.len() as u64 140 | } 141 | 142 | fn close(self) { 143 | if let Err(e) = self.tags.close() { 144 | panic!("failed to close tags: {}", e); 145 | } 146 | if let Err(e) = self.tags_index.close() { 147 | panic!("failed to close tags index: {}", e); 148 | } 149 | } 150 | } 151 | 152 | /// adds all strings in a table to the lookup and returns a vectors of 153 | /// references to be used instead 154 | fn add_string_table( 155 | pbf_stringtable: &osmpbf::StringTable, 156 | stringtable: &mut StringTable, 157 | ) -> Result, Error> { 158 | let mut result = Vec::with_capacity(pbf_stringtable.s.len()); 159 | for x in &pbf_stringtable.s { 160 | let string = str::from_utf8(x)?; 161 | result.push(stringtable.insert(string)); 162 | } 163 | Ok(result) 164 | } 165 | 166 | fn serialize_dense_nodes( 167 | block: &osmpbf::PrimitiveBlock, 168 | granularity: i32, 169 | nodes: &mut flatdata::ExternalVector, 170 | node_ids: &mut Option>, 171 | nodes_id_to_idx: &mut ids::IdTableBuilder, 172 | stringtable: &mut StringTable, 173 | tags: &mut TagSerializer, 174 | ) -> Result { 175 | let mut stats = Stats::default(); 176 | let string_refs = add_string_table(&block.stringtable, stringtable)?; 177 | for group in block.primitivegroup.iter() { 178 | let dense_nodes = group.dense.as_ref().unwrap(); 179 | 180 | let pbf_granularity = block.granularity.unwrap_or(100); 181 | let lat_offset = block.lat_offset.unwrap_or(0); 182 | let lon_offset = block.lon_offset.unwrap_or(0); 183 | let mut lat = 0; 184 | let mut lon = 0; 185 | 186 | let mut tags_offset = 0; 187 | 188 | let mut id = 0; 189 | for i in 0..dense_nodes.id.len() { 190 | id += dense_nodes.id[i]; 191 | 192 | let index = nodes_id_to_idx.insert(id as u64); 193 | assert_eq!(index as usize, nodes.len()); 194 | 195 | let node = nodes.grow()?; 196 | if let Some(ids) = node_ids { 197 | ids.grow()?.set_value(id as u64); 198 | } 199 | 200 | lat += dense_nodes.lat[i]; 201 | lon += dense_nodes.lon[i]; 202 | node.set_lat( 203 | ((lat_offset + (i64::from(pbf_granularity) * lat)) / granularity as i64) as i32, 204 | ); 205 | node.set_lon( 206 | ((lon_offset + (i64::from(pbf_granularity) * lon)) / granularity as i64) as i32, 207 | ); 208 | 209 | if tags_offset < dense_nodes.keys_vals.len() { 210 | node.set_tag_first_idx(tags.next_index()); 211 | loop { 212 | let k = dense_nodes.keys_vals[tags_offset]; 213 | tags_offset += 1; 214 | 215 | if k == 0 { 216 | break; // separator 217 | } 218 | 219 | let v = dense_nodes.keys_vals[tags_offset]; 220 | tags_offset += 1; 221 | 222 | tags.serialize(string_refs[k as usize], string_refs[v as usize])?; 223 | } 224 | } 225 | } 226 | assert_eq!(tags_offset, dense_nodes.keys_vals.len()); 227 | stats.num_nodes += dense_nodes.id.len(); 228 | } 229 | Ok(stats) 230 | } 231 | 232 | fn resolve_ways( 233 | block: &osmpbf::PrimitiveBlock, 234 | nodes_id_to_idx: &ids::IdTable, 235 | ) -> (Vec>, Stats) { 236 | let mut result = Vec::new(); 237 | let mut stats = Stats::default(); 238 | for group in &block.primitivegroup { 239 | for pbf_way in &group.ways { 240 | let mut node_ref = 0; 241 | for delta in &pbf_way.refs { 242 | node_ref += delta; 243 | let idx = nodes_id_to_idx.get(node_ref as u64); 244 | stats.num_unresolved_node_ids += idx.is_none() as usize; 245 | 246 | result.push(idx); 247 | } 248 | } 249 | } 250 | (result, stats) 251 | } 252 | 253 | #[allow(clippy::too_many_arguments)] 254 | fn serialize_ways( 255 | block: &osmpbf::PrimitiveBlock, 256 | nodes_id_to_idx: &[Option], 257 | ways: &mut flatdata::ExternalVector, 258 | way_ids: &mut Option>, 259 | ways_id_to_idx: &mut ids::IdTableBuilder, 260 | stringtable: &mut StringTable, 261 | tags: &mut TagSerializer, 262 | nodes_index: &mut flatdata::ExternalVector, 263 | ) -> Result { 264 | let mut stats = Stats::default(); 265 | let string_refs = add_string_table(&block.stringtable, stringtable)?; 266 | let mut nodes_idx = nodes_id_to_idx.iter().cloned(); 267 | for group in &block.primitivegroup { 268 | for pbf_way in &group.ways { 269 | let index = ways_id_to_idx.insert(pbf_way.id as u64); 270 | assert_eq!(index as usize, ways.len()); 271 | 272 | let way = ways.grow()?; 273 | if let Some(ids) = way_ids { 274 | ids.grow()?.set_value(pbf_way.id as u64); 275 | } 276 | 277 | debug_assert_eq!(pbf_way.keys.len(), pbf_way.vals.len(), "invalid input data"); 278 | way.set_tag_first_idx(tags.next_index()); 279 | 280 | for i in 0..pbf_way.keys.len() { 281 | tags.serialize( 282 | string_refs[pbf_way.keys[i] as usize], 283 | string_refs[pbf_way.vals[i] as usize], 284 | )?; 285 | } 286 | 287 | way.set_ref_first_idx(nodes_index.len() as u64); 288 | for _ in &pbf_way.refs { 289 | nodes_index.grow()?.set_value(nodes_idx.next().unwrap()); 290 | } 291 | } 292 | stats.num_ways += group.ways.len(); 293 | } 294 | Ok(stats) 295 | } 296 | 297 | fn build_relations_index(data: &[u8], block_index: I) -> Result 298 | where 299 | I: ExactSizeIterator + Send + 'static, 300 | { 301 | let mut result = ids::IdTableBuilder::new(); 302 | let pb = ProgressBar::new(block_index.len() as u64) 303 | .with_style(pb_style()) 304 | .with_prefix("Building relations index"); 305 | parallel::parallel_process( 306 | block_index, 307 | |idx| read_block(data, &idx), 308 | |block: Result| -> Result<(), Error> { 309 | for group in &block?.primitivegroup { 310 | for relation in &group.relations { 311 | result.insert(relation.id as u64); 312 | } 313 | } 314 | pb.inc(1); 315 | Ok(()) 316 | }, 317 | )?; 318 | pb.finish(); 319 | 320 | Ok(result.build()) 321 | } 322 | 323 | #[allow(clippy::too_many_arguments)] 324 | fn serialize_relations( 325 | block: &osmpbf::PrimitiveBlock, 326 | nodes_id_to_idx: &ids::IdTable, 327 | ways_id_to_idx: &ids::IdTable, 328 | relations_id_to_idx: &ids::IdTable, 329 | stringtable: &mut StringTable, 330 | relations: &mut flatdata::ExternalVector, 331 | relation_ids: &mut Option>, 332 | relation_members: &mut flatdata::MultiVector, 333 | tags: &mut TagSerializer, 334 | ) -> Result { 335 | let mut stats = Stats::default(); 336 | let string_refs = add_string_table(&block.stringtable, stringtable)?; 337 | for group in &block.primitivegroup { 338 | for pbf_relation in &group.relations { 339 | let relation = relations.grow()?; 340 | if let Some(ids) = relation_ids { 341 | ids.grow()?.set_value(pbf_relation.id as u64); 342 | } 343 | 344 | debug_assert_eq!( 345 | pbf_relation.keys.len(), 346 | pbf_relation.vals.len(), 347 | "invalid input data" 348 | ); 349 | relation.set_tag_first_idx(tags.next_index()); 350 | for i in 0..pbf_relation.keys.len() { 351 | tags.serialize( 352 | string_refs[pbf_relation.keys[i] as usize], 353 | string_refs[pbf_relation.vals[i] as usize], 354 | )?; 355 | } 356 | 357 | debug_assert!( 358 | pbf_relation.roles_sid.len() == pbf_relation.memids.len() 359 | && pbf_relation.memids.len() == pbf_relation.types.len(), 360 | "invalid input data" 361 | ); 362 | 363 | let mut memid = 0; 364 | let mut members = relation_members.grow()?; 365 | for i in 0..pbf_relation.roles_sid.len() { 366 | memid += pbf_relation.memids[i]; 367 | 368 | let member_type = osmpbf::relation::MemberType::try_from(pbf_relation.types[i]); 369 | debug_assert!(member_type.is_ok()); 370 | 371 | match member_type.unwrap() { 372 | osmpbf::relation::MemberType::Node => { 373 | let idx = nodes_id_to_idx.get(memid as u64); 374 | stats.num_unresolved_node_ids = idx.is_none() as usize; 375 | 376 | let member = members.add_node_member(); 377 | member.set_node_idx(idx); 378 | member.set_role_idx(string_refs[pbf_relation.roles_sid[i] as usize]); 379 | } 380 | osmpbf::relation::MemberType::Way => { 381 | let idx = ways_id_to_idx.get(memid as u64); 382 | stats.num_unresolved_way_ids = idx.is_none() as usize; 383 | 384 | let member = members.add_way_member(); 385 | member.set_way_idx(idx); 386 | member.set_role_idx(string_refs[pbf_relation.roles_sid[i] as usize]); 387 | } 388 | osmpbf::relation::MemberType::Relation => { 389 | let idx = relations_id_to_idx.get(memid as u64); 390 | stats.num_unresolved_rel_ids = idx.is_none() as usize; 391 | 392 | let member = members.add_relation_member(); 393 | member.set_relation_idx(idx); 394 | member.set_role_idx(string_refs[pbf_relation.roles_sid[i] as usize]); 395 | } 396 | } 397 | } 398 | stats.num_relations += 1; 399 | } 400 | } 401 | Ok(stats) 402 | } 403 | 404 | #[allow(clippy::too_many_arguments)] 405 | fn serialize_dense_node_blocks( 406 | builder: &osmflat::OsmBuilder, 407 | granularity: i32, 408 | mut node_ids: Option>, 409 | blocks: Vec, 410 | data: &[u8], 411 | tags: &mut TagSerializer, 412 | stringtable: &mut StringTable, 413 | stats: &mut Stats, 414 | ) -> Result { 415 | let mut nodes_id_to_idx = ids::IdTableBuilder::new(); 416 | let mut nodes = builder.start_nodes()?; 417 | let pb = ProgressBar::new(blocks.len() as u64) 418 | .with_style(pb_style()) 419 | .with_prefix("Converting dense nodes"); 420 | parallel::parallel_process( 421 | blocks.into_iter(), 422 | |idx| read_block(data, &idx), 423 | |block| -> Result { 424 | let block = block?; 425 | *stats += serialize_dense_nodes( 426 | &block, 427 | granularity, 428 | &mut nodes, 429 | &mut node_ids, 430 | &mut nodes_id_to_idx, 431 | stringtable, 432 | tags, 433 | )?; 434 | 435 | pb.inc(1); 436 | Ok(block) 437 | }, 438 | )?; 439 | pb.finish(); 440 | 441 | // fill tag_first_idx of the sentry, since it contains the end of the tag range 442 | // of the last node 443 | nodes.grow()?.set_tag_first_idx(tags.next_index()); 444 | nodes.close()?; 445 | if let Some(ids) = node_ids { 446 | ids.close()?; 447 | } 448 | info!("Dense nodes converted."); 449 | info!("Building dense nodes index..."); 450 | let nodes_id_to_idx = nodes_id_to_idx.build(); 451 | info!("Dense nodes index built."); 452 | Ok(nodes_id_to_idx) 453 | } 454 | 455 | type PrimitiveBlockWithIds = (osmpbf::PrimitiveBlock, (Vec>, Stats)); 456 | 457 | #[allow(clippy::too_many_arguments)] 458 | fn serialize_way_blocks( 459 | builder: &osmflat::OsmBuilder, 460 | mut way_ids: Option>, 461 | blocks: Vec, 462 | data: &[u8], 463 | nodes_id_to_idx: &ids::IdTable, 464 | tags: &mut TagSerializer, 465 | stringtable: &mut StringTable, 466 | stats: &mut Stats, 467 | ) -> Result { 468 | let mut ways_id_to_idx = ids::IdTableBuilder::new(); 469 | let mut ways = builder.start_ways()?; 470 | let pb = ProgressBar::new(blocks.len() as u64) 471 | .with_style(pb_style()) 472 | .with_prefix("Converting ways"); 473 | let mut nodes_index = builder.start_nodes_index()?; 474 | parallel::parallel_process( 475 | blocks.into_iter(), 476 | |idx| { 477 | let block: osmpbf::PrimitiveBlock = read_block(data, &idx)?; 478 | let ids = resolve_ways(&block, nodes_id_to_idx); 479 | Ok((block, ids)) 480 | }, 481 | |block: io::Result| -> Result { 482 | let (block, (ids, stats_resolve)) = block?; 483 | *stats += stats_resolve; 484 | *stats += serialize_ways( 485 | &block, 486 | &ids, 487 | &mut ways, 488 | &mut way_ids, 489 | &mut ways_id_to_idx, 490 | stringtable, 491 | tags, 492 | &mut nodes_index, 493 | )?; 494 | pb.inc(1); 495 | 496 | Ok(block) 497 | }, 498 | )?; 499 | 500 | { 501 | let sentinel = ways.grow()?; 502 | sentinel.set_tag_first_idx(tags.next_index()); 503 | sentinel.set_ref_first_idx(nodes_index.len() as u64); 504 | } 505 | ways.close()?; 506 | if let Some(ids) = way_ids { 507 | ids.close()?; 508 | } 509 | nodes_index.close()?; 510 | 511 | pb.finish(); 512 | info!("Ways converted."); 513 | info!("Building ways index..."); 514 | let ways_id_to_idx = ways_id_to_idx.build(); 515 | info!("Way index built."); 516 | Ok(ways_id_to_idx) 517 | } 518 | 519 | #[allow(clippy::too_many_arguments)] 520 | fn serialize_relation_blocks( 521 | builder: &osmflat::OsmBuilder, 522 | mut relation_ids: Option>, 523 | blocks: Vec, 524 | data: &[u8], 525 | nodes_id_to_idx: &ids::IdTable, 526 | ways_id_to_idx: &ids::IdTable, 527 | tags: &mut TagSerializer, 528 | stringtable: &mut StringTable, 529 | stats: &mut Stats, 530 | ) -> Result<(), Error> { 531 | // We need to build the index of relation ids first, since relations can refer 532 | // again to relations. 533 | let relations_id_to_idx = build_relations_index(data, blocks.clone().into_iter())?; 534 | 535 | let mut relations = builder.start_relations()?; 536 | let mut relation_members = builder.start_relation_members()?; 537 | 538 | let pb = ProgressBar::new(blocks.len() as u64) 539 | .with_style(pb_style()) 540 | .with_prefix("Converting relations"); 541 | parallel::parallel_process( 542 | blocks.into_iter(), 543 | |idx| read_block(data, &idx), 544 | |block| -> Result { 545 | let block = block?; 546 | *stats += serialize_relations( 547 | &block, 548 | nodes_id_to_idx, 549 | ways_id_to_idx, 550 | &relations_id_to_idx, 551 | stringtable, 552 | &mut relations, 553 | &mut relation_ids, 554 | &mut relation_members, 555 | tags, 556 | )?; 557 | pb.inc(1); 558 | Ok(block) 559 | }, 560 | )?; 561 | 562 | { 563 | let sentinel = relations.grow()?; 564 | sentinel.set_tag_first_idx(tags.next_index()); 565 | } 566 | 567 | relations.close()?; 568 | if let Some(ids) = relation_ids { 569 | ids.close()?; 570 | } 571 | relation_members.close()?; 572 | 573 | pb.finish(); 574 | info!("Relations converted."); 575 | 576 | Ok(()) 577 | } 578 | 579 | fn gcd(a: i32, b: i32) -> i32 { 580 | let (mut x, mut y) = (a.min(b), a.max(b)); 581 | while x > 1 { 582 | y %= x; 583 | std::mem::swap(&mut x, &mut y); 584 | } 585 | y 586 | } 587 | 588 | fn run(args: args::Args) -> Result<(), Error> { 589 | let input_file = File::open(&args.input)?; 590 | let input_data = unsafe { Mmap::map(&input_file)? }; 591 | 592 | let storage = FileResourceStorage::new(args.output.clone()); 593 | let builder = osmflat::OsmBuilder::new(storage.clone())?; 594 | 595 | // TODO: Would be nice not store all these strings in memory, but to flush them 596 | // from time to time to disk. 597 | let mut stringtable = StringTable::new(); 598 | let mut tags = TagSerializer::new(&builder)?; 599 | 600 | info!( 601 | "Initialized new osmflat archive at: {}", 602 | &args.output.display() 603 | ); 604 | 605 | info!("Building index of PBF blocks..."); 606 | let block_index = build_block_index(&input_data); 607 | let mut greatest_common_granularity = 1000000000; 608 | for block in &block_index { 609 | if block.block_type == BlockType::DenseNodes { 610 | // only DenseNodes have coordinate we need to scale 611 | if let Some(block_granularity) = block.granularity { 612 | greatest_common_granularity = 613 | gcd(greatest_common_granularity, block_granularity as i32); 614 | } 615 | } 616 | } 617 | let coord_scale = 1000000000 / greatest_common_granularity; 618 | info!( 619 | "Greatest common granularity: {}, Coordinate scaling factor: {}", 620 | greatest_common_granularity, coord_scale 621 | ); 622 | 623 | // TODO: move out into a function 624 | let groups = block_index.into_iter().chunk_by(|b| b.block_type); 625 | let mut pbf_header = Vec::new(); 626 | let mut pbf_dense_nodes = Vec::new(); 627 | let mut pbf_ways = Vec::new(); 628 | let mut pbf_relations = Vec::new(); 629 | for (block_type, blocks) in &groups { 630 | match block_type { 631 | BlockType::Header => pbf_header = blocks.collect(), 632 | BlockType::Nodes => panic!("Found nodes block, only dense nodes are supported now"), 633 | BlockType::DenseNodes => pbf_dense_nodes = blocks.collect(), 634 | BlockType::Ways => pbf_ways = blocks.collect(), 635 | BlockType::Relations => pbf_relations = blocks.collect(), 636 | } 637 | } 638 | info!("PBF block index built."); 639 | 640 | // Serialize header 641 | if pbf_header.len() != 1 { 642 | return Err(format!( 643 | "Require exactly one header block, but found {}", 644 | pbf_header.len() 645 | ) 646 | .into()); 647 | } 648 | let idx = &pbf_header[0]; 649 | let pbf_header: osmpbf::HeaderBlock = read_block(&input_data, idx)?; 650 | serialize_header(&pbf_header, coord_scale, &builder, &mut stringtable)?; 651 | info!("Header written."); 652 | 653 | let mut stats = Stats::default(); 654 | 655 | let ids_archive; 656 | let mut node_ids = None; 657 | let mut way_ids = None; 658 | let mut relation_ids = None; 659 | if args.ids { 660 | ids_archive = builder.ids()?; 661 | node_ids = Some(ids_archive.start_nodes()?); 662 | way_ids = Some(ids_archive.start_ways()?); 663 | relation_ids = Some(ids_archive.start_relations()?); 664 | } 665 | 666 | let nodes_id_to_idx = serialize_dense_node_blocks( 667 | &builder, 668 | greatest_common_granularity, 669 | node_ids, 670 | pbf_dense_nodes, 671 | &input_data, 672 | &mut tags, 673 | &mut stringtable, 674 | &mut stats, 675 | )?; 676 | 677 | let ways_id_to_idx = serialize_way_blocks( 678 | &builder, 679 | way_ids, 680 | pbf_ways, 681 | &input_data, 682 | &nodes_id_to_idx, 683 | &mut tags, 684 | &mut stringtable, 685 | &mut stats, 686 | )?; 687 | 688 | serialize_relation_blocks( 689 | &builder, 690 | relation_ids, 691 | pbf_relations, 692 | &input_data, 693 | &nodes_id_to_idx, 694 | &ways_id_to_idx, 695 | &mut tags, 696 | &mut stringtable, 697 | &mut stats, 698 | )?; 699 | 700 | // Finalize data structures 701 | tags.close(); // drop the reference to stringtable 702 | 703 | info!("Writing stringtable to disk..."); 704 | builder.set_stringtable(&stringtable.into_bytes())?; 705 | 706 | info!("osmflat archive built."); 707 | 708 | std::mem::drop(builder); 709 | osmflat::Osm::open(storage)?; 710 | 711 | info!("verified that osmflat archive can be opened."); 712 | 713 | println!("{stats}"); 714 | Ok(()) 715 | } 716 | 717 | fn pb_style() -> ProgressStyle { 718 | ProgressStyle::with_template("{prefix:>24} [{bar:23}] {pos}/{len}: {per_sec} {elapsed}") 719 | .unwrap() 720 | .progress_chars("=> ") 721 | } 722 | 723 | fn main() { 724 | let args = args::Args::parse(); 725 | let level = match args.verbose { 726 | 0 => "info", 727 | 1 => "debug", 728 | _ => "trace", 729 | }; 730 | env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(level)) 731 | .format_target(false) 732 | .format_module_path(false) 733 | .format_timestamp_nanos() 734 | .init(); 735 | 736 | if let Err(e) = run(args) { 737 | error!("{e}"); 738 | std::process::exit(1); 739 | } 740 | } 741 | -------------------------------------------------------------------------------- /osmflatc/src/osmpbf.rs: -------------------------------------------------------------------------------- 1 | #![allow(unknown_lints, clippy::derive_partial_eq_without_eq)] 2 | 3 | use byteorder::{ByteOrder, NetworkEndian}; 4 | use flate2::read::ZlibDecoder; 5 | use log::info; 6 | use prost::{self, Message}; 7 | use rayon::prelude::*; 8 | 9 | use std::io::{self, Read}; 10 | 11 | include!(concat!(env!("OUT_DIR"), "/osmpbf.rs")); 12 | 13 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 14 | pub enum BlockType { 15 | Header, 16 | Nodes, 17 | DenseNodes, 18 | Ways, 19 | Relations, 20 | } 21 | 22 | /// Decode block type from PrimitiveBlock protobuf message 23 | /// 24 | /// This does not decode any fields, it just checks which tags are present 25 | /// in PrimitiveGroup fields of the message. 26 | /// 27 | /// `blob` should contain decompressed data of an OSMData PrimitiveBlock. 28 | /// 29 | /// Note: We use public API of `prost` crate, which though is not exposed in 30 | /// the crate and marked with comment that it should be only used from 31 | /// `prost::Message`. 32 | pub fn type_and_granularity_from_osmdata_blob(mut blob: &[u8]) -> io::Result<(BlockType, u64)> { 33 | const PRIMITIVE_GROUP_TAG: u32 = 2; 34 | const GRANULARITY_TAG: u32 = 17; 35 | const NODES_TAG: u32 = 1; 36 | const DENSE_NODES_TAG: u32 = 2; 37 | const WAY_STAG: u32 = 3; 38 | const RELATIONS_TAG: u32 = 4; 39 | const CHANGESETS_TAG: u32 = 5; 40 | 41 | let mut block_type = None; 42 | let mut granularity = 100; // default value 43 | while !blob.is_empty() { 44 | // decode fields of PrimitiveBlock 45 | let (key, wire_type) = prost::encoding::decode_key(&mut blob)?; 46 | let mut blob_copy = blob; 47 | if key == PRIMITIVE_GROUP_TAG { 48 | // We found a PrimitiveGroup field. There could be several of them, but 49 | // follwoing the specs of OSMPBF, all of them will have the same single 50 | // optional field, which defines the type of the block. 51 | 52 | // Decode the number of primitive groups. 53 | let _ = prost::encoding::decode_varint(&mut blob_copy)?; 54 | // Decode the tag of the first primitive group defining the type. 55 | let (tag, _wire_type) = prost::encoding::decode_key(&mut blob_copy)?; 56 | block_type = match tag { 57 | NODES_TAG => Some(BlockType::Nodes), 58 | DENSE_NODES_TAG => Some(BlockType::DenseNodes), 59 | WAY_STAG => Some(BlockType::Ways), 60 | RELATIONS_TAG => Some(BlockType::Relations), 61 | CHANGESETS_TAG => { 62 | panic!("found block containing unsupported changesets"); 63 | } 64 | _ => { 65 | panic!("invalid input data: malformed primitive block"); 66 | } 67 | }; 68 | } else if key == GRANULARITY_TAG { 69 | granularity = prost::encoding::decode_varint(&mut blob_copy)?; 70 | } 71 | // skip payload 72 | prost::encoding::skip_field( 73 | wire_type, 74 | key, 75 | &mut blob, 76 | prost::encoding::DecodeContext::default(), 77 | )?; 78 | } 79 | match block_type { 80 | None => panic!("Found block without primitive group"), 81 | Some(x) => Ok((x, granularity)), 82 | } 83 | } 84 | 85 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] 86 | pub struct BlockIndex { 87 | pub block_type: BlockType, 88 | pub granularity: Option, 89 | pub blob_start: usize, 90 | pub blob_len: usize, 91 | } 92 | 93 | struct BlockIndexIterator<'a> { 94 | data: &'a [u8], 95 | cursor: usize, 96 | } 97 | 98 | enum BlobInfo { 99 | Header(BlockIndex), 100 | Unknown(usize, usize, Vec), 101 | } 102 | 103 | impl<'a> BlockIndexIterator<'a> { 104 | fn new(data: &'a [u8]) -> Self { 105 | Self { data, cursor: 0 } 106 | } 107 | 108 | fn read(&mut self, len: usize) -> &[u8] { 109 | let data = &self.data[self.cursor..self.cursor + len]; 110 | self.cursor += len; 111 | data 112 | } 113 | 114 | fn next_blob(&mut self) -> Result { 115 | // read size of blob header 116 | let blob_header_len: i32 = NetworkEndian::read_i32(self.read(4)); 117 | 118 | // read blob header 119 | let blob_header = BlobHeader::decode(self.read(blob_header_len as usize))?; 120 | 121 | let blob_start = self.cursor; 122 | let blob_len = blob_header.datasize as usize; 123 | 124 | if blob_header.r#type == "OSMHeader" { 125 | self.cursor += blob_len; 126 | Ok(BlobInfo::Header(BlockIndex { 127 | block_type: BlockType::Header, 128 | granularity: None, 129 | blob_start, 130 | blob_len, 131 | })) 132 | } else if blob_header.r#type == "OSMData" { 133 | // read blob 134 | Ok(BlobInfo::Unknown( 135 | blob_start, 136 | blob_len, 137 | self.read(blob_header.datasize as usize).to_vec(), 138 | )) 139 | } else { 140 | panic!("unknown blob type"); 141 | } 142 | } 143 | } 144 | 145 | impl<'a> Iterator for BlockIndexIterator<'a> { 146 | type Item = Result; 147 | fn next(&mut self) -> Option { 148 | if self.cursor < self.data.len() { 149 | Some(self.next_blob()) 150 | } else { 151 | None 152 | } 153 | } 154 | } 155 | 156 | pub fn read_block( 157 | data: &[u8], 158 | idx: &BlockIndex, 159 | ) -> Result { 160 | let blob = Blob::decode(&data[idx.blob_start..idx.blob_start + idx.blob_len])?; 161 | 162 | let mut blob_buf = Vec::new(); 163 | let blob_data = if blob.raw.is_some() { 164 | blob.raw.as_ref().unwrap() 165 | } else if blob.zlib_data.is_some() { 166 | // decompress zlib data 167 | let data: &Vec = blob.zlib_data.as_ref().unwrap(); 168 | let mut decoder = ZlibDecoder::new(&data[..]); 169 | decoder.read_to_end(&mut blob_buf)?; 170 | &blob_buf 171 | } else { 172 | return Err(io::Error::new( 173 | io::ErrorKind::InvalidData, 174 | "unknown compression", 175 | )); 176 | }; 177 | Ok(T::decode(blob_data.as_slice())?) 178 | } 179 | 180 | fn blob_type_and_granularity_from_blob_info( 181 | blob_start: usize, 182 | blob_len: usize, 183 | blob: Vec, 184 | ) -> Result { 185 | let blob = Blob::decode(blob.as_slice())?; 186 | 187 | let mut blob_buf = Vec::new(); 188 | let blob_data = if blob.raw.is_some() { 189 | // use raw bytes 190 | blob.raw.as_ref().unwrap() 191 | } else if blob.zlib_data.is_some() { 192 | // decompress zlib data 193 | let data: &Vec = blob.zlib_data.as_ref().unwrap(); 194 | let mut decoder = ZlibDecoder::new(&data[..]); 195 | decoder.read_to_end(&mut blob_buf)?; 196 | &blob_buf 197 | } else { 198 | panic!("can only read raw or zlib compressed blob"); 199 | }; 200 | assert_eq!( 201 | blob_data.len(), 202 | blob.raw_size.unwrap_or(blob_data.len() as i32) as usize 203 | ); 204 | 205 | let (block_type, granularity) = type_and_granularity_from_osmdata_blob(&blob_data[..])?; 206 | Ok(BlockIndex { 207 | block_type, 208 | granularity: Some(granularity), 209 | blob_start, 210 | blob_len, 211 | }) 212 | } 213 | 214 | pub fn build_block_index(pbf_data: &[u8]) -> Vec { 215 | let mut result: Vec = BlockIndexIterator::new(pbf_data) 216 | .par_bridge() 217 | .filter_map(|blob| { 218 | let block = match blob { 219 | Ok(BlobInfo::Header(b)) => Ok(b), 220 | Ok(BlobInfo::Unknown(start, len, blob)) => { 221 | blob_type_and_granularity_from_blob_info(start, len, blob) 222 | } 223 | Err(e) => Err(e), 224 | }; 225 | match block { 226 | Ok(b) => Some(b), 227 | Err(e) => { 228 | eprintln!("Skipping block due to error: {e}"); 229 | None 230 | } 231 | } 232 | }) 233 | .collect(); 234 | result.par_sort_unstable(); 235 | info!("Found {} blocks", result.len()); 236 | result 237 | } 238 | -------------------------------------------------------------------------------- /osmflatc/src/parallel.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Reverse; 2 | use std::collections::BTreeMap; 3 | use std::sync::{mpsc::sync_channel, Arc}; 4 | 5 | use parking_lot::{Condvar, Mutex}; 6 | 7 | pub fn parallel_process( 8 | iter: Iter, 9 | produce: Producer, 10 | mut consume: Consumer, 11 | ) -> Result<(), Error> 12 | where 13 | Iter: Iterator + Send, 14 | Producer: Fn(Item) -> Data + Sync, 15 | Data: Send, 16 | Consumer: FnMut(Data) -> Result, 17 | Garbage: Send + 'static, 18 | { 19 | let num_threads = rayon::current_num_threads(); 20 | 21 | let iter = Arc::new(Mutex::new(iter.enumerate())); 22 | let next = Arc::new((Mutex::new(2 * num_threads), Condvar::new())); 23 | 24 | crossbeam::scope(|s| { 25 | let (sender, receiver) = sync_channel(2 * num_threads); 26 | for _ in 0..num_threads { 27 | let sender = sender.clone(); 28 | let iter = iter.clone(); 29 | s.spawn(|_| { 30 | let sender = sender; 31 | let iter = iter; 32 | loop { 33 | let (i, item) = { 34 | match iter.lock().next() { 35 | None => break, 36 | Some(x) => x, 37 | } 38 | }; 39 | 40 | let data = produce(item); 41 | 42 | let (counter, cond) = &*next; 43 | { 44 | let mut guard = counter.lock(); 45 | while *guard <= i { 46 | cond.wait(&mut guard); 47 | } 48 | } 49 | 50 | sender.send((i, data)).unwrap(); 51 | } 52 | }); 53 | } 54 | drop(sender); // drop to make sure iteration will finish once all senders are out of scope 55 | 56 | let (garbage_sender, garbage_receiver) = sync_channel(2 * num_threads); 57 | 58 | std::thread::spawn(move || { 59 | // we move dropping of heavy objects to other threads as they can have a lot 60 | // of allocations (e.g. Vec) 61 | for garbage in garbage_receiver { 62 | std::mem::drop(garbage); 63 | } 64 | }); 65 | 66 | let mut pending = BTreeMap::new(); 67 | let mut next_idx = 0; 68 | for result in receiver { 69 | pending.insert(Reverse(result.0), result.1); 70 | while let Some(data) = pending.remove(&Reverse(next_idx)) { 71 | { 72 | let mut guard = next.0.lock(); 73 | *guard += 1; 74 | next.1.notify_all(); 75 | } 76 | 77 | next_idx += 1; 78 | let garbage = consume(data)?; 79 | garbage_sender.send(garbage).unwrap(); 80 | } 81 | } 82 | Ok(()) 83 | }) 84 | .expect("thread panicked") 85 | } 86 | -------------------------------------------------------------------------------- /osmflatc/src/proto/fileformat.proto: -------------------------------------------------------------------------------- 1 | /** Copyright (c) 2010 Scott A. Crosby. 2 | 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as 5 | published by the Free Software Foundation, either version 3 of the 6 | License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with this program. If not, see . 15 | 16 | */ 17 | 18 | syntax = "proto2"; 19 | 20 | option optimize_for = LITE_RUNTIME; 21 | option java_package = "org.openstreetmap.osmosis.osmbinary"; 22 | package OSMPBF; 23 | 24 | //protoc --java_out=../.. fileformat.proto 25 | 26 | 27 | // 28 | // STORAGE LAYER: Storing primitives. 29 | // 30 | 31 | message Blob { 32 | optional bytes raw = 1; // No compression 33 | optional int32 raw_size = 2; // When compressed, the uncompressed size 34 | 35 | // Possible compressed versions of the data. 36 | optional bytes zlib_data = 3; 37 | 38 | // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED. 39 | optional bytes lzma_data = 4; 40 | 41 | // Formerly used for bzip2 compressed data. Depreciated in 2010. 42 | optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number. 43 | } 44 | 45 | /* A file contains an sequence of fileblock headers, each prefixed by 46 | their length in network byte order, followed by a data block 47 | containing the actual data. types staring with a "_" are reserved. 48 | */ 49 | 50 | message BlobHeader { 51 | required string type = 1; 52 | optional bytes indexdata = 2; 53 | required int32 datasize = 3; 54 | } 55 | 56 | 57 | -------------------------------------------------------------------------------- /osmflatc/src/proto/osmformat.proto: -------------------------------------------------------------------------------- 1 | /** Copyright (c) 2010 Scott A. Crosby. 2 | 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as 5 | published by the Free Software Foundation, either version 3 of the 6 | License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with this program. If not, see . 15 | 16 | */ 17 | 18 | syntax = "proto2"; 19 | 20 | option optimize_for = LITE_RUNTIME; 21 | option java_package = "org.openstreetmap.osmosis.osmbinary"; 22 | package OSMPBF; 23 | 24 | /* OSM Binary file format 25 | 26 | This is the master schema file of the OSM binary file format. This 27 | file is designed to support limited random-access and future 28 | extendability. 29 | 30 | A binary OSM file consists of a sequence of FileBlocks (please see 31 | fileformat.proto). The first fileblock contains a serialized instance 32 | of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that 33 | contain the primitives. 34 | 35 | Each primitiveblock is designed to be independently parsable. It 36 | contains a string table storing all strings in that block (keys and 37 | values in tags, roles in relations, usernames, etc.) as well as 38 | metadata containing the precision of coordinates or timestamps in that 39 | block. 40 | 41 | A primitiveblock contains a sequence of primitive groups, each 42 | containing primitives of the same type (nodes, densenodes, ways, 43 | relations). Coordinates are stored in signed 64-bit integers. Lat&lon 44 | are measured in units nanodegrees. The default of 45 | granularity of 100 nanodegrees corresponds to about 1cm on the ground, 46 | and a full lat or lon fits into 32 bits. 47 | 48 | Converting an integer to a lattitude or longitude uses the formula: 49 | $OUT = IN * granularity / 10**9$. Many encoding schemes use delta 50 | coding when representing nodes and relations. 51 | 52 | */ 53 | 54 | ////////////////////////////////////////////////////////////////////////// 55 | ////////////////////////////////////////////////////////////////////////// 56 | 57 | /* Contains the file header. */ 58 | 59 | message HeaderBlock { 60 | optional HeaderBBox bbox = 1; 61 | /* Additional tags to aid in parsing this dataset */ 62 | repeated string required_features = 4; 63 | repeated string optional_features = 5; 64 | 65 | optional string writingprogram = 16; 66 | optional string source = 17; // From the bbox field. 67 | 68 | /* Tags that allow continuing an Osmosis replication */ 69 | 70 | // replication timestamp, expressed in seconds since the epoch, 71 | // otherwise the same value as in the "timestamp=..." field 72 | // in the state.txt file used by Osmosis 73 | optional int64 osmosis_replication_timestamp = 32; 74 | 75 | // replication sequence number (sequenceNumber in state.txt) 76 | optional int64 osmosis_replication_sequence_number = 33; 77 | 78 | // replication base URL (from Osmosis' configuration.txt file) 79 | optional string osmosis_replication_base_url = 34; 80 | } 81 | 82 | 83 | /** The bounding box field in the OSM header. BBOX, as used in the OSM 84 | header. Units are always in nanodegrees -- they do not obey 85 | granularity rules. */ 86 | 87 | message HeaderBBox { 88 | required sint64 left = 1; 89 | required sint64 right = 2; 90 | required sint64 top = 3; 91 | required sint64 bottom = 4; 92 | } 93 | 94 | 95 | /////////////////////////////////////////////////////////////////////// 96 | /////////////////////////////////////////////////////////////////////// 97 | 98 | 99 | message PrimitiveBlock { 100 | required StringTable stringtable = 1; 101 | repeated PrimitiveGroup primitivegroup = 2; 102 | 103 | // Granularity, units of nanodegrees, used to store coordinates in this block 104 | optional int32 granularity = 17 [default=100]; 105 | // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees. 106 | optional int64 lat_offset = 19 [default=0]; 107 | optional int64 lon_offset = 20 [default=0]; 108 | 109 | // Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. 110 | optional int32 date_granularity = 18 [default=1000]; 111 | 112 | 113 | // Proposed extension: 114 | //optional BBox bbox = XX; 115 | } 116 | 117 | // Group of OSMPrimitives. All primitives in a group must be the same type. 118 | message PrimitiveGroup { 119 | repeated Node nodes = 1; 120 | optional DenseNodes dense = 2; 121 | repeated Way ways = 3; 122 | repeated Relation relations = 4; 123 | repeated ChangeSet changesets = 5; 124 | } 125 | 126 | 127 | /** String table, contains the common strings in each block. 128 | 129 | Note that we reserve index '0' as a delimiter, so the entry at that 130 | index in the table is ALWAYS blank and unused. 131 | 132 | */ 133 | message StringTable { 134 | repeated bytes s = 1; 135 | } 136 | 137 | /* Optional metadata that may be included into each primitive. */ 138 | message Info { 139 | optional int32 version = 1 [default = -1]; 140 | optional int64 timestamp = 2; 141 | optional int64 changeset = 3; 142 | optional int32 uid = 4; 143 | optional uint32 user_sid = 5; // String IDs 144 | 145 | // The visible flag is used to store history information. It indicates that 146 | // the current object version has been created by a delete operation on the 147 | // OSM API. 148 | // When a writer sets this flag, it MUST add a required_features tag with 149 | // value "HistoricalInformation" to the HeaderBlock. 150 | // If this flag is not available for some object it MUST be assumed to be 151 | // true if the file has the required_features tag "HistoricalInformation" 152 | // set. 153 | optional bool visible = 6; 154 | } 155 | 156 | /** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */ 157 | message DenseInfo { 158 | repeated int32 version = 1 [packed = true]; 159 | repeated sint64 timestamp = 2 [packed = true]; // DELTA coded 160 | repeated sint64 changeset = 3 [packed = true]; // DELTA coded 161 | repeated sint32 uid = 4 [packed = true]; // DELTA coded 162 | repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded 163 | 164 | // The visible flag is used to store history information. It indicates that 165 | // the current object version has been created by a delete operation on the 166 | // OSM API. 167 | // When a writer sets this flag, it MUST add a required_features tag with 168 | // value "HistoricalInformation" to the HeaderBlock. 169 | // If this flag is not available for some object it MUST be assumed to be 170 | // true if the file has the required_features tag "HistoricalInformation" 171 | // set. 172 | repeated bool visible = 6 [packed = true]; 173 | } 174 | 175 | 176 | // THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW. 177 | // TODO: REMOVE THIS? 178 | message ChangeSet { 179 | required int64 id = 1; 180 | // 181 | // // Parallel arrays. 182 | // repeated uint32 keys = 2 [packed = true]; // String IDs. 183 | // repeated uint32 vals = 3 [packed = true]; // String IDs. 184 | // 185 | // optional Info info = 4; 186 | 187 | // optional int64 created_at = 8; 188 | // optional int64 closetime_delta = 9; 189 | // optional bool open = 10; 190 | // optional HeaderBBox bbox = 11; 191 | } 192 | 193 | 194 | message Node { 195 | required sint64 id = 1; 196 | // Parallel arrays. 197 | repeated uint32 keys = 2 [packed = true]; // String IDs. 198 | repeated uint32 vals = 3 [packed = true]; // String IDs. 199 | 200 | optional Info info = 4; // May be omitted in omitmeta 201 | 202 | required sint64 lat = 8; 203 | required sint64 lon = 9; 204 | } 205 | 206 | /* Used to densly represent a sequence of nodes that do not have any tags. 207 | 208 | We represent these nodes columnwise as five columns: ID's, lats, and 209 | lons, all delta coded. When metadata is not omitted, 210 | 211 | We encode keys & vals for all nodes as a single array of integers 212 | containing key-stringid and val-stringid, using a stringid of 0 as a 213 | delimiter between nodes. 214 | 215 | ( ( )* '0' )* 216 | */ 217 | 218 | message DenseNodes { 219 | repeated sint64 id = 1 [packed = true]; // DELTA coded 220 | 221 | //repeated Info info = 4; 222 | optional DenseInfo denseinfo = 5; 223 | 224 | repeated sint64 lat = 8 [packed = true]; // DELTA coded 225 | repeated sint64 lon = 9 [packed = true]; // DELTA coded 226 | 227 | // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless. 228 | repeated int32 keys_vals = 10 [packed = true]; 229 | } 230 | 231 | 232 | message Way { 233 | required int64 id = 1; 234 | // Parallel arrays. 235 | repeated uint32 keys = 2 [packed = true]; 236 | repeated uint32 vals = 3 [packed = true]; 237 | 238 | optional Info info = 4; 239 | 240 | repeated sint64 refs = 8 [packed = true]; // DELTA coded 241 | } 242 | 243 | message Relation { 244 | enum MemberType { 245 | NODE = 0; 246 | WAY = 1; 247 | RELATION = 2; 248 | } 249 | required int64 id = 1; 250 | 251 | // Parallel arrays. 252 | repeated uint32 keys = 2 [packed = true]; 253 | repeated uint32 vals = 3 [packed = true]; 254 | 255 | optional Info info = 4; 256 | 257 | // Parallel arrays 258 | repeated int32 roles_sid = 8 [packed = true]; 259 | repeated sint64 memids = 9 [packed = true]; // DELTA encoded 260 | repeated MemberType types = 10 [packed = true]; 261 | } 262 | 263 | -------------------------------------------------------------------------------- /osmflatc/src/stats.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::ops::AddAssign; 3 | 4 | #[derive(Debug, Default)] 5 | pub struct Stats { 6 | pub num_nodes: usize, 7 | pub num_ways: usize, 8 | pub num_relations: usize, 9 | pub num_unresolved_node_ids: usize, 10 | pub num_unresolved_way_ids: usize, 11 | pub num_unresolved_rel_ids: usize, 12 | } 13 | 14 | impl AddAssign for Stats { 15 | #[inline] 16 | fn add_assign(&mut self, other: Self) { 17 | self.num_nodes += other.num_nodes; 18 | self.num_ways += other.num_ways; 19 | self.num_relations += other.num_relations; 20 | self.num_unresolved_node_ids += other.num_unresolved_node_ids; 21 | self.num_unresolved_way_ids += other.num_unresolved_way_ids; 22 | self.num_unresolved_rel_ids += other.num_unresolved_rel_ids; 23 | } 24 | } 25 | 26 | impl fmt::Display for Stats { 27 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 28 | write!( 29 | f, 30 | r#"Converted: 31 | nodes: {} 32 | ways: {} 33 | relations: {} 34 | Unresolved ids: 35 | nodes: {} 36 | ways: {} 37 | relations: {}"#, 38 | self.num_nodes, 39 | self.num_ways, 40 | self.num_relations, 41 | self.num_unresolved_node_ids, 42 | self.num_unresolved_way_ids, 43 | self.num_unresolved_rel_ids 44 | ) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /osmflatc/src/strings.rs: -------------------------------------------------------------------------------- 1 | use ahash::AHashMap; 2 | 3 | #[derive(Debug, Clone, Copy)] 4 | struct TerminatedStringPtr { 5 | ptr: *const u8, 6 | } 7 | 8 | // We use this (unsafe) wrapper to get the most compact hashmap possible 9 | // Using a "&'static str" would be bigger due to the length stored 10 | // Using a String would allocate a lot of individual blocks 11 | // Using a small-string-optimized structure would create large objects 12 | impl TerminatedStringPtr { 13 | /// Safety: 14 | /// Requires the data pointed to to: 15 | /// * Be \0 terminated 16 | /// * Outlive TerminatedStringPtr 17 | unsafe fn from_ptr(ptr: *const u8) -> Self { 18 | Self { ptr } 19 | } 20 | 21 | fn as_bytes(&self) -> &[u8] { 22 | // Safety: 23 | // If constructed properly from a 0-terminated string that outlives this instance this is safe 24 | unsafe { std::ffi::CStr::from_ptr(self.ptr as *const i8).to_bytes() } 25 | } 26 | } 27 | 28 | impl PartialEq for TerminatedStringPtr { 29 | fn eq(&self, other: &TerminatedStringPtr) -> bool { 30 | self.as_bytes() == other.as_bytes() 31 | } 32 | } 33 | 34 | impl std::hash::Hash for TerminatedStringPtr { 35 | fn hash(&self, h: &mut H) 36 | where 37 | H: std::hash::Hasher, 38 | { 39 | self.as_bytes().hash(h) 40 | } 41 | } 42 | 43 | impl Eq for TerminatedStringPtr {} 44 | 45 | impl std::borrow::Borrow<[u8]> for TerminatedStringPtr { 46 | fn borrow(&self) -> &[u8] { 47 | self.as_bytes() 48 | } 49 | } 50 | 51 | #[derive(Debug, Default)] 52 | pub struct StringTable { 53 | // Append only, we will never reallocate any data inside 54 | data: Vec>, 55 | 56 | // The hashmap references strings in the data block 57 | // Since we cannot prove to the compiler that the strings 58 | // will be "alive" long enough we have to manage lifetime ourselves 59 | indexed_data: AHashMap, 60 | 61 | size_in_bytes: u64, 62 | } 63 | 64 | impl StringTable { 65 | pub fn new() -> Self { 66 | Default::default() 67 | } 68 | 69 | /// Inserts a string into string table and returns its index. 70 | /// 71 | /// If the string was already inserted before, the string is deduplicated 72 | /// and the index to the previous string is returned. 73 | pub fn insert(&mut self, s: &str) -> u64 { 74 | // Horrible news, we cannot use entry API since it does not support Borrow 75 | // See: https://github.com/rust-lang/rust/issues/56167 76 | if let Some(&idx) = self.indexed_data.get(s.as_bytes()) { 77 | return idx; 78 | } 79 | 80 | let idx = self.size_in_bytes; 81 | if self 82 | .data 83 | .last() 84 | .filter(|x| x.len() + s.len() < x.capacity()) // str-len + \0 85 | .is_none() 86 | { 87 | self.data 88 | .push(Vec::with_capacity((1024 * 1024 * 4).max(s.len() + 1))); 89 | } 90 | // unwrap is ok here, since we just ensured that there is always one entry 91 | let buffer = self.data.last_mut().unwrap(); 92 | let pos = buffer.len(); 93 | let ptr_before = buffer.as_ptr(); 94 | buffer.extend(s.as_bytes()); 95 | buffer.push(0); 96 | // Safety: We must never reallocate the buffer 97 | debug_assert_eq!(ptr_before, buffer.as_ptr()); 98 | let key = unsafe { 99 | // convert back to str (safe since we know that it is valid UTF, it was created from a str) 100 | let key: &str = std::str::from_utf8_unchecked(&buffer[pos..]); 101 | // safe since we make sure to never reallocate/free any buffer 102 | let key_ptr = key.as_ptr(); 103 | TerminatedStringPtr::from_ptr(key_ptr) 104 | }; 105 | self.indexed_data.insert(key, idx); 106 | 107 | self.size_in_bytes += s.len() as u64 + 1; 108 | idx 109 | } 110 | 111 | pub fn into_bytes(self) -> Vec { 112 | let Self { 113 | data, 114 | indexed_data, 115 | size_in_bytes, 116 | } = self; 117 | std::mem::drop(indexed_data); 118 | 119 | let mut result = Vec::with_capacity(size_in_bytes as usize); 120 | for buffer in data { 121 | result.extend(buffer); // also drops buffer 122 | } 123 | result 124 | } 125 | } 126 | 127 | #[cfg(test)] 128 | mod test { 129 | use super::StringTable; 130 | use proptest::prelude::*; 131 | use std::collections::HashSet; 132 | 133 | #[test] 134 | fn test_simple_insert() { 135 | let mut st = StringTable::new(); 136 | assert_eq!(st.insert("hello"), 0); 137 | assert_eq!(st.insert("world"), 6); 138 | assert_eq!(st.insert("world"), 6); 139 | assert_eq!(st.insert("!"), 6 + 6); 140 | assert_eq!(st.insert("!"), 6 + 6); 141 | assert_eq!(st.insert("!"), 6 + 6); 142 | 143 | let bytes = st.into_bytes(); 144 | println!("{}", ::std::str::from_utf8(&bytes).unwrap()); 145 | assert_eq!(bytes, b"hello\0world\0!\0"); 146 | } 147 | 148 | #[test] 149 | fn test_large_insert() { 150 | let mut st = StringTable::new(); 151 | assert_eq!(st.insert("hello"), 0); 152 | assert_eq!(st.insert(&str::repeat("x", 1024 * 1024 * 5)), 6); 153 | assert_eq!(st.insert("huh"), 1024 * 1024 * 5 + 1 + 6); 154 | assert_eq!(st.insert(&str::repeat("x", 1024 * 1024 * 5)), 6); 155 | assert_eq!(st.insert("hello"), 0); 156 | 157 | let bytes = st.into_bytes(); 158 | assert_eq!( 159 | bytes, 160 | ("hello\0".to_string() + &str::repeat("x", 1024 * 1024 * 5) + "\0huh\0").as_bytes() 161 | ); 162 | } 163 | 164 | #[derive(Debug, Default)] 165 | struct ReferenceStringTable { 166 | words: HashSet, 167 | data: Vec, 168 | } 169 | 170 | impl ReferenceStringTable { 171 | fn insert(&mut self, input: String) { 172 | if !self.words.contains(&input) { 173 | self.words.insert(input.clone()); 174 | self.data.extend(input.as_bytes()); 175 | self.data.push(b'\0'); 176 | } 177 | } 178 | } 179 | 180 | proptest! { 181 | #[test] 182 | fn sequence_of_insert(ref seq in prop::collection::vec("[^\x00]*", 1..100)) 183 | { 184 | let mut st = StringTable::new(); 185 | let mut reference_st = ReferenceStringTable::default(); 186 | for input in seq { 187 | st.insert(input); 188 | reference_st.insert(input.into()); 189 | } 190 | assert_eq!(st.into_bytes(), reference_st.data); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | wrap_comments = true 2 | reorder_imports = true 3 | reorder_modules = true 4 | --------------------------------------------------------------------------------