├── src ├── test │ ├── resources │ │ └── sample.pbf │ └── java │ │ └── com │ │ └── wolt │ │ └── osm │ │ └── parallelpbf │ │ ├── encoder │ │ ├── OsmEncoderTest.java │ │ ├── OsmEntityEncoderTest.java │ │ ├── WayEncoderTest.java │ │ ├── OsmHeaderEncoderTest.java │ │ ├── RelationEncoderTest.java │ │ ├── StringTableEncoderTest.java │ │ └── DenseNodesEncoderTest.java │ │ ├── parser │ │ ├── WayParserTest.java │ │ ├── TagParserTest.java │ │ ├── RelationParserTest.java │ │ ├── NodeParserTest.java │ │ └── InfoParserTest.java │ │ ├── ParallelBinaryWriterExample.java │ │ ├── blob │ │ ├── ReadHeaderLengthTest.java │ │ ├── ReadBlobTest.java │ │ ├── BlobWriterTest.java │ │ └── ReadHeaderTest.java │ │ ├── ParalelBinaryParserExample.java │ │ ├── io │ │ ├── OSMDataReaderTest.java │ │ ├── OSMHeaderReaderTest.java │ │ ├── OSMReaderTest.java │ │ └── OSMWriterTest.java │ │ ├── TestObjectsFactory.java │ │ └── ParallelBinaryParserIT.java └── main │ ├── java │ └── com │ │ └── wolt │ │ └── osm │ │ └── parallelpbf │ │ ├── encoder │ │ ├── package-info.java │ │ ├── OsmEncoder.java │ │ ├── OsmEntityEncoder.java │ │ ├── OsmHeaderEncoder.java │ │ ├── StringTableEncoder.java │ │ ├── WayEncoder.java │ │ ├── RelationEncoder.java │ │ └── DenseNodesEncoder.java │ │ ├── io │ │ ├── package-info.java │ │ ├── OSMReader.java │ │ ├── OSMHeaderReader.java │ │ ├── OSMDataReader.java │ │ └── OSMWriter.java │ │ ├── parser │ │ ├── package-info.java │ │ ├── WayParser.java │ │ ├── RelationParser.java │ │ ├── BaseParser.java │ │ └── NodeParser.java │ │ ├── blob │ │ ├── package-info.java │ │ ├── BlobInformation.java │ │ ├── BlobWriter.java │ │ └── BlobReader.java │ │ ├── package-info.java │ │ ├── entity │ │ ├── package-info.java │ │ ├── BoundBox.java │ │ ├── OsmEntity.java │ │ ├── Way.java │ │ ├── Relation.java │ │ ├── Node.java │ │ ├── Info.java │ │ ├── Header.java │ │ └── RelationMember.java │ │ └── ParallelBinaryWriter.java │ └── proto │ ├── fileformat.proto │ └── osmformat.proto ├── spotbugsExclude.xml ├── .gitignore ├── checkstyle.xml ├── README.md └── pom.xml /src/test/resources/sample.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woltapp/parallelpbf/HEAD/src/test/resources/sample.pbf -------------------------------------------------------------------------------- /spotbugsExclude.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/OsmEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import static org.junit.jupiter.api.Assertions.*; 6 | 7 | class OsmEncoderTest { 8 | 9 | @Test 10 | void testNanoScale() { 11 | double input = 100.5009; 12 | 13 | long actual = OsmEncoder.doubleToNanoScaled(input); 14 | assertEquals(100500900000L, actual); 15 | } 16 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | /** 19 | * Binary blobs encoding. 20 | */ 21 | 22 | package com.wolt.osm.parallelpbf.encoder; 23 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/io/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | /** 19 | * Binary blobs reading, writing and processing. 20 | */ 21 | package com.wolt.osm.parallelpbf.io; 22 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/parser/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | /** 19 | * Collection of OSM primitives parsers. 20 | */ 21 | package com.wolt.osm.parallelpbf.parser; 22 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/blob/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | /** 19 | * BlobHeader, Blob and stream reading operations. 20 | */ 21 | package com.wolt.osm.parallelpbf.blob; 22 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | /** 19 | * Parallel OSM PBF format parser. 20 | * 21 | * See https://github.com/akashihi/parallelpbf for the details and usage example. 22 | */ 23 | package com.wolt.osm.parallelpbf; 24 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | /** 19 | * Definition of OSM data entities (v0.6 API compatible), 20 | * that will be extracted from the PBF file for the future 21 | * processing. 22 | */ 23 | package com.wolt.osm.parallelpbf.entity; 24 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/OsmEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | /** 4 | * Base class for all encoders, provides common encoding functions. 5 | */ 6 | public abstract class OsmEncoder { 7 | /** 8 | * Coordinates grid default granularity. 9 | */ 10 | public static final int GRANULARITY = 100; 11 | 12 | /** 13 | * Single tag entry (key or value) is a integer index, 14 | * so 4 bytes per entry. 15 | */ 16 | protected static final int TAG_ENTRY_SIZE = 4; 17 | 18 | /** 19 | * Single member entry (key or value) is a long value, 20 | * so 8 bytes per entry keeping both of them. 21 | */ 22 | protected static final int MEMBER_ENTRY_SIZE = 8; 23 | 24 | /** 25 | * Conversion from nano- to non-scaled. 26 | */ 27 | private static final double NANO = 1e9; 28 | 29 | /** 30 | * Convert double to nano-scaled long. 31 | * @param value double to convert. 32 | * @return value multiplied to 1e9 and rounded then. 33 | */ 34 | protected static long doubleToNanoScaled(final double value) { 35 | return Math.round(value * NANO); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/OsmEntityEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 4 | import com.wolt.osm.parallelpbf.entity.Node; 5 | import crosby.binary.Osmformat; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertThrows; 9 | 10 | class OsmEntityEncoderTest { 11 | private static class OsmEntityEncoderImpl extends OsmEntityEncoder { 12 | 13 | @Override 14 | public void addImpl(Node entity) { 15 | 16 | } 17 | 18 | @Override 19 | public int estimateSize() { 20 | return 0; 21 | } 22 | 23 | @Override 24 | public Osmformat.PrimitiveGroup.Builder writeImpl() { 25 | return Osmformat.PrimitiveGroup.newBuilder(); 26 | } 27 | } 28 | 29 | @Test 30 | public void testNoUseAfterWrite() { 31 | OsmEntityEncoderImpl testedObject = new OsmEntityEncoderImpl(); 32 | testedObject.add(TestObjectsFactory.node()); 33 | testedObject.write(); 34 | assertThrows(IllegalStateException.class, () -> testedObject.add(TestObjectsFactory.node())); 35 | } 36 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/BoundBox.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | 22 | /** 23 | * Wrapper of the bounding box of the map parsed. 24 | */ 25 | @Data 26 | public final class BoundBox { 27 | /** 28 | * Lesser longitude of a bounding box. 29 | */ 30 | private final double left; 31 | 32 | /** 33 | * Lesser latitude of a bounding box. 34 | */ 35 | private final double top; 36 | 37 | /** 38 | * Bigger longitude of a bounding box. 39 | */ 40 | private final double right; 41 | 42 | /** 43 | * Bigger latitude of a bounding gox. 44 | */ 45 | private final double bottom; 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/WayEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 4 | import crosby.binary.Osmformat; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | class WayEncoderTest { 11 | private StringTableEncoder stringEncoder; 12 | 13 | @BeforeEach 14 | public void setUp() { 15 | stringEncoder = new StringTableEncoder(); 16 | } 17 | 18 | @Test 19 | public void testWaySize() { 20 | WayEncoder testedObject = new WayEncoder(stringEncoder); 21 | testedObject.add(TestObjectsFactory.way()); 22 | 23 | assertEquals(40, testedObject.estimateSize()); 24 | } 25 | 26 | @Test 27 | public void testWrite() { 28 | WayEncoder testedObject = new WayEncoder(stringEncoder); 29 | testedObject.add(TestObjectsFactory.way()); 30 | 31 | Osmformat.PrimitiveGroup actual = testedObject.write().build(); 32 | 33 | Osmformat.Way w = actual.getWays(0); 34 | assertEquals(1, w.getId()); 35 | assertEquals(1, w.getKeys(0)); 36 | assertEquals(1, w.getVals(0)); 37 | 38 | assertEquals(3, w.getRefs(0)); 39 | assertEquals(3, w.getRefs(1)); 40 | assertEquals(-4, w.getRefs(2)); 41 | 42 | assertEquals(3, w.getInfo().getVersion()); 43 | assertEquals(5, w.getInfo().getChangeset()); 44 | assertEquals(1, w.getInfo().getUid()); 45 | assertEquals(true, w.getInfo().getVisible()); 46 | } 47 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/OsmEntity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | 22 | import java.util.HashMap; 23 | import java.util.Map; 24 | 25 | /** 26 | * Base class for the all OSM entities. 27 | * 28 | * All OSM v0.6 API entities have id and tags, 29 | * presented as unique keys with their values. 30 | * 31 | * For a PBF format we also store metadata for the entity. 32 | * @see Info 33 | */ 34 | @Data 35 | public abstract class OsmEntity { 36 | /** 37 | * Entry id. 38 | */ 39 | private final long id; 40 | 41 | /** 42 | * Entry tags map. May be empty. 43 | */ 44 | private Map tags = new HashMap<>(); 45 | 46 | /** 47 | * Entry metadata, can be null. 48 | * 49 | * @see Info 50 | */ 51 | private Info info; 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/blob/BlobInformation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.blob; 19 | 20 | import lombok.Data; 21 | 22 | /** 23 | * A better wrapper over BlobHeader. 24 | * 25 | * Keeps blob size and blob type together for future processing. 26 | */ 27 | @Data 28 | public class BlobInformation { 29 | /* OSM PBF Fileformat block types. See https://wiki.openstreetmap.org/wiki/PBF_Format for the details */ 30 | /** 31 | * OSMData type block. 32 | */ 33 | public static final String TYPE_OSM_DATA = "OSMData"; 34 | /** 35 | * OSMHeader type block. 36 | */ 37 | public static final String TYPE_OSM_HEADER = "OSMHeader"; 38 | /** 39 | * Data blob size. 40 | */ 41 | private final Integer size; 42 | 43 | /** 44 | * Data blob type. 45 | */ 46 | private final String type; 47 | } 48 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/OsmHeaderEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.google.protobuf.InvalidProtocolBufferException; 4 | import com.wolt.osm.parallelpbf.entity.BoundBox; 5 | import com.wolt.osm.parallelpbf.entity.Header; 6 | import crosby.binary.Osmformat; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import static org.junit.jupiter.api.Assertions.*; 10 | 11 | class OsmHeaderEncoderTest { 12 | @Test 13 | void testHeaderNoBBox() throws InvalidProtocolBufferException { 14 | byte[] blob = OsmHeaderEncoder.encodeHeader(null); 15 | 16 | Osmformat.HeaderBlock actual = Osmformat.HeaderBlock.parseFrom(blob); 17 | 18 | assertFalse(actual.hasBbox()); 19 | 20 | assertTrue(actual.hasWritingprogram()); 21 | assertEquals("parallelpbf", actual.getWritingprogram()); 22 | 23 | assertTrue(actual.getRequiredFeaturesList().contains(Header.FEATURE_OSM_SCHEMA)); 24 | assertTrue(actual.getRequiredFeaturesList().contains(Header.FEATURE_DENSE_NODES)); 25 | } 26 | 27 | @Test 28 | void testHeaderBBox() throws InvalidProtocolBufferException { 29 | BoundBox bbox = new BoundBox(1, 2, 4 ,8); 30 | 31 | byte[] blob = OsmHeaderEncoder.encodeHeader(bbox); 32 | 33 | Osmformat.HeaderBlock actual = Osmformat.HeaderBlock.parseFrom(blob); 34 | 35 | assertTrue(actual.hasBbox()); 36 | 37 | assertEquals(1000000000L, actual.getBbox().getLeft()); 38 | assertEquals(2000000000L, actual.getBbox().getTop()); 39 | assertEquals(4000000000L, actual.getBbox().getRight()); 40 | assertEquals(8000000000L, actual.getBbox().getBottom()); 41 | } 42 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/Way.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | import lombok.EqualsAndHashCode; 22 | import lombok.ToString; 23 | 24 | import java.util.LinkedList; 25 | import java.util.List; 26 | 27 | /** 28 | * OSM Way entity. 29 | * 30 | * Way is a ordered, therefore directed, collection of nodes. 31 | * 32 | * @see Node 33 | */ 34 | @Data 35 | @EqualsAndHashCode(callSuper = true) 36 | @ToString(callSuper = true) 37 | public final class Way extends OsmEntity { 38 | /** 39 | * Constructs Way setting mandatory fields. 40 | * @param id Required object id. 41 | */ 42 | public Way(final long id) { 43 | super(id); 44 | } 45 | 46 | /** 47 | * Ordered list of nodes, making way. Should contain at least one node. 48 | * 49 | * @see Node 50 | */ 51 | private final List nodes = new LinkedList<>(); 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/Relation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | import lombok.EqualsAndHashCode; 22 | import lombok.ToString; 23 | 24 | import java.util.LinkedList; 25 | import java.util.List; 26 | 27 | /** 28 | * OSM Relation entity. 29 | * 30 | * Groups several OSM entities (including other relations) 31 | * to the single logical entity. 32 | * 33 | * @see RelationMember 34 | */ 35 | @Data 36 | @EqualsAndHashCode(callSuper = true) 37 | @ToString(callSuper = true) 38 | public final class Relation extends OsmEntity { 39 | /** 40 | * Entity constructor. 41 | * @param id Sets required object id during construction. 42 | */ 43 | public Relation(final long id) { 44 | super(id); 45 | } 46 | 47 | /** 48 | * Ordered list of relation members. Can be empty. 49 | */ 50 | private final List members = new LinkedList<>(); 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/Node.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | import lombok.EqualsAndHashCode; 22 | import lombok.ToString; 23 | 24 | /** 25 | * OSM Node entity. 26 | * 27 | * Node is a most basic building block of the OSM database. 28 | */ 29 | @Data 30 | @EqualsAndHashCode(callSuper = true) 31 | @ToString(callSuper = true) 32 | public final class Node extends OsmEntity { 33 | /** 34 | * Constructs Node setting mandatory fields. 35 | * @param id Required node id. 36 | * @param latitude Node latitude. 37 | * @param longitude Node longitude 38 | */ 39 | public Node(final long id, final double latitude, final double longitude) { 40 | super(id); 41 | this.lat = latitude; 42 | this.lon = longitude; 43 | } 44 | 45 | /** 46 | * Node latitude. 47 | */ 48 | private final double lat; 49 | 50 | /** 51 | * Node longitude. 52 | */ 53 | private final double lon; 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/OsmEntityEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.entity.OsmEntity; 4 | import crosby.binary.Osmformat; 5 | 6 | /** 7 | * Entity specific extension of OsmEncoder. 8 | * @param Type of entity that encoder supports. 9 | */ 10 | public abstract class OsmEntityEncoder extends OsmEncoder { 11 | /** 12 | * 'Write was called' flag. 13 | */ 14 | private boolean built = false; 15 | 16 | /** 17 | * Type specific write implementation. 18 | * @param entity Osm entity to add to the encoder. 19 | */ 20 | protected abstract void addImpl(T entity); 21 | 22 | /** 23 | * Type specific group writer implementation. 24 | * @return Group with entities of T type. 25 | */ 26 | protected abstract Osmformat.PrimitiveGroup.Builder writeImpl(); 27 | 28 | /** 29 | * Add entity to the encoder. 30 | * @param entity Entity to add. 31 | * @throws IllegalStateException when call after write() call. 32 | */ 33 | public void add(final T entity) { 34 | if (built) { 35 | throw new IllegalStateException("Encoder content is already written"); 36 | } 37 | addImpl(entity); 38 | } 39 | 40 | /** 41 | * Provides approximate size of the future blob. 42 | * @return Estimated approximate maximum size of a blob. 43 | */ 44 | public abstract int estimateSize(); 45 | 46 | /** 47 | * Build a blob from the collected data. Encoder will become 48 | * unusable after that call. 49 | * @return OSM PBF primitiveBlock blob. 50 | */ 51 | public Osmformat.PrimitiveGroup.Builder write() { 52 | built = true; 53 | return writeImpl(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/OsmHeaderEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.entity.BoundBox; 4 | import com.wolt.osm.parallelpbf.entity.Header; 5 | import crosby.binary.Osmformat; 6 | 7 | /** 8 | * HeaderBlock encoder. 9 | */ 10 | public final class OsmHeaderEncoder extends OsmEncoder { 11 | /** 12 | * Wraps bound box to OSM PBF entity. 13 | * @param boundBox Bound box to wrap. 14 | * @return HeaderBBox entity. 15 | */ 16 | private static Osmformat.HeaderBBox encodeBoundBox(final BoundBox boundBox) { 17 | return Osmformat.HeaderBBox.newBuilder() 18 | .setLeft(doubleToNanoScaled(boundBox.getLeft())) 19 | .setTop(doubleToNanoScaled(boundBox.getTop())) 20 | .setRight(doubleToNanoScaled(boundBox.getRight())) 21 | .setBottom(doubleToNanoScaled(boundBox.getBottom())) 22 | .build(); 23 | } 24 | 25 | /** 26 | * Generates OSM PBF header and add (optional) bounding box to it. 27 | * Header values are predefined and can't be set right now. 28 | * @param boundBox Bounding box to include into header. May be null. 29 | * @return array of bytes with binary representation of the header. 30 | */ 31 | public static byte[] encodeHeader(final BoundBox boundBox) { 32 | Osmformat.HeaderBlock.Builder blob = Osmformat.HeaderBlock.newBuilder(); 33 | 34 | if (boundBox != null) { 35 | blob.setBbox(encodeBoundBox(boundBox)); 36 | } 37 | 38 | blob.addRequiredFeatures(Header.FEATURE_OSM_SCHEMA); 39 | blob.addRequiredFeatures(Header.FEATURE_DENSE_NODES); 40 | 41 | blob.setWritingprogram("parallelpbf"); 42 | 43 | return blob.build().toByteArray(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/parser/WayParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Way; 21 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 22 | import lombok.var; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.util.function.Consumer; 26 | 27 | import static org.junit.jupiter.api.Assertions.assertEquals; 28 | import static org.junit.jupiter.api.Assertions.assertTrue; 29 | 30 | class WayParserTest { 31 | private final Consumer checker = (way) -> { 32 | assertEquals(1, way.getId()); 33 | assertEquals(TestObjectsFactory.info, way.getInfo()); 34 | 35 | var tags = way.getTags(); 36 | assertTrue(tags.containsKey("tag")); 37 | assertEquals("value", tags.get("tag")); 38 | 39 | assertEquals(9000, way.getNodes().get(0).longValue()); 40 | }; 41 | 42 | @Test 43 | void testWayParse() { 44 | var testedObject = new WayParser(checker, TestObjectsFactory.stringTable); 45 | testedObject.parse(TestObjectsFactory.wayMessage); 46 | } 47 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/ParallelBinaryWriterExample.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf; 2 | 3 | import ch.qos.logback.classic.Level; 4 | import ch.qos.logback.classic.Logger; 5 | import com.wolt.osm.parallelpbf.entity.*; 6 | import lombok.SneakyThrows; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.io.*; 10 | 11 | public class ParallelBinaryWriterExample { 12 | private ParallelBinaryWriter writer; 13 | 14 | private void processNodes(Node node) { 15 | writer.write(node); 16 | } 17 | 18 | private void processWays(Way way) { 19 | writer.write(way); 20 | } 21 | 22 | private void processRelations(Relation relation) { 23 | writer.write(relation); 24 | } 25 | 26 | @SneakyThrows 27 | private void closeOnComplete() { 28 | writer.close(); 29 | } 30 | 31 | private void execute() throws IOException { 32 | Logger root = (Logger) LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME); 33 | root.setLevel(Level.TRACE); 34 | 35 | String outputFilename = System.getProperty("java.io.tmpdir")+"/parallel.pbf"; 36 | File outputFile = new File(outputFilename); 37 | if (outputFile.exists()) { 38 | outputFile.delete(); 39 | } 40 | outputFile.createNewFile(); 41 | OutputStream output = new FileOutputStream(outputFile); 42 | 43 | writer = new ParallelBinaryWriter(output,1, null); 44 | writer.start(); 45 | 46 | InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream("sample.pbf"); 47 | new ParallelBinaryParser(input, 1) 48 | .onComplete(this::closeOnComplete) 49 | .onNode(this::processNodes) 50 | .onWay(this::processWays) 51 | .onRelation(this::processRelations) 52 | .parse(); 53 | output.close(); 54 | } 55 | 56 | public static void main(String[] args) throws IOException { 57 | new ParallelBinaryWriterExample().execute(); 58 | }} 59 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/parser/TagParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 21 | import crosby.binary.Osmformat; 22 | import lombok.var; 23 | import org.junit.jupiter.api.Tag; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.util.Collections; 27 | import java.util.function.Consumer; 28 | 29 | import static org.junit.jupiter.api.Assertions.assertEquals; 30 | import static org.junit.jupiter.api.Assertions.assertTrue; 31 | 32 | @Tag("BaseParser") 33 | class TagParserTest { 34 | 35 | static class TagParser extends BaseParser> { 36 | 37 | public TagParser(Consumer callback, Osmformat.StringTable stringTable) { 38 | super(callback, stringTable); 39 | } 40 | 41 | @Override 42 | public void parse(Object message) { } 43 | } 44 | 45 | @Test 46 | void testParseTags() { 47 | var testedObject = new TagParser(null, TestObjectsFactory.stringTable); 48 | var actual = testedObject.parseTags(Collections.singletonList(3), Collections.singletonList(4)); 49 | 50 | testedObject.parse(null); 51 | assertTrue(actual.containsKey("tag")); 52 | assertEquals(actual.get("tag"), "value"); 53 | } 54 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/RelationEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 4 | import com.wolt.osm.parallelpbf.entity.Relation; 5 | import com.wolt.osm.parallelpbf.entity.RelationMember; 6 | import crosby.binary.Osmformat; 7 | import org.junit.jupiter.api.BeforeEach; 8 | import org.junit.jupiter.api.Test; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertEquals; 11 | 12 | class RelationEncoderTest { 13 | private StringTableEncoder stringEncoder; 14 | 15 | @BeforeEach 16 | public void setUp() { 17 | stringEncoder = new StringTableEncoder(); 18 | } 19 | 20 | @Test 21 | void testRelationSize() { 22 | RelationEncoder testedObject = new RelationEncoder(stringEncoder); 23 | testedObject.add(TestObjectsFactory.relation()); 24 | 25 | assertEquals(55, testedObject.estimateSize()); 26 | } 27 | 28 | @Test 29 | public void testWrite() { 30 | RelationEncoder testedObject = new RelationEncoder(stringEncoder); 31 | testedObject.add(TestObjectsFactory.relation()); 32 | 33 | Osmformat.PrimitiveGroup actual = testedObject.write().build(); 34 | 35 | Osmformat.Relation r = actual.getRelations(0); 36 | assertEquals(1, r.getId()); 37 | assertEquals(1, r.getKeys(0)); 38 | assertEquals(1, r.getVals(0)); 39 | 40 | assertEquals(1, r.getRolesSid(0)); 41 | assertEquals(2, r.getRolesSid(1)); 42 | assertEquals(3, r.getRolesSid(2)); 43 | 44 | assertEquals(Osmformat.Relation.MemberType.WAY, r.getTypes(0)); 45 | assertEquals(Osmformat.Relation.MemberType.RELATION, r.getTypes(1)); 46 | assertEquals(Osmformat.Relation.MemberType.NODE, r.getTypes(2)); 47 | 48 | assertEquals(2, r.getMemids(0)); 49 | assertEquals(1, r.getMemids(1)); 50 | assertEquals(3, r.getMemids(2)); 51 | 52 | assertEquals(3, r.getInfo().getVersion()); 53 | assertEquals(1, r.getInfo().getUid()); 54 | assertEquals(5, r.getInfo().getChangeset()); 55 | 56 | 57 | 58 | } 59 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/Info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | 22 | /** 23 | * An OSM entity metadata. Stores author and versioning information. 24 | * 25 | * Most of publicly available PBF files will not contain author id, 26 | * name and changeset id due to privacy regulations. 27 | * 28 | */ 29 | @Data 30 | public final class Info { 31 | /** 32 | * Author id of that object version. May be null. 33 | */ 34 | private final int uid; 35 | 36 | /** 37 | * Author username of that object version. May be null. 38 | */ 39 | private final String username; 40 | 41 | /** 42 | * Object version. May be null or '-1', both cases marks 43 | * missing version. 44 | */ 45 | private final int version; 46 | 47 | /** 48 | * Object version creation timestamp in milliseconds 49 | * since epoch time. 50 | */ 51 | private final long timestamp; 52 | 53 | /** 54 | * Changeset related to that version. 55 | */ 56 | private final long changeset; 57 | 58 | /** 59 | * If set false, ut indicates that the current object version 60 | * has been created by a delete operation on the OSM API. 61 | * 62 | * May be null in which case should be understood as 'true' 63 | */ 64 | private final boolean visible; 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/parser/WayParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Way; 21 | import crosby.binary.Osmformat; 22 | import lombok.extern.slf4j.Slf4j; 23 | 24 | import java.util.function.Consumer; 25 | 26 | /** 27 | * Implements OSM Way parser. 28 | * 29 | */ 30 | @Slf4j 31 | public final class WayParser extends BaseParser> { 32 | /** 33 | * Parent compatible constructor that sets callback and string table. 34 | * @param callback Callback to call on successful parse. 35 | * @param stringTable String table to use while parsing. 36 | */ 37 | public WayParser(final Consumer callback, final Osmformat.StringTable stringTable) { 38 | super(callback, stringTable); 39 | } 40 | 41 | @Override 42 | public void parse(final Osmformat.Way message) { 43 | long nodeId = 0; 44 | Way way = new Way(message.getId()); 45 | way.setTags(parseTags(message.getKeysList(), message.getValsList())); 46 | way.setInfo(parseInfo(message)); 47 | for (Long node : message.getRefsList()) { 48 | nodeId += node; 49 | way.getNodes().add(nodeId); 50 | } 51 | if (log.isDebugEnabled()) { 52 | log.debug(way.toString()); 53 | } 54 | getCallback().accept(way); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/blob/ReadHeaderLengthTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.blob; 19 | 20 | import lombok.var; 21 | import org.junit.jupiter.api.Tag; 22 | import org.junit.jupiter.api.Test; 23 | 24 | import java.io.ByteArrayInputStream; 25 | import java.io.IOException; 26 | import java.nio.ByteBuffer; 27 | 28 | import static org.junit.jupiter.api.Assertions.*; 29 | 30 | @Tag("BlobReader") 31 | class ReadHeaderLengthTest { 32 | @Test 33 | void testReadBlob() throws IOException { 34 | final Integer testLength = 0xDEADCAFE; 35 | var bytes = ByteBuffer.allocate(4).putInt(testLength).array(); 36 | 37 | var blobStream = new ByteArrayInputStream(bytes); 38 | 39 | var testedObject = new BlobReader(blobStream); 40 | var actual = testedObject.readBlobHeaderLength(); 41 | 42 | assertTrue(actual.isPresent()); 43 | assertEquals(testLength, actual.get()); 44 | } 45 | 46 | @Test 47 | void testReadBlobTooBig() throws IOException { 48 | final Integer testLength = 65 * 1024; 49 | var bytes = ByteBuffer.allocate(4).putInt(testLength).array(); 50 | 51 | var blobStream = new ByteArrayInputStream(bytes); 52 | 53 | var testedObject = new BlobReader(blobStream); 54 | var actual = testedObject.readBlobHeaderLength(); 55 | 56 | assertFalse(actual.isPresent()); 57 | } 58 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/StringTableEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.google.protobuf.ByteString; 4 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 5 | import crosby.binary.Osmformat; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | class StringTableEncoderTest { 11 | @Test 12 | public void testStableIndex() { 13 | StringTableEncoder testedObject = new StringTableEncoder(); 14 | int index = testedObject.getStringIndex(TestObjectsFactory.testTag); 15 | int actual = testedObject.getStringIndex(TestObjectsFactory.testTag); 16 | 17 | assertEquals(index, actual); 18 | } 19 | 20 | @Test 21 | public void testStringTableSize() { 22 | StringTableEncoder testedObject = new StringTableEncoder(); 23 | testedObject.getStringIndex(TestObjectsFactory.testTag); 24 | assertEquals(4, testedObject.getStringSize()); 25 | testedObject.getStringIndex(TestObjectsFactory.testTag); 26 | assertEquals(4, testedObject.getStringSize()); 27 | } 28 | 29 | @Test 30 | public void testStringPresence() { 31 | String first = "first"; 32 | String second = "second"; 33 | 34 | StringTableEncoder testedObject = new StringTableEncoder(); 35 | int firstIndex = testedObject.getStringIndex(first); 36 | int secondIndex = testedObject.getStringIndex(second); 37 | int thirdIndex = testedObject.getStringIndex(second); 38 | 39 | Osmformat.StringTable strings = testedObject.getStrings().build(); 40 | 41 | assertEquals(first, strings.getS(firstIndex).toStringUtf8()); 42 | assertEquals(second, strings.getS(secondIndex).toStringUtf8()); 43 | assertEquals(second, strings.getS(thirdIndex).toStringUtf8()); 44 | } 45 | 46 | @Test 47 | public void testFirstStringReserved() { 48 | StringTableEncoder testedObject = new StringTableEncoder(); 49 | testedObject.getStringIndex(TestObjectsFactory.testTag); 50 | 51 | assertEquals(ByteString.EMPTY,testedObject.getStrings().build().getS(0)); 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/proto/fileformat.proto: -------------------------------------------------------------------------------- 1 | /** Copyright (c) 2010 Scott A. Crosby. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | */ 10 | 11 | option optimize_for = LITE_RUNTIME; 12 | option java_package = "crosby.binary"; 13 | package OSMPBF; 14 | 15 | //protoc --java_out=../.. fileformat.proto 16 | 17 | 18 | // 19 | // STORAGE LAYER: Storing primitives. 20 | // 21 | 22 | message Blob { 23 | optional bytes raw = 1; // No compression 24 | optional int32 raw_size = 2; // When compressed, the uncompressed size 25 | 26 | // Possible compressed versions of the data. 27 | optional bytes zlib_data = 3; 28 | 29 | // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED. 30 | optional bytes lzma_data = 4; 31 | 32 | // Formerly used for bzip2 compressed data. Depreciated in 2010. 33 | optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number. 34 | } 35 | 36 | /* A file contains an sequence of fileblock headers, each prefixed by 37 | their length in network byte order, followed by a data block 38 | containing the actual data. types staring with a "_" are reserved. 39 | */ 40 | 41 | message BlobHeader { 42 | required string type = 1; 43 | optional bytes indexdata = 2; 44 | required int32 datasize = 3; 45 | } 46 | 47 | 48 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/parser/RelationParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 21 | import com.wolt.osm.parallelpbf.entity.Relation; 22 | import com.wolt.osm.parallelpbf.entity.RelationMember; 23 | import lombok.var; 24 | import org.junit.jupiter.api.Assertions; 25 | import org.junit.jupiter.api.Test; 26 | 27 | import java.util.function.Consumer; 28 | 29 | import static org.junit.jupiter.api.Assertions.*; 30 | 31 | class RelationParserTest { 32 | private final Consumer checker = (relation) -> { 33 | assertEquals(1, relation.getId()); 34 | Assertions.assertEquals(TestObjectsFactory.info, relation.getInfo()); 35 | 36 | var tags = relation.getTags(); 37 | assertTrue(tags.containsKey("tag")); 38 | assertEquals("value", tags.get("tag")); 39 | 40 | var actualMember = relation.getMembers().get(0); 41 | assertEquals(9000, actualMember.getId().longValue()); 42 | Assertions.assertEquals(RelationMember.Type.NODE, actualMember.getType()); 43 | assertEquals("fail", actualMember.getRole()); 44 | assertNotNull(relation.getInfo()); 45 | assertEquals("test", relation.getInfo().getUsername()); 46 | }; 47 | 48 | @Test 49 | void testRelationParse() { 50 | var testedObject = new RelationParser(checker, TestObjectsFactory.stringTable); 51 | testedObject.parse(TestObjectsFactory.relationMessage); 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/StringTableEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.google.protobuf.ByteString; 4 | import crosby.binary.Osmformat; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | 9 | /** 10 | * Encodes StringTable for the whole blob. 11 | * 12 | * Encoder is stateful and shouldn't be used after 'getStrings' call is issued. 13 | * Encoder is not thread-safe. 14 | */ 15 | public class StringTableEncoder { 16 | /** 17 | * Keeps current maximum string index value. 18 | */ 19 | private Integer stringIndex = 0; 20 | 21 | /** 22 | * Size of strings kept in the string table. 23 | */ 24 | private Integer stringTableSize = 0; 25 | 26 | /** 27 | * Reverse index mapping - for string already stored in the table it will map 28 | * string values back to their indices. 29 | */ 30 | private Map indexMap = new HashMap<>(); 31 | 32 | /** 33 | * The string table. 34 | */ 35 | private Osmformat.StringTable.Builder strings = Osmformat.StringTable.newBuilder(); 36 | 37 | /** 38 | * Adds string to the string table and adds string size to the stringtable size. 39 | * @param str String to add. 40 | * @return String index in table. 41 | */ 42 | private int addStringToTable(final String str) { 43 | stringTableSize = stringTableSize + str.length(); 44 | strings.addS(ByteString.copyFromUtf8(str)); 45 | return ++stringIndex; 46 | } 47 | 48 | /** 49 | * Default constructor. 50 | */ 51 | public StringTableEncoder() { 52 | strings.addS(ByteString.EMPTY); //First entry with index 0 is always empty. 53 | } 54 | 55 | /** 56 | * String table accessor. 57 | * @return string table value. 58 | */ 59 | public Osmformat.StringTable.Builder getStrings() { 60 | return strings; 61 | } 62 | 63 | /** 64 | * Finds stringtable index for a supplied string. Will return either existing index for a string 65 | * or add string to the stringtable and emit a new index. 66 | * @param s String to index. 67 | * @return Strings index in the stringtable. 68 | */ 69 | public int getStringIndex(final String s) { 70 | return indexMap.computeIfAbsent(s, this::addStringToTable); 71 | } 72 | 73 | /** 74 | * Returns current estimated size of the string table. 75 | * @return size in bytes. 76 | */ 77 | public int getStringSize() { 78 | return stringTableSize; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/Header.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | 22 | import java.util.List; 23 | 24 | /** 25 | * OSM PBF file header. Is not a part of the OSM v0.6 API. 26 | */ 27 | @Data 28 | public class Header { 29 | /* 30 | Definition os OSM PBF features. See https://wiki.openstreetmap.org/wiki/PBF_Format for details. 31 | */ 32 | /** 33 | * "OsmSchema-V0.6" — File contains data with the OSM v0.6 schema. 34 | */ 35 | public static final String FEATURE_OSM_SCHEMA = "OsmSchema-V0.6"; 36 | 37 | /** 38 | * "DenseNodes" — File contains dense nodes and dense info. 39 | */ 40 | public static final String FEATURE_DENSE_NODES = "DenseNodes"; 41 | 42 | /** 43 | * "HistoricalInformation" — File contains historical OSM data. 44 | */ 45 | public static final String FEATURE_HISTORICAL_INFORMATION = "HistoricalInformation"; 46 | 47 | /** 48 | * List of features, required to read and process PBF data. 49 | */ 50 | private final List requiredFeatures; 51 | /** 52 | * List of optional features, that mey be present in PBF file. 53 | * 54 | * Please, pay attention, that those features may include soring features, 55 | * like 'Sort.Type_then_ID', 'Sort.Geographic' etc. 56 | * Due to the asynchronous nature of parallel processing, 57 | * that order is not guaranteed to be kept during 58 | * reading procedure. 59 | */ 60 | private final List optionalFeatures; 61 | 62 | /** 63 | * Optional name of the PBF file's origination program. May be null. 64 | */ 65 | private String writingProgram; 66 | 67 | /** 68 | * Optional source information. May be null. 69 | */ 70 | private String source; 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/parser/RelationParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Relation; 21 | import com.wolt.osm.parallelpbf.entity.RelationMember; 22 | import crosby.binary.Osmformat; 23 | import lombok.extern.slf4j.Slf4j; 24 | import lombok.var; 25 | 26 | import java.util.function.Consumer; 27 | 28 | /** 29 | * Implements OSM Relation parser. 30 | * 31 | */ 32 | @Slf4j 33 | public final class RelationParser extends BaseParser> { 34 | /** 35 | * Parent compatible constructor that sets callback and string table. 36 | * @param callback Callback to call on successful parse. 37 | * @param stringTable String table to use while parsing. 38 | */ 39 | public RelationParser(final Consumer callback, final Osmformat.StringTable stringTable) { 40 | super(callback, stringTable); 41 | } 42 | 43 | @Override 44 | public void parse(final Osmformat.Relation message) { 45 | long memberId = 0; 46 | var relation = new Relation(message.getId()); 47 | relation.setTags(parseTags(message.getKeysList(), message.getValsList())); 48 | relation.setInfo(parseInfo(message)); 49 | for (int indx = 0; indx < message.getRolesSidCount(); ++indx) { 50 | String role = getStringTable().getS(message.getRolesSid(indx)).toStringUtf8(); 51 | memberId += message.getMemids(indx); 52 | var type = RelationMember.Type.get(message.getTypes(indx).getNumber()); 53 | var member = new RelationMember(memberId, role, type); 54 | relation.getMembers().add(member); 55 | } 56 | 57 | if (log.isDebugEnabled()) { 58 | log.debug(relation.toString()); 59 | } 60 | getCallback().accept(relation); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/blob/ReadBlobTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.blob; 19 | 20 | import lombok.var; 21 | import org.apache.commons.io.IOUtils; 22 | import org.junit.jupiter.api.Tag; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.io.IOException; 26 | import java.io.InputStream; 27 | import java.nio.charset.StandardCharsets; 28 | 29 | import static org.easymock.EasyMock.*; 30 | import static org.junit.jupiter.api.Assertions.*; 31 | 32 | @Tag("BlobReader") 33 | class ReadBlobTest { 34 | 35 | @Test 36 | void testReadBlob() throws IOException { 37 | final String testString = "test blob"; 38 | 39 | var blobStream = IOUtils.toInputStream(testString, "UTF-8"); 40 | 41 | var testedObject = new BlobReader(blobStream); 42 | var actual = testedObject.readBlob(testString.length()); 43 | 44 | assertTrue(actual.isPresent()); 45 | assertEquals(testString, new String(actual.get(), StandardCharsets.UTF_8)); 46 | } 47 | 48 | @Test 49 | void testReadBlobShort() throws IOException { 50 | final String testString = "test blob"; 51 | 52 | var blobStream = IOUtils.toInputStream(testString, "UTF-8"); 53 | 54 | var testedObject = new BlobReader(blobStream); 55 | var actual = testedObject.readBlob(testString.length()*2); 56 | 57 | assertFalse(actual.isPresent()); 58 | } 59 | 60 | @Test 61 | void testReadBlobFailure() throws IOException { 62 | final String testString = "test blob"; 63 | 64 | InputStream blobStream = mock(InputStream.class); 65 | expect(blobStream.read(anyObject())).andStubThrow(new IOException()); 66 | replay(blobStream); 67 | 68 | var testedObject = new BlobReader(blobStream); 69 | var actual = testedObject.readBlob(testString.length()); 70 | 71 | assertFalse(actual.isPresent()); 72 | } 73 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/blob/BlobWriterTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.blob; 2 | 3 | import com.google.protobuf.InvalidProtocolBufferException; 4 | import crosby.binary.Fileformat; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import java.io.ByteArrayOutputStream; 8 | import java.nio.ByteBuffer; 9 | import java.util.Arrays; 10 | import java.util.zip.DataFormatException; 11 | import java.util.zip.Inflater; 12 | 13 | import static org.junit.jupiter.api.Assertions.*; 14 | 15 | class BlobWriterTest { 16 | @Test 17 | void testWriter() throws InvalidProtocolBufferException, DataFormatException { 18 | String expected = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt"; 19 | ByteArrayOutputStream output = new ByteArrayOutputStream(); 20 | 21 | BlobWriter testedObject = new BlobWriter(output); 22 | assertTrue(testedObject.writeData(expected.getBytes())); 23 | 24 | byte[] data = output.toByteArray(); 25 | 26 | ByteBuffer size = ByteBuffer.wrap(data); 27 | assertEquals(11, size.getInt()); 28 | 29 | 30 | byte[] headerBinary = Arrays.copyOfRange(data, 4, 4 + 11); 31 | Fileformat.BlobHeader blobHeader = Fileformat.BlobHeader.parseFrom(headerBinary); 32 | assertEquals(BlobInformation.TYPE_OSM_DATA, blobHeader.getType()); 33 | assertEquals(81, blobHeader.getDatasize()); 34 | 35 | byte[] blobBinary = Arrays.copyOfRange(data, 15, 15 + 81); 36 | Fileformat.Blob blob = Fileformat.Blob.parseFrom(blobBinary); 37 | assertEquals(89, blob.getRawSize()); 38 | 39 | byte[] content = blob.getZlibData().toByteArray(); 40 | byte[] uncompressedString = new byte[89]; 41 | Inflater inflater = new Inflater(); 42 | inflater.setInput(content); 43 | inflater.finished(); 44 | inflater.inflate(uncompressedString); 45 | 46 | assertEquals(expected, new String(uncompressedString)); 47 | } 48 | 49 | @Test 50 | void testWriterHeader() throws InvalidProtocolBufferException { 51 | ByteArrayOutputStream output = new ByteArrayOutputStream(); 52 | 53 | BlobWriter testedObject = new BlobWriter(output); 54 | assertTrue(testedObject.writeHeader(new byte[1])); 55 | 56 | byte[] data = output.toByteArray(); 57 | 58 | ByteBuffer size = ByteBuffer.wrap(data); 59 | assertEquals(13, size.getInt()); 60 | 61 | 62 | byte[] headerBinary = Arrays.copyOfRange(data, 4, 4 + 13); 63 | Fileformat.BlobHeader blobHeader = Fileformat.BlobHeader.parseFrom(headerBinary); 64 | assertEquals(BlobInformation.TYPE_OSM_HEADER, blobHeader.getType()); 65 | } 66 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/blob/ReadHeaderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.blob; 19 | 20 | import crosby.binary.Fileformat; 21 | import lombok.var; 22 | import org.apache.commons.io.IOUtils; 23 | import org.junit.jupiter.api.Tag; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.io.ByteArrayInputStream; 27 | import java.io.IOException; 28 | 29 | import static org.junit.jupiter.api.Assertions.*; 30 | 31 | @Tag("BlobReader") 32 | class ReadHeaderTest { 33 | @Test 34 | void testReadHeader() { 35 | var testHeader = Fileformat.BlobHeader.newBuilder() 36 | .setType("OSMHeader") 37 | .setDatasize(1024).build(); 38 | 39 | var blobStream = new ByteArrayInputStream(testHeader.toByteArray()); 40 | 41 | var testedObject = new BlobReader(blobStream); 42 | var actual = testedObject.readBlobHeader(testHeader.getSerializedSize()); 43 | 44 | assertTrue(actual.isPresent()); 45 | assertEquals(testHeader.getType(), actual.get().getType()); 46 | assertEquals(testHeader.getDatasize(), actual.get().getSize().intValue()); 47 | } 48 | 49 | @Test 50 | void testReadHeaderTooBig() { 51 | var testHeader = Fileformat.BlobHeader.newBuilder() 52 | .setType("OSMHeader") 53 | .setDatasize(33 * 1024 * 1024).build(); 54 | 55 | var blobStream = new ByteArrayInputStream(testHeader.toByteArray()); 56 | 57 | var testedObject = new BlobReader(blobStream); 58 | var actual = testedObject.readBlobHeader(testHeader.getSerializedSize()); 59 | 60 | assertFalse(actual.isPresent()); 61 | } 62 | 63 | @Test 64 | void testReadHeaderInvalidFormat() throws IOException { 65 | var blobStream = IOUtils.toInputStream("test blob", "UTF-8"); 66 | 67 | var testedObject = new BlobReader(blobStream); 68 | var actual = testedObject.readBlobHeader(4); 69 | 70 | assertFalse(actual.isPresent()); 71 | } 72 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/parser/NodeParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Node; 21 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 22 | import crosby.binary.Osmformat; 23 | import lombok.var; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.util.function.Consumer; 27 | 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | import static org.junit.jupiter.api.Assertions.assertTrue; 30 | 31 | class NodeParserTest { 32 | 33 | private final Consumer checker = (node) -> { 34 | assertEquals(1, node.getId()); 35 | assertEquals(1, node.getLat(), 0.1); 36 | assertEquals(2, node.getLon(), 0.1); 37 | assertEquals(TestObjectsFactory.info, node.getInfo()); 38 | 39 | var tags = node.getTags(); 40 | assertTrue(tags.containsKey("tag")); 41 | assertEquals("value", tags.get("tag")); 42 | }; 43 | 44 | @Test 45 | void testNodeParse() { 46 | var testedObject = new NodeParser(checker, TestObjectsFactory.stringTable, 1, 0, 0, 1); 47 | testedObject.parse(TestObjectsFactory.nodeMessage); 48 | } 49 | 50 | @Test 51 | void testDenseNodeParse() { 52 | var testedObject = new NodeParser(checker, TestObjectsFactory.stringTable, 1, 0, 0, 1); 53 | testedObject.parse(TestObjectsFactory.denseNodesMessage); 54 | } 55 | 56 | @Test 57 | void testDenseNodeParseDefaultVisible() { 58 | var denseInfo = Osmformat.DenseInfo.newBuilder() 59 | .addUid(1) 60 | .addUserSid(2) 61 | .addVersion(3) 62 | .addTimestamp(4) 63 | .addChangeset(5) 64 | .build(); 65 | var denseNodes = Osmformat.DenseNodes.newBuilder() 66 | .addId(1) 67 | .addLat(1000000000) 68 | .addLon(2000000000) 69 | .addKeysVals(3).addKeysVals(4).addKeysVals(0) 70 | .setDenseinfo(denseInfo) 71 | .build(); 72 | 73 | var testedObject = new NodeParser(checker, TestObjectsFactory.stringTable, 1, 0, 0, 1); 74 | testedObject.parse(denseNodes); 75 | } 76 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/entity/RelationMember.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.entity; 19 | 20 | import lombok.Data; 21 | 22 | /** 23 | * Single relation participant. 24 | * 25 | * @see Relation 26 | */ 27 | @Data 28 | public final class RelationMember { 29 | /** 30 | * Defines relation member types. 31 | * 32 | * The values of the enum participants are linked to 33 | * the underlying protobuf definitions. 34 | */ 35 | public enum Type { 36 | /** 37 | * Relation member is Node. 38 | * 39 | * @see Node 40 | */ 41 | NODE(0), 42 | 43 | /** 44 | * Relation member is Way. 45 | * 46 | * @see Way 47 | */ 48 | WAY(1), 49 | 50 | /** 51 | * Relation member is another Relation. 52 | * 53 | * @see Relation 54 | */ 55 | RELATION(2); 56 | 57 | /** 58 | * A related protobuf relation member id. 59 | */ 60 | private final int value; 61 | 62 | /** 63 | * Constructor for enum entry value. 64 | * @param v Protobuf relation member id. 65 | * 66 | * @see crosby.binary.Osmformat.Relation.MemberType 67 | */ 68 | Type(final int v) { 69 | this.value = v; 70 | } 71 | 72 | /** 73 | * Finds proper enum entry by protobuf MemberType value. 74 | * @param v Protobuf relation member id. 75 | * @return Matching enum entry. 76 | * @throws IllegalArgumentException in case of unknown member id. 77 | */ 78 | public static Type get(final int v) { 79 | for (Type t : Type.values()) { 80 | if (t.value == v) { 81 | return t; 82 | } 83 | } 84 | throw new IllegalArgumentException(); 85 | } 86 | } 87 | 88 | /** 89 | * Id of referenced entity. 90 | */ 91 | private final Long id; 92 | 93 | /** 94 | * Role of the referenced entity in the relation. 95 | * Can be null. 96 | */ 97 | private final String role; 98 | 99 | /** 100 | * Type of the referencing entity. 101 | * 102 | * @see RelationMember.Type 103 | */ 104 | private final Type type; 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/WayEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.entity.Way; 4 | import crosby.binary.Osmformat; 5 | 6 | /** 7 | * Encodes for Way structure. Keeps data for the next blob 8 | * production in RAM and form byte[] blob in request. 9 | * 10 | * Encoder is stateful and can't be used after 'write' call is issued. 11 | * Encoder is not thread-safe. 12 | */ 13 | public final class WayEncoder extends OsmEntityEncoder { 14 | /** 15 | * Length of all members arrays, calculated as sum of all members entries of each 16 | * way. 17 | */ 18 | private int membersLength = 0; 19 | 20 | /** 21 | * Length of all tags (keys/vals) arrays, calculated as sum of all tags entries of each 22 | * way. 23 | */ 24 | private int tagsLength = 0; 25 | 26 | /** 27 | * Ways builder. 28 | */ 29 | private Osmformat.PrimitiveGroup.Builder ways = Osmformat.PrimitiveGroup.newBuilder(); 30 | 31 | /** 32 | * Block-wide string table encoder. 33 | */ 34 | private final StringTableEncoder stringEncoder; 35 | 36 | /** 37 | * Default constructor. 38 | * @param stringTableEncoder Block-wide string encoder. 39 | */ 40 | public WayEncoder(final StringTableEncoder stringTableEncoder) { 41 | super(); 42 | this.stringEncoder = stringTableEncoder; 43 | } 44 | 45 | /** 46 | * Add wy to the encoder. 47 | * @param w Way to add. 48 | * @throws IllegalStateException when call after write() call. 49 | */ 50 | @Override 51 | protected void addImpl(final Way w) { 52 | Osmformat.Way.Builder way = Osmformat.Way.newBuilder(); 53 | way.setId(w.getId()); 54 | w.getTags().forEach((k, v) -> { 55 | way.addKeys(stringEncoder.getStringIndex(k)); 56 | way.addVals(stringEncoder.getStringIndex(v)); 57 | }); 58 | 59 | tagsLength = tagsLength + w.getTags().size() * MEMBER_ENTRY_SIZE; 60 | 61 | Osmformat.Info info = 62 | w.getInfo() != null ? Osmformat.Info.getDefaultInstance().toBuilder() 63 | .setChangeset(w.getInfo().getChangeset()) 64 | .setTimestamp(w.getInfo().getTimestamp()) 65 | .setUid(w.getInfo().getUid()) 66 | .setUserSid(stringEncoder.getStringIndex(w.getInfo().getUsername())) 67 | .setVersion(w.getInfo().getVersion()) 68 | .setVisible(w.getInfo().isVisible()) 69 | .build() : Osmformat.Info.getDefaultInstance(); 70 | way.setInfo(info); 71 | 72 | long member = 0; 73 | for (long node : w.getNodes()) { 74 | way.addRefs(node - member); 75 | member = node; 76 | } 77 | membersLength = membersLength + w.getNodes().size() * MEMBER_ENTRY_SIZE; 78 | 79 | ways.addWays(way); 80 | } 81 | 82 | /** 83 | * Provides approximate size of the future blob. 84 | * Size is calculated as 8 bytes per each way plus 8 bytes per each tag plus 4 bytes each member.. 85 | * As protobuf will compact the values in arrays, actual size expected to be smaller. 86 | * @return Estimated approximate maximum size of a blob. 87 | */ 88 | @Override 89 | public int estimateSize() { 90 | return membersLength + tagsLength + ways.getWaysCount() * MEMBER_ENTRY_SIZE; 91 | } 92 | 93 | @Override 94 | protected Osmformat.PrimitiveGroup.Builder writeImpl() { 95 | return ways; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/RelationEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.entity.Relation; 4 | import com.wolt.osm.parallelpbf.entity.RelationMember; 5 | import crosby.binary.Osmformat; 6 | 7 | /** 8 | * Encodes for Relation structure. Keeps data for the next blob 9 | * production in RAM and form byte[] blob in request. 10 | * 11 | * Encoder is stateful and can't be used after 'write' call is issued. 12 | * Encoder is not thread-safe. 13 | */ 14 | public final class RelationEncoder extends OsmEntityEncoder { 15 | /** 16 | * Single relation entry is built of role ide of int, 17 | * member id of long and type with one byte size. 18 | */ 19 | private static final int RELATION_ENTRY_SIZE = 13; 20 | 21 | /** 22 | * Length of all members arrays, calculated as sum of all members entries of each 23 | * way. 24 | */ 25 | private int membersLength = 0; 26 | 27 | /** 28 | * Length of all tags (keys/vals) arrays, calculated as sum of all tags entries of each 29 | * way. 30 | */ 31 | private int tagsLength = 0; 32 | 33 | /** 34 | * Ways builder. 35 | */ 36 | private Osmformat.PrimitiveGroup.Builder relations = Osmformat.PrimitiveGroup.newBuilder(); 37 | 38 | /** 39 | * Block-wide string table encoder. 40 | */ 41 | private final StringTableEncoder stringEncoder; 42 | 43 | /** 44 | * Constructor. 45 | * @param stringTableEncoder Block-wide string encoder. 46 | */ 47 | public RelationEncoder(final StringTableEncoder stringTableEncoder) { 48 | this.stringEncoder = stringTableEncoder; 49 | } 50 | 51 | @Override 52 | protected void addImpl(final Relation r) { 53 | Osmformat.Relation.Builder relation = Osmformat.Relation.newBuilder(); 54 | 55 | relation.setId(r.getId()); 56 | 57 | r.getTags().forEach((k, v) -> { 58 | relation.addKeys(stringEncoder.getStringIndex(k)); 59 | relation.addVals(stringEncoder.getStringIndex(v)); 60 | }); 61 | tagsLength = tagsLength + r.getTags().size() * MEMBER_ENTRY_SIZE; 62 | 63 | Osmformat.Info info = r.getInfo() != null ? Osmformat.Info.newBuilder() 64 | .setUserSid(stringEncoder.getStringIndex(r.getInfo().getUsername())) 65 | .setVisible(r.getInfo().isVisible()) 66 | .setUid(r.getInfo().getUid()) 67 | .setVersion(r.getInfo().getVersion()) 68 | .setTimestamp(r.getInfo().getTimestamp()) 69 | .setChangeset(r.getInfo().getChangeset()) 70 | .build() : Osmformat.Info.getDefaultInstance(); 71 | relation.setInfo(info); 72 | 73 | long member = 0; 74 | for (RelationMember rm : r.getMembers()) { 75 | relation.addRolesSid(stringEncoder.getStringIndex(rm.getRole())); 76 | relation.addMemids(rm.getId() - member); 77 | member = rm.getId(); 78 | relation.addTypes(Osmformat.Relation.MemberType.valueOf(rm.getType().ordinal())); 79 | } 80 | membersLength = membersLength + r.getMembers().size() * RELATION_ENTRY_SIZE; 81 | 82 | relations.addRelations(relation); 83 | } 84 | 85 | @Override 86 | public int estimateSize() { 87 | return relations.getRelationsCount() * MEMBER_ENTRY_SIZE + membersLength + tagsLength; 88 | } 89 | 90 | @Override 91 | protected Osmformat.PrimitiveGroup.Builder writeImpl() { 92 | return relations; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/blob/BlobWriter.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.blob; 2 | 3 | import com.google.protobuf.ByteString; 4 | import crosby.binary.Fileformat; 5 | import lombok.RequiredArgsConstructor; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | import java.io.IOException; 9 | import java.io.OutputStream; 10 | import java.nio.ByteBuffer; 11 | import java.util.zip.Deflater; 12 | 13 | /** 14 | * Fileformat writer. Should be shared between all the OSMWriters 15 | * as it owns single OutputStream. 16 | *

17 | * Accepts blob to write with the type and serializes it to the 18 | * output stream. Writing to the stream is synchronized, so it 19 | * is thread safe. 20 | */ 21 | @RequiredArgsConstructor 22 | @Slf4j 23 | public class BlobWriter { 24 | /** 25 | * Size of a int, prepending the HeaderBlock. 26 | */ 27 | private static final int INT_SIZE = 4; 28 | /** 29 | * Output data stream. 30 | */ 31 | private final OutputStream output; 32 | 33 | /** 34 | * Blob writing helper. Adds headerBlob and size to the stream. 35 | * Stream is locked during output operation. 36 | * @param blob Blob to write. 37 | * @param type Type of that blob. 38 | * @return false in case of error, true otherwise. 39 | */ 40 | private boolean write(final byte[] blob, final String type) { 41 | // Form headerBlob 42 | byte[] headerBlob = Fileformat.BlobHeader.newBuilder() 43 | .setType(type) 44 | .setDatasize(blob.length) 45 | .build().toByteArray(); 46 | 47 | // Get size of the headerBlob 48 | byte[] size = ByteBuffer.allocate(INT_SIZE).putInt(headerBlob.length).array(); 49 | 50 | // Write it to the output stream 51 | synchronized (output) { 52 | try { 53 | output.write(size); 54 | output.write(headerBlob); 55 | output.write(blob); 56 | } catch (IOException e) { 57 | log.error("Error while writing data blob: {}", e.getMessage(), e); 58 | return false; 59 | } 60 | } 61 | return true; 62 | } 63 | 64 | /** 65 | * Writes data blob to the OutputStream. Blob will be compresed, prepended with HeaderBlob 66 | * and its size. 67 | * OutputFileStream will be locked during IO operation. 68 | * 69 | * @param blob binary blob to write. 70 | * @return false in case of error, true otherwise. 71 | */ 72 | public boolean writeData(final byte[] blob) { 73 | // Form DataBlob 74 | Deflater compressor = new Deflater(Deflater.BEST_COMPRESSION); 75 | byte[] compressedBlob = new byte[blob.length]; 76 | compressor.setInput(blob); 77 | compressor.finish(); 78 | int compressedBlobLength = compressor.deflate(compressedBlob); 79 | compressor.end(); 80 | byte[] dataBlob = Fileformat.Blob.newBuilder() 81 | .setRawSize(blob.length) 82 | .setZlibData(ByteString.copyFrom(compressedBlob, 0, compressedBlobLength)) 83 | .build().toByteArray(); 84 | 85 | return write(dataBlob, BlobInformation.TYPE_OSM_DATA); 86 | } 87 | 88 | /** 89 | * Writes header blob to the OutputStream. Blob will be prepended with HeaderBlob 90 | * and its size. 91 | * OutputFileStream will be locked during IO operation. 92 | * 93 | * @param blob binary blob to write. 94 | * @return false in case of error, true otherwise. 95 | */ 96 | public boolean writeHeader(final byte[] blob) { 97 | // Form DataBlob 98 | byte[] dataBlob = Fileformat.Blob.newBuilder() 99 | .setRaw(ByteString.copyFrom(blob)) 100 | .build().toByteArray(); 101 | 102 | return write(dataBlob, BlobInformation.TYPE_OSM_HEADER); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/ParalelBinaryParserExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf; 19 | 20 | import ch.qos.logback.classic.Level; 21 | import ch.qos.logback.classic.Logger; 22 | import com.wolt.osm.parallelpbf.entity.*; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.InputStream; 26 | import java.util.concurrent.atomic.AtomicLong; 27 | 28 | public class ParalelBinaryParserExample { 29 | 30 | private final StringBuilder output = new StringBuilder(); 31 | private final AtomicLong nodesCounter = new AtomicLong(); 32 | private final AtomicLong waysCounter = new AtomicLong(); 33 | private final AtomicLong relationsCounter = new AtomicLong(); 34 | private final AtomicLong changesetsCounter = new AtomicLong(); 35 | 36 | private void processHeader(Header header) { 37 | synchronized (output) { 38 | output.append(header); 39 | output.append("\n"); 40 | } 41 | } 42 | 43 | private void processBoundingBox(BoundBox bbox) { 44 | synchronized (output) { 45 | output.append(bbox); 46 | output.append("\n"); 47 | } 48 | } 49 | 50 | private void processNodes(Node node) { 51 | nodesCounter.incrementAndGet(); 52 | } 53 | 54 | private void processWays(Way way) { 55 | waysCounter.incrementAndGet(); 56 | } 57 | 58 | private void processRelations(Relation way) { 59 | relationsCounter.incrementAndGet(); 60 | } 61 | 62 | private void processChangesets(Long id) { 63 | changesetsCounter.incrementAndGet(); 64 | } 65 | 66 | private void printOnCompletions() { 67 | output.append("Node count: "); 68 | output.append(nodesCounter.get()); 69 | output.append("\n"); 70 | 71 | output.append("Way count: "); 72 | output.append(waysCounter.get()); 73 | output.append("\n"); 74 | 75 | output.append("Relations count: "); 76 | output.append(relationsCounter.get()); 77 | output.append("\n"); 78 | 79 | output.append("Changesets count: "); 80 | output.append(changesetsCounter.get()); 81 | output.append("\n"); 82 | 83 | System.out.println("Reading results:"); 84 | System.out.println(output); 85 | } 86 | 87 | private void execute() { 88 | Logger root = (Logger) LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME); 89 | root.setLevel(Level.TRACE); 90 | 91 | InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream("sample.pbf"); 92 | new ParallelBinaryParser(input, 1) 93 | .onHeader(this::processHeader) 94 | .onBoundBox(this::processBoundingBox) 95 | .onComplete(this::printOnCompletions) 96 | .onNode(this::processNodes) 97 | .onWay(this::processWays) 98 | .onRelation(this::processRelations) 99 | .onChangeset(this::processChangesets) 100 | .parse(); 101 | } 102 | 103 | public static void main(String[] args) { 104 | new ParalelBinaryParserExample().execute(); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/io/OSMDataReaderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.io; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Node; 21 | import com.wolt.osm.parallelpbf.entity.Relation; 22 | import com.wolt.osm.parallelpbf.entity.Way; 23 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 24 | import com.google.protobuf.ByteString; 25 | import crosby.binary.Fileformat; 26 | import crosby.binary.Osmformat; 27 | import lombok.var; 28 | import org.junit.jupiter.api.BeforeEach; 29 | import org.junit.jupiter.api.Test; 30 | 31 | import java.util.concurrent.Semaphore; 32 | import java.util.function.Consumer; 33 | 34 | import static org.junit.jupiter.api.Assertions.*; 35 | 36 | class OSMDataReaderTest { 37 | private Semaphore limiter; 38 | 39 | private Osmformat.PrimitiveGroup primitiveGroupMessage = Osmformat.PrimitiveGroup.newBuilder() 40 | .addNodes(TestObjectsFactory.nodeMessage) 41 | .setDense(TestObjectsFactory.denseNodesMessage) 42 | .addWays(TestObjectsFactory.wayMessage) 43 | .addRelations(TestObjectsFactory.relationMessage) 44 | .addChangesets(TestObjectsFactory.changesetMessage) 45 | .build(); 46 | 47 | private Osmformat.PrimitiveBlock primitivesMessage = Osmformat.PrimitiveBlock.newBuilder() 48 | .addPrimitivegroup(primitiveGroupMessage) 49 | .setStringtable(TestObjectsFactory.stringTable) 50 | .setGranularity(1) 51 | .setDateGranularity(1) 52 | .setLatOffset(0) 53 | .setLonOffset(0) 54 | .build(); 55 | 56 | private byte[] blob = Fileformat.Blob.newBuilder().setRaw(primitivesMessage.toByteString()).build().toByteArray(); 57 | 58 | @BeforeEach 59 | void setUp() { 60 | limiter = new Semaphore(0); 61 | } 62 | 63 | @Test 64 | void testInvalidBlob() { 65 | var blob = Fileformat.Blob.newBuilder().setRaw(ByteString.copyFromUtf8("failme")).build().toByteArray(); 66 | 67 | var testedObject = new OSMDataReader(blob, limiter, null, null, null, null); 68 | assertThrows(RuntimeException.class, testedObject::run); 69 | } 70 | 71 | @Test 72 | void testNodesParse() { 73 | Consumer checker = (node -> assertEquals(TestObjectsFactory.nodeMessage.getId(), node.getId())); 74 | var testedObject = new OSMDataReader(blob, limiter, checker, null, null, null); 75 | testedObject.run(); 76 | } 77 | 78 | @Test 79 | void testWaysParse() { 80 | Consumer checker = (way -> assertEquals(TestObjectsFactory.wayMessage.getId(), way.getId())); 81 | var testedObject = new OSMDataReader(blob, limiter, null, checker, null, null); 82 | testedObject.run(); 83 | } 84 | 85 | @Test 86 | void testRelationsParse() { 87 | Consumer checker = (relation -> assertEquals(TestObjectsFactory.relationMessage.getId(), relation.getId())); 88 | var testedObject = new OSMDataReader(blob, limiter, null, null, checker, null); 89 | testedObject.run(); 90 | } 91 | 92 | @Test 93 | void testChangesetsParse() { 94 | Consumer checker = (changeset -> assertEquals(TestObjectsFactory.changesetMessage.getId(), changeset.longValue())); 95 | var testedObject = new OSMDataReader(blob, limiter, null, null, null, checker); 96 | testedObject.run(); 97 | } 98 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/ParallelBinaryWriter.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf; 2 | 3 | import com.wolt.osm.parallelpbf.blob.BlobWriter; 4 | import com.wolt.osm.parallelpbf.encoder.OsmHeaderEncoder; 5 | import com.wolt.osm.parallelpbf.entity.BoundBox; 6 | import com.wolt.osm.parallelpbf.entity.OsmEntity; 7 | import com.wolt.osm.parallelpbf.io.OSMWriter; 8 | import lombok.extern.slf4j.Slf4j; 9 | 10 | import java.io.Closeable; 11 | import java.io.OutputStream; 12 | import java.util.LinkedList; 13 | import java.util.concurrent.LinkedBlockingQueue; 14 | 15 | /** 16 | * Parallel OSM PBF format writer. 17 | *

18 | * See https://github.com/woltapp/parallelpbf for the details and usage example. 19 | */ 20 | @Slf4j 21 | public class ParallelBinaryWriter implements Closeable { 22 | /** 23 | * Number of threads to use. 24 | */ 25 | private final int threads; 26 | /** 27 | * Output writer. 28 | */ 29 | private final BlobWriter writer; 30 | 31 | /** 32 | * Writer frontend-to-writing-threads interface. 33 | */ 34 | private final LinkedBlockingQueue writeQueue; 35 | 36 | /** 37 | * List of worker threads. 38 | */ 39 | private final LinkedList workers = new LinkedList<>(); 40 | 41 | /** 42 | * Header writing procedure. 43 | * @param boundBox Optional bounding box to include into header. 44 | * @return false in case of error, true otherwise. 45 | */ 46 | private boolean writeHeader(final BoundBox boundBox) { 47 | return writer.writeHeader(OsmHeaderEncoder.encodeHeader(boundBox)); 48 | } 49 | 50 | /** 51 | * Sets OSM PBF file to write and number of threads to use. 52 | * 53 | * @param outputStream Any OutputStream pointing to the file to write OSM PBF data. 54 | * @param noThreads Number of threads to use. The best results can be achieved when this value 55 | * is set to number of available CPU cores or twice the number of available CPU cores. 56 | * Each thread will use up to 192MB of ram to keep blob data and actually may grow up to 57 | * several hundreds of megabytes. 58 | * @param boundBox Output file bbox. 59 | */ 60 | public ParallelBinaryWriter(final OutputStream outputStream, final int noThreads, final BoundBox boundBox) { 61 | this.writer = new BlobWriter(outputStream); 62 | this.threads = noThreads; 63 | writeQueue = new LinkedBlockingQueue<>(noThreads); 64 | if (!writeHeader(boundBox)) { 65 | throw new RuntimeException("Error while creating writer and writing header"); 66 | } 67 | } 68 | 69 | /** 70 | * Starts writing threads. 71 | */ 72 | public void start() { 73 | for (int indx = 0; indx < this.threads; ++indx) { 74 | Thread worker = new Thread(new OSMWriter(writer, writeQueue)); 75 | worker.start(); 76 | workers.push(worker); 77 | } 78 | } 79 | 80 | /** 81 | * Write entity to the OSM PBF file. Thread-safe. 82 | * 83 | * @param entity Node/Way/Relation entity, other entity types are ignored. 84 | * @return true if Object queued for writing, false in case of error 85 | */ 86 | public boolean write(final OsmEntity entity) { 87 | try { 88 | writeQueue.put(entity); 89 | } catch (InterruptedException e) { 90 | log.warn("Unable to send entity for writing: {}", e.getMessage(), e); 91 | return false; 92 | } 93 | return true; 94 | } 95 | 96 | /** 97 | * Finishes OSM PBF file. **Must** be called or file may be left unfinished. 98 | */ 99 | @Override 100 | public void close() { 101 | workers.forEach((worker) -> { 102 | worker.interrupt(); 103 | try { 104 | worker.join(); 105 | } catch (InterruptedException e) { 106 | log.warn("Interrupted while waiting for OSMWriter to stop"); 107 | } 108 | }); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/encoder/DenseNodesEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 4 | import com.wolt.osm.parallelpbf.entity.Node; 5 | import crosby.binary.Osmformat; 6 | import org.junit.jupiter.api.BeforeEach; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import static org.junit.jupiter.api.Assertions.assertEquals; 10 | import static org.junit.jupiter.api.Assertions.assertTrue; 11 | 12 | class DenseNodesEncoderTest { 13 | private StringTableEncoder stringEncoder; 14 | 15 | @BeforeEach 16 | public void setUp() { 17 | stringEncoder = new StringTableEncoder(); 18 | } 19 | 20 | @Test 21 | public void testNodeSize() { 22 | DenseNodesEncoder testedObject = new DenseNodesEncoder(stringEncoder); 23 | testedObject.add(TestObjectsFactory.node()); 24 | 25 | assertEquals(36, testedObject.estimateSize()); 26 | } 27 | 28 | @Test 29 | public void testWrite() { 30 | DenseNodesEncoder testedObject = new DenseNodesEncoder(stringEncoder); 31 | testedObject.add(TestObjectsFactory.node()); 32 | Osmformat.PrimitiveGroup actual = testedObject.write().build(); 33 | 34 | Osmformat.DenseNodes nodes = actual.getDense(); 35 | assertEquals(1, nodes.getId(0)); 36 | assertEquals(5.0E8, nodes.getLon(0)); 37 | assertEquals(1.0E8, nodes.getLat(0)); 38 | 39 | assertEquals(1, nodes.getKeysVals(0)); 40 | assertEquals(1, nodes.getKeysVals(1)); 41 | assertEquals(0, nodes.getKeysVals(2)); 42 | } 43 | 44 | @Test 45 | public void testTaglessWrite() { 46 | Node node = TestObjectsFactory.node(); 47 | node.getTags().clear(); 48 | DenseNodesEncoder testedObject = new DenseNodesEncoder(stringEncoder); 49 | testedObject.add(node); 50 | testedObject.add(node); 51 | Osmformat.PrimitiveGroup actual = testedObject.write().build(); 52 | 53 | Osmformat.DenseNodes nodes = actual.getDense(); 54 | assertEquals(0, nodes.getKeysValsCount()); 55 | } 56 | 57 | @Test 58 | public void testTagMixWrite() { 59 | Node node = TestObjectsFactory.node(); 60 | node.getTags().clear(); 61 | DenseNodesEncoder testedObject = new DenseNodesEncoder(stringEncoder); 62 | testedObject.add(node); 63 | testedObject.add(TestObjectsFactory.node()); 64 | Osmformat.PrimitiveGroup actual = testedObject.write().build(); 65 | 66 | Osmformat.DenseNodes nodes = actual.getDense(); 67 | assertEquals(4, nodes.getKeysValsCount()); 68 | assertEquals(0, nodes.getKeysVals(0)); 69 | assertEquals(1, nodes.getKeysVals(1)); 70 | assertEquals(1, nodes.getKeysVals(2)); 71 | assertEquals(0, nodes.getKeysVals(3)); 72 | } 73 | 74 | @Test 75 | public void testDeltaCoding() { 76 | Node node1 = new Node(3, 20.0, 60.0); 77 | node1.getTags().put(TestObjectsFactory.testTag, TestObjectsFactory.testTag); 78 | Node node2 = new Node(6, 30.0, 20.0); 79 | node2.getTags().put(TestObjectsFactory.testTag, TestObjectsFactory.testTag); 80 | Node node3 = new Node(2, 60.0, 30.0); 81 | node3.getTags().put(TestObjectsFactory.testTag, TestObjectsFactory.testTag); 82 | 83 | DenseNodesEncoder testedObject = new DenseNodesEncoder(stringEncoder); 84 | testedObject.add(node1); 85 | testedObject.add(node2); 86 | testedObject.add(node3); 87 | 88 | Osmformat.PrimitiveGroup actual = testedObject.write().build(); 89 | 90 | Osmformat.DenseNodes nodes = actual.getDense(); 91 | 92 | assertEquals(3, nodes.getId(0)); 93 | assertEquals(3, nodes.getId(1)); 94 | assertEquals(-4, nodes.getId(2)); 95 | 96 | assertEquals(200000000, nodes.getLat(0)); 97 | assertEquals(100000000, nodes.getLat(1)); 98 | assertEquals(300000000, nodes.getLat(2)); 99 | 100 | assertEquals(600000000, nodes.getLon(0)); 101 | assertEquals(-400000000, nodes.getLon(1)); 102 | assertEquals(100000000, nodes.getLon(2)); 103 | } 104 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/io/OSMReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.io; 19 | 20 | import com.google.protobuf.InvalidProtocolBufferException; 21 | import crosby.binary.Fileformat; 22 | import lombok.extern.slf4j.Slf4j; 23 | 24 | import java.util.concurrent.Semaphore; 25 | import java.util.zip.DataFormatException; 26 | import java.util.zip.Inflater; 27 | 28 | /** 29 | * Base class for Blob handlers. 30 | * Provides blob decompression and runs proper decoder on a 31 | * raw blob. 32 | * 33 | * This class have no shared context, thus can be safely 34 | * executed on parallel. 35 | */ 36 | @Slf4j 37 | public abstract class OSMReader implements Runnable { 38 | /** 39 | * Incoming blob to process. 40 | */ 41 | private final byte[] blob; 42 | 43 | /** 44 | * Part of throttling system. 45 | * Reader takes the semaphore while running 46 | * and releases on completion. Therefore caller 47 | * may wait for some number of semaphores to be available and 48 | * do not overload thread pool. 49 | */ 50 | private final Semaphore tasksLimiter; 51 | 52 | /** 53 | * Sets base parameters. 54 | * @param blobValue The blob to parse. 55 | * @param tasksLimiterValue Task limiting semaphore. 56 | */ 57 | @SuppressWarnings("EI_EXPOSE_REP2") 58 | OSMReader(final byte[] blobValue, final Semaphore tasksLimiterValue) { 59 | this.blob = blobValue; 60 | this.tasksLimiter = tasksLimiterValue; 61 | } 62 | 63 | /** 64 | * Parses blob data by decompressing it, if needed and passing 65 | * to read() function. 66 | * 67 | * @throws RuntimeException if blob is unsupported format or other error did happened. 68 | */ 69 | @Override 70 | public void run() { 71 | try { 72 | Fileformat.Blob blobData = Fileformat.Blob.parseFrom(blob); 73 | byte[] payload; 74 | if (blobData.hasZlibData()) { 75 | payload = decompress(blobData); 76 | } else if (blobData.hasRaw()) { 77 | payload = blobData.getRaw().toByteArray(); 78 | } else { 79 | throw new RuntimeException("Only RAW or ZLib blob formats are supported"); 80 | } 81 | this.read(payload); 82 | } catch (InvalidProtocolBufferException | DataFormatException e) { 83 | log.error("Error parsing Blob: {}", e.getMessage(), e); 84 | throw new RuntimeException(e); 85 | } finally { 86 | tasksLimiter.release(); 87 | } 88 | } 89 | 90 | /** 91 | * Decompresses blob using Zlib algorithm. 92 | * @param blobData binary data to decompress. 93 | * @return uncompressed raw blob data 94 | * @throws DataFormatException in case of decompression error. 95 | * @throws RuntimeException if decompressed size differs from expected one. 96 | */ 97 | private byte[] decompress(final Fileformat.Blob blobData) throws DataFormatException { 98 | byte[] payload; 99 | Inflater decompresser = new Inflater(); 100 | decompresser.setInput(blobData.getZlibData().toByteArray()); 101 | payload = new byte[blobData.getRawSize()]; 102 | int uncompressedSize = decompresser.inflate(payload); 103 | if (uncompressedSize != blobData.getRawSize()) { 104 | log.error("Expected {} bytes after decompression, but got {}", blobData.getRawSize(), uncompressedSize); 105 | throw new RuntimeException("Invalid blob payload size"); 106 | } 107 | return payload; 108 | } 109 | 110 | /** 111 | * Actual decoding should happen here. 112 | * @param message Raw OSMHeader or OSMData blob. 113 | */ 114 | protected abstract void read(byte[] message); 115 | } 116 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/parser/InfoParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 21 | import crosby.binary.Osmformat; 22 | import lombok.var; 23 | import org.junit.jupiter.api.Assertions; 24 | import org.junit.jupiter.api.Tag; 25 | import org.junit.jupiter.api.Test; 26 | 27 | import java.util.function.Consumer; 28 | 29 | import static org.junit.jupiter.api.Assertions.*; 30 | 31 | @Tag("BaseParser") 32 | class InfoParserTest { 33 | 34 | static class InfoParser extends BaseParser> { 35 | 36 | public InfoParser(Consumer callback, Osmformat.StringTable stringTable) { 37 | super(callback, stringTable); 38 | } 39 | 40 | @Override 41 | protected void parse(Object message) { } 42 | } 43 | 44 | @Test 45 | void testNodeInfoMissing() { 46 | var node = Osmformat.Node.newBuilder() 47 | .setId(1) 48 | .setLat(2) 49 | .setLon(3) 50 | .build(); 51 | 52 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 53 | 54 | assertNull(testedObject.parseInfo(node)); 55 | } 56 | 57 | @Test 58 | void testNodeInfo() { 59 | var node = Osmformat.Node.newBuilder() 60 | .setId(1) 61 | .setLat(2) 62 | .setLon(3) 63 | .setInfo(TestObjectsFactory.infoMessage) 64 | .build(); 65 | 66 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 67 | 68 | var actual = testedObject.parseInfo(node); 69 | Assertions.assertEquals(TestObjectsFactory.info, actual); 70 | } 71 | 72 | @Test 73 | void testWayInfoMissing() { 74 | var way = Osmformat.Way.newBuilder() 75 | .setId(1) 76 | .build(); 77 | 78 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 79 | 80 | assertNull(testedObject.parseInfo(way)); 81 | } 82 | 83 | @Test 84 | void testWayInfo() { 85 | var way = Osmformat.Way.newBuilder() 86 | .setId(1) 87 | .setInfo(TestObjectsFactory.infoMessage) 88 | .build(); 89 | 90 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 91 | 92 | var actual = testedObject.parseInfo(way); 93 | Assertions.assertEquals(TestObjectsFactory.info, actual); 94 | } 95 | 96 | @Test 97 | void testWayInfoWithNullVisibleFlag() { 98 | var way = Osmformat.Way.newBuilder() 99 | .setId(1) 100 | .setInfo(TestObjectsFactory.infoMessageWithNullVisibleFlag) 101 | .build(); 102 | 103 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 104 | 105 | var actual = testedObject.parseInfo(way); 106 | Assertions.assertEquals(TestObjectsFactory.info, actual); 107 | } 108 | 109 | @Test 110 | void testRelationInfoMissing() { 111 | var relation = Osmformat.Relation.newBuilder() 112 | .setId(1) 113 | .build(); 114 | 115 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 116 | 117 | assertNull(testedObject.parseInfo(relation)); 118 | } 119 | 120 | @Test 121 | void testRelationInfo() { 122 | var relation = Osmformat.Relation.newBuilder() 123 | .setId(1) 124 | .setInfo(TestObjectsFactory.infoMessage) 125 | .build(); 126 | 127 | var testedObject = new InfoParser(null, TestObjectsFactory.stringTable); 128 | 129 | var actual = testedObject.parseInfo(relation); 130 | Assertions.assertEquals(TestObjectsFactory.info, actual); 131 | } 132 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/encoder/DenseNodesEncoder.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.encoder; 2 | 3 | import com.wolt.osm.parallelpbf.entity.Node; 4 | import crosby.binary.Osmformat; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | /** 8 | * Encodes for DenseNodes structure. Keeps data for the next blob 9 | * production in RAM and form byte[] blob in request. 10 | * 11 | * Encoder is stateful and can't be used after 'write' call is issued. 12 | * Encoder is not thread-safe. 13 | */ 14 | public final class DenseNodesEncoder extends OsmEntityEncoder { 15 | /** 16 | * Single mode uses 3 long values: id, lat, lon. 17 | * So single node will use 24 bytes. 18 | */ 19 | private static final int NODE_ENTRY_SIZE = 24; 20 | 21 | /** 22 | * Block-wide string table encoder. 23 | */ 24 | private final StringTableEncoder stringEncoder; 25 | 26 | /** 27 | * Current value of NodeId for delta coding. 28 | */ 29 | private long id = 0; 30 | 31 | /** 32 | * Current value of lat millis for delta coding. 33 | */ 34 | private long lat = 0; 35 | 36 | /** 37 | * Current value of lon millis for delta coding. 38 | */ 39 | private long lon = 0; 40 | 41 | /** 42 | * Current value of UserStringId for delta coding. 43 | */ 44 | private int infoUserSid = 0; 45 | 46 | /** 47 | * Current value of Changeset for delta coding. 48 | */ 49 | private long infoChangeset = 0L; 50 | 51 | /** 52 | * Current value of Uid for delta coding. 53 | */ 54 | private int infoUid = 0; 55 | 56 | /** 57 | * Current value of Timestamp for delta coding. 58 | */ 59 | private long infoTimestamp = 0L; 60 | 61 | /** 62 | * DensNodes blob. 63 | */ 64 | private Osmformat.DenseNodes.Builder nodes = Osmformat.DenseNodes.newBuilder(); 65 | 66 | /** 67 | * Default constructor. 68 | * @param stringTableEncoder Block-wide sting encoder. 69 | */ 70 | public DenseNodesEncoder(final StringTableEncoder stringTableEncoder) { 71 | super(); 72 | this.stringEncoder = stringTableEncoder; 73 | } 74 | 75 | /** 76 | * Adds a node to the encoder. 77 | * @param node Node to add. 78 | * @throws IllegalStateException when call after write() call. 79 | */ 80 | @Override 81 | protected void addImpl(final Node node) { 82 | node.getTags().forEach((k, v) -> { 83 | nodes.addKeysVals(stringEncoder.getStringIndex(k)); 84 | nodes.addKeysVals(stringEncoder.getStringIndex(v)); 85 | }); 86 | nodes.addKeysVals(0); //Index zero means 'end of tags for node' 87 | 88 | nodes.addId(node.getId() - id); 89 | id = node.getId(); 90 | 91 | if (node.getInfo() != null) { 92 | int newUserSid = stringEncoder.getStringIndex(node.getInfo().getUsername()); 93 | nodes.getDenseinfoBuilder().addVersion(node.getInfo().getVersion()) 94 | .addChangeset(node.getInfo().getChangeset() - infoChangeset).addUid(node.getInfo().getUid() - infoUid) 95 | .addUserSid(newUserSid - infoUserSid) 96 | .addTimestamp(TimeUnit.MICROSECONDS.toMillis(node.getInfo().getTimestamp() - infoTimestamp)) 97 | .addVisible(node.getInfo().isVisible()); 98 | infoChangeset = node.getInfo().getChangeset(); 99 | infoUid = node.getInfo().getUid(); 100 | infoTimestamp = node.getInfo().getTimestamp(); 101 | infoUserSid = newUserSid; 102 | } 103 | 104 | long latMillis = doubleToNanoScaled(node.getLat() / GRANULARITY); 105 | long lonMillis = doubleToNanoScaled(node.getLon() / GRANULARITY); 106 | 107 | nodes.addLat(latMillis - lat); 108 | nodes.addLon(lonMillis - lon); 109 | lat = latMillis; 110 | lon = lonMillis; 111 | } 112 | 113 | /** 114 | * Provides approximate size of the future blob. 115 | * Size is calculated as 24 bytes per each node plus 4 bytes per each tag, including closing tags. 116 | * As protobuf will compact the values in arrays, actual size expected to be smaller. 117 | * @return Estimated approximate maximum size of a blob. 118 | */ 119 | @Override 120 | public int estimateSize() { 121 | return nodes.getIdCount() * NODE_ENTRY_SIZE + nodes.getKeysValsCount() * TAG_ENTRY_SIZE; 122 | } 123 | 124 | @Override 125 | protected Osmformat.PrimitiveGroup.Builder writeImpl() { 126 | if (nodes.getKeysValsList().stream().noneMatch(i -> i != 0)) { 127 | // Exceptional case - all nodes in the block are tagless, meaning 128 | // that tags array must be empty 129 | nodes.clearKeysVals(); 130 | } 131 | return Osmformat.PrimitiveGroup.newBuilder().setDense(nodes); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/io/OSMHeaderReaderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.io; 19 | 20 | import com.wolt.osm.parallelpbf.entity.BoundBox; 21 | import com.wolt.osm.parallelpbf.entity.Header; 22 | import com.google.protobuf.ByteString; 23 | import crosby.binary.Fileformat; 24 | import crosby.binary.Osmformat; 25 | import lombok.var; 26 | import org.junit.jupiter.api.BeforeEach; 27 | import org.junit.jupiter.api.Test; 28 | 29 | import java.util.concurrent.Semaphore; 30 | import java.util.function.Consumer; 31 | 32 | import static org.junit.jupiter.api.Assertions.*; 33 | 34 | class OSMHeaderReaderTest { 35 | 36 | private Semaphore limiter; 37 | 38 | @BeforeEach 39 | void setUp() { 40 | limiter = new Semaphore(0); 41 | } 42 | 43 | @Test 44 | void testInvalidBlob() { 45 | var blob = Fileformat.Blob.newBuilder().setRaw(ByteString.copyFromUtf8("failme")).build().toByteArray(); 46 | 47 | var testedObject = new OSMHeaderReader(blob, limiter, null, null); 48 | assertThrows(RuntimeException.class, testedObject::run); 49 | } 50 | 51 | @Test 52 | void testFailOnRequiredFeature() { 53 | var header = Osmformat.HeaderBlock.newBuilder() 54 | .addRequiredFeatures("failme") 55 | .build().toByteArray(); 56 | var blob = Fileformat.Blob.newBuilder().setRaw(ByteString.copyFrom(header)).build().toByteArray(); 57 | 58 | var testedObject = new OSMHeaderReader(blob, limiter, null, null); 59 | assertThrows(RuntimeException.class, testedObject::run); 60 | } 61 | 62 | @Test 63 | void testHeaderParse() { 64 | var headerMessage = Osmformat.HeaderBlock.newBuilder() 65 | .addRequiredFeatures("DenseNodes") 66 | .addOptionalFeatures("test_feature") 67 | .setSource("test") 68 | .setWritingprogram("junit5") 69 | .build().toByteArray(); 70 | var blob = Fileformat.Blob.newBuilder().setRaw(ByteString.copyFrom(headerMessage)).build().toByteArray(); 71 | 72 | Consumer
checker = (header) -> { 73 | assertEquals("DenseNodes", header.getRequiredFeatures().get(0)); 74 | assertEquals("test_feature", header.getOptionalFeatures().get(0)); 75 | assertEquals("test", header.getSource()); 76 | assertEquals("junit5", header.getWritingProgram()); 77 | }; 78 | var testedObject = new OSMHeaderReader(blob, limiter, checker, null); 79 | testedObject.run(); 80 | } 81 | 82 | @Test 83 | void testNoBoundBox() { 84 | var headerMessage = Osmformat.HeaderBlock.newBuilder() 85 | .addRequiredFeatures("DenseNodes") 86 | .build().toByteArray(); 87 | var blob = Fileformat.Blob.newBuilder().setRaw(ByteString.copyFrom(headerMessage)).build().toByteArray(); 88 | 89 | Consumer checker = (bbox) -> fail("BoundBox is missing"); 90 | var testedObject = new OSMHeaderReader(blob, limiter, null, checker); 91 | testedObject.run(); 92 | } 93 | @Test 94 | void testBoundBoxParse() { 95 | var boundBoxMessage = Osmformat.HeaderBBox.newBuilder() 96 | .setLeft(1000000000) 97 | .setTop(2000000000) 98 | .setRight(3000000000L) 99 | .setBottom(4000000000L) 100 | .build(); 101 | var headerMessage = Osmformat.HeaderBlock.newBuilder() 102 | .addRequiredFeatures("DenseNodes") 103 | .setBbox(boundBoxMessage) 104 | .build().toByteArray(); 105 | var blob = Fileformat.Blob.newBuilder().setRaw(ByteString.copyFrom(headerMessage)).build().toByteArray(); 106 | 107 | Consumer checker = (bbox) -> { 108 | assertEquals(1, bbox.getLeft(), 0.1); 109 | assertEquals(2, bbox.getTop(), 0.1); 110 | assertEquals(3, bbox.getRight(), 0.1); 111 | assertEquals(4, bbox.getBottom(), 0.1); 112 | }; 113 | var testedObject = new OSMHeaderReader(blob, limiter, null, checker); 114 | testedObject.run(); 115 | } 116 | 117 | } -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/io/OSMReaderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.io; 19 | 20 | import com.google.protobuf.ByteString; 21 | import crosby.binary.Fileformat; 22 | import lombok.var; 23 | import org.junit.jupiter.api.BeforeEach; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.nio.charset.StandardCharsets; 27 | import java.util.concurrent.Semaphore; 28 | import java.util.zip.Deflater; 29 | 30 | import static org.junit.jupiter.api.Assertions.*; 31 | 32 | class OSMReaderTest { 33 | private static final String testString = "TestString"; 34 | 35 | private Semaphore limiter; 36 | 37 | @BeforeEach 38 | void setUp() { 39 | limiter = new Semaphore(0); 40 | } 41 | 42 | private static class TestReader extends OSMReader { 43 | TestReader(byte[] blobValue, Semaphore tasksLimiterValue) { 44 | super(blobValue, tasksLimiterValue); 45 | } 46 | 47 | @Override 48 | protected void read(byte[] message) { 49 | assertEquals(testString, new String(message, StandardCharsets.UTF_8)); 50 | } 51 | } 52 | 53 | @Test 54 | void testRaw() { 55 | byte[] content = testString.getBytes(StandardCharsets.UTF_8); 56 | var blob = Fileformat.Blob.newBuilder() 57 | .setRaw(ByteString.copyFrom(content)) 58 | .build().toByteArray(); 59 | 60 | var testedObject = new TestReader(blob, limiter); 61 | testedObject.run(); 62 | assertEquals(1, limiter.availablePermits()); 63 | } 64 | 65 | @Test 66 | void testZlib() { 67 | byte[] content = testString.getBytes(StandardCharsets.UTF_8); 68 | Deflater deflater = new Deflater(); 69 | deflater.setInput(content); 70 | deflater.finish(); 71 | byte[] compressed_content = new byte[content.length * 2]; 72 | int compressedDataLength = deflater.deflate(compressed_content); 73 | var blob = Fileformat.Blob.newBuilder() 74 | .setRawSize(content.length) 75 | .setZlibData(ByteString.copyFrom(compressed_content, 0, compressedDataLength)) 76 | .build().toByteArray(); 77 | 78 | var testedObject = new TestReader(blob, limiter); 79 | testedObject.run(); 80 | assertEquals(1, limiter.availablePermits()); 81 | } 82 | 83 | @Test 84 | void testZlibInvalidRawSize() { 85 | byte[] content = testString.getBytes(StandardCharsets.UTF_8); 86 | Deflater deflater = new Deflater(); 87 | deflater.setInput(content); 88 | deflater.finish(); 89 | byte[] compressed_content = new byte[content.length * 2]; 90 | int compressedDataLength = deflater.deflate(compressed_content); 91 | var blob = Fileformat.Blob.newBuilder() 92 | .setRawSize(9000) 93 | .setZlibData(ByteString.copyFrom(compressed_content, 0, compressedDataLength)) 94 | .build().toByteArray(); 95 | 96 | var testedObject = new TestReader(blob, limiter); 97 | assertThrows(RuntimeException.class, testedObject::run); 98 | } 99 | 100 | @Test 101 | void testZlibInvalidData() { 102 | var blob = Fileformat.Blob.newBuilder() 103 | .setRawSize(1) 104 | .setZlibData(ByteString.copyFromUtf8("test")) 105 | .build().toByteArray(); 106 | 107 | var testedObject = new TestReader(blob, limiter); 108 | assertThrows(RuntimeException.class, testedObject::run); 109 | } 110 | 111 | @Test 112 | void testInvalidCompressionFormat() { 113 | var blob = Fileformat.Blob.newBuilder() 114 | .setRawSize(9000) 115 | .setOBSOLETEBzip2Data(ByteString.copyFromUtf8("")) 116 | .build().toByteArray(); 117 | 118 | var testedObject = new TestReader(blob, limiter); 119 | assertThrows(RuntimeException.class, testedObject::run); 120 | } 121 | 122 | @Test 123 | void testInvalidBlobFormat() { 124 | var testedObject = new TestReader("fail".getBytes(StandardCharsets.UTF_8), limiter); 125 | assertThrows(RuntimeException.class, testedObject::run); 126 | } 127 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Maven template 3 | target/ 4 | pom.xml.tag 5 | pom.xml.releaseBackup 6 | pom.xml.versionsBackup 7 | pom.xml.next 8 | release.properties 9 | dependency-reduced-pom.xml 10 | buildNumber.properties 11 | .mvn/timing.properties 12 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar 13 | .mvn/wrapper/maven-wrapper.jar 14 | 15 | ### Linux template 16 | *~ 17 | 18 | # temporary files which can be created if a process still has a handle open of a deleted file 19 | .fuse_hidden* 20 | 21 | # KDE directory preferences 22 | .directory 23 | 24 | # Linux trash folder which might appear on any partition or disk 25 | .Trash-* 26 | 27 | # .nfs files are created when an open file is removed but is still being accessed 28 | .nfs* 29 | 30 | ### Vim template 31 | # Swap 32 | [._]*.s[a-v][a-z] 33 | [._]*.sw[a-p] 34 | [._]s[a-rt-v][a-z] 35 | [._]ss[a-gi-z] 36 | [._]sw[a-p] 37 | 38 | # Session 39 | Session.vim 40 | Sessionx.vim 41 | 42 | # Temporary 43 | .netrwhist 44 | *~ 45 | # Auto-generated tag files 46 | tags 47 | # Persistent undo 48 | [._]*.un~ 49 | 50 | ### Java template 51 | # Compiled class file 52 | *.class 53 | 54 | # Log file 55 | *.log 56 | 57 | # BlueJ files 58 | *.ctxt 59 | 60 | # Mobile Tools for Java (J2ME) 61 | .mtj.tmp/ 62 | 63 | # Package Files # 64 | *.jar 65 | *.war 66 | *.nar 67 | *.ear 68 | *.zip 69 | *.tar.gz 70 | *.rar 71 | 72 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 73 | hs_err_pid* 74 | 75 | ### JetBrains template 76 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 77 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 78 | 79 | # User-specific stuff 80 | .idea/**/workspace.xml 81 | .idea/**/tasks.xml 82 | .idea/**/usage.statistics.xml 83 | .idea/**/dictionaries 84 | .idea/**/shelf 85 | 86 | # Generated files 87 | .idea/**/contentModel.xml 88 | 89 | # Sensitive or high-churn files 90 | .idea/**/dataSources/ 91 | .idea/**/dataSources.ids 92 | .idea/**/dataSources.local.xml 93 | .idea/**/sqlDataSources.xml 94 | .idea/**/dynamic.xml 95 | .idea/**/uiDesigner.xml 96 | .idea/**/dbnavigator.xml 97 | 98 | # Gradle 99 | .idea/**/gradle.xml 100 | .idea/**/libraries 101 | 102 | # Gradle and Maven with auto-import 103 | # When using Gradle or Maven with auto-import, you should exclude module files, 104 | # since they will be recreated, and may cause churn. Uncomment if using 105 | # auto-import. 106 | # .idea/modules.xml 107 | # .idea/*.iml 108 | # .idea/modules 109 | # *.iml 110 | # *.ipr 111 | 112 | # CMake 113 | cmake-build-*/ 114 | 115 | # Mongo Explorer plugin 116 | .idea/**/mongoSettings.xml 117 | 118 | # File-based project format 119 | *.iws 120 | 121 | # IntelliJ 122 | out/ 123 | 124 | # mpeltonen/sbt-idea plugin 125 | .idea_modules/ 126 | 127 | # JIRA plugin 128 | atlassian-ide-plugin.xml 129 | 130 | # Cursive Clojure plugin 131 | .idea/replstate.xml 132 | 133 | # Crashlytics plugin (for Android Studio and IntelliJ) 134 | com_crashlytics_export_strings.xml 135 | crashlytics.properties 136 | crashlytics-build.properties 137 | fabric.properties 138 | 139 | # Editor-based Rest Client 140 | .idea/httpRequests 141 | 142 | # Android studio 3.1+ serialized cache file 143 | .idea/caches/build_file_checksums.ser 144 | 145 | # All other Jetbrains stuff 146 | .idea/ 147 | *.iml 148 | ### Archives template 149 | # It's better to unpack these files and commit the raw source because 150 | # git has its own built in compression methods. 151 | *.7z 152 | *.jar 153 | *.rar 154 | *.zip 155 | *.gz 156 | *.tgz 157 | *.bzip 158 | *.bz2 159 | *.xz 160 | *.lzma 161 | *.cab 162 | 163 | # Packing-only formats 164 | *.iso 165 | *.tar 166 | 167 | # Package management formats 168 | *.dmg 169 | *.xpi 170 | *.gem 171 | *.egg 172 | *.deb 173 | *.rpm 174 | *.msi 175 | *.msm 176 | *.msp 177 | 178 | ### Windows template 179 | # Windows thumbnail cache files 180 | Thumbs.db 181 | Thumbs.db:encryptable 182 | ehthumbs.db 183 | ehthumbs_vista.db 184 | 185 | # Dump file 186 | *.stackdump 187 | 188 | # Folder config file 189 | [Dd]esktop.ini 190 | 191 | # Recycle Bin used on file shares 192 | $RECYCLE.BIN/ 193 | 194 | # Windows Installer files 195 | *.cab 196 | *.msi 197 | *.msix 198 | *.msm 199 | *.msp 200 | 201 | # Windows shortcuts 202 | *.lnk 203 | 204 | ### Backup template 205 | *.bak 206 | *.gho 207 | *.ori 208 | *.orig 209 | *.tmp 210 | 211 | ### macOS template 212 | # General 213 | .DS_Store 214 | .AppleDouble 215 | .LSOverride 216 | 217 | # Icon must end with two \r 218 | Icon 219 | 220 | # Thumbnails 221 | ._* 222 | 223 | # Files that might appear in the root of a volume 224 | .DocumentRevisions-V100 225 | .fseventsd 226 | .Spotlight-V100 227 | .TemporaryItems 228 | .Trashes 229 | .VolumeIcon.icns 230 | .com.apple.timemachine.donotpresent 231 | 232 | # Directories potentially created on remote AFP share 233 | .AppleDB 234 | .AppleDesktop 235 | Network Trash Folder 236 | Temporary Items 237 | .apdisk 238 | 239 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/io/OSMWriterTest.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.io; 2 | 3 | import com.google.protobuf.ByteString; 4 | import com.google.protobuf.InvalidProtocolBufferException; 5 | import com.wolt.osm.parallelpbf.TestObjectsFactory; 6 | import com.wolt.osm.parallelpbf.blob.BlobWriter; 7 | import com.wolt.osm.parallelpbf.encoder.OsmEncoder; 8 | import com.wolt.osm.parallelpbf.entity.*; 9 | import crosby.binary.Osmformat; 10 | import lombok.SneakyThrows; 11 | import org.junit.jupiter.api.BeforeEach; 12 | import org.junit.jupiter.api.Tag; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import java.io.ByteArrayOutputStream; 16 | import java.io.OutputStream; 17 | import java.util.concurrent.LinkedBlockingQueue; 18 | 19 | import static org.junit.jupiter.api.Assertions.*; 20 | 21 | class OSMWriterTest { 22 | 23 | private ByteArrayOutputStream output; 24 | private BlobWriter writer; 25 | private LinkedBlockingQueue queue; 26 | 27 | private class BlobWriterMock extends BlobWriter { 28 | 29 | public BlobWriterMock(OutputStream output) { 30 | super(output); 31 | } 32 | 33 | @SneakyThrows 34 | @Override 35 | public boolean writeData(byte[] blob) { 36 | output.write(blob); 37 | return true; 38 | } 39 | } 40 | 41 | private static class FakeEntity extends OsmEntity { 42 | public FakeEntity(long id) { 43 | super(id); 44 | } 45 | } 46 | 47 | @BeforeEach 48 | void setUp() { 49 | output = new ByteArrayOutputStream(); 50 | writer = new BlobWriterMock(output); 51 | queue = new LinkedBlockingQueue<>(); 52 | } 53 | 54 | private void writeEntity(OsmEntity entity) throws InterruptedException { 55 | Thread testedObject = new Thread(new OSMWriter(writer, queue)); 56 | testedObject.start(); 57 | 58 | queue.put(entity); 59 | while(!queue.isEmpty()) { 60 | Thread.sleep(1); 61 | } 62 | testedObject.interrupt(); 63 | testedObject.join(); 64 | } 65 | 66 | @Test 67 | void testWriteStrings() throws InterruptedException, InvalidProtocolBufferException { 68 | writeEntity(TestObjectsFactory.way()); 69 | 70 | byte[] blob = output.toByteArray(); 71 | 72 | Osmformat.PrimitiveBlock block = Osmformat.PrimitiveBlock.parseFrom(blob); 73 | 74 | Osmformat.StringTable stringTable = block.getStringtable(); 75 | assertEquals(ByteString.EMPTY, stringTable.getS(0)); 76 | assertEquals(TestObjectsFactory.testTag, stringTable.getS(1).toStringUtf8()); 77 | } 78 | 79 | @Test 80 | void testNodeSetsGranularity() throws InterruptedException, InvalidProtocolBufferException { 81 | writeEntity(TestObjectsFactory.node()); 82 | 83 | byte[] blob = output.toByteArray(); 84 | 85 | Osmformat.PrimitiveBlock block = Osmformat.PrimitiveBlock.parseFrom(blob); 86 | 87 | assertEquals(OsmEncoder.GRANULARITY, block.getGranularity()); 88 | assertEquals(0, block.getLonOffset()); 89 | assertEquals(0, block.getLatOffset()); 90 | } 91 | 92 | @Test 93 | void testNoNodeNoGranularity() throws InterruptedException, InvalidProtocolBufferException { 94 | writeEntity(TestObjectsFactory.way()); 95 | 96 | byte[] blob = output.toByteArray(); 97 | 98 | Osmformat.PrimitiveBlock block = Osmformat.PrimitiveBlock.parseFrom(blob); 99 | 100 | assertFalse(block.hasGranularity()); 101 | assertFalse(block.hasLatOffset()); 102 | assertFalse(block.hasLonOffset()); 103 | } 104 | 105 | @Test 106 | void testWriteRelation() throws InterruptedException, InvalidProtocolBufferException { 107 | writeEntity(TestObjectsFactory.relation()); 108 | 109 | byte[] blob = output.toByteArray(); 110 | 111 | Osmformat.PrimitiveBlock block = Osmformat.PrimitiveBlock.parseFrom(blob); 112 | 113 | assertEquals(1, block.getPrimitivegroupList().size()); 114 | assertFalse(block.getPrimitivegroup(0).getRelationsList().isEmpty()); 115 | } 116 | 117 | @Test 118 | void testNoInvalidEntities() throws InterruptedException { 119 | FakeEntity entity = new FakeEntity(1L); 120 | 121 | writeEntity(entity); 122 | 123 | assertEquals(0, output.toByteArray().length); 124 | } 125 | 126 | @Test 127 | @Tag("slow") 128 | void testFlushOnOverflow() throws InterruptedException { 129 | Thread testedObject = new Thread(new OSMWriter(writer, queue)); 130 | testedObject.start(); 131 | 132 | // We flush on 15*1024*1024, each node is 36 bytes + string table is 4 bytes 133 | //That means we have to write more then (15*1024*1024-4)/36 = 436907 nodes. 134 | for (int i=0; i<436908; ++i) { 135 | queue.put(TestObjectsFactory.node()); 136 | } 137 | while(!queue.isEmpty()) { 138 | Thread.sleep(1); 139 | } 140 | assertTrue(output.toByteArray().length > 4); //4 as is should contain more then just a test tag 141 | 142 | //Flush only there 143 | testedObject.interrupt(); 144 | testedObject.join(); 145 | } 146 | } -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/parser/BaseParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Info; 21 | import crosby.binary.Osmformat; 22 | import lombok.Getter; 23 | import lombok.RequiredArgsConstructor; 24 | 25 | import java.util.HashMap; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.function.Consumer; 29 | 30 | /** 31 | * Base class for OSM message parsers. Stores callback and a shared string table. 32 | * 33 | * @param Type of the message to process. 34 | * @param Callback Type 35 | */ 36 | @RequiredArgsConstructor 37 | @Getter 38 | public abstract class BaseParser> { 39 | /** 40 | * Callback to call on successful parse of the entity. 41 | */ 42 | private final T callback; 43 | 44 | /** 45 | * Indexed table os strings. 46 | */ 47 | private final Osmformat.StringTable stringTable; 48 | 49 | /** 50 | * Helper that knows how to extract tags from the OSM entity. 51 | *

52 | * Tags are stored as two parallel arrays of indices of keys and values. 53 | * Each index points to the some string in the string table. 54 | * 55 | * @param keys List if tag keys indices in the string table. 56 | * @param values List if tag values indices in the string table. 57 | * @return Map of tags with their values. 58 | */ 59 | Map parseTags(final List keys, final List values) { 60 | HashMap result = new HashMap<>(); 61 | for (int indx = 0; indx < keys.size(); ++indx) { 62 | String key = stringTable.getS(keys.get(indx)).toStringUtf8(); 63 | String value = stringTable.getS(values.get(indx)).toStringUtf8(); 64 | result.put(key, value); 65 | } 66 | return result; 67 | } 68 | 69 | /** 70 | * Checks if OSM entity have Info filled and extracts it from the PBF. 71 | * 72 | * @param message Node message to parse. 73 | * @return Info message if present of null otherwise. 74 | */ 75 | Info parseInfo(final Osmformat.Node message) { 76 | Osmformat.Info infoMessage = null; 77 | if (message.hasInfo()) { 78 | infoMessage = message.getInfo(); 79 | } 80 | return convertInfo(infoMessage); 81 | } 82 | 83 | /** 84 | * Checks if OSM entity have Info filled and extracts it from the PBF. 85 | * 86 | * @param message Way message to parse. 87 | * @return Info message if present of null otherwise. 88 | */ 89 | Info parseInfo(final Osmformat.Way message) { 90 | Osmformat.Info infoMessage = null; 91 | if (message.hasInfo()) { 92 | infoMessage = message.getInfo(); 93 | } 94 | return convertInfo(infoMessage); 95 | } 96 | 97 | /** 98 | * Checks if OSM entity have Info filled and extracts it from the PBF. 99 | * 100 | * @param message Relation message to parse. 101 | * @return Info message if present of null otherwise. 102 | */ 103 | Info parseInfo(final Osmformat.Relation message) { 104 | Osmformat.Info infoMessage = null; 105 | if (message.hasInfo()) { 106 | infoMessage = message.getInfo(); 107 | } 108 | return convertInfo(infoMessage); 109 | } 110 | 111 | /** 112 | * Parses PBF info message and return Info entity. 113 | * 114 | * @param infoMessage Info message to parse. Can be null. 115 | * @return Info entity or null in case of error or if incoming message was null. 116 | */ 117 | private Info convertInfo(final Osmformat.Info infoMessage) { 118 | if (infoMessage != null) { 119 | String username = stringTable.getS(infoMessage.getUserSid()).toStringUtf8(); 120 | boolean isVisible = !infoMessage.hasVisible() || infoMessage.getVisible(); 121 | return new Info(infoMessage.getUid(), 122 | username, 123 | infoMessage.getVersion(), 124 | infoMessage.getTimestamp(), 125 | infoMessage.getChangeset(), 126 | isVisible); 127 | } 128 | return null; 129 | } 130 | 131 | /** 132 | * Actual parse function, should be implemented for a specific type. 133 | * 134 | * @param message PBF message to parse. 135 | */ 136 | protected abstract void parse(M message); 137 | } 138 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/TestObjectsFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf; 19 | 20 | import com.wolt.osm.parallelpbf.entity.*; 21 | import com.google.protobuf.ByteString; 22 | import crosby.binary.Osmformat; 23 | 24 | public class TestObjectsFactory { 25 | public static final Info info = new Info(1, "test", 3, 4, 5, true); 26 | 27 | public static final Osmformat.Info infoMessage = Osmformat.Info.newBuilder().setUid(1).setUserSid(2).setVersion(3).setTimestamp(4).setChangeset(5).setVisible(true).build(); 28 | public static final Osmformat.Info infoMessageWithNullVisibleFlag = Osmformat.Info.newBuilder() 29 | .setUid(1).setUserSid(2).setVersion(3).setTimestamp(4).setChangeset(5).build(); 30 | 31 | public static final Osmformat.StringTable stringTable = Osmformat.StringTable.newBuilder() 32 | .addS(ByteString.copyFromUtf8("")) 33 | .addS(ByteString.copyFromUtf8("fail")) 34 | .addS(ByteString.copyFromUtf8("test")) 35 | .addS(ByteString.copyFromUtf8("tag")) 36 | .addS(ByteString.copyFromUtf8("value")).build(); 37 | 38 | public static final Osmformat.Node nodeMessage = Osmformat.Node.newBuilder() 39 | .setId(1) 40 | .setLat(1000000000) 41 | .setLon(2000000000) 42 | .addKeys(3) 43 | .addVals(4) 44 | .setInfo(TestObjectsFactory.infoMessage) 45 | .build(); 46 | 47 | public static final Osmformat.DenseInfo denseInfo = Osmformat.DenseInfo.newBuilder() 48 | .addUid(1) 49 | .addUserSid(2) 50 | .addVersion(3) 51 | .addTimestamp(4) 52 | .addChangeset(5) 53 | .addVisible(true) 54 | .build(); 55 | public static final Osmformat.DenseNodes denseNodesMessage = Osmformat.DenseNodes.newBuilder() 56 | .addId(1) 57 | .addLat(1000000000) 58 | .addLon(2000000000) 59 | .addKeysVals(3).addKeysVals(4).addKeysVals(0) 60 | .setDenseinfo(denseInfo) 61 | .build(); 62 | 63 | public static final Osmformat.Way wayMessage = Osmformat.Way.newBuilder() 64 | .setId(1) 65 | .addKeys(3) 66 | .addVals(4) 67 | .setInfo(TestObjectsFactory.infoMessage) 68 | .addRefs(9000) 69 | .build(); 70 | 71 | public static final Osmformat.Way wayMessageWithNullVisibleFlag = Osmformat.Way.newBuilder() 72 | .setId(1) 73 | .addKeys(3) 74 | .addVals(4) 75 | .setInfo(TestObjectsFactory.infoMessageWithNullVisibleFlag) 76 | .addRefs(9000) 77 | .build(); 78 | 79 | public static final Osmformat.Relation relationMessage = Osmformat.Relation.newBuilder() 80 | .setId(1) 81 | .addKeys(3) 82 | .addVals(4) 83 | .setInfo(TestObjectsFactory.infoMessage) 84 | .addMemids(9000) 85 | .addTypes(Osmformat.Relation.MemberType.NODE) 86 | .addRolesSid(1) 87 | .build(); 88 | 89 | public static final Osmformat.ChangeSet changesetMessage = Osmformat.ChangeSet.newBuilder().setId(1).build(); 90 | 91 | public static final String testTag = "test"; 92 | 93 | public static Node node() { 94 | Node node = new Node(1, 10.0, 50.0); 95 | node.getTags().put(testTag, testTag); 96 | return node; 97 | } 98 | 99 | public static Way way() { 100 | Way way = new Way(1L); 101 | way.getTags().put(TestObjectsFactory.testTag, TestObjectsFactory.testTag); 102 | way.getNodes().add(3L); 103 | way.getNodes().add(6L); 104 | way.getNodes().add(2L); 105 | way.setInfo(info); 106 | return way; 107 | } 108 | 109 | public static Relation relation() { 110 | RelationMember member1 = new RelationMember(2L, "test", RelationMember.Type.WAY); 111 | RelationMember member2 = new RelationMember(3L, "forward", RelationMember.Type.RELATION); 112 | RelationMember member3 = new RelationMember(6L, "stop", RelationMember.Type.NODE); 113 | Relation relation = new Relation(1L); 114 | relation.getTags().put(TestObjectsFactory.testTag, TestObjectsFactory.testTag); 115 | relation.getMembers().add(member1); 116 | relation.getMembers().add(member2); 117 | relation.getMembers().add(member3); 118 | relation.setInfo(info); 119 | return relation; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/io/OSMHeaderReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.io; 19 | 20 | import com.google.protobuf.InvalidProtocolBufferException; 21 | import crosby.binary.Osmformat; 22 | import lombok.extern.slf4j.Slf4j; 23 | import com.wolt.osm.parallelpbf.entity.BoundBox; 24 | import com.wolt.osm.parallelpbf.entity.Header; 25 | 26 | import java.util.List; 27 | import java.util.Optional; 28 | import java.util.concurrent.Semaphore; 29 | import java.util.function.Consumer; 30 | 31 | /** 32 | * Implemented parser for OSMHeader message. 33 | * 34 | * @see Header 35 | */ 36 | @Slf4j 37 | public final class OSMHeaderReader extends OSMReader { 38 | /** 39 | * Conversion from nano- to non-scaled. 40 | */ 41 | private static final double NANO = 1e9; 42 | 43 | /** 44 | * Header processing callback. Must be reentrant. 45 | */ 46 | private final Consumer

headerCb; 47 | 48 | /** 49 | * Bounding box processing callback. Must be reentrant. 50 | */ 51 | private final Consumer boundBoxCb; 52 | 53 | /** 54 | * Constructs reader object. 55 | * 56 | * @param blob blob to parse. 57 | * @param tasksLimiter task limiting semaphore. 58 | * @param onHeader Callback to call with a filled Header entity. 59 | * Header parsing will be partially skipped if set to null. 60 | * @param onBoundBox Callback to call if bounding box present in header. 61 | * Bounding box parsing will be skipped completely if set to null 62 | */ 63 | public OSMHeaderReader(final byte[] blob, 64 | final Semaphore tasksLimiter, 65 | final Consumer
onHeader, 66 | final Consumer onBoundBox) { 67 | super(blob, tasksLimiter); 68 | this.headerCb = onHeader; 69 | this.boundBoxCb = onBoundBox; 70 | } 71 | 72 | /** 73 | * Check, that all required features are supported by that parser. 74 | * 75 | * @param features Features list. 76 | * @return true if all required features are supported, false otherwise. 77 | */ 78 | private boolean checkRequiredFeatures(final List features) { 79 | Optional unsupported = features.stream() 80 | .filter(f -> !f.equalsIgnoreCase(Header.FEATURE_OSM_SCHEMA)) 81 | .filter(f -> !f.equalsIgnoreCase(Header.FEATURE_DENSE_NODES)) 82 | .filter(f -> !f.equalsIgnoreCase(Header.FEATURE_HISTORICAL_INFORMATION)) 83 | .findAny(); 84 | unsupported.ifPresent(s -> log.error("Unsupported required feature found: {}", s)); 85 | return !unsupported.isPresent(); 86 | } 87 | 88 | /** 89 | * Parses OSMHeader value and checks required feature list from it. 90 | *

91 | * Check for required features is mandatory and is actually a reason, why we can't 92 | * just skip parsing if no callbacks are set. 93 | * 94 | * @param message Raw OSMHeader blob. 95 | */ 96 | @Override 97 | protected void read(final byte[] message) { 98 | Osmformat.HeaderBlock headerData; 99 | try { 100 | headerData = Osmformat.HeaderBlock.parseFrom(message); 101 | } catch (InvalidProtocolBufferException e) { 102 | log.error("Error parsing OSMHeader block: {}", e.getMessage(), e); 103 | throw new RuntimeException(e); 104 | } 105 | 106 | if (!checkRequiredFeatures(headerData.getRequiredFeaturesList())) { 107 | throw new RuntimeException("Can't proceed with unsupported features"); 108 | } 109 | 110 | if (headerCb != null) { 111 | Header header = new Header(headerData.getRequiredFeaturesList(), headerData.getOptionalFeaturesList()); 112 | if (headerData.hasWritingprogram()) { 113 | header.setWritingProgram(headerData.getWritingprogram()); 114 | } 115 | if (headerData.hasSource()) { 116 | header.setSource(headerData.getSource()); 117 | } 118 | log.debug("Header: {}", header.toString()); 119 | headerCb.accept(header); 120 | } 121 | 122 | if (boundBoxCb != null && headerData.hasBbox()) { 123 | BoundBox bbox = new BoundBox(headerData.getBbox().getLeft() / NANO, 124 | headerData.getBbox().getTop() / NANO, 125 | headerData.getBbox().getRight() / NANO, 126 | headerData.getBbox().getBottom() / NANO); 127 | log.debug("Bounding box: {}", bbox.toString()); 128 | boundBoxCb.accept(bbox); 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/io/OSMDataReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.io; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Node; 21 | import com.wolt.osm.parallelpbf.entity.Relation; 22 | import com.wolt.osm.parallelpbf.entity.Way; 23 | import com.wolt.osm.parallelpbf.parser.NodeParser; 24 | import com.wolt.osm.parallelpbf.parser.RelationParser; 25 | import com.wolt.osm.parallelpbf.parser.WayParser; 26 | import com.google.protobuf.InvalidProtocolBufferException; 27 | import crosby.binary.Osmformat; 28 | import lombok.extern.slf4j.Slf4j; 29 | import lombok.var; 30 | 31 | import java.util.concurrent.Semaphore; 32 | import java.util.function.Consumer; 33 | 34 | /** 35 | * Implemented parser for OSMHeader message. 36 | * 37 | * @see BaseParser 38 | */ 39 | @Slf4j 40 | public class OSMDataReader extends OSMReader { 41 | /** 42 | * Changeset processing callback. Must be reentrant. 43 | */ 44 | private final Consumer changesetsCb; 45 | 46 | /** 47 | * Nodes processing callback. Must be reentrant. 48 | */ 49 | private final Consumer nodesCb; 50 | 51 | /** 52 | * Ways processing callback. Must be reentrant. 53 | */ 54 | private final Consumer waysCb; 55 | 56 | /** 57 | * Relations processing callback. Must be reentrant. 58 | */ 59 | private final Consumer relationsCb; 60 | 61 | /** 62 | * Configures reader with blob and callbacks. 63 | * @param blob blob to parse. 64 | * @param tasksLimiter task limiting semaphore. 65 | * @param onNodes Callback to call on node parse. May be null, in that case nodes parsing will be skipped. 66 | * @param onWays Callback to call on way parse. May be null, in that case ways parsing will be skipped. 67 | * @param onRelations Callback to call on relation parse. May be null, 68 | * in that case relations parsing will be skipped. 69 | * @param onChangesets Callback to call on changeset parse. May be null, 70 | * in that case changesets parsing will be skipped. 71 | */ 72 | public OSMDataReader(final byte[] blob, 73 | final Semaphore tasksLimiter, 74 | final Consumer onNodes, 75 | final Consumer onWays, 76 | final Consumer onRelations, 77 | final Consumer onChangesets) { 78 | super(blob, tasksLimiter); 79 | this.nodesCb = onNodes; 80 | this.waysCb = onWays; 81 | this.relationsCb = onRelations; 82 | this.changesetsCb = onChangesets; 83 | } 84 | 85 | /** 86 | * Extracts primitives groups from the Blob and parses them. 87 | *

88 | * In case callback for some of the primitives is not set, it will 89 | * be ignored and not parsed. 90 | * 91 | * @param message Raw OSMData blob. 92 | * @throws RuntimeException in case of protobuf parsing error. 93 | */ 94 | @Override 95 | protected void read(final byte[] message) { 96 | Osmformat.PrimitiveBlock primitives; 97 | try { 98 | primitives = Osmformat.PrimitiveBlock.parseFrom(message); 99 | } catch (InvalidProtocolBufferException e) { 100 | log.error("Error parsing OSMData block: {}", e.getMessage(), e); 101 | throw new RuntimeException(e); 102 | } 103 | var stringTable = primitives.getStringtable(); 104 | var groups = primitives.getPrimitivegroupList(); 105 | for (Osmformat.PrimitiveGroup group : groups) { 106 | if (nodesCb != null) { 107 | var parser = new NodeParser(nodesCb, 108 | stringTable, 109 | primitives.getGranularity(), 110 | primitives.getLatOffset(), 111 | primitives.getLonOffset(), 112 | primitives.getDateGranularity()); 113 | group.getNodesList().forEach(parser::parse); 114 | if (group.hasDense()) { 115 | parser.parse(group.getDense()); 116 | } 117 | } 118 | if (waysCb != null) { 119 | var parser = new WayParser(waysCb, stringTable); 120 | group.getWaysList().forEach(parser::parse); 121 | } 122 | if (relationsCb != null) { 123 | var parser = new RelationParser(relationsCb, stringTable); 124 | group.getRelationsList().forEach(parser::parse); 125 | } 126 | if (changesetsCb != null) { 127 | group.getChangesetsList().forEach(changeMessage -> { 128 | long id = changeMessage.getId(); 129 | log.debug("ChangeSet id: {}", id); 130 | changesetsCb.accept(id); 131 | }); 132 | } 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/io/OSMWriter.java: -------------------------------------------------------------------------------- 1 | package com.wolt.osm.parallelpbf.io; 2 | 3 | import com.wolt.osm.parallelpbf.blob.BlobWriter; 4 | import com.wolt.osm.parallelpbf.encoder.OsmEntityEncoder; 5 | import com.wolt.osm.parallelpbf.encoder.StringTableEncoder; 6 | import com.wolt.osm.parallelpbf.encoder.OsmEncoder; 7 | import com.wolt.osm.parallelpbf.encoder.DenseNodesEncoder; 8 | import com.wolt.osm.parallelpbf.encoder.WayEncoder; 9 | import com.wolt.osm.parallelpbf.encoder.RelationEncoder; 10 | import com.wolt.osm.parallelpbf.entity.Node; 11 | import com.wolt.osm.parallelpbf.entity.OsmEntity; 12 | import com.wolt.osm.parallelpbf.entity.Relation; 13 | import com.wolt.osm.parallelpbf.entity.Way; 14 | import crosby.binary.Osmformat; 15 | import lombok.extern.slf4j.Slf4j; 16 | 17 | import java.util.concurrent.LinkedBlockingQueue; 18 | 19 | /** 20 | * Main handler for the OSM entities. Accepts entities over 21 | * the writer queue and stores them to the corresponding encoder. 22 | * On encoder overflow/close request encoders content is sent 23 | * to the writer. 24 | */ 25 | @Slf4j 26 | public final class OSMWriter implements Runnable { 27 | /** 28 | * Blob should not be bigger then 16M, but we limit to 29 | * 15M for a safety, as we do estimate size approximately. 30 | */ 31 | private static final int LIMIT_BLOB_SIZE = 15 * 1024 * 1024; 32 | 33 | /** 34 | * (Shared) BlobWriter for this OSMWriter. 35 | * BlobWriter.write() call expected to be thread-safe. 36 | */ 37 | private final BlobWriter writer; 38 | 39 | /** 40 | * Writer frontend-to-writing-threads interface. 41 | */ 42 | private final LinkedBlockingQueue writeQueue; 43 | 44 | /** 45 | * Current(!) densenodes block encoder. 46 | */ 47 | private OsmEntityEncoder nodesEncoder; 48 | 49 | /** 50 | * Current(!) ways block encoder. 51 | */ 52 | private OsmEntityEncoder wayEncoder; 53 | 54 | /** 55 | * Current(!) relation block encoder. 56 | */ 57 | private OsmEntityEncoder relationEncoder; 58 | 59 | /** 60 | * Block-wide stringtable encoder. 61 | */ 62 | private StringTableEncoder stringEncoder; 63 | 64 | 65 | /** 66 | * Writes contents of encoders to the writer 67 | * and resets encoders. 68 | * 69 | * @param nodesSize Estimated size of nodes group. 70 | * @param waysSize Estimated size of ways group. 71 | * @param relationSize Estimated size of relations group. 72 | */ 73 | private void flush(final int nodesSize, final int waysSize, final int relationSize) { 74 | if (nodesSize + waysSize + relationSize > 0) { 75 | Osmformat.PrimitiveBlock.Builder block = Osmformat.PrimitiveBlock.newBuilder() 76 | .setStringtable(stringEncoder.getStrings()); 77 | if (nodesSize > 0) { 78 | block.setGranularity(OsmEncoder.GRANULARITY) 79 | .setLatOffset(0) 80 | .setLonOffset(0) 81 | .addPrimitivegroup(nodesEncoder.write()); 82 | } 83 | if (waysSize > 0) { 84 | block.addPrimitivegroup(wayEncoder.write()); 85 | } 86 | if (relationSize > 0) { 87 | block.addPrimitivegroup(relationEncoder.write()); 88 | } 89 | byte[] blob = block.build().toByteArray(); 90 | writer.writeData(blob); 91 | } 92 | 93 | encodersReset(); 94 | } 95 | 96 | /** 97 | * Encoder reset function. Recreates all the encoders in proper order. 98 | */ 99 | private void encodersReset() { 100 | this.stringEncoder = new StringTableEncoder(); 101 | this.nodesEncoder = new DenseNodesEncoder(this.stringEncoder); 102 | this.wayEncoder = new WayEncoder(this.stringEncoder); 103 | this.relationEncoder = new RelationEncoder(this.stringEncoder); 104 | } 105 | 106 | /** 107 | * OSMWriter constructor. 108 | * 109 | * @param output Shared BlobWriter 110 | * @param queue input queue with entities. 111 | */ 112 | public OSMWriter(final BlobWriter output, final LinkedBlockingQueue queue) { 113 | this.writer = output; 114 | this.writeQueue = queue; 115 | encodersReset(); 116 | } 117 | 118 | @Override 119 | public void run() { 120 | Thread.currentThread().setName("OSMWriter"); 121 | while (true) { 122 | try { 123 | OsmEntity entity = writeQueue.take(); 124 | if (entity instanceof Node) { 125 | nodesEncoder.add((Node) entity); 126 | } else if (entity instanceof Way) { 127 | wayEncoder.add((Way) entity); 128 | } else if (entity instanceof Relation) { 129 | relationEncoder.add((Relation) entity); 130 | } else { 131 | log.error("Unknown entity type: {}", entity); 132 | } 133 | 134 | int nodesSize = nodesEncoder.estimateSize(); 135 | int waysSize = wayEncoder.estimateSize(); 136 | int relationSize = relationEncoder.estimateSize(); 137 | int blobSize = nodesSize + waysSize + relationSize + stringEncoder.getStringSize(); 138 | if (blobSize > LIMIT_BLOB_SIZE) { 139 | flush(nodesSize, waysSize, relationSize); 140 | } 141 | } catch (InterruptedException e) { 142 | flush(nodesEncoder.estimateSize(), wayEncoder.estimateSize(), relationEncoder.estimateSize()); 143 | log.debug("OSMWriter requested to stop"); 144 | return; 145 | } 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/parser/NodeParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.parser; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Info; 21 | import com.wolt.osm.parallelpbf.entity.Node; 22 | import crosby.binary.Osmformat; 23 | import lombok.extern.slf4j.Slf4j; 24 | import lombok.var; 25 | 26 | import java.util.function.Consumer; 27 | 28 | /** 29 | * Implements OSM Way parser. 30 | * 31 | * Can parse both primitive and dense nodes. 32 | */ 33 | @Slf4j 34 | public final class NodeParser extends BaseParser> { 35 | /** 36 | * Nano degrees scale. 37 | */ 38 | private static final double NANO = .000000001; 39 | 40 | /** 41 | * Granularity, units of nanodegrees, used to store coordinates. 42 | */ 43 | private final int granularity; 44 | 45 | /** 46 | * Offset value between the output coordinates coordinates and the granularity grid, in units of nanodegrees. 47 | * Latitude part. 48 | */ 49 | private final long latOffset; 50 | 51 | /** 52 | * Offset value between the output coordinates coordinates and the granularity grid, in units of nanodegrees. 53 | * Longitude part. 54 | */ 55 | private final long lonOffset; 56 | 57 | /** 58 | * Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. 59 | */ 60 | private final int dateGranularity; 61 | 62 | /** 63 | * Sets all the node parsing parameters from the primitive message. 64 | * @param callback Callback to call on successful parse. 65 | * @param stringTable String table to use while parsing. 66 | * @param granularityValue Grid granularity value. 67 | * @param latOffsetValue Latitude offset of the grid. 68 | * @param lonOffsetValue Longitude offset of the grid. 69 | * @param dateGranularityValue Date granularity value. 70 | */ 71 | public NodeParser(final Consumer callback, 72 | final Osmformat.StringTable stringTable, 73 | final int granularityValue, 74 | final long latOffsetValue, 75 | final long lonOffsetValue, 76 | final int dateGranularityValue) { 77 | super(callback, stringTable); 78 | this.granularity = granularityValue; 79 | this.latOffset = latOffsetValue; 80 | this.lonOffset = lonOffsetValue; 81 | this.dateGranularity = dateGranularityValue; 82 | } 83 | 84 | @Override 85 | public void parse(final Osmformat.Node message) { 86 | double latitude = NANO * (latOffset + (granularity * message.getLat())); 87 | double longitude = NANO * (lonOffset + (granularity * message.getLon())); 88 | Node node = new Node(message.getId(), latitude, longitude); 89 | node.setTags(parseTags(message.getKeysList(), message.getValsList())); 90 | node.setInfo(parseInfo(message)); 91 | if (log.isDebugEnabled()) { 92 | log.debug(node.toString()); 93 | } 94 | getCallback().accept(node); 95 | } 96 | 97 | /** 98 | * Parses nodes in DenseFormat. 99 | * @param nodes list of DenseNodes messages. 100 | */ 101 | public void parse(final Osmformat.DenseNodes nodes) { 102 | int tagsKeyValuePointer = 0; 103 | long id = 0; 104 | double latitude = 0; 105 | double longitude = 0; 106 | 107 | long timestamp = 0; 108 | long changeset = 0; 109 | int uid = 0; 110 | int usernameStringId = 0; 111 | for (int indx = 0; indx < nodes.getIdCount(); indx++) { 112 | id += nodes.getId(indx); 113 | latitude += NANO * (latOffset + (granularity * nodes.getLat(indx))); 114 | longitude += NANO * (lonOffset + (granularity * nodes.getLon(indx))); 115 | 116 | Node node = new Node(id, latitude, longitude); 117 | if (nodes.getKeysValsCount() > 0) { 118 | while (true) { 119 | int keyIndex = nodes.getKeysVals(tagsKeyValuePointer); 120 | ++tagsKeyValuePointer; 121 | if (keyIndex == 0) { 122 | break; 123 | } 124 | int valueIndex = nodes.getKeysVals(tagsKeyValuePointer); 125 | ++tagsKeyValuePointer; 126 | String key = getStringTable().getS(keyIndex).toStringUtf8(); 127 | String value = getStringTable().getS(valueIndex).toStringUtf8(); 128 | node.getTags().put(key, value); 129 | } 130 | } 131 | if (nodes.hasDenseinfo()) { 132 | var infoMessage = nodes.getDenseinfo(); 133 | uid += infoMessage.getUid(indx); 134 | usernameStringId += infoMessage.getUserSid(indx); 135 | String username = getStringTable().getS(usernameStringId).toStringUtf8(); 136 | changeset += infoMessage.getChangeset(indx); 137 | timestamp += infoMessage.getTimestamp(indx); 138 | int version = infoMessage.getVersion(indx); 139 | boolean visible; 140 | if (infoMessage.getVisibleCount() > 0) { 141 | visible = infoMessage.getVisible(indx); 142 | } else { 143 | visible = true; 144 | } 145 | Info info = new Info(uid, username, version, timestamp * dateGranularity, changeset, visible); 146 | node.setInfo(info); 147 | } 148 | if (log.isDebugEnabled()) { 149 | log.debug(node.toString()); 150 | } 151 | getCallback().accept(node); 152 | 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/main/java/com/wolt/osm/parallelpbf/blob/BlobReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf.blob; 19 | 20 | import com.google.protobuf.InvalidProtocolBufferException; 21 | import crosby.binary.Fileformat; 22 | import lombok.RequiredArgsConstructor; 23 | import lombok.extern.slf4j.Slf4j; 24 | 25 | import java.io.IOException; 26 | import java.io.InputStream; 27 | import java.nio.ByteBuffer; 28 | import java.util.Optional; 29 | 30 | /** 31 | * Handles all stream operations and retrieves Blob* messages. 32 | */ 33 | @Slf4j 34 | @RequiredArgsConstructor 35 | public final class BlobReader { 36 | /** 37 | * The size field have fixed length of 4 bytes. 38 | */ 39 | private static final int SIZE_FIELD_LENGTH = 4; 40 | 41 | /** 42 | * BlobHeader is never bigger then 64K. 43 | */ 44 | private static final int MAX_HEADER_SIZE = 64 * 1024; 45 | 46 | /** 47 | * Blob is never bigger then 32M. 48 | */ 49 | private static final int MAX_BLOB_SIZE = 32 * 1024 * 1024; 50 | 51 | /** 52 | * Input data stream. 53 | */ 54 | private final InputStream input; 55 | 56 | /** 57 | * Tries to read specified amount of bytes from the stream. 58 | * @param bytesToRead how many bytes should be read. 59 | * @return Buffer of bytesToRead size or empty, 60 | * in case of EOF or IOException. 61 | */ 62 | private Optional readFromStream(final int bytesToRead) { 63 | byte[] buffer = new byte[bytesToRead]; 64 | try { 65 | int bytesRead = input.read(buffer); 66 | if (bytesRead != bytesToRead) { 67 | return Optional.empty(); 68 | } 69 | } catch (IOException e) { 70 | log.error("Error reading from the stream: {}", e.getMessage(), e); 71 | return Optional.empty(); 72 | } 73 | return Optional.of(buffer); 74 | } 75 | 76 | /** 77 | * Reads next blob header length from the current stream position. 78 | * As blob header length is just 4 bytes in network byte order, 79 | * this functions makes no checks and will return garbage 80 | * if called within a wrong stream position. 81 | * 82 | * @return length of next block header or empty if can't be read. 83 | */ 84 | public Optional readBlobHeaderLength() { 85 | Optional blobHeaderLengthBuffer = readFromStream(SIZE_FIELD_LENGTH); 86 | Optional result = blobHeaderLengthBuffer.map(value -> { 87 | ByteBuffer blobHeaderLengthWrapped = ByteBuffer.wrap(value); 88 | int blobHeaderLength = blobHeaderLengthWrapped.getInt(); 89 | log.trace("Read BlobHeaderLength: {}", blobHeaderLength); 90 | return blobHeaderLength; 91 | }); 92 | return result.flatMap(value -> { 93 | if (value > MAX_HEADER_SIZE) { 94 | log.warn("BlobHeader size is too big: {}", value); 95 | return Optional.empty(); 96 | } else { 97 | return result; 98 | } 99 | }); 100 | } 101 | 102 | /** 103 | * Reads next blob header from the current stream position. Size of the header is 104 | * specified in the parameters. As BlobHeader is a protobuf entity, basic validity checking 105 | * is made and 0 will be returned in case of failure. Same 0 will be returned if header can't be read fully 106 | * or eof is reached. 107 | * 108 | * @param headerLength Number of bytes to read and interpret as BlobHeader 109 | * @return Size of the following Blob in bytes or empty in case of read error. 110 | */ 111 | public Optional readBlobHeader(final int headerLength) { 112 | Optional blobHeaderBuffer = readFromStream(headerLength); 113 | Optional result = blobHeaderBuffer.flatMap(value -> { 114 | Fileformat.BlobHeader header; 115 | try { 116 | header = Fileformat.BlobHeader.parseFrom(blobHeaderBuffer.get()); 117 | log.trace("Got BlobHeader with type: {}, data size: {}", header.getType(), header.getDatasize()); 118 | return Optional.of(new BlobInformation(header.getDatasize(), header.getType())); 119 | } catch (InvalidProtocolBufferException e) { 120 | log.error("Failed to parse BlobHeader: {}", e.getMessage(), e); 121 | return Optional.empty(); 122 | } 123 | }); 124 | return result.flatMap(value -> { 125 | if (value.getSize() > MAX_BLOB_SIZE) { 126 | log.warn("Blob size is too big: {}", value); 127 | return Optional.empty(); 128 | } else { 129 | return result; 130 | } 131 | }); 132 | } 133 | 134 | /** 135 | * Reads next blob from the current stream position. Size of the blob is 136 | * specified in the parameters. 137 | * @param blobLength Number of bytes to read 138 | * @return Blob value or empty in case of read error 139 | */ 140 | public Optional readBlob(final int blobLength) { 141 | return readFromStream(blobLength); 142 | } 143 | 144 | /** 145 | * Fast forwards input stream to the offset. Used in conjunction with 146 | * partitioning. 147 | * @param offset Number of bytes to skip from the stream. 148 | * @return Optional with `offset` value or empty in case of failure. 149 | */ 150 | public Optional skip(final Integer offset) { 151 | long left = offset; 152 | try { 153 | while (left != 0) { 154 | long skipped = input.skip(left); 155 | left -= skipped; 156 | } 157 | return Optional.of(offset); 158 | } catch (IOException e) { 159 | log.error("Error fast forwarding the stream: {}", e.getMessage(), e); 160 | return Optional.empty(); 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 21 | 22 | 23 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parallel OSM PBF parser 2 | 3 | [OSM PBF format](https://wiki.openstreetmap.org/wiki/PBF_Format) multithreaded reader/writer written in Java. Supports all 4 | current OSM PBF features and options (only for reading) 5 | 6 | ## Rationale 7 | 8 | 9 | The OSMPBF format consists of sequence of independent blobs, containing actual OSM data. All existing Java readers 10 | of OSMPBF format read that file sequentially, processing each blob one by one using just a single thread. 11 | Parsing single blob usually involves decompressing it and calculating OSM entity values from a delta-packed 12 | data ([check wiki](https://wiki.openstreetmap.org/wiki/PBF_Format) for details). Obviously it is more 13 | CPU bound task, than IO bound task, so loading CPU up should speed up the processing. The Simplest way to do that 14 | is to distribute the work on all the cores. And here we go... 15 | 16 | ## Download 17 | 18 | ### Maven 19 | 20 | ```xml 21 | 22 | com.wolt.osm 23 | parallelpbf 24 | 0.3.1 25 | 26 | ``` 27 | 28 | ### Gradle 29 | 30 | ```gradle 31 | compile group: 'com.wolt.osm', name: 'parallelpbf', version: '0.3.1' 32 | ``` 33 | 34 | ### SBT 35 | 36 | ```sbt 37 | libraryDependencies += "com.wolt.osm" % "parallelpbf" % "0.3.1" 38 | ``` 39 | 40 | ### GitHub release 41 | 42 | → https://github.com/woltapp/parallelpbf/releases/tag/v0.3.1 43 | 44 | ## Reading 45 | 46 | As parsing is asynchronous, it heavily relies on the callbacks. There are 7 callbacks defined: 47 | 48 | * `Consumer onNode` - is called for each Node in the OSM PBF file. This callback must be reenterable as it will be 49 | called simultaneously from the different parallel executing threads. 50 | 51 | * `Consumer onWay` - is called for each Way in the OSM PBF file. This callback must be reenterable as it will be 52 | called simultaneously from the different parallel executing threads. 53 | 54 | * `Consumer onRelation` - is called for each Relation in the OSM PBF file. This callback must be reenterable as it will be 55 | called simultaneously from the different parallel executing threads. 56 | 57 | * `Consumer onChangeSet` - is called for each ChangeSet in the OSM PBF file. This callback must be reenterable as it will be 58 | called simultaneously from the different parallel executing threads. 59 | 60 | * `Consumer

onHeader` - is called for the Header object of the OSM PBF file. Each OSM PBF file should have just a single Header object, 61 | so it is safe to assume, that this callback will be called just once. 62 | 63 | * `Consumer onBoundBox` - is called for the Bounding box object of the OSM PBF file. OSM file may not have BoundBox object written, 64 | so this callback may never be called. As BoundBox is a part of OSMPBF Header object, it is safe to assume, that this callback will be called just once. 65 | 66 | * `Runnable onComplete` - called only in case of successful parse completion. All the other callbacks are guaranteed 67 | to finish before calling `onComplete` and no other callbacks will happen after `onComplete` call. 68 | 69 | Callbacks can be attached to the parser using appropriate calls: 70 | 71 | ```java 72 | InputStream input = Thread.currentThread().getContextClassLoader() 73 | .getResourceAsStream("sample.pbf"); 74 | 75 | new ParallelBinaryParser(input, 1) 76 | .onHeader(this::processHeader) 77 | .onBoundBox(this::processBoundingBox) 78 | .onComplete(this::printOnCompletions) 79 | .onNode(this::processNodes) 80 | .onWay(this::processWays) 81 | .onRelation(this::processRelations) 82 | .onChangeset(this::processChangesets) 83 | .parse(); 84 | ``` 85 | 86 | All callbacks are optional, if you do not set some callback, nothing will break. Parsing of data for missing callback 87 | will be skipped. So, for example, if you need just relations data, you should not set other callbacks and data blocks carrying 88 | other types of OSM data will be skipped completely, thus saving processing time. 89 | There is an exception from that rule - Header data block is always parsed, even if no callback is set. 90 | Even more, if no Node/Way/Relation/Changeset callbacks will be set, actual processing of data will be skipped 91 | after finding first Header block. 92 | 93 | `ParallelBinaryParser` constructor accepts two mandatory arguments: 94 | 95 | * `InputStream input` - InputStream pointing to the beginning of the OSMPBF data. 96 | * `int threads` - Number of threads for parallel processing. Parser will automatically throttle and stop input 97 | reading if all threads are busy. Each thread keeps blob data in memory, so memory usage will be at least 98 | 64MB per thread, but probably couple of hundreds megabytes per thread, depending on a block content. 99 | 100 | There are also two optional arguments for partitioning support: 101 | 102 | * `noPartitions` - Total number of partitions processed file should be divided. 103 | * `myShard` - Number of partition, associated with the this instance of the parser. 104 | 105 | The partitioning is added to support multi-host parallel loading, when each hosts reads it's own amount of data 106 | independently and then somehow combines that data or continues processing it on each hosts separately. The whole idea 107 | of partitioning here is that we split up the file to some number of partitions or shard and only process OSMData blocks 108 | from our 'own' shard, skipping all data blocks belonging to the other shard. Even with partitioning enabled, the whole 109 | InputStream will be processed and all OSMHeader blocks will be read and analyzed. 110 | 111 | To start actually processing the input stream, you should call `parse()` function. It will create all required threads 112 | and start data reading from the input and parsing it. That function is intentionally blocking, but it is safe to 113 | wrap it to some other thread and wait for completion using `onComplete` callback. 114 | 115 | ### Warning on order instability 116 | 117 | OSM PBF file can be sorted and stored in a ordered way. Unfortunately, due to parallel nature of the parser, that 118 | ordering will be broken during parsing and several consequent parse runs may return data in a different order for 119 | each run. In case order is important for you, you can either sort after parse or switch back to the single threaded 120 | parsers. 121 | 122 | 123 | ### Performance comparision 124 | 125 | | Region | Size in GB | Single thread read time in seconds | 24 threads read time in seconds | 126 | |----------------|------------|------------------------------------|---------------------------------| 127 | | Czech republic | 0.7 | 133 | 40 | 128 | | Asia | 7.3 | 2381 | 405 | 129 | | Europe | 21 | 3545 | 953 | 130 | | Planet | 47 | 8204 | 3203 | 131 | 132 | ## Writing 133 | 134 | Write API differs from the Reading API, as it makes no sense to use callbacks here. The writer object provides three 135 | methods to start writing, feed the writer with data and close writer. Writing function itself is thread-safe and reenterable, 136 | so can be used from parallel threads. 137 | 138 | So the correct workflow will be: 139 | 140 | ```java 141 | writer = new ParallelBinaryWriter(output,1, bbox); 142 | writer.start(); 143 | writer.write(node); 144 | writer.close(); 145 | ``` 146 | 147 | `ParallelBinaryWriter` accepts two mandatory arguments and one optional: 148 | 149 | * `OutputStream output` - OutputStream that will hold OSM PBF data. 150 | * `int threads` - Number of threads for parallel processing. Writer will automatically throttle and block on `.write()` call if all threads are busy. Each thread keeps blob data in memory, so memory usage will be at least 16MB per thread, but may be more, depending on block content. 151 | * `BoundBox boundBox` - Optional BoundBox of the data to be written, can be `null` 152 | 153 | OSM PBF header will be written to the OutputStream during construction. 154 | 155 | `.start()` call actually spawns writing threads and allows to make `.write()` calls. The `.start()` call is not thread-safe. 156 | 157 | `.write(OsmEntity)` call sends specified entity to one of the writing threads. This call is thread safe and calling it in parallel 158 | is recommended. In case of writing threads overload, the `.write()` call will block and wait for an empty writing thread to handle request. 159 | 160 | `.close()` will flush block to the output stream and terminate writing threads. Writer should not be used after calling `.close()` 161 | on it. 162 | 163 | ## Versioning 164 | 165 | We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://github.com/akashihi/parallelpbf/tags). 166 | 167 | ## Authors 168 | 169 | * **Denis Chaplygin** - *Initial work* - [akashihi](https://github.com/akashihi) 170 | * **Scott Crosby** - *.proto definition files* - [scrosby](https://github.com/openstreetmap/OSM-binary) 171 | 172 | ## License 173 | 174 | This project is licensed under the GPLv3 License - see the LICENSE file for details. 175 | The .proto definition files are licensed under the MIT license. 176 | -------------------------------------------------------------------------------- /src/main/proto/osmformat.proto: -------------------------------------------------------------------------------- 1 | /** Copyright (c) 2010 Scott A. Crosby. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | */ 10 | 11 | option optimize_for = LITE_RUNTIME; 12 | option java_package = "crosby.binary"; 13 | package OSMPBF; 14 | 15 | /* OSM Binary file format 16 | 17 | This is the master schema file of the OSM binary file format. This 18 | file is designed to support limited random-access and future 19 | extendability. 20 | 21 | A binary OSM file consists of a sequence of FileBlocks (please see 22 | fileformat.proto). The first fileblock contains a serialized instance 23 | of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that 24 | contain the primitives. 25 | 26 | Each primitiveblock is designed to be independently parsable. It 27 | contains a string table storing all strings in that block (keys and 28 | values in tags, roles in relations, usernames, etc.) as well as 29 | metadata containing the precision of coordinates or timestamps in that 30 | block. 31 | 32 | A primitiveblock contains a sequence of primitive groups, each 33 | containing primitives of the same type (nodes, densenodes, ways, 34 | relations). Coordinates are stored in signed 64-bit integers. Lat&lon 35 | are measured in units nanodegrees. The default of 36 | granularity of 100 nanodegrees corresponds to about 1cm on the ground, 37 | and a full lat or lon fits into 32 bits. 38 | 39 | Converting an integer to a lattitude or longitude uses the formula: 40 | $OUT = IN * granularity / 10**9$. Many encoding schemes use delta 41 | coding when representing nodes and relations. 42 | 43 | */ 44 | 45 | ////////////////////////////////////////////////////////////////////////// 46 | ////////////////////////////////////////////////////////////////////////// 47 | 48 | /* Contains the file header. */ 49 | 50 | message HeaderBlock { 51 | optional HeaderBBox bbox = 1; 52 | /* Additional tags to aid in parsing this dataset */ 53 | repeated string required_features = 4; 54 | repeated string optional_features = 5; 55 | 56 | optional string writingprogram = 16; 57 | optional string source = 17; // From the bbox field. 58 | 59 | /* Tags that allow continuing an Osmosis replication */ 60 | 61 | // replication timestamp, expressed in seconds since the epoch, 62 | // otherwise the same value as in the "timestamp=..." field 63 | // in the state.txt file used by Osmosis 64 | optional int64 osmosis_replication_timestamp = 32; 65 | 66 | // replication sequence number (sequenceNumber in state.txt) 67 | optional int64 osmosis_replication_sequence_number = 33; 68 | 69 | // replication base URL (from Osmosis' configuration.txt file) 70 | optional string osmosis_replication_base_url = 34; 71 | } 72 | 73 | 74 | /** The bounding box field in the OSM header. BBOX, as used in the OSM 75 | header. Units are always in nanodegrees -- they do not obey 76 | granularity rules. */ 77 | 78 | message HeaderBBox { 79 | required sint64 left = 1; 80 | required sint64 right = 2; 81 | required sint64 top = 3; 82 | required sint64 bottom = 4; 83 | } 84 | 85 | 86 | /////////////////////////////////////////////////////////////////////// 87 | /////////////////////////////////////////////////////////////////////// 88 | 89 | 90 | message PrimitiveBlock { 91 | required StringTable stringtable = 1; 92 | repeated PrimitiveGroup primitivegroup = 2; 93 | 94 | // Granularity, units of nanodegrees, used to store coordinates in this block 95 | optional int32 granularity = 17 [default=100]; 96 | // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees. 97 | optional int64 lat_offset = 19 [default=0]; 98 | optional int64 lon_offset = 20 [default=0]; 99 | 100 | // Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. 101 | optional int32 date_granularity = 18 [default=1000]; 102 | 103 | 104 | // Proposed extension: 105 | //optional BBox bbox = XX; 106 | } 107 | 108 | // Group of OSMPrimitives. All primitives in a group must be the same type. 109 | message PrimitiveGroup { 110 | repeated Node nodes = 1; 111 | optional DenseNodes dense = 2; 112 | repeated Way ways = 3; 113 | repeated Relation relations = 4; 114 | repeated ChangeSet changesets = 5; 115 | } 116 | 117 | 118 | /** String table, contains the common strings in each block. 119 | 120 | Note that we reserve index '0' as a delimiter, so the entry at that 121 | index in the table is ALWAYS blank and unused. 122 | 123 | */ 124 | message StringTable { 125 | repeated bytes s = 1; 126 | } 127 | 128 | /* Optional metadata that may be included into each primitive. */ 129 | message Info { 130 | optional int32 version = 1 [default = -1]; 131 | optional int64 timestamp = 2; 132 | optional int64 changeset = 3; 133 | optional int32 uid = 4; 134 | optional uint32 user_sid = 5; // String IDs 135 | 136 | // The visible flag is used to store history information. It indicates that 137 | // the current object version has been created by a delete operation on the 138 | // OSM API. 139 | // When a writer sets this flag, it MUST add a required_features tag with 140 | // value "HistoricalInformation" to the HeaderBlock. 141 | // If this flag is not available for some object it MUST be assumed to be 142 | // true if the file has the required_features tag "HistoricalInformation" 143 | // set. 144 | optional bool visible = 6; 145 | } 146 | 147 | /** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */ 148 | message DenseInfo { 149 | repeated int32 version = 1 [packed = true]; 150 | repeated sint64 timestamp = 2 [packed = true]; // DELTA coded 151 | repeated sint64 changeset = 3 [packed = true]; // DELTA coded 152 | repeated sint32 uid = 4 [packed = true]; // DELTA coded 153 | repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded 154 | 155 | // The visible flag is used to store history information. It indicates that 156 | // the current object version has been created by a delete operation on the 157 | // OSM API. 158 | // When a writer sets this flag, it MUST add a required_features tag with 159 | // value "HistoricalInformation" to the HeaderBlock. 160 | // If this flag is not available for some object it MUST be assumed to be 161 | // true if the file has the required_features tag "HistoricalInformation" 162 | // set. 163 | repeated bool visible = 6 [packed = true]; 164 | } 165 | 166 | 167 | // THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW. 168 | // TODO: REMOVE THIS? 169 | message ChangeSet { 170 | required int64 id = 1; 171 | // 172 | // // Parallel arrays. 173 | // repeated uint32 keys = 2 [packed = true]; // String IDs. 174 | // repeated uint32 vals = 3 [packed = true]; // String IDs. 175 | // 176 | // optional Info info = 4; 177 | 178 | // optional int64 created_at = 8; 179 | // optional int64 closetime_delta = 9; 180 | // optional bool open = 10; 181 | // optional HeaderBBox bbox = 11; 182 | } 183 | 184 | 185 | message Node { 186 | required sint64 id = 1; 187 | // Parallel arrays. 188 | repeated uint32 keys = 2 [packed = true]; // String IDs. 189 | repeated uint32 vals = 3 [packed = true]; // String IDs. 190 | 191 | optional Info info = 4; // May be omitted in omitmeta 192 | 193 | required sint64 lat = 8; 194 | required sint64 lon = 9; 195 | } 196 | 197 | /* Used to densly represent a sequence of nodes that do not have any tags. 198 | 199 | We represent these nodes columnwise as five columns: ID's, lats, and 200 | lons, all delta coded. When metadata is not omitted, 201 | 202 | We encode keys & vals for all nodes as a single array of integers 203 | containing key-stringid and val-stringid, using a stringid of 0 as a 204 | delimiter between nodes. 205 | 206 | ( ( )* '0' )* 207 | */ 208 | 209 | message DenseNodes { 210 | repeated sint64 id = 1 [packed = true]; // DELTA coded 211 | 212 | //repeated Info info = 4; 213 | optional DenseInfo denseinfo = 5; 214 | 215 | repeated sint64 lat = 8 [packed = true]; // DELTA coded 216 | repeated sint64 lon = 9 [packed = true]; // DELTA coded 217 | 218 | // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless. 219 | repeated int32 keys_vals = 10 [packed = true]; 220 | } 221 | 222 | 223 | message Way { 224 | required int64 id = 1; 225 | // Parallel arrays. 226 | repeated uint32 keys = 2 [packed = true]; 227 | repeated uint32 vals = 3 [packed = true]; 228 | 229 | optional Info info = 4; 230 | 231 | repeated sint64 refs = 8 [packed = true]; // DELTA coded 232 | } 233 | 234 | message Relation { 235 | enum MemberType { 236 | NODE = 0; 237 | WAY = 1; 238 | RELATION = 2; 239 | } 240 | required int64 id = 1; 241 | 242 | // Parallel arrays. 243 | repeated uint32 keys = 2 [packed = true]; 244 | repeated uint32 vals = 3 [packed = true]; 245 | 246 | optional Info info = 4; 247 | 248 | // Parallel arrays 249 | repeated int32 roles_sid = 8 [packed = true]; // This should have been defined as uint32 for consistency, but it is now too late to change it 250 | repeated sint64 memids = 9 [packed = true]; // DELTA encoded 251 | repeated MemberType types = 10 [packed = true]; 252 | } 253 | 254 | -------------------------------------------------------------------------------- /src/test/java/com/wolt/osm/parallelpbf/ParallelBinaryParserIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of parallelpbf. 3 | * 4 | * parallelpbf is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * Foobar is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with Foobar. If not, see . 16 | */ 17 | 18 | package com.wolt.osm.parallelpbf; 19 | 20 | import com.wolt.osm.parallelpbf.entity.Node; 21 | import com.wolt.osm.parallelpbf.entity.Relation; 22 | import com.wolt.osm.parallelpbf.entity.RelationMember; 23 | import com.wolt.osm.parallelpbf.entity.Way; 24 | import lombok.SneakyThrows; 25 | import org.junit.jupiter.api.Assertions; 26 | import org.junit.jupiter.api.Test; 27 | 28 | import java.io.*; 29 | import java.util.Arrays; 30 | import java.util.Optional; 31 | import java.util.concurrent.atomic.AtomicInteger; 32 | import java.util.function.Consumer; 33 | 34 | import static org.junit.jupiter.api.Assertions.*; 35 | 36 | class ParallelBinaryParserIT { 37 | /* Reader specific part */ 38 | private Node simpleNode; 39 | private Node taggedNode; 40 | 41 | private Way taggedWay; 42 | 43 | private Relation taggedRelation; 44 | 45 | Consumer nodeChecker = (node) -> { 46 | if (node.getId() == 653970877) { 47 | simpleNode = node; 48 | } 49 | if (node.getId() == 502550970) { 50 | taggedNode = node; 51 | } 52 | }; 53 | 54 | Consumer wayChecker = (way) -> { 55 | if (way.getId() == 158788812) { 56 | taggedWay = way; 57 | } 58 | }; 59 | 60 | Consumer relationsChecker = relation -> { 61 | if (relation.getId() == 31640) { 62 | taggedRelation = relation; 63 | } 64 | }; 65 | 66 | /* Writer specific part */ 67 | private ParallelBinaryWriter writer; 68 | 69 | private void writeNodes(Node node) { 70 | writer.write(node); 71 | } 72 | 73 | private void writeWays(Way way) { 74 | writer.write(way); 75 | } 76 | 77 | private void writeRelations(Relation relation) { 78 | writer.write(relation); 79 | } 80 | 81 | @SneakyThrows 82 | private void closeOnComplete() { 83 | writer.close(); 84 | } 85 | 86 | /* Shared code */ 87 | private void parse(InputStream input) { 88 | new ParallelBinaryParser(input, 1) 89 | .onNode(nodeChecker) 90 | .onWay(wayChecker) 91 | .onRelation(relationsChecker) 92 | .parse(); 93 | } 94 | 95 | private void testSimpleNode() { 96 | assertEquals(51.7636027, simpleNode.getLat(), 0.0000001); 97 | assertEquals(-0.22875700000000002, simpleNode.getLon(), 0.0000001); 98 | assertTrue(simpleNode.getTags().isEmpty()); 99 | } 100 | 101 | private void testTaggedNode() { 102 | assertEquals(51.76511770000001, taggedNode.getLat(), 0.0000001); 103 | assertEquals(-0.23366680000000006, taggedNode.getLon(), 0.0000001); 104 | assertFalse(taggedNode.getTags().isEmpty()); 105 | assertTrue(taggedNode.getTags().containsKey("name")); 106 | assertEquals("Oaktree Close", taggedNode.getTags().get("name")); 107 | assertTrue(taggedNode.getTags().containsKey("highway")); 108 | assertEquals("bus_stop", taggedNode.getTags().get("highway")); 109 | 110 | assertEquals("NaPTAN", taggedNode.getInfo().getUsername()); 111 | assertEquals(1, taggedNode.getInfo().getVersion()); 112 | assertEquals(2539009, taggedNode.getInfo().getChangeset()); 113 | assertEquals(104459, taggedNode.getInfo().getUid()); 114 | assertEquals(1253397762000L, taggedNode.getInfo().getTimestamp()); 115 | } 116 | 117 | private void testWay() { 118 | assertEquals(Arrays.asList(1709246789L, 1709246746L, 1709246741L, 1709246791L), taggedWay.getNodes()); 119 | assertFalse(taggedWay.getTags().isEmpty()); 120 | assertTrue(taggedWay.getTags().containsKey("highway")); 121 | assertEquals("footway", taggedWay.getTags().get("highway")); 122 | } 123 | 124 | public void testRelation() { 125 | Optional member = taggedRelation.getMembers().stream().filter(m -> m.getId() == 25896432).findAny(); 126 | assertTrue(member.isPresent()); 127 | assertEquals("forward", member.get().getRole()); 128 | Assertions.assertEquals(RelationMember.Type.WAY, member.get().getType()); 129 | assertFalse(taggedRelation.getTags().isEmpty()); 130 | assertTrue(taggedRelation.getTags().containsKey("route")); 131 | assertEquals("bicycle", taggedRelation.getTags().get("route")); 132 | assertNotNull(taggedRelation.getInfo()); 133 | assertEquals("Mauls", taggedRelation.getInfo().getUsername()); 134 | } 135 | 136 | @Test 137 | void testParser() { 138 | InputStream input = Thread.currentThread().getContextClassLoader() 139 | .getResourceAsStream("sample.pbf"); 140 | parse(input); 141 | 142 | testSimpleNode(); 143 | assertEquals(234999, simpleNode.getInfo().getUid()); 144 | assertEquals("Nicholas Shanks", simpleNode.getInfo().getUsername()); 145 | assertEquals(1, simpleNode.getInfo().getVersion()); 146 | assertEquals(1267144226000L, simpleNode.getInfo().getTimestamp()); 147 | assertEquals(3977001, simpleNode.getInfo().getChangeset()); 148 | assertTrue(simpleNode.getInfo().isVisible()); 149 | 150 | testTaggedNode(); 151 | assertEquals(104459, taggedNode.getInfo().getUid()); 152 | assertEquals("NaPTAN", taggedNode.getInfo().getUsername()); 153 | assertEquals(1, taggedNode.getInfo().getVersion()); 154 | assertEquals(1253397762000L, taggedNode.getInfo().getTimestamp()); 155 | assertEquals(2539009, taggedNode.getInfo().getChangeset()); 156 | assertTrue(taggedNode.getInfo().isVisible()); 157 | 158 | testWay(); 159 | assertEquals(470302, taggedWay.getInfo().getUid()); 160 | assertEquals("Kjc", taggedWay.getInfo().getUsername()); 161 | assertEquals(1, taggedWay.getInfo().getVersion()); 162 | assertEquals(1334007464L, taggedWay.getInfo().getTimestamp()); 163 | assertEquals(11245909, taggedWay.getInfo().getChangeset()); 164 | assertTrue(taggedWay.getInfo().isVisible()); 165 | 166 | testRelation(); 167 | assertEquals(24119, taggedRelation.getInfo().getUid()); 168 | assertEquals("Mauls", taggedRelation.getInfo().getUsername()); 169 | assertEquals(81, taggedRelation.getInfo().getVersion()); 170 | assertEquals(1337419064L, taggedRelation.getInfo().getTimestamp()); 171 | assertEquals(11640673, taggedRelation.getInfo().getChangeset()); 172 | assertTrue(taggedRelation.getInfo().isVisible()); 173 | } 174 | 175 | @Test 176 | void testWriter() throws IOException { 177 | String outputFilename = System.getProperty("java.io.tmpdir")+"/parallel.pbf"; 178 | File outputFile = new File(outputFilename); 179 | if (outputFile.exists()) { 180 | outputFile.delete(); 181 | } 182 | outputFile.createNewFile(); 183 | OutputStream output = new FileOutputStream(outputFile); 184 | 185 | writer = new ParallelBinaryWriter(output,1, null); 186 | writer.start(); 187 | 188 | InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream("sample.pbf"); 189 | new ParallelBinaryParser(input, 1) 190 | .onComplete(this::closeOnComplete) 191 | .onNode(this::writeNodes) 192 | .onWay(this::writeWays) 193 | .onRelation(this::writeRelations) 194 | .parse(); 195 | output.close(); 196 | 197 | InputStream written = new FileInputStream(outputFile); 198 | parse(written); 199 | 200 | testSimpleNode(); 201 | testTaggedNode(); 202 | testWay(); 203 | testParser(); 204 | } 205 | 206 | @Test 207 | void testExceptionProcessing() { 208 | final AtomicInteger completedCount = new AtomicInteger(); 209 | 210 | final AtomicInteger nodeCount = new AtomicInteger(); 211 | assertThrows(RuntimeException.class, () -> { 212 | try (InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream("sample.pbf")) { 213 | new ParallelBinaryParser(input, 5).onComplete(completedCount::incrementAndGet).onNode((Node node) -> { 214 | nodeCount.incrementAndGet(); 215 | if (nodeCount.get() > 5) { 216 | throw new RuntimeException("Problem processing node!"); 217 | } 218 | }).onWay(way -> { 219 | }).onRelation(relation -> { 220 | }).parse(); 221 | } 222 | }); 223 | 224 | final AtomicInteger wayCount = new AtomicInteger(); 225 | assertThrows(RuntimeException.class, () -> { 226 | try (InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream("sample.pbf")) { 227 | new ParallelBinaryParser(input, 5).onComplete(completedCount::incrementAndGet).onNode(node -> { 228 | }).onWay(way -> { 229 | wayCount.incrementAndGet(); 230 | if (wayCount.get() > 5) { 231 | throw new RuntimeException("Problem processing way!"); 232 | } 233 | }).onRelation(relation -> { 234 | }).parse(); 235 | } 236 | }); 237 | 238 | final AtomicInteger relationCount = new AtomicInteger(); 239 | assertThrows(RuntimeException.class, () -> { 240 | try (InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream("sample.pbf")) { 241 | new ParallelBinaryParser(input, 5).onComplete(completedCount::incrementAndGet).onNode(node -> { 242 | }).onWay(way -> { 243 | }).onRelation(relation -> { 244 | relationCount.incrementAndGet(); 245 | if (relationCount.get() > 2) { 246 | throw new RuntimeException("Problem processing relation!"); 247 | } 248 | }).parse(); 249 | } 250 | }); 251 | 252 | assertEquals(0, completedCount.get()); 253 | } 254 | 255 | } 256 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | com.wolt.osm 8 | parallelpbf 9 | 0.3.1 10 | jar 11 | 12 | parallelpbf 13 | OpenStreetMap PBF format multithreaded reader 14 | https://github.com/woltapp/parallelpbf 15 | 16 | 17 | 18 | GPLv3 19 | https://github.com/woltapp/parallelpbf/blob/master/LICENSE 20 | repo 21 | 22 | 23 | 24 | 25 | scm:git:https://github.com/woltapp/parallelpbf.git 26 | scm:git:https://github.com/woltapp/parallelpbf.git 27 | https://github.com/woltapp/parallelpbf 28 | v0.1.1 29 | 30 | 31 | 32 | 33 | Denis Chaplygin 34 | denis.chaplygin@wolt.com 35 | Wolt enterprises oy 36 | https://wolt.com/ 37 | 38 | 39 | 40 | 41 | GitHub 42 | https://github.com/akashihi/woltapp/issues 43 | 44 | 45 | 46 | 47 | ossrh 48 | https://oss.sonatype.org/content/repositories/snapshots 49 | 50 | 51 | 52 | 53 | UTF-8 54 | 1.8 55 | 1.8 56 | 57 | 58 | 59 | 60 | com.google.protobuf 61 | protobuf-java 62 | 3.12.2 63 | 64 | 65 | 66 | org.slf4j 67 | slf4j-api 68 | 1.7.28 69 | 70 | 71 | 72 | org.projectlombok 73 | lombok 74 | 1.18.10 75 | provided 76 | 77 | 78 | 79 | ch.qos.logback 80 | logback-classic 81 | 1.2.3 82 | test 83 | 84 | 85 | 86 | org.junit.jupiter 87 | junit-jupiter-engine 88 | 5.1.0 89 | test 90 | 91 | 92 | 93 | commons-io 94 | commons-io 95 | 2.7 96 | test 97 | 98 | 99 | 100 | org.easymock 101 | easymock 102 | 4.0.2 103 | test 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | maven-clean-plugin 113 | 3.1.0 114 | 115 | 116 | 117 | maven-resources-plugin 118 | 3.0.2 119 | 120 | 121 | maven-compiler-plugin 122 | 3.8.0 123 | 124 | 125 | maven-surefire-plugin 126 | 2.22.1 127 | 128 | 129 | maven-jar-plugin 130 | 3.0.2 131 | 132 | 133 | maven-install-plugin 134 | 2.5.2 135 | 136 | 137 | maven-deploy-plugin 138 | 2.8.2 139 | 140 | 141 | 142 | maven-site-plugin 143 | 3.7.1 144 | 145 | 146 | maven-project-info-reports-plugin 147 | 3.0.0 148 | 149 | 150 | 151 | 152 | 153 | org.apache.maven.plugins 154 | maven-checkstyle-plugin 155 | 3.1.0 156 | 157 | checkstyle.xml 158 | UTF-8 159 | true 160 | true 161 | false 162 | 163 | 164 | 165 | validate 166 | validate 167 | 168 | check 169 | 170 | 171 | 172 | 173 | 174 | org.apache.maven.plugins 175 | maven-pmd-plugin 176 | 3.12.0 177 | 178 | false 179 | 180 | 181 | 182 | validate 183 | validate 184 | 185 | check 186 | cpd-check 187 | 188 | 189 | 190 | 191 | 192 | com.github.spotbugs 193 | spotbugs-maven-plugin 194 | 4.0.4 195 | 196 | ${project.basedir}/spotbugsExclude.xml 197 | 198 | 199 | 200 | validate 201 | compile 202 | 203 | check 204 | 205 | 206 | 207 | 208 | 209 | org.apache.maven.plugins 210 | maven-failsafe-plugin 211 | 3.0.0-M3 212 | 213 | 214 | 215 | integration-test 216 | verify 217 | 218 | 219 | 220 | 221 | 222 | org.apache.maven.plugins 223 | maven-gpg-plugin 224 | 1.5 225 | 226 | 227 | sign-artifacts 228 | verify 229 | 230 | sign 231 | 232 | 233 | 234 | 235 | 236 | org.sonatype.plugins 237 | nexus-staging-maven-plugin 238 | 1.6.7 239 | true 240 | 241 | ossrh 242 | https://oss.sonatype.org/ 243 | true 244 | 245 | 246 | 247 | org.apache.maven.plugins 248 | maven-source-plugin 249 | 2.2.1 250 | 251 | 252 | attach-sources 253 | 254 | jar-no-fork 255 | 256 | 257 | 258 | 259 | 260 | org.apache.maven.plugins 261 | maven-javadoc-plugin 262 | 2.9.1 263 | 264 | 265 | false 266 | 267 | 268 | 269 | attach-javadocs 270 | 271 | jar 272 | 273 | 274 | 275 | 276 | 277 | com.github.os72 278 | protoc-jar-maven-plugin 279 | 3.11.4 280 | 281 | 282 | generate-sources 283 | 284 | run 285 | 286 | 287 | 3.12.3 288 | 289 | ${project.basedir}/src/main/proto 290 | 291 | ${project.build.directory}/generated-sources 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | --------------------------------------------------------------------------------