├── .dockerignore ├── .travis.yml ├── Dockerfile ├── LICENSE.md ├── Makefile ├── README.md ├── algorithm_mod.hpp ├── create.cpp ├── decode.cpp ├── header.cpp ├── header.hpp ├── jsonpull ├── jsonpull.c └── jsonpull.h ├── merge.cpp ├── merge.hpp ├── mergetool.cpp ├── milo ├── LICENSE.txt └── dtoa_milo.h ├── protozero ├── byteswap.hpp ├── config.hpp ├── exception.hpp ├── iterators.hpp ├── pbf_builder.hpp ├── pbf_message.hpp ├── pbf_reader.hpp ├── pbf_writer.hpp ├── types.hpp ├── varint.hpp └── version.hpp ├── serial.cpp ├── serial.hpp ├── tests ├── 1.json ├── 2.json ├── check-minimum-count.js └── fixture │ ├── bitmap-vector.geojson │ ├── both.geojson │ ├── thresh-1.geojson │ ├── thresh-mp.geojson │ └── thresh.geojson ├── tile.cpp └── tippecanoe ├── mbtiles.cpp ├── mbtiles.hpp ├── mvt.cpp ├── mvt.hpp ├── projection.cpp ├── projection.hpp ├── text.cpp └── text.hpp /.dockerignore: -------------------------------------------------------------------------------- 1 | # Don't copy Dockerfile or git items 2 | .gitignore 3 | .git 4 | .o 5 | Dockerfile -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "6" 4 | 5 | sudo: false 6 | 7 | matrix: 8 | include: 9 | # test on docker+ubuntu 10 | - os: linux 11 | compiler: clang 12 | services: 13 | - docker 14 | sudo: true 15 | dist: trusty 16 | env: DOCKERFILE=Dockerfile 17 | before_install: [] 18 | install: 19 | - docker build -t tile-count-image -f ${DOCKERFILE} . 20 | script: 21 | - docker run -it tile-count-image 22 | # debug+santizer build 23 | - os: linux 24 | compiler: clang 25 | sudo: true # to workaround https://github.com/mapbox/node-cpp-skel/issues/93 26 | env: CLANG_VERSION='5.0.1' BUILDTYPE=Debug CC="clang" CXX="clang++" CXXFLAGS="-fsanitize=address,undefined,integer -fsanitize-address-use-after-scope -fno-sanitize-recover=all" CFLAGS="-fsanitize=address,undefined,integer -fsanitize-address-use-after-scope -fno-sanitize-recover=all" LDFLAGS="-fsanitize=address,undefined,integer" 27 | addons: 28 | apt: 29 | sources: ['ubuntu-toolchain-r-test' ] 30 | packages: [ 'libstdc++6','libstdc++-5-dev', 'libsqlite3-dev', 'libpng-dev' ] 31 | # coverage+debug build 32 | - os: linux 33 | compiler: clang 34 | env: CLANG_VERSION='5.0.1' BUILDTYPE=Debug CC="clang" CXX="clang++" CXXFLAGS="--coverage" CFLAGS="--coverage" LDFLAGS="--coverage" 35 | addons: 36 | apt: 37 | sources: ['ubuntu-toolchain-r-test' ] 38 | packages: [ 'libstdc++6','libstdc++-5-dev', 'libsqlite3-dev', 'libpng-dev' ] 39 | # release+linux+g++ 40 | - os: linux 41 | compiler: gcc 42 | env: BUILDTYPE=Release CC="gcc-4.9" CXX="g++-4.9" 43 | addons: 44 | apt: 45 | sources: ['ubuntu-toolchain-r-test'] 46 | packages: [ 'g++-4.9', 'libsqlite3-dev', 'libpng-dev' ] 47 | # release+linux+clang++ 48 | - os: linux 49 | compiler: clang 50 | env: CLANG_VERSION='5.0.1' BUILDTYPE=Release CC="clang" CXX="clang++" 51 | addons: 52 | apt: 53 | sources: ['ubuntu-toolchain-r-test' ] 54 | packages: [ 'libstdc++6','libstdc++-5-dev', 'libsqlite3-dev', 'libpng-dev' ] 55 | # release+osx 56 | - os: osx 57 | compiler: clang 58 | env: BUILDTYPE=Release 59 | before_install: 60 | - brew update && brew install libpng || true 61 | # debug+osx 62 | - os: osx 63 | compiler: clang 64 | env: BUILDTYPE=Debug 65 | before_install: 66 | - brew update && brew install libpng || true 67 | 68 | install: 69 | - DEPS_DIR="${TRAVIS_BUILD_DIR}/deps" 70 | - export PATH=${DEPS_DIR}/bin:${PATH} && mkdir -p ${DEPS_DIR} 71 | - | 72 | if [[ ${CLANG_VERSION:-false} != false ]]; then 73 | export CCOMPILER='clang' 74 | export CXXCOMPILER='clang++' 75 | CLANG_URL="https://mason-binaries.s3.amazonaws.com/${TRAVIS_OS_NAME}-x86_64/clang++/${CLANG_VERSION}.tar.gz" 76 | travis_retry wget --quiet -O - ${CLANG_URL} | tar --strip-components=1 -xz -C ${DEPS_DIR} 77 | fi 78 | - BUILDTYPE=${BUILDTYPE} make -j2 79 | 80 | before_script: 81 | # tests depend on tippecanoe 82 | - export TIPPECANOE_VERSION="1.26.3" 83 | - wget https://github.com/mapbox/tippecanoe/archive/${TIPPECANOE_VERSION}.tar.gz 84 | - tar -xvf ${TIPPECANOE_VERSION}.tar.gz 85 | - cd tippecanoe-${TIPPECANOE_VERSION} 86 | - make 87 | - export PATH=$(pwd):${PATH} 88 | - cd ../ 89 | 90 | script: 91 | - BUILDTYPE=${BUILDTYPE} make test 92 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Start from ubuntu 2 | FROM ubuntu:16.04 3 | 4 | # Update repos and install dependencies 5 | RUN apt-get update \ 6 | && apt-get -y upgrade \ 7 | && apt-get -y install build-essential wget curl libsqlite3-dev zlib1g-dev libpng-dev 8 | 9 | # Create a directory and copy in all files 10 | RUN mkdir -p /tmp/tile-count-src 11 | WORKDIR /tmp/tile-count-src 12 | COPY . /tmp/tile-count-src 13 | 14 | # Build tile-count 15 | RUN make \ 16 | && make install 17 | 18 | # Install tippecanoe since the tests depend on it 19 | ENV TIPPECANOE_VERSION="1.26.3" 20 | 21 | RUN wget https://github.com/mapbox/tippecanoe/archive/${TIPPECANOE_VERSION}.tar.gz && \ 22 | tar -xvf ${TIPPECANOE_VERSION}.tar.gz && \ 23 | cd tippecanoe-${TIPPECANOE_VERSION} && \ 24 | make && \ 25 | make install 26 | 27 | RUN curl https://nodejs.org/dist/v4.8.6/node-v4.8.6-linux-x64.tar.gz | tar zxC /usr/local --strip-components=1 28 | 29 | # Run the tests 30 | CMD make test 31 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Mapbox Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | - Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 20 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PREFIX ?= /usr/local 2 | MANDIR ?= $(PREFIX)/share/man/man1/ 3 | BUILDTYPE ?= Release 4 | SHELL = /bin/bash 5 | 6 | # inherit from env if set 7 | CC := $(CC) 8 | CXX := $(CXX) 9 | CFLAGS := $(CFLAGS) 10 | CXXFLAGS := $(CXXFLAGS) -std=c++11 11 | LDFLAGS := $(LDFLAGS) 12 | WARNING_FLAGS := -Wall -Wshadow -Wsign-compare 13 | RELEASE_FLAGS := -O3 -DNDEBUG 14 | DEBUG_FLAGS := -O0 -DDEBUG -fno-inline-functions -fno-omit-frame-pointer 15 | 16 | ifeq ($(BUILDTYPE),Release) 17 | FINAL_FLAGS := -g $(WARNING_FLAGS) $(RELEASE_FLAGS) 18 | else 19 | FINAL_FLAGS := -g $(WARNING_FLAGS) $(DEBUG_FLAGS) 20 | endif 21 | 22 | PGMS := tile-count-create tile-count-decode tile-count-tile tile-count-merge 23 | 24 | all: $(PGMS) 25 | 26 | install: $(PGMS) 27 | cp $(PGMS) $(PREFIX)/bin 28 | 29 | PG= 30 | 31 | H = $(wildcard *.h) $(wildcard *.hpp) 32 | C = $(wildcard *.c) $(wildcard *.cpp) 33 | 34 | INCLUDES = -I/usr/local/include -I. 35 | LIBS = -L/usr/local/lib 36 | 37 | tile-count-create: tippecanoe/projection.o create.o header.o serial.o merge.o jsonpull/jsonpull.o 38 | $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread 39 | 40 | tile-count-decode: tippecanoe/projection.o decode.o header.o serial.o 41 | $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread 42 | 43 | tile-count-tile: tippecanoe/projection.o tile.o header.o serial.o tippecanoe/mbtiles.o tippecanoe/mvt.o tippecanoe/text.o 44 | $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread -lpng 45 | 46 | tile-count-merge: mergetool.o header.o serial.o merge.o 47 | $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread 48 | 49 | -include $(wildcard *.d) 50 | 51 | %.o: %.c 52 | $(CC) -MMD $(PG) $(INCLUDES) $(FINAL_FLAGS) $(CFLAGS) -c -o $@ $< 53 | 54 | %.o: %.cpp 55 | $(CXX) -MMD $(PG) $(INCLUDES) $(FINAL_FLAGS) $(CXXFLAGS) -c -o $@ $< 56 | 57 | clean: 58 | rm -f ./tile-count-* *.o *.d */*.o */*.d 59 | 60 | indent: 61 | clang-format -i -style="{BasedOnStyle: Google, IndentWidth: 8, UseTab: Always, AllowShortIfStatementsOnASingleLine: false, ColumnLimit: 0, ContinuationIndentWidth: 8, SpaceAfterCStyleCast: true, IndentCaseLabels: false, AllowShortBlocksOnASingleLine: false, AllowShortFunctionsOnASingleLine: false, SortIncludes: false}" $(C) $(H) 62 | 63 | test: all 64 | rm -rf tests/tmp 65 | mkdir -p tests/tmp 66 | ./tile-count-create -s20 -o tests/tmp/1.count tests/1.json 67 | ./tile-count-create -o tests/tmp/2.count tests/2.json 68 | cat tests/1.json tests/2.json | ./tile-count-create -s16 -o tests/tmp/both.count 69 | # Verify merging of .count files 70 | ./tile-count-merge -s16 -o tests/tmp/merged.count tests/tmp/1.count tests/tmp/2.count 71 | cmp tests/tmp/merged.count tests/tmp/both.count 72 | # Verify merging of hundreds of .count files 73 | ./tile-count-decode tests/tmp/merged.count | split -l20 - tests/tmp/split 74 | cat tests/tmp/split?? | ./tile-count-create -o tests/tmp/combined.count 75 | for i in tests/tmp/split??; do ./tile-count-create -o $$i.count $$i; done 76 | ./tile-count-merge -o tests/tmp/merged2.count tests/tmp/split*.count 77 | cmp tests/tmp/merged2.count tests/tmp/combined.count 78 | # Verify merging a list of files from the standard input 79 | ls tests/tmp/split*.count | ./tile-count-merge -F -o tests/tmp/merged3.count 80 | cmp tests/tmp/merged2.count tests/tmp/merged3.count 81 | # Verify merging of vector mbtiles with separate features per bin 82 | ./tile-count-tile -f -1 -y count -s16 -o tests/tmp/1.mbtiles tests/tmp/1.count 83 | ./tile-count-tile -f -1 -y count -s16 -o tests/tmp/2.mbtiles tests/tmp/2.count 84 | ./tile-count-tile -f -1 -y count -s16 -o tests/tmp/both.mbtiles tests/tmp/both.count 85 | ./tile-count-tile -f -1 -y count -s16 -o tests/tmp/merged.mbtiles tests/tmp/1.mbtiles tests/tmp/2.mbtiles 86 | tippecanoe-decode tests/tmp/both.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"max_density"' -e '"name"' > tests/tmp/both.geojson 87 | tippecanoe-decode tests/tmp/merged.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"max_density"' -e '"name"' > tests/tmp/merged.geojson 88 | cmp tests/tmp/both.geojson tests/tmp/merged.geojson 89 | cmp tests/tmp/both.geojson tests/fixture/both.geojson 90 | # Verify round-trip between normalized vectors and bitmaps 91 | ./tile-count-tile -f -s16 -o tests/tmp/both.mbtiles tests/tmp/both.count 92 | ./tile-count-tile -f -b -o tests/tmp/bitmap.mbtiles tests/tmp/both.mbtiles 93 | ./tile-count-tile -f -o tests/tmp/bitmap-vector.mbtiles tests/tmp/bitmap.mbtiles 94 | tippecanoe-decode tests/tmp/both.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"name"' > tests/tmp/both.geojson 95 | tippecanoe-decode tests/tmp/bitmap-vector.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"name"' > tests/tmp/bitmap-vector.geojson 96 | cmp tests/tmp/both.geojson tests/tmp/bitmap-vector.geojson 97 | cmp tests/tmp/both.geojson tests/fixture/bitmap-vector.geojson 98 | # Verify round trip between (normalized) polygon vectors and point vectors 99 | ./tile-count-tile -f -P -o tests/tmp/both-point.mbtiles tests/tmp/both.mbtiles 100 | ./tile-count-tile -f -o tests/tmp/both-point-poly.mbtiles tests/tmp/both-point.mbtiles 101 | tippecanoe-decode tests/tmp/both-point-poly.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"name"' > tests/tmp/both-point-poly.geojson 102 | cmp tests/tmp/both.geojson tests/tmp/both-point-poly.geojson 103 | cmp tests/tmp/both.geojson tests/fixture/bitmap-vector.geojson 104 | # Verify that absolute threshold works 105 | ./tile-count-tile -f -1 -M7 -y count -s16 -o tests/tmp/both.mbtiles tests/tmp/both.count 106 | tippecanoe-decode tests/tmp/both.mbtiles > tests/tmp/both.geojson 107 | cmp tests/tmp/both.geojson tests/fixture/thresh.geojson 108 | ./tests/check-minimum-count.js tests/tmp/both.geojson 7 109 | # Verify absolute threshold with multipolygons 110 | ./tile-count-tile -f -M7 -y count -s16 -o tests/tmp/both.mbtiles tests/tmp/both.count 111 | tippecanoe-decode tests/tmp/both.mbtiles > tests/tmp/both.geojson 112 | cmp tests/tmp/both.geojson tests/fixture/thresh-mp.geojson 113 | ./tests/check-minimum-count.js tests/tmp/both.geojson 7 114 | # Verify that level thresholds produce the same results with bitmap and vector 115 | ./tile-count-tile -f -m7 -s16 -o tests/tmp/vector.mbtiles tests/tmp/both.count 116 | ./tile-count-tile -f -m7 -s16 -b -o tests/tmp/raster.mbtiles tests/tmp/both.count 117 | ./tile-count-tile -f -o tests/tmp/raster-vector.mbtiles tests/tmp/raster.mbtiles 118 | ./tile-count-tile -f -m7 -s16 -1 -o tests/tmp/vector-1.mbtiles tests/tmp/both.count 119 | ./tile-count-tile -f -o tests/tmp/vector-1-vector.mbtiles tests/tmp/vector-1.mbtiles 120 | tippecanoe-decode tests/tmp/vector.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"name"' > tests/tmp/vector.geojson 121 | tippecanoe-decode tests/tmp/raster-vector.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"name"' > tests/tmp/raster-vector.geojson 122 | tippecanoe-decode tests/tmp/vector-1-vector.mbtiles | grep -v -e '"bounds"' -e '"center"' -e '"description"' -e '"name"' > tests/tmp/vector-1-vector.geojson 123 | cmp tests/tmp/vector.geojson tests/tmp/raster-vector.geojson 124 | cmp tests/tmp/vector.geojson tests/tmp/vector-1-vector.geojson 125 | cmp tests/tmp/vector.geojson tests/fixture/thresh-1.geojson 126 | rm -rf tests/tmp 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | tile-count 2 | ========== 3 | 4 | A tool for accumulating point counts by tile. 5 | 6 | Installation 7 | ------------ 8 | 9 | Tile-count requires sqlite3 and libpng. 10 | 11 | ``` 12 | $ sudo apt-get install libsqlite3-dev libpng-dev # Linux 13 | $ brew install libpng # MacOS 14 | $ make install 15 | ``` 16 | 17 | Creating a count 18 | ---------------- 19 | 20 | tile-count-create [-q] [-s binsize] -o out.count [file.csv ...] [file.json ...] 21 | 22 | * The `-s` option specifies the maximum precision of the data, so that duplicates 23 | beyond this precision can be pre-summed to make the data file smaller. 24 | * The `-q` option silences the progress indicator. 25 | 26 | If the input is CSV, it is a list of records in the form: 27 | 28 | lon,lat 29 | 30 | or 31 | 32 | lon,lat,count 33 | 34 | If it is JSON, any array of two or more numbers will be treated as a longitude-latitude 35 | pair. This includes GeoJSON Points as well as the points that make up GeoJSON MultiPoints, 36 | LineStrings, MultiLineStrings, Polygons, and MultiPolygons. Beware that it also includes 37 | anything else that might be mistaken for a longitude-latitude pair. 38 | 39 | The input is first streamed into the internal format specified below (minus the header) 40 | and then 41 | sorted and merged into the same format in quadkey order, with adjacent duplicates 42 | summed. 43 | 44 | Merging counts 45 | -------------- 46 | 47 | tile-count-merge [-q] [-s binsize] -o out.count [-F] in1.count [in2.count ...] 48 | 49 | Produces a new count file from the specified count files, summing the counts for any points 50 | duplicated between the two. 51 | 52 | * `-F`: Read a newline-separated list of files to merge from the standard input 53 | * `-s` *binsize*: The precision of all locations in the output file will be reduced as specified. 54 | * `-q`: Silence the progress indicator 55 | 56 | Decoding counts 57 | --------------- 58 | 59 | tile-count-decode in.count ... 60 | 61 | Outputs the `lon,lat,count` CSV that would recreate `in.count`. 62 | 63 | Tiling 64 | ------ 65 | 66 | tile-count-tile [options] -o out.mbtiles in.count 67 | tile-count-tile [options] -o out.mbtiles in.mbtiles [ … in.mbtiles ] 68 | 69 | The features in the `mbtiles` are a grid of squares with a `density` attribute 70 | indicating how many original points were accumulated into that binned point, 71 | normalized according the densest point in the zoom level. 72 | 73 | If you are merging existing `.mbtiles` files, they all have to have been created 74 | with the same minzoom, maxzoom, and detail. The merged output can be either 75 | bitmap or vector as desired. The `.mbtiles` files being merged should be separated 76 | spatially, not temporally, because merging does not recalculate the reference 77 | brightness where tilesets overlap. Use `tile-count-merge` to combine data sets 78 | that are for the same area. 79 | 80 | ### Output tileset 81 | 82 | * `-n` *layername*: Specify the layer name in vector tile output. The default is `count`. 83 | * `-o` *out.mbtiles*: Specify the name of the output file. 84 | * `-f`: Delete the output file if it already exists 85 | 86 | ### Zoom levels 87 | 88 | * `-d` *detail*: Make the grid within each tile 2^detail points on each side. The default is 9. 89 | * `-Z` *minzoom*: Specify the minzoom of the tileset. The default is 0. 90 | * `-z` *maxzoom*: Specify the maxzoom of the tileset. 91 | * `-s` *binsize*: Specify the zoom level whose tiles are used as bins. 92 | 93 | You must specify either `-z` (maxzoom) or `-s` (bin size) if you are creating a new tileset 94 | instead of merging existing tilesets. The *maxzoom* plus the *detail* always equals the *bin size*. 95 | 96 | ### Level bucketing 97 | 98 | * `-l` *levels*: Quantize the normalized counts within each tile into the specified number of levels. The default is 50. 99 | * `-m` *level*: Don't include normalized counts that are quantized below the specified level. 100 | * `-M` *count*: don't include absolute counts that are below the specified count. 101 | * `-g` *gamma*: Scale the counts within each tile to the gamma'th root of their linear value. The default is 2.5. 102 | * `-y density`: Include an attribute in each vector feature indicating the normalized density of points within each bin. This is the default. 103 | * `-y count`: Include an attribute in each vector feature indicating the count of points within each bin. The count is only approximate because the levels are bucketed. 104 | 105 | ### Bitmap tiles 106 | 107 | * `-b`: Create PNG raster tiles instead of vectors. If you are not planning to use these tiles with Mapbox GL, 108 | you will probably also want to specify `-d8` for normal 256x256 web map tile resolution. 109 | * `-c` *rrggbb*: Specify the color to use in raster tiles as a hex color. 110 | * `-w`: Make tiles for a white background instead of a black background. 111 | 112 | ### Vector tiles 113 | 114 | * `-1`: Output an individual polygon for each bin instead of combining them into MultiPolygons. 115 | * `-P`: Output Points or MultiPoints instead of Polygons or MultiPolygons 116 | 117 | ### Tile size 118 | 119 | * `-k`: Don't enforce the 500K limit on tile size 120 | * `-K`: Raise the minimum count threshold on each tile if necessary to keep it under 500K. 121 | 122 | ### Miscellaneous controls 123 | 124 | * `-p` *cpus*: Use the specified number of parallel tasks. 125 | * `-q`: Silence the progress indicator 126 | * `-B` *multiplier*: Multiply the normalized density by the specified *multiplier* to make it brighter or dimmer. 127 | 128 | Relationship between bin size, maxzoom, and detail 129 | -------------------------------------------------- 130 | 131 | What exactly the "detail" parameter means is often the source of confusion. 132 | It is the difference between the maxzoom and the bin size. 133 | 134 | So, for example, if you have data with a bin size of 23 and want to 135 | tile it with a maxzoom of 16, you should specify a detail of 7, because 136 | 16+7=23. 137 | 138 | Within each tile, the resolution of the tile is 2^detail, so if you 139 | specify a detail of 7, each tile will be a 128x128 grid of pixels or 140 | features, because 2^7=128. 141 | 142 | It is often more useful to work backward from the bin size 143 | to the maxzoom: if you have data with a bin size of 24, and you 144 | want 256x256 tiles, 2^8=256 so you should specify a detail of 8, 145 | and the maxzoom will be 16 because 24-8=16. 146 | 147 | Internal file format 148 | -------------------- 149 | 150 | The `.count` files contain a header for versioning and identification 151 | followed (currently) by a simple list of 12-byte records containing: 152 | 153 | * 64-bit location quadkey 154 | * 32-bit count 155 | -------------------------------------------------------------------------------- /algorithm_mod.hpp: -------------------------------------------------------------------------------- 1 | // -*- C++ -*- 2 | //===-------------------------- algorithm ---------------------------------===// 3 | // 4 | // The LLVM Compiler Infrastructure 5 | // 6 | // This file is dual licensed under the MIT and the University of Illinois Open 7 | // Source Licenses. See LICENSE.TXT for details. 8 | // 9 | //===----------------------------------------------------------------------===// 10 | 11 | /* 12 | 13 | ============================================================================== 14 | LLVM Release License 15 | ============================================================================== 16 | University of Illinois/NCSA 17 | Open Source License 18 | 19 | Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. 20 | All rights reserved. 21 | 22 | Developed by: 23 | 24 | LLVM Team 25 | 26 | University of Illinois at Urbana-Champaign 27 | 28 | http://llvm.org 29 | 30 | Permission is hereby granted, free of charge, to any person obtaining a copy of 31 | this software and associated documentation files (the "Software"), to deal with 32 | the Software without restriction, including without limitation the rights to 33 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 34 | of the Software, and to permit persons to whom the Software is furnished to do 35 | so, subject to the following conditions: 36 | 37 | * Redistributions of source code must retain the above copyright notice, 38 | this list of conditions and the following disclaimers. 39 | 40 | * Redistributions in binary form must reproduce the above copyright notice, 41 | this list of conditions and the following disclaimers in the 42 | documentation and/or other materials provided with the distribution. 43 | 44 | * Neither the names of the LLVM Team, University of Illinois at 45 | Urbana-Champaign, nor the names of its contributors may be used to 46 | endorse or promote products derived from this Software without specific 47 | prior written permission. 48 | 49 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 51 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 52 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 54 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 55 | SOFTWARE. 56 | 57 | */ 58 | 59 | // Inlined copy of std::lower_bound, with an additional check added 60 | // for the impossible situation where the length of the list being 61 | // searched goes negative. 62 | 63 | template 64 | _ForwardIterator 65 | __lower_bound1(_ForwardIterator __first, _ForwardIterator __last, const _Tp &__value_, _Compare __comp) { 66 | typedef typename std::iterator_traits<_ForwardIterator>::difference_type difference_type; 67 | difference_type __len = std::distance(__first, __last); 68 | while (__len > 0) { 69 | difference_type __l2 = __len / 2; 70 | _ForwardIterator __m = __first; 71 | std::advance(__m, __l2); 72 | if (__comp(*__m, __value_)) { 73 | __first = ++__m; 74 | __len -= __l2 + 1; 75 | } else 76 | __len = __l2; 77 | } 78 | if (__len < 0) { 79 | fprintf(stderr, "Error: Input file is out of sort\n"); 80 | exit(EXIT_FAILURE); 81 | } 82 | return __first; 83 | } 84 | 85 | template 86 | inline __attribute__((__visibility__("hidden"), __always_inline__)) 87 | _ForwardIterator 88 | lower_bound1(_ForwardIterator __first, _ForwardIterator __last, const _Tp &__value_, _Compare __comp) { 89 | typedef typename std::add_lvalue_reference<_Compare>::type _Comp_ref; 90 | return __lower_bound1<_Comp_ref>(__first, __last, __value_, __comp); 91 | } 92 | 93 | template 94 | inline __attribute__((__visibility__("hidden"), __always_inline__)) 95 | _ForwardIterator 96 | lower_bound1(_ForwardIterator __first, _ForwardIterator __last, const _Tp &__value_) { 97 | return lower_bound1(__first, __last, __value_, 98 | std::less::value_type>()); 99 | } 100 | -------------------------------------------------------------------------------- /create.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "tippecanoe/projection.hpp" 11 | #include "header.hpp" 12 | #include "serial.hpp" 13 | #include "merge.hpp" 14 | 15 | extern "C" { 16 | #include "jsonpull/jsonpull.h" 17 | } 18 | 19 | bool quiet = false; 20 | 21 | void usage(char **argv) { 22 | fprintf(stderr, "Usage: %s -o out.count [-s binsize] [in.csv ...]\n", argv[0]); 23 | } 24 | 25 | void write_point(FILE *out, long long &seq, double lon, double lat, unsigned long long count) { 26 | if (seq % 100000 == 0) { 27 | if (!quiet) { 28 | fprintf(stderr, "Read %.1f million records\r", seq / 1000000.0); 29 | } 30 | } 31 | seq++; 32 | 33 | long long x, y; 34 | projection->project(lon, lat, 32, &x, &y); 35 | 36 | if (x >= 0 && y >= 0 && x < (1LL << 32) && y < (1LL << 32)) { 37 | unsigned long long index = encode(x, y); 38 | 39 | while (count > MAX_COUNT) { 40 | write64(out, index); 41 | write32(out, MAX_COUNT); 42 | 43 | count -= MAX_COUNT; 44 | } 45 | 46 | write64(out, index); 47 | write32(out, count); 48 | } 49 | } 50 | 51 | void read_json(FILE *out, FILE *in, const char *fname, long long &seq) { 52 | json_pull *jp = json_begin_file(in); 53 | 54 | while (1) { 55 | json_object *j = json_read(jp); 56 | if (j == NULL) { 57 | if (jp->error != NULL) { 58 | fprintf(stderr, "%s:%d: %s\n", fname, jp->line, jp->error); 59 | } 60 | 61 | json_free(jp->root); 62 | break; 63 | } 64 | 65 | if (j->type == JSON_HASH) { 66 | json_free(j); 67 | } else if (j->type == JSON_ARRAY) { 68 | if (j->length >= 2) { 69 | if (j->array[0]->type == JSON_NUMBER && j->array[1]->type == JSON_NUMBER) { 70 | write_point(out, seq, j->array[0]->number, j->array[1]->number, 1); 71 | } 72 | } 73 | json_free(j); 74 | } 75 | } 76 | 77 | json_end(jp); 78 | } 79 | 80 | void read_into(FILE *out, FILE *in, const char *fname, long long &seq) { 81 | int c = getc(in); 82 | if (c != EOF) { 83 | ungetc(c, in); 84 | } 85 | if (c == '{') { 86 | read_json(out, in, fname, seq); 87 | return; 88 | } 89 | 90 | size_t line = 0; 91 | char s[2000]; 92 | while (fgets(s, 2000, in)) { 93 | double lon, lat; 94 | long long count; 95 | 96 | line++; 97 | size_t n = sscanf(s, "%lf,%lf,%lld", &lon, &lat, &count); 98 | if (n == 2) { 99 | count = 1; 100 | } else if (n != 3) { 101 | fprintf(stderr, "%s:%zu: Can't understand %s", fname, line, s); 102 | continue; 103 | } 104 | 105 | if (count < 0) { 106 | fprintf(stderr, "%s:%zu: Count is negative in %s\n", fname, line, s); 107 | exit(EXIT_FAILURE); 108 | } 109 | 110 | if (count > (long long) MAX_COUNT) { 111 | fprintf(stderr, "%s:%zu: Count is too large in %s\n", fname, line, s); 112 | exit(EXIT_FAILURE); 113 | } 114 | 115 | write_point(out, seq, lon, lat, count); 116 | } 117 | } 118 | 119 | int indexcmp(const void *p1, const void *p2) { 120 | return memcmp(p1, p2, INDEX_BYTES); 121 | } 122 | 123 | void *run_sort(void *p) { 124 | struct merge *m = (struct merge *) p; 125 | 126 | void *map = mmap(NULL, m->end - m->start, PROT_READ | PROT_WRITE, MAP_PRIVATE, m->fd, m->start); 127 | if (map == MAP_FAILED) { 128 | perror("mmap (sort)"); 129 | exit(EXIT_FAILURE); 130 | } 131 | 132 | qsort(map, (m->end - m->start) / RECORD_BYTES, RECORD_BYTES, indexcmp); 133 | 134 | // Sorting and then copying avoids the need to 135 | // write out intermediate stages of the sort. 136 | 137 | void *map2 = mmap(NULL, m->end - m->start, PROT_READ | PROT_WRITE, MAP_SHARED, m->fd, m->start); 138 | if (map2 == MAP_FAILED) { 139 | perror("mmap (write)"); 140 | exit(EXIT_FAILURE); 141 | } 142 | 143 | memcpy(map2, map, m->end - m->start); 144 | 145 | munmap(map, m->end - m->start); 146 | munmap(map2, m->end - m->start); 147 | 148 | return NULL; 149 | } 150 | 151 | void sort_and_merge(int fd, int out, int zoom, size_t cpus) { 152 | struct stat st; 153 | if (fstat(fd, &st) < 0) { 154 | perror("stat"); 155 | exit(EXIT_FAILURE); 156 | } 157 | 158 | if (st.st_size % RECORD_BYTES != 0) { 159 | fprintf(stderr, "File size not a multiple of record length\n"); 160 | exit(EXIT_FAILURE); 161 | } 162 | 163 | long long to_sort = st.st_size; 164 | int bytes = RECORD_BYTES; 165 | 166 | int page = sysconf(_SC_PAGESIZE); 167 | long long unit = (50 * 1024 * 1024 / bytes) * bytes; 168 | while (unit % page != 0) { 169 | unit += bytes; 170 | } 171 | 172 | size_t nmerges = (to_sort + unit - 1) / unit; 173 | struct merge merges[nmerges]; 174 | 175 | long long start; 176 | for (start = 0; start < to_sort; start += unit) { 177 | long long end = start + unit; 178 | if (end > to_sort) { 179 | end = to_sort; 180 | } 181 | 182 | merges[start / unit].start = start; 183 | merges[start / unit].end = end; 184 | merges[start / unit].fd = fd; 185 | } 186 | 187 | for (size_t i = 0; i < nmerges; i += cpus) { 188 | if (!quiet) { 189 | fprintf(stderr, "Sorting part %zu of %zu \r", i + 1, nmerges); 190 | } 191 | 192 | pthread_t pthreads[cpus]; 193 | for (size_t j = 0; j < cpus && i + j < nmerges; j++) { 194 | if (pthread_create(&pthreads[j], NULL, run_sort, &merges[i + j]) != 0) { 195 | perror("pthread_create (sort)"); 196 | exit(EXIT_FAILURE); 197 | } 198 | } 199 | 200 | for (size_t j = 0; j < cpus && i + j < nmerges; j++) { 201 | void *retval; 202 | 203 | if (pthread_join(pthreads[j], &retval) != 0) { 204 | perror("pthread_join (sort)"); 205 | exit(EXIT_FAILURE); 206 | } 207 | } 208 | } 209 | 210 | if (write(out, header_text, HEADER_LEN) != HEADER_LEN) { 211 | perror("write header"); 212 | exit(EXIT_FAILURE); 213 | } 214 | 215 | if (to_sort > 0) { 216 | void *map = mmap(NULL, to_sort, PROT_READ, MAP_SHARED, fd, 0); 217 | if (map == MAP_FAILED) { 218 | perror("mmap (for merge)"); 219 | exit(EXIT_FAILURE); 220 | } 221 | 222 | for (size_t i = 0; i < nmerges; i++) { 223 | merges[i].map = (unsigned char *) map; 224 | } 225 | 226 | do_merge(merges, nmerges, out, bytes, to_sort / bytes, zoom, quiet, cpus, 0, 0); 227 | munmap(map, st.st_size); 228 | } 229 | } 230 | 231 | int main(int argc, char **argv) { 232 | extern int optind; 233 | extern char *optarg; 234 | 235 | char *outfile = NULL; 236 | int zoom = 32; 237 | size_t cpus = sysconf(_SC_NPROCESSORS_ONLN); 238 | 239 | int i; 240 | while ((i = getopt(argc, argv, "fs:o:p:q")) != -1) { 241 | switch (i) { 242 | case 's': 243 | zoom = atoi(optarg); 244 | break; 245 | 246 | case 'o': 247 | outfile = optarg; 248 | break; 249 | 250 | case 'p': 251 | cpus = atoi(optarg); 252 | break; 253 | 254 | case 'q': 255 | quiet = true; 256 | break; 257 | 258 | default: 259 | usage(argv); 260 | exit(EXIT_FAILURE); 261 | } 262 | } 263 | 264 | if (outfile == NULL) { 265 | usage(argv); 266 | exit(EXIT_FAILURE); 267 | } 268 | 269 | if (zoom < 0 || zoom > 32) { 270 | fprintf(stderr, "%s: zoom (-s) must be in the range 0 to 32, not %d\n", argv[0], zoom); 271 | exit(EXIT_FAILURE); 272 | } 273 | 274 | int fd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, 0777); 275 | if (fd < 0) { 276 | perror(outfile); 277 | exit(EXIT_FAILURE); 278 | } 279 | int fd2 = dup(fd); 280 | if (fd2 < 0) { 281 | perror("dup output file"); 282 | exit(EXIT_FAILURE); 283 | } 284 | FILE *fp = fdopen(fd2, "wb"); 285 | if (fp == NULL) { 286 | perror("fdopen output file"); 287 | exit(EXIT_FAILURE); 288 | } 289 | if (unlink(outfile) != 0) { 290 | perror("unlink output file"); 291 | exit(EXIT_FAILURE); 292 | } 293 | 294 | long long seq = 0; 295 | if (optind == argc) { 296 | read_into(fp, stdin, "standard input", seq); 297 | } else { 298 | for (; optind < argc; optind++) { 299 | FILE *in = fopen(argv[optind], "r"); 300 | if (in == NULL) { 301 | perror(argv[optind]); 302 | exit(EXIT_FAILURE); 303 | } else { 304 | read_into(fp, in, argv[optind], seq); 305 | fclose(in); 306 | } 307 | } 308 | } 309 | if (!quiet) { 310 | fprintf(stderr, "Total of %lld\n", seq); 311 | } 312 | 313 | if (fflush(fp) != 0) { 314 | perror("flush output file"); 315 | exit(EXIT_FAILURE); 316 | } 317 | if (fclose(fp) != 0) { 318 | perror("close output file"); 319 | exit(EXIT_FAILURE); 320 | } 321 | 322 | int f = open(outfile, O_CREAT | O_TRUNC | O_RDWR, 0777); 323 | if (f < 0) { 324 | perror(outfile); 325 | exit(EXIT_FAILURE); 326 | } 327 | sort_and_merge(fd, f, zoom, cpus); 328 | if (close(f) != 0) { 329 | perror("close"); 330 | } 331 | 332 | return 0; 333 | } 334 | -------------------------------------------------------------------------------- /decode.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "tippecanoe/projection.hpp" 7 | #include "header.hpp" 8 | #include "serial.hpp" 9 | #include "milo/dtoa_milo.h" 10 | 11 | void usage(char **argv) { 12 | fprintf(stderr, "Usage: %s file.count ...\n", argv[0]); 13 | } 14 | 15 | int main(int argc, char **argv) { 16 | extern int optind; 17 | 18 | int i; 19 | while ((i = getopt(argc, argv, "")) != -1) { 20 | switch (i) { 21 | default: 22 | usage(argv); 23 | exit(EXIT_FAILURE); 24 | } 25 | } 26 | 27 | if (optind == argc) { 28 | usage(argv); 29 | exit(EXIT_FAILURE); 30 | } 31 | 32 | for (; optind < argc; optind++) { 33 | FILE *f = fopen(argv[optind], "rb"); 34 | if (f == NULL) { 35 | perror(optind[argv]); 36 | exit(EXIT_FAILURE); 37 | } 38 | 39 | char header[HEADER_LEN]; 40 | if (fread(header, HEADER_LEN, 1, f) != 1) { 41 | perror("read header"); 42 | exit(EXIT_FAILURE); 43 | } 44 | 45 | if (memcmp(header, header_text, HEADER_LEN) != 0) { 46 | fprintf(stderr, "%s: not a tile-count file\n", argv[optind]); 47 | exit(EXIT_FAILURE); 48 | } 49 | 50 | unsigned char buf[RECORD_BYTES]; 51 | while (fread(buf, RECORD_BYTES, 1, f) == 1) { 52 | unsigned long long index = read64(buf); 53 | unsigned long long count = read32(buf + INDEX_BYTES); 54 | 55 | unsigned x, y; 56 | decode(index, &x, &y); 57 | 58 | double lon, lat; 59 | projection->unproject(x, y, 32, &lon, &lat); 60 | printf("%s,%s,%llu\n", milo::dtoa_milo(lon).c_str(), milo::dtoa_milo(lat).c_str(), count); 61 | } 62 | 63 | fclose(f); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /header.cpp: -------------------------------------------------------------------------------- 1 | #include "header.hpp" 2 | 3 | const char header_text[HEADER_LEN] = "tile-count v2 "; // and implicit null 4 | -------------------------------------------------------------------------------- /header.hpp: -------------------------------------------------------------------------------- 1 | #define HEADER_LEN 16 2 | extern const char header_text[HEADER_LEN]; 3 | 4 | #define INDEX_BYTES 8 5 | #define COUNT_BYTES 4 6 | #define RECORD_BYTES (INDEX_BYTES + COUNT_BYTES) 7 | 8 | #define MAX_COUNT (1ULL << 31) 9 | -------------------------------------------------------------------------------- /jsonpull/jsonpull.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE // for asprintf() 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "jsonpull.h" 8 | 9 | #define BUFFER 10000 10 | 11 | json_pull *json_begin(ssize_t (*read)(struct json_pull *, char *buffer, size_t n), void *source) { 12 | json_pull *j = malloc(sizeof(json_pull)); 13 | if (j == NULL) { 14 | perror("Out of memory"); 15 | exit(EXIT_FAILURE); 16 | } 17 | 18 | j->error = NULL; 19 | j->line = 1; 20 | j->container = NULL; 21 | j->root = NULL; 22 | 23 | j->read = read; 24 | j->source = source; 25 | j->buffer_head = 0; 26 | j->buffer_tail = 0; 27 | 28 | j->buffer = malloc(BUFFER); 29 | if (j->buffer == NULL) { 30 | perror("Out of memory"); 31 | exit(EXIT_FAILURE); 32 | } 33 | 34 | return j; 35 | } 36 | 37 | static inline int peek(json_pull *j) { 38 | if (j->buffer_head < j->buffer_tail) { 39 | return j->buffer[j->buffer_head]; 40 | } else { 41 | j->buffer_head = 0; 42 | j->buffer_tail = j->read(j, j->buffer, BUFFER); 43 | if (j->buffer_head >= j->buffer_tail) { 44 | return EOF; 45 | } 46 | return j->buffer[j->buffer_head]; 47 | } 48 | } 49 | 50 | static inline int next(json_pull *j) { 51 | if (j->buffer_head < j->buffer_tail) { 52 | return (unsigned char) j->buffer[j->buffer_head++]; 53 | } else { 54 | j->buffer_head = 0; 55 | j->buffer_tail = j->read(j, j->buffer, BUFFER); 56 | if (j->buffer_head >= j->buffer_tail) { 57 | return EOF; 58 | } 59 | return (unsigned char) j->buffer[j->buffer_head++]; 60 | } 61 | } 62 | 63 | static ssize_t read_file(json_pull *j, char *buffer, size_t n) { 64 | return fread(buffer, 1, n, j->source); 65 | } 66 | 67 | json_pull *json_begin_file(FILE *f) { 68 | return json_begin(read_file, f); 69 | } 70 | 71 | static ssize_t read_string(json_pull *j, char *buffer, size_t n) { 72 | char *cp = j->source; 73 | size_t out = 0; 74 | 75 | while (out < n && cp[out] != '\0') { 76 | buffer[out] = cp[out]; 77 | out++; 78 | } 79 | 80 | j->source = cp + out; 81 | return out; 82 | } 83 | 84 | json_pull *json_begin_string(char *s) { 85 | return json_begin(read_string, s); 86 | } 87 | 88 | void json_end(json_pull *p) { 89 | json_free(p->root); 90 | free(p->buffer); 91 | free(p); 92 | } 93 | 94 | static inline int read_wrap(json_pull *j) { 95 | int c = next(j); 96 | 97 | if (c == '\n') { 98 | j->line++; 99 | } 100 | 101 | return c; 102 | } 103 | 104 | #define SIZE_FOR(i, size) ((size_t)((((i) + 31) & ~31) * size)) 105 | 106 | static json_object *fabricate_object(json_pull *jp, json_object *parent, json_type type) { 107 | json_object *o = malloc(sizeof(struct json_object)); 108 | if (o == NULL) { 109 | perror("Out of memory"); 110 | exit(EXIT_FAILURE); 111 | } 112 | o->type = type; 113 | o->parent = parent; 114 | o->array = NULL; 115 | o->keys = NULL; 116 | o->values = NULL; 117 | o->length = 0; 118 | o->parser = jp; 119 | return o; 120 | } 121 | 122 | static json_object *add_object(json_pull *j, json_type type) { 123 | json_object *c = j->container; 124 | json_object *o = fabricate_object(j, c, type); 125 | 126 | if (c != NULL) { 127 | if (c->type == JSON_ARRAY) { 128 | if (c->expect == JSON_ITEM) { 129 | if (SIZE_FOR(c->length + 1, sizeof(json_object *)) != SIZE_FOR(c->length, sizeof(json_object *))) { 130 | if (SIZE_FOR(c->length + 1, sizeof(json_object *)) < SIZE_FOR(c->length, sizeof(json_object *))) { 131 | fprintf(stderr, "Array size overflow\n"); 132 | exit(EXIT_FAILURE); 133 | } 134 | c->array = realloc(c->array, SIZE_FOR(c->length + 1, sizeof(json_object *))); 135 | if (c->array == NULL) { 136 | perror("Out of memory"); 137 | exit(EXIT_FAILURE); 138 | } 139 | } 140 | 141 | c->array[c->length++] = o; 142 | c->expect = JSON_COMMA; 143 | } else { 144 | j->error = "Expected a comma, not a list item"; 145 | free(o); 146 | return NULL; 147 | } 148 | } else if (c->type == JSON_HASH) { 149 | if (c->expect == JSON_VALUE) { 150 | c->values[c->length - 1] = o; 151 | c->expect = JSON_COMMA; 152 | } else if (c->expect == JSON_KEY) { 153 | if (type != JSON_STRING) { 154 | j->error = "Hash key is not a string"; 155 | free(o); 156 | return NULL; 157 | } 158 | 159 | if (SIZE_FOR(c->length + 1, sizeof(json_object *)) != SIZE_FOR(c->length, sizeof(json_object *))) { 160 | if (SIZE_FOR(c->length + 1, sizeof(json_object *)) < SIZE_FOR(c->length, sizeof(json_object *))) { 161 | fprintf(stderr, "Hash size overflow\n"); 162 | exit(EXIT_FAILURE); 163 | } 164 | c->keys = realloc(c->keys, SIZE_FOR(c->length + 1, sizeof(json_object *))); 165 | c->values = realloc(c->values, SIZE_FOR(c->length + 1, sizeof(json_object *))); 166 | if (c->keys == NULL || c->values == NULL) { 167 | perror("Out of memory"); 168 | exit(EXIT_FAILURE); 169 | } 170 | } 171 | 172 | c->keys[c->length] = o; 173 | c->values[c->length] = NULL; 174 | c->length++; 175 | c->expect = JSON_COLON; 176 | } else { 177 | j->error = "Expected a comma or colon"; 178 | free(o); 179 | return NULL; 180 | } 181 | } 182 | } else { 183 | if (j->root != NULL) { 184 | json_free(j->root); 185 | } 186 | 187 | j->root = o; 188 | } 189 | 190 | return o; 191 | } 192 | 193 | json_object *json_hash_get(json_object *o, const char *s) { 194 | if (o == NULL || o->type != JSON_HASH) { 195 | return NULL; 196 | } 197 | 198 | size_t i; 199 | for (i = 0; i < o->length; i++) { 200 | if (o->keys[i] != NULL && o->keys[i]->type == JSON_STRING) { 201 | if (strcmp(o->keys[i]->string, s) == 0) { 202 | return o->values[i]; 203 | } 204 | } 205 | } 206 | 207 | return NULL; 208 | } 209 | 210 | struct string { 211 | char *buf; 212 | size_t n; 213 | size_t nalloc; 214 | }; 215 | 216 | static void string_init(struct string *s) { 217 | s->nalloc = 500; 218 | s->buf = malloc(s->nalloc); 219 | if (s->buf == NULL) { 220 | perror("Out of memory"); 221 | exit(EXIT_FAILURE); 222 | } 223 | s->n = 0; 224 | s->buf[0] = '\0'; 225 | } 226 | 227 | static void string_append(struct string *s, char c) { 228 | if (s->n + 2 >= s->nalloc) { 229 | size_t prev = s->nalloc; 230 | s->nalloc += 500; 231 | if (s->nalloc <= prev) { 232 | fprintf(stderr, "String size overflowed\n"); 233 | exit(EXIT_FAILURE); 234 | } 235 | s->buf = realloc(s->buf, s->nalloc); 236 | if (s->buf == NULL) { 237 | perror("Out of memory"); 238 | exit(EXIT_FAILURE); 239 | } 240 | } 241 | 242 | s->buf[s->n++] = c; 243 | s->buf[s->n] = '\0'; 244 | } 245 | 246 | static void string_append_string(struct string *s, char *add) { 247 | size_t len = strlen(add); 248 | 249 | if (s->n + len + 1 >= s->nalloc) { 250 | size_t prev = s->nalloc; 251 | s->nalloc += 500 + len; 252 | if (s->nalloc <= prev) { 253 | fprintf(stderr, "String size overflowed\n"); 254 | exit(EXIT_FAILURE); 255 | } 256 | s->buf = realloc(s->buf, s->nalloc); 257 | if (s->buf == NULL) { 258 | perror("Out of memory"); 259 | exit(EXIT_FAILURE); 260 | } 261 | } 262 | 263 | for (; *add != '\0'; add++) { 264 | s->buf[s->n++] = *add; 265 | } 266 | 267 | s->buf[s->n] = '\0'; 268 | } 269 | 270 | static void string_free(struct string *s) { 271 | free(s->buf); 272 | } 273 | 274 | json_object *json_read_separators(json_pull *j, json_separator_callback cb, void *state) { 275 | int c; 276 | 277 | // In case there is an error at the top level 278 | if (j->container == NULL) { 279 | if (j->root != NULL) { 280 | json_free(j->root); 281 | } 282 | 283 | j->root = NULL; 284 | } 285 | 286 | again: 287 | /////////////////////////// Whitespace 288 | 289 | do { 290 | c = read_wrap(j); 291 | if (c == EOF) { 292 | if (j->container != NULL) { 293 | j->error = "Reached EOF without all containers being closed"; 294 | } 295 | 296 | return NULL; 297 | } 298 | } while (c == ' ' || c == '\t' || c == '\r' || c == '\n'); 299 | 300 | /////////////////////////// Arrays 301 | 302 | if (c == '[') { 303 | json_object *o = add_object(j, JSON_ARRAY); 304 | if (o == NULL) { 305 | return NULL; 306 | } 307 | j->container = o; 308 | j->container->expect = JSON_ITEM; 309 | 310 | if (cb != NULL) { 311 | cb(JSON_ARRAY, j, state); 312 | } 313 | 314 | goto again; 315 | } else if (c == ']') { 316 | if (j->container == NULL) { 317 | j->error = "Found ] at top level"; 318 | return NULL; 319 | } 320 | 321 | if (j->container->type != JSON_ARRAY) { 322 | j->error = "Found ] not in an array"; 323 | return NULL; 324 | } 325 | 326 | if (j->container->expect != JSON_COMMA) { 327 | if (!(j->container->expect == JSON_ITEM && j->container->length == 0)) { 328 | j->error = "Found ] without final element"; 329 | return NULL; 330 | } 331 | } 332 | 333 | json_object *ret = j->container; 334 | j->container = ret->parent; 335 | return ret; 336 | } 337 | 338 | /////////////////////////// Hashes 339 | 340 | if (c == '{') { 341 | json_object *o = add_object(j, JSON_HASH); 342 | if (o == NULL) { 343 | return NULL; 344 | } 345 | j->container = o; 346 | j->container->expect = JSON_KEY; 347 | 348 | if (cb != NULL) { 349 | cb(JSON_HASH, j, state); 350 | } 351 | 352 | goto again; 353 | } else if (c == '}') { 354 | if (j->container == NULL) { 355 | j->error = "Found } at top level"; 356 | return NULL; 357 | } 358 | 359 | if (j->container->type != JSON_HASH) { 360 | j->error = "Found } not in a hash"; 361 | return NULL; 362 | } 363 | 364 | if (j->container->expect != JSON_COMMA) { 365 | if (!(j->container->expect == JSON_KEY && j->container->length == 0)) { 366 | j->error = "Found } without final element"; 367 | return NULL; 368 | } 369 | } 370 | 371 | json_object *ret = j->container; 372 | j->container = ret->parent; 373 | return ret; 374 | } 375 | 376 | /////////////////////////// Null 377 | 378 | if (c == 'n') { 379 | if (read_wrap(j) != 'u' || read_wrap(j) != 'l' || read_wrap(j) != 'l') { 380 | j->error = "Found misspelling of null"; 381 | return NULL; 382 | } 383 | 384 | return add_object(j, JSON_NULL); 385 | } 386 | 387 | /////////////////////////// True 388 | 389 | if (c == 't') { 390 | if (read_wrap(j) != 'r' || read_wrap(j) != 'u' || read_wrap(j) != 'e') { 391 | j->error = "Found misspelling of true"; 392 | return NULL; 393 | } 394 | 395 | return add_object(j, JSON_TRUE); 396 | } 397 | 398 | /////////////////////////// False 399 | 400 | if (c == 'f') { 401 | if (read_wrap(j) != 'a' || read_wrap(j) != 'l' || read_wrap(j) != 's' || read_wrap(j) != 'e') { 402 | j->error = "Found misspelling of false"; 403 | return NULL; 404 | } 405 | 406 | return add_object(j, JSON_FALSE); 407 | } 408 | 409 | /////////////////////////// Comma 410 | 411 | if (c == ',') { 412 | if (j->container != NULL) { 413 | if (j->container->expect != JSON_COMMA) { 414 | j->error = "Found unexpected comma"; 415 | return NULL; 416 | } 417 | 418 | if (j->container->type == JSON_HASH) { 419 | j->container->expect = JSON_KEY; 420 | } else { 421 | j->container->expect = JSON_ITEM; 422 | } 423 | } 424 | 425 | if (cb != NULL) { 426 | cb(JSON_COMMA, j, state); 427 | } 428 | 429 | goto again; 430 | } 431 | 432 | /////////////////////////// Colon 433 | 434 | if (c == ':') { 435 | if (j->container == NULL) { 436 | j->error = "Found colon at top level"; 437 | return NULL; 438 | } 439 | 440 | if (j->container->expect != JSON_COLON) { 441 | j->error = "Found unexpected colon"; 442 | return NULL; 443 | } 444 | 445 | j->container->expect = JSON_VALUE; 446 | 447 | if (cb != NULL) { 448 | cb(JSON_COLON, j, state); 449 | } 450 | 451 | goto again; 452 | } 453 | 454 | /////////////////////////// Numbers 455 | 456 | if (c == '-' || (c >= '0' && c <= '9')) { 457 | struct string val; 458 | string_init(&val); 459 | 460 | if (c == '-') { 461 | string_append(&val, c); 462 | c = read_wrap(j); 463 | } 464 | 465 | if (c == '0') { 466 | string_append(&val, c); 467 | } else if (c >= '1' && c <= '9') { 468 | string_append(&val, c); 469 | c = peek(j); 470 | 471 | while (c >= '0' && c <= '9') { 472 | string_append(&val, read_wrap(j)); 473 | c = peek(j); 474 | } 475 | } 476 | 477 | if (peek(j) == '.') { 478 | string_append(&val, read_wrap(j)); 479 | 480 | c = peek(j); 481 | if (c < '0' || c > '9') { 482 | j->error = "Decimal point without digits"; 483 | string_free(&val); 484 | return NULL; 485 | } 486 | while (c >= '0' && c <= '9') { 487 | string_append(&val, read_wrap(j)); 488 | c = peek(j); 489 | } 490 | } 491 | 492 | c = peek(j); 493 | if (c == 'e' || c == 'E') { 494 | string_append(&val, read_wrap(j)); 495 | 496 | c = peek(j); 497 | if (c == '+' || c == '-') { 498 | string_append(&val, read_wrap(j)); 499 | } 500 | 501 | c = peek(j); 502 | if (c < '0' || c > '9') { 503 | j->error = "Exponent without digits"; 504 | string_free(&val); 505 | return NULL; 506 | } 507 | while (c >= '0' && c <= '9') { 508 | string_append(&val, read_wrap(j)); 509 | c = peek(j); 510 | } 511 | } 512 | 513 | json_object *n = add_object(j, JSON_NUMBER); 514 | if (n != NULL) { 515 | n->number = atof(val.buf); 516 | n->string = val.buf; 517 | n->length = val.n; 518 | } else { 519 | string_free(&val); 520 | } 521 | return n; 522 | } 523 | 524 | /////////////////////////// Strings 525 | 526 | if (c == '"') { 527 | struct string val; 528 | string_init(&val); 529 | 530 | while ((c = read_wrap(j)) != EOF) { 531 | if (c == '"') { 532 | break; 533 | } else if (c == '\\') { 534 | c = read_wrap(j); 535 | 536 | if (c == '"') { 537 | string_append(&val, '"'); 538 | } else if (c == '\\') { 539 | string_append(&val, '\\'); 540 | } else if (c == '/') { 541 | string_append(&val, '/'); 542 | } else if (c == 'b') { 543 | string_append(&val, '\b'); 544 | } else if (c == 'f') { 545 | string_append(&val, '\f'); 546 | } else if (c == 'n') { 547 | string_append(&val, '\n'); 548 | } else if (c == 'r') { 549 | string_append(&val, '\r'); 550 | } else if (c == 't') { 551 | string_append(&val, '\t'); 552 | } else if (c == 'u') { 553 | char hex[5] = "aaaa"; 554 | int i; 555 | for (i = 0; i < 4; i++) { 556 | hex[i] = read_wrap(j); 557 | if (hex[i] < '0' || (hex[i] > '9' && hex[i] < 'A') || (hex[i] > 'F' && hex[i] < 'a') || hex[i] > 'f') { 558 | j->error = "Invalid \\u hex character"; 559 | string_free(&val); 560 | return NULL; 561 | } 562 | } 563 | unsigned long ch = strtoul(hex, NULL, 16); 564 | if (ch <= 0x7F) { 565 | string_append(&val, ch); 566 | } else if (ch <= 0x7FF) { 567 | string_append(&val, 0xC0 | (ch >> 6)); 568 | string_append(&val, 0x80 | (ch & 0x3F)); 569 | } else { 570 | string_append(&val, 0xE0 | (ch >> 12)); 571 | string_append(&val, 0x80 | ((ch >> 6) & 0x3F)); 572 | string_append(&val, 0x80 | (ch & 0x3F)); 573 | } 574 | } else { 575 | j->error = "Found backslash followed by unknown character"; 576 | string_free(&val); 577 | return NULL; 578 | } 579 | } else if (c < ' ') { 580 | j->error = "Found control character in string"; 581 | string_free(&val); 582 | return NULL; 583 | } else { 584 | string_append(&val, c); 585 | } 586 | } 587 | if (c == EOF) { 588 | j->error = "String without closing quote mark"; 589 | string_free(&val); 590 | return NULL; 591 | } 592 | 593 | json_object *s = add_object(j, JSON_STRING); 594 | if (s != NULL) { 595 | s->string = val.buf; 596 | s->length = val.n; 597 | } else { 598 | string_free(&val); 599 | } 600 | return s; 601 | } 602 | 603 | j->error = "Found unexpected character"; 604 | return NULL; 605 | } 606 | 607 | json_object *json_read(json_pull *j) { 608 | return json_read_separators(j, NULL, NULL); 609 | } 610 | 611 | json_object *json_read_tree(json_pull *p) { 612 | json_object *j; 613 | 614 | while ((j = json_read(p)) != NULL) { 615 | if (j->parent == NULL) { 616 | return j; 617 | } 618 | } 619 | 620 | return NULL; 621 | } 622 | 623 | void json_free(json_object *o) { 624 | size_t i; 625 | 626 | if (o == NULL) { 627 | return; 628 | } 629 | 630 | // Free any data linked from here 631 | 632 | if (o->type == JSON_ARRAY) { 633 | json_object **a = o->array; 634 | size_t n = o->length; 635 | 636 | o->array = NULL; 637 | o->length = 0; 638 | 639 | for (i = 0; i < n; i++) { 640 | json_free(a[i]); 641 | } 642 | 643 | free(a); 644 | } else if (o->type == JSON_HASH) { 645 | json_object **k = o->keys; 646 | json_object **v = o->values; 647 | size_t n = o->length; 648 | 649 | o->keys = NULL; 650 | o->values = NULL; 651 | o->length = 0; 652 | 653 | for (i = 0; i < n; i++) { 654 | json_free(k[i]); 655 | json_free(v[i]); 656 | } 657 | 658 | free(k); 659 | free(v); 660 | } else if (o->type == JSON_STRING || o->type == JSON_NUMBER) { 661 | free(o->string); 662 | } 663 | 664 | json_disconnect(o); 665 | 666 | free(o); 667 | } 668 | 669 | void json_disconnect(json_object *o) { 670 | // Expunge references to this as an array element 671 | // or a hash key or value. 672 | 673 | if (o->parent != NULL) { 674 | if (o->parent->type == JSON_ARRAY) { 675 | size_t i; 676 | 677 | for (i = 0; i < o->parent->length; i++) { 678 | if (o->parent->array[i] == o) { 679 | break; 680 | } 681 | } 682 | 683 | if (i < o->parent->length) { 684 | memmove(o->parent->array + i, o->parent->array + i + 1, o->parent->length - i - 1); 685 | o->parent->length--; 686 | } 687 | } 688 | 689 | if (o->parent->type == JSON_HASH) { 690 | size_t i; 691 | 692 | for (i = 0; i < o->parent->length; i++) { 693 | if (o->parent->keys[i] == o) { 694 | o->parent->keys[i] = fabricate_object(o->parser, o->parent, JSON_NULL); 695 | break; 696 | } 697 | if (o->parent->values[i] == o) { 698 | o->parent->values[i] = fabricate_object(o->parser, o->parent, JSON_NULL); 699 | break; 700 | } 701 | } 702 | 703 | if (i < o->parent->length) { 704 | if (o->parent->keys[i] != NULL && o->parent->keys[i]->type == JSON_NULL) { 705 | if (o->parent->values[i] != NULL && o->parent->values[i]->type == JSON_NULL) { 706 | free(o->parent->keys[i]); 707 | free(o->parent->values[i]); 708 | 709 | memmove(o->parent->keys + i, o->parent->keys + i + 1, o->parent->length - i - 1); 710 | memmove(o->parent->values + i, o->parent->values + i + 1, o->parent->length - i - 1); 711 | o->parent->length--; 712 | } 713 | } 714 | } 715 | } 716 | } 717 | 718 | if (o->parser != NULL && o->parser->root == o) { 719 | o->parser->root = NULL; 720 | } 721 | 722 | o->parent = NULL; 723 | } 724 | 725 | static void json_print_one(struct string *val, json_object *o) { 726 | if (o == NULL) { 727 | string_append_string(val, "NULL"); 728 | } else if (o->type == JSON_STRING) { 729 | string_append(val, '\"'); 730 | 731 | char *cp; 732 | for (cp = o->string; *cp != '\0'; cp++) { 733 | if (*cp == '\\' || *cp == '"') { 734 | string_append(val, '\\'); 735 | string_append(val, *cp); 736 | } else if (*cp >= 0 && *cp < ' ') { 737 | char *s; 738 | if (asprintf(&s, "\\u%04x", *cp) >= 0) { 739 | string_append_string(val, s); 740 | free(s); 741 | } 742 | } else { 743 | string_append(val, *cp); 744 | } 745 | } 746 | 747 | string_append(val, '\"'); 748 | } else if (o->type == JSON_NUMBER) { 749 | string_append_string(val, o->string); 750 | } else if (o->type == JSON_NULL) { 751 | string_append_string(val, "null"); 752 | } else if (o->type == JSON_TRUE) { 753 | string_append_string(val, "true"); 754 | } else if (o->type == JSON_FALSE) { 755 | string_append_string(val, "false"); 756 | } else if (o->type == JSON_HASH) { 757 | string_append(val, '}'); 758 | } else if (o->type == JSON_ARRAY) { 759 | string_append(val, ']'); 760 | } 761 | } 762 | 763 | static void json_print(struct string *val, json_object *o) { 764 | if (o == NULL) { 765 | // Hash value in incompletely read hash 766 | string_append_string(val, "NULL"); 767 | } else if (o->type == JSON_HASH) { 768 | string_append(val, '{'); 769 | 770 | size_t i; 771 | for (i = 0; i < o->length; i++) { 772 | json_print(val, o->keys[i]); 773 | string_append(val, ':'); 774 | json_print(val, o->values[i]); 775 | if (i + 1 < o->length) { 776 | string_append(val, ','); 777 | } 778 | } 779 | string_append(val, '}'); 780 | } else if (o->type == JSON_ARRAY) { 781 | string_append(val, '['); 782 | size_t i; 783 | for (i = 0; i < o->length; i++) { 784 | json_print(val, o->array[i]); 785 | if (i + 1 < o->length) { 786 | string_append(val, ','); 787 | } 788 | } 789 | string_append(val, ']'); 790 | } else { 791 | json_print_one(val, o); 792 | } 793 | } 794 | 795 | char *json_stringify(json_object *o) { 796 | struct string val; 797 | string_init(&val); 798 | json_print(&val, o); 799 | 800 | return val.buf; 801 | } 802 | -------------------------------------------------------------------------------- /jsonpull/jsonpull.h: -------------------------------------------------------------------------------- 1 | typedef enum json_type { 2 | // These types can be returned by json_read() 3 | JSON_HASH, 4 | JSON_ARRAY, 5 | JSON_NUMBER, 6 | JSON_STRING, 7 | JSON_TRUE, 8 | JSON_FALSE, 9 | JSON_NULL, 10 | 11 | // These and JSON_HASH and JSON_ARRAY can be called back by json_read_with_separators() 12 | JSON_COMMA, 13 | JSON_COLON, 14 | 15 | // These are only used internally as expectations of what comes next 16 | JSON_ITEM, 17 | JSON_KEY, 18 | JSON_VALUE, 19 | } json_type; 20 | 21 | typedef struct json_object { 22 | json_type type; 23 | struct json_object *parent; 24 | struct json_pull *parser; 25 | 26 | char *string; 27 | double number; 28 | 29 | struct json_object **array; 30 | struct json_object **keys; 31 | struct json_object **values; 32 | size_t length; 33 | 34 | int expect; 35 | } json_object; 36 | 37 | typedef struct json_pull { 38 | char *error; 39 | int line; 40 | 41 | ssize_t (*read)(struct json_pull *, char *buf, size_t n); 42 | void *source; 43 | char *buffer; 44 | ssize_t buffer_tail; 45 | ssize_t buffer_head; 46 | 47 | json_object *container; 48 | json_object *root; 49 | } json_pull; 50 | 51 | json_pull *json_begin_file(FILE *f); 52 | json_pull *json_begin_string(char *s); 53 | 54 | json_pull *json_begin(ssize_t (*read)(struct json_pull *, char *buffer, size_t n), void *source); 55 | void json_end(json_pull *p); 56 | 57 | typedef void (*json_separator_callback)(json_type type, json_pull *j, void *state); 58 | 59 | json_object *json_read_tree(json_pull *j); 60 | json_object *json_read(json_pull *j); 61 | json_object *json_read_separators(json_pull *j, json_separator_callback cb, void *state); 62 | void json_free(json_object *j); 63 | void json_disconnect(json_object *j); 64 | 65 | json_object *json_hash_get(json_object *o, const char *s); 66 | 67 | char *json_stringify(json_object *o); 68 | -------------------------------------------------------------------------------- /merge.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "merge.hpp" 12 | #include "header.hpp" 13 | #include "serial.hpp" 14 | #include "algorithm_mod.hpp" 15 | 16 | struct merger { 17 | unsigned char *start; 18 | unsigned char *end; 19 | 20 | bool operator<(const merger &m) const { 21 | // > 0 so that lowest quadkey comes first 22 | return memcmp(start, m.start, INDEX_BYTES) > 0; 23 | } 24 | }; 25 | 26 | unsigned char *do_merge1(std::vector &merges, size_t nmerges, unsigned char *f, int bytes, long long nrec, int zoom, bool quiet, std::atomic *progress, size_t shard, size_t nshards, size_t also_todo, size_t also_did) { 27 | std::priority_queue q; 28 | 29 | unsigned long long mask = 0; 30 | if (zoom != 0) { 31 | mask = 0xFFFFFFFFFFFFFFFFULL << (64 - 2 * zoom); 32 | } 33 | 34 | long long along = 0; 35 | long long reported = -1; 36 | 37 | for (size_t i = 0; i < nmerges; i++) { 38 | if (merges[i].start < merges[i].end) { 39 | q.push(merges[i]); 40 | } 41 | } 42 | 43 | unsigned long long current_index = 0; 44 | unsigned long long current_count = 0; 45 | 46 | while (q.size() != 0) { 47 | merger head = q.top(); 48 | q.pop(); 49 | 50 | unsigned long long new_index = read64(head.start) & mask; 51 | unsigned long long count = read32(head.start + INDEX_BYTES); 52 | 53 | if (new_index < current_index) { 54 | fprintf(stderr, "Internal error: file out of order: %llx vs %llx\n", read64(head.start), current_index); 55 | exit(EXIT_FAILURE); 56 | } 57 | 58 | if (new_index != current_index || current_count + count > MAX_COUNT) { 59 | if (current_count != 0) { 60 | write64(&f, current_index); 61 | write32(&f, current_count); 62 | } 63 | 64 | current_index = new_index; 65 | current_count = 0; 66 | } 67 | current_count += count; 68 | 69 | head.start += bytes; 70 | if (head.start < head.end) { 71 | q.push(head); 72 | } 73 | 74 | along++; 75 | long long report = 100 * (along + also_did) / (nrec + also_todo); 76 | if (report != reported) { 77 | progress[shard] = report; 78 | int sum = 0; 79 | for (size_t i = 0; i < nshards; i++) { 80 | sum += progress[i]; 81 | } 82 | sum /= nshards; 83 | 84 | if (!quiet) { 85 | fprintf(stderr, "Merging: %d%% \r", sum); 86 | } 87 | reported = report; 88 | } 89 | } 90 | 91 | if (current_count != 0) { 92 | write64(&f, current_index); 93 | write32(&f, current_count); 94 | } 95 | 96 | return f; 97 | } 98 | 99 | struct merge_arg { 100 | std::vector mergers; 101 | size_t off; 102 | size_t outlen; 103 | size_t len; 104 | unsigned char *out; 105 | int zoom; 106 | bool quiet; 107 | size_t also_todo; 108 | size_t also_did; 109 | 110 | std::atomic *progress; 111 | size_t shard; 112 | size_t nshards; 113 | }; 114 | 115 | struct finder { 116 | unsigned char data[RECORD_BYTES]; 117 | 118 | bool operator<(const finder &f) const { 119 | return memcmp(data, f.data, INDEX_BYTES) < 0; 120 | } 121 | }; 122 | 123 | void *run_merge(void *va) { 124 | merge_arg *a = (merge_arg *) va; 125 | 126 | size_t nrec = 0; 127 | for (size_t i = 0; i < a->mergers.size(); i++) { 128 | nrec += (a->mergers[i].end - a->mergers[i].start) / RECORD_BYTES; 129 | } 130 | 131 | unsigned char *end = do_merge1(a->mergers, a->mergers.size(), a->out + a->off, RECORD_BYTES, nrec, a->zoom, a->quiet, a->progress, a->shard, a->nshards, a->also_todo, a->also_did); 132 | a->outlen = end - (a->out + a->off); 133 | 134 | return NULL; 135 | } 136 | 137 | void do_merge(struct merge *merges, size_t nmerges, int f, int bytes, long long nrec, int zoom, bool quiet, size_t cpus, size_t also_todo, size_t also_did) { 138 | unsigned long long mask = 0; 139 | if (zoom != 0) { 140 | mask = 0xFFFFFFFFFFFFFFFFULL << (64 - 2 * zoom); 141 | } 142 | 143 | unsigned long long beginning[cpus]; 144 | 145 | struct val { 146 | unsigned long long index; 147 | size_t weight; 148 | 149 | val(long long i, size_t w) { 150 | index = i; 151 | weight = w; 152 | } 153 | 154 | bool operator<(val const &v) const { 155 | return index < v.index; 156 | }; 157 | }; 158 | 159 | std::vector vals; 160 | size_t total_weight = 0; 161 | for (size_t j = 0; j < nmerges; j++) { 162 | size_t merge_nrec = (merges[j].end - merges[j].start) / bytes; 163 | if (merge_nrec != 0) { 164 | for (size_t i = 0; i < cpus; i++) { 165 | size_t rec = merge_nrec * i / cpus; 166 | 167 | // fprintf(stderr, "%zu: %zu of %zu: %llx\n", j, rec, merge_nrec, read64(merges[j].map + merges[j].start + bytes * rec)); 168 | 169 | vals.push_back(val(read64(merges[j].map + merges[j].start + bytes * rec), merge_nrec)); 170 | total_weight += merge_nrec; 171 | } 172 | } 173 | } 174 | 175 | std::sort(vals.begin(), vals.end()); 176 | 177 | size_t weight = 0; 178 | size_t n = 0; 179 | for (size_t i = 0; i < vals.size(); i++) { 180 | weight += vals[i].weight; 181 | if (weight >= total_weight * n / cpus) { 182 | beginning[n] = vals[i].index; 183 | n++; 184 | 185 | if (n >= cpus) { 186 | break; 187 | } 188 | } 189 | } 190 | for (; n < cpus; n++) { 191 | if (n == 0) { 192 | beginning[n] = 0; 193 | } else { 194 | beginning[n] = beginning[n - 1]; 195 | } 196 | } 197 | 198 | std::atomic progress[cpus]; 199 | std::vector args; 200 | 201 | for (size_t i = 0; i < cpus; i++) { 202 | merge_arg ma; 203 | 204 | progress[i] = 0; 205 | ma.progress = progress; 206 | ma.shard = i; 207 | ma.nshards = cpus; 208 | ma.also_todo = also_todo; 209 | ma.also_did = also_did; 210 | 211 | for (size_t j = 0; j < nmerges; j++) { 212 | if ((merges[j].end - merges[j].start) % sizeof(finder) != 0) { 213 | fprintf(stderr, "File size is not a multiple of the count unit\n"); 214 | exit(EXIT_FAILURE); 215 | } 216 | 217 | finder *fs = (finder *) (merges[j].map + merges[j].start); 218 | finder *fe = (finder *) (merges[j].map + merges[j].end); 219 | 220 | if (fs > fe) { 221 | fprintf(stderr, "Region being merged ends before it begins: %p to %p\n", fs, fe); 222 | exit(EXIT_FAILURE); 223 | } 224 | 225 | finder look; 226 | unsigned char *p = look.data; 227 | write64(&p, beginning[i] & mask); 228 | 229 | finder *l = lower_bound1(fs, fe, look); 230 | 231 | merger m; 232 | m.start = (unsigned char *) l; 233 | if (i == cpus - 1) { 234 | m.end = (unsigned char *) fe; 235 | } 236 | if (i > 0) { 237 | args[i - 1].mergers[j].end = m.start; 238 | } 239 | 240 | ma.mergers.push_back(m); 241 | } 242 | 243 | ma.quiet = quiet; 244 | args.push_back(ma); 245 | } 246 | 247 | size_t off = HEADER_LEN; 248 | for (size_t i = 0; i < cpus; i++) { 249 | args[i].off = off; 250 | 251 | for (size_t j = 0; j < nmerges; j++) { 252 | // printf("range: %zu: %zu\n", j, (args[i].mergers[j].end - args[i].mergers[j].start)); 253 | off += args[i].mergers[j].end - args[i].mergers[j].start; 254 | } 255 | 256 | args[i].len = off - args[i].off; 257 | } 258 | 259 | if (off != (size_t)(nrec * bytes + HEADER_LEN)) { 260 | fprintf(stderr, "Internal error: Wrong total size: %zu vs %lld * %d == %lld\n", off, nrec, bytes, nrec * bytes); 261 | exit(EXIT_FAILURE); 262 | } 263 | 264 | if (ftruncate(f, off) != 0) { 265 | perror("resize output file"); 266 | exit(EXIT_FAILURE); 267 | } 268 | 269 | void *map = mmap(NULL, off, PROT_READ | PROT_WRITE, MAP_SHARED, f, 0); 270 | if (map == MAP_FAILED) { 271 | perror("mmap output file"); 272 | exit(EXIT_FAILURE); 273 | } 274 | 275 | memcpy(map, header_text, HEADER_LEN); 276 | 277 | pthread_t threads[cpus]; 278 | for (size_t i = 0; i < cpus; i++) { 279 | args[i].out = (unsigned char *) map; 280 | args[i].zoom = zoom; 281 | 282 | if (pthread_create(&threads[i], NULL, run_merge, &args[i]) != 0) { 283 | perror("pthread_create"); 284 | exit(EXIT_FAILURE); 285 | } 286 | } 287 | 288 | size_t outpos = HEADER_LEN; 289 | size_t inpos = HEADER_LEN; 290 | 291 | for (size_t i = 0; i < cpus; i++) { 292 | void *ret; 293 | if (pthread_join(threads[i], &ret) != 0) { 294 | perror("pthread_join"); 295 | exit(EXIT_FAILURE); 296 | } 297 | 298 | if (inpos != outpos) { 299 | memmove((unsigned char *) map + outpos, (unsigned char *) map + inpos, args[i].outlen); 300 | } 301 | outpos += args[i].outlen; 302 | inpos += args[i].len; 303 | } 304 | 305 | if (munmap(map, off) != 0) { 306 | perror("munmap"); 307 | exit(EXIT_FAILURE); 308 | } 309 | 310 | if (ftruncate(f, outpos) != 0) { 311 | perror("shrink output file"); 312 | exit(EXIT_FAILURE); 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /merge.hpp: -------------------------------------------------------------------------------- 1 | struct merge { 2 | long long start; 3 | long long end; 4 | unsigned char *map; // used for merge 5 | int fd; // used for sort 6 | }; 7 | 8 | void do_merge(struct merge *merges, size_t nmerges, int f, int bytes, long long nrec, int zoom, bool quiet, size_t cpus, size_t also_todo, size_t also_did); 9 | -------------------------------------------------------------------------------- /mergetool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "header.hpp" 11 | #include "serial.hpp" 12 | #include "merge.hpp" 13 | 14 | void submerge(std::vector fnames, int out, const char *argv0, int zoom, int cpus, size_t *also_todo, size_t *also_did); 15 | 16 | bool quiet = false; 17 | 18 | void usage(char **argv) { 19 | fprintf(stderr, "Usage: %s -o merged.count file.count ...\n", argv[0]); 20 | } 21 | 22 | void trim(char *s) { 23 | for (; *s != '\0'; s++) { 24 | if (*s == '\n') { 25 | *s = '\0'; 26 | break; 27 | } 28 | } 29 | } 30 | 31 | void addfiles(std::vector &list) { 32 | char s[2000]; 33 | while (fgets(s, 2000, stdin)) { 34 | trim(s); 35 | list.push_back(s); 36 | } 37 | } 38 | 39 | int main(int argc, char **argv) { 40 | extern int optind; 41 | extern char *optarg; 42 | 43 | char *outfile = NULL; 44 | int zoom = 32; 45 | size_t cpus = sysconf(_SC_NPROCESSORS_ONLN); 46 | bool readfiles = false; 47 | 48 | int i; 49 | while ((i = getopt(argc, argv, "o:s:qp:F")) != -1) { 50 | switch (i) { 51 | case 's': 52 | zoom = atoi(optarg); 53 | break; 54 | 55 | case 'o': 56 | outfile = optarg; 57 | break; 58 | 59 | case 'p': 60 | cpus = atoi(optarg); 61 | break; 62 | 63 | case 'q': 64 | quiet = true; 65 | break; 66 | 67 | case 'F': 68 | readfiles = true; 69 | break; 70 | 71 | default: 72 | usage(argv); 73 | exit(EXIT_FAILURE); 74 | } 75 | } 76 | 77 | if (zoom < 0 || zoom > 32) { 78 | fprintf(stderr, "%s: zoom (-s) must be in the range 0 to 32, not %d\n", argv[0], zoom); 79 | exit(EXIT_FAILURE); 80 | } 81 | 82 | std::vector fnames; 83 | for (i = optind; i < argc; i++) { 84 | fnames.push_back(argv[i]); 85 | } 86 | 87 | if (readfiles) { 88 | addfiles(fnames); 89 | } 90 | 91 | if (fnames.size() == 0) { 92 | usage(argv); 93 | exit(EXIT_FAILURE); 94 | } 95 | 96 | int out = open(outfile, O_CREAT | O_TRUNC | O_RDWR, 0777); 97 | if (out < 0) { 98 | perror(outfile); 99 | exit(EXIT_FAILURE); 100 | } 101 | 102 | size_t also_todo = 0, also_did = 0; 103 | submerge(fnames, out, argv[0], zoom, cpus, &also_todo, &also_did); 104 | 105 | return 0; 106 | } 107 | 108 | #define MAX_MERGE 50 109 | 110 | void submerge(std::vector fnames, int out, const char *argv0, int zoom, int cpus, size_t *also_todo, size_t *also_did) { 111 | std::vector todelete; 112 | 113 | if (fnames.size() > MAX_MERGE) { 114 | size_t subs = MAX_MERGE; 115 | 116 | std::vector temps; 117 | std::vector tempfds; 118 | std::vector> subfnames; 119 | for (size_t i = 0; i < subs; i++) { 120 | char s[2000] = "/tmp/count.XXXXXX"; 121 | int fd = mkstemp(s); 122 | if (fd < 0) { 123 | perror("mkstemp"); 124 | } 125 | 126 | temps.push_back(s); 127 | tempfds.push_back(fd); 128 | subfnames.push_back(std::vector()); 129 | } 130 | 131 | for (size_t i = 0; i < fnames.size(); i++) { 132 | subfnames[i % subs].push_back(fnames[i]); 133 | 134 | struct stat st; 135 | if (stat(fnames[i].c_str(), &st) == 0) { 136 | *also_todo += st.st_size / RECORD_BYTES; 137 | } 138 | 139 | if ((st.st_size - HEADER_LEN) % RECORD_BYTES != 0) { 140 | fprintf(stderr, "%s: file size not a multiple of record length\n", fnames[i].c_str()); 141 | exit(EXIT_FAILURE); 142 | } 143 | } 144 | 145 | for (size_t i = 0; i < subs; i++) { 146 | submerge(subfnames[i], tempfds[i], argv0, zoom, cpus, also_todo, also_did); 147 | // submerge will have closed the temp fds 148 | } 149 | 150 | fnames = temps; 151 | todelete = temps; 152 | } 153 | 154 | size_t nmerges = fnames.size(); 155 | struct merge merges[nmerges]; 156 | int fds[nmerges]; 157 | unsigned char *maps[nmerges]; 158 | long long to_sort = 0; 159 | 160 | for (size_t i = 0; i < nmerges; i++) { 161 | fds[i] = open(fnames[i].c_str(), O_RDONLY); 162 | if (fds[i] < 0) { 163 | perror(fnames[i].c_str()); 164 | exit(EXIT_FAILURE); 165 | } 166 | 167 | struct stat st; 168 | if (fstat(fds[i], &st) != 0) { 169 | perror("stat"); 170 | exit(EXIT_FAILURE); 171 | } 172 | 173 | maps[i] = (unsigned char *) mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fds[i], 0); 174 | if (maps[i] == MAP_FAILED) { 175 | perror(fnames[i].c_str()); 176 | exit(EXIT_FAILURE); 177 | } 178 | 179 | if (st.st_size < HEADER_LEN || memcmp(maps[i], header_text, HEADER_LEN) != 0) { 180 | fprintf(stderr, "%s:%s: Not a tile-count file\n", argv0, fnames[i].c_str()); 181 | exit(EXIT_FAILURE); 182 | } 183 | 184 | merges[i].start = HEADER_LEN; 185 | merges[i].end = st.st_size; 186 | merges[i].map = maps[i]; 187 | 188 | to_sort += st.st_size - HEADER_LEN; 189 | 190 | if (close(fds[i]) < 0) { 191 | perror("close"); 192 | exit(EXIT_FAILURE); 193 | } 194 | } 195 | 196 | if (write(out, header_text, HEADER_LEN) != HEADER_LEN) { 197 | perror("write header"); 198 | exit(EXIT_FAILURE); 199 | } 200 | 201 | do_merge(merges, nmerges, out, RECORD_BYTES, to_sort / RECORD_BYTES, zoom, quiet, cpus, *also_todo, *also_did); 202 | if (close(out) != 0) { 203 | perror("close"); 204 | exit(EXIT_FAILURE); 205 | } 206 | 207 | *also_did += to_sort / RECORD_BYTES; 208 | 209 | for (size_t i = 0; i < todelete.size(); i++) { 210 | if (unlink(todelete[i].c_str()) < 0) { 211 | perror(todelete[i].c_str()); 212 | exit(EXIT_FAILURE); 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /milo/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2014 Milo Yip 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /milo/dtoa_milo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #if defined(_MSC_VER) 7 | #include "msinttypes/stdint.h" 8 | #include 9 | #else 10 | #include 11 | #endif 12 | 13 | namespace milo { 14 | 15 | #define UINT64_C2(h, l) ((static_cast(h) << 32) | static_cast(l)) 16 | 17 | struct DiyFp { 18 | DiyFp() {} 19 | 20 | DiyFp(uint64_t ff, int ee) : f(ff), e(ee) {} 21 | 22 | DiyFp(double d) { 23 | union { 24 | double d; 25 | uint64_t u64; 26 | } u = { d }; 27 | 28 | int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize; 29 | uint64_t significand = (u.u64 & kDpSignificandMask); 30 | if (biased_e != 0) { 31 | f = significand + kDpHiddenBit; 32 | e = biased_e - kDpExponentBias; 33 | } 34 | else { 35 | f = significand; 36 | e = kDpMinExponent + 1; 37 | } 38 | } 39 | 40 | DiyFp operator-(const DiyFp& rhs) const { 41 | assert(e == rhs.e); 42 | assert(f >= rhs.f); 43 | return DiyFp(f - rhs.f, e); 44 | } 45 | 46 | DiyFp operator*(const DiyFp& rhs) const { 47 | #if defined(_MSC_VER) && defined(_M_AMD64) 48 | uint64_t h; 49 | uint64_t l = _umul128(f, rhs.f, &h); 50 | if (l & (uint64_t(1) << 63)) // rounding 51 | h++; 52 | return DiyFp(h, e + rhs.e + 64); 53 | #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) 54 | unsigned __int128 p = static_cast(f) * static_cast(rhs.f); 55 | uint64_t h = p >> 64; 56 | uint64_t l = static_cast(p); 57 | if (l & (uint64_t(1) << 63)) // rounding 58 | h++; 59 | return DiyFp(h, e + rhs.e + 64); 60 | #else 61 | const uint64_t M32 = 0xFFFFFFFF; 62 | const uint64_t a = f >> 32; 63 | const uint64_t b = f & M32; 64 | const uint64_t c = rhs.f >> 32; 65 | const uint64_t d = rhs.f & M32; 66 | const uint64_t ac = a * c; 67 | const uint64_t bc = b * c; 68 | const uint64_t ad = a * d; 69 | const uint64_t bd = b * d; 70 | uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); 71 | tmp += 1U << 31; /// mult_round 72 | return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); 73 | #endif 74 | } 75 | 76 | DiyFp Normalize() const { 77 | #if defined(_MSC_VER) && defined(_M_AMD64) 78 | unsigned long index; 79 | _BitScanReverse64(&index, f); 80 | return DiyFp(f << (63 - index), e - (63 - index)); 81 | #elif defined(__GNUC__) 82 | int s = __builtin_clzll(f); 83 | return DiyFp(f << s, e - s); 84 | #else 85 | DiyFp res = *this; 86 | while (!(res.f & kDpHiddenBit)) { 87 | res.f <<= 1; 88 | res.e--; 89 | } 90 | res.f <<= (kDiySignificandSize - kDpSignificandSize - 1); 91 | res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1); 92 | return res; 93 | #endif 94 | } 95 | 96 | DiyFp NormalizeBoundary() const { 97 | #if defined(_MSC_VER) && defined(_M_AMD64) 98 | unsigned long index; 99 | _BitScanReverse64(&index, f); 100 | return DiyFp (f << (63 - index), e - (63 - index)); 101 | #else 102 | DiyFp res = *this; 103 | while (!(res.f & (kDpHiddenBit << 1))) { 104 | res.f <<= 1; 105 | res.e--; 106 | } 107 | res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); 108 | res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); 109 | return res; 110 | #endif 111 | } 112 | 113 | void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { 114 | DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); 115 | DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); 116 | mi.f <<= mi.e - pl.e; 117 | mi.e = pl.e; 118 | *plus = pl; 119 | *minus = mi; 120 | } 121 | 122 | static const int kDiySignificandSize = 64; 123 | static const int kDpSignificandSize = 52; 124 | static const int kDpExponentBias = 0x3FF + kDpSignificandSize; 125 | static const int kDpMinExponent = -kDpExponentBias; 126 | static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000); 127 | static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF); 128 | static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000); 129 | 130 | uint64_t f; 131 | int e; 132 | }; 133 | 134 | inline DiyFp GetCachedPower(int e, int* K) { 135 | // 10^-348, 10^-340, ..., 10^340 136 | static const uint64_t kCachedPowers_F[] = { 137 | UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76), 138 | UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea), 139 | UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df), 140 | UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f), 141 | UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c), 142 | UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5), 143 | UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d), 144 | UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637), 145 | UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7), 146 | UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5), 147 | UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b), 148 | UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996), 149 | UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6), 150 | UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8), 151 | UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053), 152 | UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd), 153 | UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94), 154 | UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b), 155 | UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac), 156 | UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3), 157 | UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb), 158 | UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c), 159 | UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000), 160 | UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984), 161 | UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70), 162 | UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245), 163 | UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8), 164 | UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a), 165 | UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea), 166 | UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85), 167 | UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2), 168 | UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3), 169 | UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25), 170 | UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece), 171 | UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5), 172 | UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a), 173 | UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c), 174 | UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a), 175 | UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129), 176 | UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429), 177 | UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d), 178 | UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841), 179 | UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9), 180 | UINT64_C2(0xaf87023b, 0x9bf0ee6b) 181 | }; 182 | static const int16_t kCachedPowers_E[] = { 183 | -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, 184 | -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, 185 | -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, 186 | -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, 187 | -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, 188 | 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, 189 | 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, 190 | 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 191 | 907, 933, 960, 986, 1013, 1039, 1066 192 | }; 193 | 194 | //int k = static_cast(ceil((-61 - e) * 0.30102999566398114)) + 374; 195 | double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive 196 | int k = static_cast(dk); 197 | if (k != dk) 198 | k++; 199 | 200 | unsigned index = static_cast((k >> 3) + 1); 201 | *K = -(-348 + static_cast(index << 3)); // decimal exponent no need lookup table 202 | 203 | assert(index < sizeof(kCachedPowers_F) / sizeof(kCachedPowers_F[0])); 204 | return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); 205 | } 206 | 207 | inline void GrisuRound(std::string &buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) { 208 | while (rest < wp_w && delta - rest >= ten_kappa && 209 | (rest + ten_kappa < wp_w || /// closer 210 | wp_w - rest > rest + ten_kappa - wp_w)) { 211 | buffer[len - 1]--; 212 | rest += ten_kappa; 213 | } 214 | } 215 | 216 | inline unsigned CountDecimalDigit32(uint32_t n) { 217 | // Simple pure C++ implementation was faster than __builtin_clz version in this situation. 218 | if (n < 10) return 1; 219 | if (n < 100) return 2; 220 | if (n < 1000) return 3; 221 | if (n < 10000) return 4; 222 | if (n < 100000) return 5; 223 | if (n < 1000000) return 6; 224 | if (n < 10000000) return 7; 225 | if (n < 100000000) return 8; 226 | if (n < 1000000000) return 9; 227 | return 10; 228 | } 229 | 230 | inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, std::string &buffer, int* len, int* K) { 231 | static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; 232 | const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); 233 | const DiyFp wp_w = Mp - W; 234 | uint32_t p1 = static_cast(Mp.f >> -one.e); 235 | uint64_t p2 = Mp.f & (one.f - 1); 236 | int kappa = static_cast(CountDecimalDigit32(p1)); 237 | *len = 0; 238 | 239 | while (kappa > 0) { 240 | uint32_t d; 241 | switch (kappa) { 242 | case 10: d = p1 / 1000000000; p1 %= 1000000000; break; 243 | case 9: d = p1 / 100000000; p1 %= 100000000; break; 244 | case 8: d = p1 / 10000000; p1 %= 10000000; break; 245 | case 7: d = p1 / 1000000; p1 %= 1000000; break; 246 | case 6: d = p1 / 100000; p1 %= 100000; break; 247 | case 5: d = p1 / 10000; p1 %= 10000; break; 248 | case 4: d = p1 / 1000; p1 %= 1000; break; 249 | case 3: d = p1 / 100; p1 %= 100; break; 250 | case 2: d = p1 / 10; p1 %= 10; break; 251 | case 1: d = p1; p1 = 0; break; 252 | default: 253 | #if defined(_MSC_VER) 254 | __assume(0); 255 | #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) 256 | __builtin_unreachable(); 257 | #else 258 | d = 0; 259 | #endif 260 | } 261 | if (d || *len) { 262 | buffer.push_back('0' + static_cast(d)); 263 | (*len)++; 264 | } 265 | kappa--; 266 | uint64_t tmp = (static_cast(p1) << -one.e) + p2; 267 | if (tmp <= delta) { 268 | *K += kappa; 269 | GrisuRound(buffer, *len, delta, tmp, static_cast(kPow10[kappa]) << -one.e, wp_w.f); 270 | return; 271 | } 272 | } 273 | 274 | // kappa = 0 275 | for (;;) { 276 | p2 *= 10; 277 | delta *= 10; 278 | char d = static_cast(p2 >> -one.e); 279 | if (d || *len) { 280 | buffer.push_back('0' + d); 281 | (*len)++; 282 | } 283 | p2 &= one.f - 1; 284 | kappa--; 285 | if (p2 < delta) { 286 | *K += kappa; 287 | int index = -static_cast(kappa); 288 | GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[-static_cast(kappa)] : 0)); 289 | return; 290 | } 291 | } 292 | } 293 | 294 | inline void Grisu2(double value, std::string &buffer, int* length, int* K) { 295 | const DiyFp v(value); 296 | DiyFp w_m, w_p; 297 | v.NormalizedBoundaries(&w_m, &w_p); 298 | 299 | const DiyFp c_mk = GetCachedPower(w_p.e, K); 300 | const DiyFp W = v.Normalize() * c_mk; 301 | DiyFp Wp = w_p * c_mk; 302 | DiyFp Wm = w_m * c_mk; 303 | Wm.f++; 304 | Wp.f--; 305 | DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K); 306 | } 307 | 308 | inline const char* GetDigitsLut() { 309 | static const char cDigitsLut[200] = { 310 | '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', 311 | '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', 312 | '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', 313 | '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', 314 | '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', 315 | '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', 316 | '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', 317 | '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', 318 | '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', 319 | '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' 320 | }; 321 | return cDigitsLut; 322 | } 323 | 324 | inline void WriteExponent(int K, std::string &buffer) { 325 | if (K < 0) { 326 | buffer.push_back('-'); 327 | K = -K; 328 | } else { 329 | buffer.push_back('+'); 330 | } 331 | 332 | if (K >= 100) { 333 | buffer.push_back('0' + static_cast(K / 100)); 334 | K %= 100; 335 | const char* d = GetDigitsLut() + K * 2; 336 | buffer.push_back(d[0]); 337 | buffer.push_back(d[1]); 338 | } 339 | else if (K >= 10) { 340 | const char* d = GetDigitsLut() + K * 2; 341 | buffer.push_back(d[0]); 342 | buffer.push_back(d[1]); 343 | } 344 | else 345 | buffer.push_back('0' + static_cast(K)); 346 | } 347 | 348 | inline void Prettify(std::string &buffer, int length, int k) { 349 | const int kk = length + k; // 10^(kk-1) <= v < 10^kk 350 | 351 | if (length <= kk && kk <= 21) { 352 | // 1234e7 -> 12340000000 353 | for (int i = length; i < kk; i++) 354 | buffer.push_back('0'); 355 | } 356 | else if (0 < kk && kk <= 21) { 357 | // 1234e-2 -> 12.34 358 | buffer.insert(buffer.begin() + kk, '.'); 359 | } 360 | else if (-6 < kk && kk <= 0) { 361 | // 1234e-6 -> 0.001234 362 | const int offset = 2 - kk; 363 | buffer.insert(buffer.begin(), '0'); 364 | buffer.insert(buffer.begin() + 1, '.'); 365 | for (int i = 2; i < offset; i++) 366 | buffer.insert(buffer.begin() + 2, '0'); 367 | } 368 | else if (length == 1) { 369 | // 1e30 370 | buffer.push_back('e'); 371 | WriteExponent(kk - 1, buffer); 372 | } 373 | else { 374 | // 1234e30 -> 1.234e33 375 | buffer.insert(buffer.begin() + 1, '.'); 376 | buffer.push_back('e'); 377 | WriteExponent(kk - 1, buffer); 378 | } 379 | } 380 | 381 | inline std::string dtoa_milo(double value) { 382 | std::string buffer; 383 | 384 | if (std::isnan(value)) { 385 | return "nan"; 386 | } 387 | if (std::isinf(value)) { 388 | if (value < 0) { 389 | return "-inf"; 390 | } else { 391 | return "inf"; 392 | } 393 | } 394 | 395 | if (value == 0) { 396 | buffer = "0"; 397 | } 398 | else { 399 | bool minus = false; 400 | if (value < 0) { 401 | minus = true; 402 | value = -value; 403 | } 404 | int length, K; 405 | Grisu2(value, buffer, &length, &K); 406 | Prettify(buffer, length, K); 407 | if (minus) { 408 | buffer.insert(buffer.begin(), '-'); 409 | } 410 | } 411 | 412 | return buffer; 413 | } 414 | 415 | } 416 | -------------------------------------------------------------------------------- /protozero/byteswap.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_BYTESWAP_HPP 2 | #define PROTOZERO_BYTESWAP_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file byteswap.hpp 15 | * 16 | * @brief Contains functions to swap bytes in values (for different endianness). 17 | */ 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | namespace protozero { 25 | namespace detail { 26 | 27 | inline uint32_t byteswap_impl(uint32_t value) noexcept { 28 | #ifdef PROTOZERO_USE_BUILTIN_BSWAP 29 | return __builtin_bswap32(value); 30 | #else 31 | return ((value & 0xff000000) >> 24) | 32 | ((value & 0x00ff0000) >> 8) | 33 | ((value & 0x0000ff00) << 8) | 34 | ((value & 0x000000ff) << 24); 35 | #endif 36 | } 37 | 38 | inline uint64_t byteswap_impl(uint64_t value) noexcept { 39 | #ifdef PROTOZERO_USE_BUILTIN_BSWAP 40 | return __builtin_bswap64(value); 41 | #else 42 | return ((value & 0xff00000000000000ULL) >> 56) | 43 | ((value & 0x00ff000000000000ULL) >> 40) | 44 | ((value & 0x0000ff0000000000ULL) >> 24) | 45 | ((value & 0x000000ff00000000ULL) >> 8) | 46 | ((value & 0x00000000ff000000ULL) << 8) | 47 | ((value & 0x0000000000ff0000ULL) << 24) | 48 | ((value & 0x000000000000ff00ULL) << 40) | 49 | ((value & 0x00000000000000ffULL) << 56); 50 | #endif 51 | } 52 | 53 | inline void byteswap_inplace(uint32_t* ptr) noexcept { 54 | *ptr = byteswap_impl(*ptr); 55 | } 56 | 57 | inline void byteswap_inplace(uint64_t* ptr) noexcept { 58 | *ptr = byteswap_impl(*ptr); 59 | } 60 | 61 | inline void byteswap_inplace(int32_t* ptr) noexcept { 62 | auto bptr = reinterpret_cast(ptr); 63 | *bptr = byteswap_impl(*bptr); 64 | } 65 | 66 | inline void byteswap_inplace(int64_t* ptr) noexcept { 67 | auto bptr = reinterpret_cast(ptr); 68 | *bptr = byteswap_impl(*bptr); 69 | } 70 | 71 | inline void byteswap_inplace(float* ptr) noexcept { 72 | auto bptr = reinterpret_cast(ptr); 73 | *bptr = byteswap_impl(*bptr); 74 | } 75 | 76 | inline void byteswap_inplace(double* ptr) noexcept { 77 | auto bptr = reinterpret_cast(ptr); 78 | *bptr = byteswap_impl(*bptr); 79 | } 80 | 81 | } // end namespace detail 82 | } // end namespace protozero 83 | 84 | #endif // PROTOZERO_BYTESWAP_HPP 85 | -------------------------------------------------------------------------------- /protozero/config.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_CONFIG_HPP 2 | #define PROTOZERO_CONFIG_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | #include 14 | 15 | /** 16 | * @file config.hpp 17 | * 18 | * @brief Contains macro checks for different configurations. 19 | */ 20 | 21 | #define PROTOZERO_LITTLE_ENDIAN 1234 22 | #define PROTOZERO_BIG_ENDIAN 4321 23 | 24 | // Find out which byte order the machine has. 25 | #if defined(__BYTE_ORDER) 26 | # if (__BYTE_ORDER == __LITTLE_ENDIAN) 27 | # define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN 28 | # endif 29 | # if (__BYTE_ORDER == __BIG_ENDIAN) 30 | # define PROTOZERO_BYTE_ORDER PROTOZERO_BIG_ENDIAN 31 | # endif 32 | #else 33 | // This probably isn't a very good default, but might do until we figure 34 | // out something better. 35 | # define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN 36 | #endif 37 | 38 | // Check whether __builtin_bswap is available 39 | #if defined(__GNUC__) || defined(__clang__) 40 | # define PROTOZERO_USE_BUILTIN_BSWAP 41 | #endif 42 | 43 | // Wrapper for assert() used for testing 44 | #ifndef protozero_assert 45 | # define protozero_assert(x) assert(x) 46 | #endif 47 | 48 | #endif // PROTOZERO_CONFIG_HPP 49 | -------------------------------------------------------------------------------- /protozero/exception.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_EXCEPTION_HPP 2 | #define PROTOZERO_EXCEPTION_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file exception.hpp 15 | * 16 | * @brief Contains the exceptions used in the protozero library. 17 | */ 18 | 19 | #include 20 | 21 | /** 22 | * @brief All parts of the protozero header-only library are in this namespace. 23 | */ 24 | namespace protozero { 25 | 26 | /** 27 | * All exceptions explicitly thrown by the functions of the protozero library 28 | * derive from this exception. 29 | */ 30 | struct exception : std::exception { 31 | /// Returns the explanatory string. 32 | const char* what() const noexcept override { return "pbf exception"; } 33 | }; 34 | 35 | /** 36 | * This exception is thrown when parsing a varint thats larger than allowed. 37 | * This should never happen unless the data is corrupted. 38 | */ 39 | struct varint_too_long_exception : exception { 40 | /// Returns the explanatory string. 41 | const char* what() const noexcept override { return "varint too long exception"; } 42 | }; 43 | 44 | /** 45 | * This exception is thrown when the wire type of a pdf field is unknown. 46 | * This should never happen unless the data is corrupted. 47 | */ 48 | struct unknown_pbf_wire_type_exception : exception { 49 | /// Returns the explanatory string. 50 | const char* what() const noexcept override { return "unknown pbf field type exception"; } 51 | }; 52 | 53 | /** 54 | * This exception is thrown when we are trying to read a field and there 55 | * are not enough bytes left in the buffer to read it. Almost all functions 56 | * of the pbf_reader class can throw this exception. 57 | * 58 | * This should never happen unless the data is corrupted or you have 59 | * initialized the pbf_reader object with incomplete data. 60 | */ 61 | struct end_of_buffer_exception : exception { 62 | /// Returns the explanatory string. 63 | const char* what() const noexcept override { return "end of buffer exception"; } 64 | }; 65 | 66 | } // end namespace protozero 67 | 68 | #endif // PROTOZERO_EXCEPTION_HPP 69 | -------------------------------------------------------------------------------- /protozero/iterators.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_ITERATORS_HPP 2 | #define PROTOZERO_ITERATORS_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file iterators.hpp 15 | * 16 | * @brief Contains the iterators for access to packed repeated fields. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN 27 | # include 28 | #endif 29 | 30 | namespace protozero { 31 | 32 | /** 33 | * A range of iterators based on std::pair. Created from beginning and 34 | * end iterators. Used as a return type from some pbf_reader methods 35 | * that is easy to use with range-based for loops. 36 | */ 37 | template > 38 | class iterator_range : 39 | #ifdef PROTOZERO_STRICT_API 40 | protected 41 | #else 42 | public 43 | #endif 44 | P { 45 | 46 | public: 47 | 48 | /// The type of the iterators in this range. 49 | using iterator = T; 50 | 51 | /// The value type of the underlying iterator. 52 | using value_type = typename std::iterator_traits::value_type; 53 | 54 | /** 55 | * Default constructor. Create empty iterator_range. 56 | */ 57 | constexpr iterator_range() : 58 | P(iterator{}, iterator{}) { 59 | } 60 | 61 | /** 62 | * Create iterator range from two iterators. 63 | * 64 | * @param first_iterator Iterator to beginning or range. 65 | * @param last_iterator Iterator to end or range. 66 | */ 67 | constexpr iterator_range(iterator&& first_iterator, iterator&& last_iterator) : 68 | P(std::forward(first_iterator), 69 | std::forward(last_iterator)) { 70 | } 71 | 72 | /// Return iterator to beginning of range. 73 | constexpr iterator begin() const noexcept { 74 | return this->first; 75 | } 76 | 77 | /// Return iterator to end of range. 78 | constexpr iterator end() const noexcept { 79 | return this->second; 80 | } 81 | 82 | /// Return iterator to beginning of range. 83 | constexpr iterator cbegin() const noexcept { 84 | return this->first; 85 | } 86 | 87 | /// Return iterator to end of range. 88 | constexpr iterator cend() const noexcept { 89 | return this->second; 90 | } 91 | 92 | /// Return true if this range is empty. 93 | constexpr std::size_t empty() const noexcept { 94 | return begin() == end(); 95 | } 96 | 97 | /** 98 | * Get element at the beginning of the range. 99 | * 100 | * @pre Range must not be empty. 101 | */ 102 | value_type front() const { 103 | protozero_assert(!empty()); 104 | return *(this->first); 105 | } 106 | 107 | /** 108 | * Advance beginning of range by one. 109 | * 110 | * @pre Range must not be empty. 111 | */ 112 | void drop_front() { 113 | protozero_assert(!empty()); 114 | ++this->first; 115 | } 116 | 117 | /** 118 | * Swap the contents of this range with the other. 119 | * 120 | * @param other Other range to swap data with. 121 | */ 122 | void swap(iterator_range& other) noexcept { 123 | using std::swap; 124 | swap(this->first, other.first); 125 | swap(this->second, other.second); 126 | } 127 | 128 | }; // struct iterator_range 129 | 130 | /** 131 | * Swap two iterator_ranges. 132 | * 133 | * @param lhs First range. 134 | * @param rhs Second range. 135 | */ 136 | template 137 | inline void swap(iterator_range& lhs, iterator_range& rhs) noexcept { 138 | lhs.swap(rhs); 139 | } 140 | 141 | /** 142 | * A forward iterator used for accessing packed repeated fields of fixed 143 | * length (fixed32, sfixed32, float, double). 144 | */ 145 | template 146 | class const_fixed_iterator { 147 | 148 | /// Pointer to current iterator position 149 | const char* m_data; 150 | 151 | /// Pointer to end iterator position 152 | const char* m_end; 153 | 154 | public: 155 | 156 | using iterator_category = std::forward_iterator_tag; 157 | using value_type = T; 158 | using difference_type = std::ptrdiff_t; 159 | using pointer = value_type*; 160 | using reference = value_type&; 161 | 162 | const_fixed_iterator() noexcept : 163 | m_data(nullptr), 164 | m_end(nullptr) { 165 | } 166 | 167 | const_fixed_iterator(const char* data, const char* end) noexcept : 168 | m_data(data), 169 | m_end(end) { 170 | } 171 | 172 | const_fixed_iterator(const const_fixed_iterator&) noexcept = default; 173 | const_fixed_iterator(const_fixed_iterator&&) noexcept = default; 174 | 175 | const_fixed_iterator& operator=(const const_fixed_iterator&) noexcept = default; 176 | const_fixed_iterator& operator=(const_fixed_iterator&&) noexcept = default; 177 | 178 | ~const_fixed_iterator() noexcept = default; 179 | 180 | value_type operator*() const { 181 | value_type result; 182 | std::memcpy(&result, m_data, sizeof(value_type)); 183 | #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN 184 | detail::byteswap_inplace(&result); 185 | #endif 186 | return result; 187 | } 188 | 189 | const_fixed_iterator& operator++() { 190 | m_data += sizeof(value_type); 191 | return *this; 192 | } 193 | 194 | const_fixed_iterator operator++(int) { 195 | const const_fixed_iterator tmp(*this); 196 | ++(*this); 197 | return tmp; 198 | } 199 | 200 | bool operator==(const const_fixed_iterator& rhs) const noexcept { 201 | return m_data == rhs.m_data && m_end == rhs.m_end; 202 | } 203 | 204 | bool operator!=(const const_fixed_iterator& rhs) const noexcept { 205 | return !(*this == rhs); 206 | } 207 | 208 | }; // class const_fixed_iterator 209 | 210 | /** 211 | * A forward iterator used for accessing packed repeated varint fields 212 | * (int32, uint32, int64, uint64, bool, enum). 213 | */ 214 | template 215 | class const_varint_iterator { 216 | 217 | protected: 218 | 219 | /// Pointer to current iterator position 220 | const char* m_data; 221 | 222 | /// Pointer to end iterator position 223 | const char* m_end; 224 | 225 | public: 226 | 227 | using iterator_category = std::forward_iterator_tag; 228 | using value_type = T; 229 | using difference_type = std::ptrdiff_t; 230 | using pointer = value_type*; 231 | using reference = value_type&; 232 | 233 | const_varint_iterator() noexcept : 234 | m_data(nullptr), 235 | m_end(nullptr) { 236 | } 237 | 238 | const_varint_iterator(const char* data, const char* end) noexcept : 239 | m_data(data), 240 | m_end(end) { 241 | } 242 | 243 | const_varint_iterator(const const_varint_iterator&) noexcept = default; 244 | const_varint_iterator(const_varint_iterator&&) noexcept = default; 245 | 246 | const_varint_iterator& operator=(const const_varint_iterator&) noexcept = default; 247 | const_varint_iterator& operator=(const_varint_iterator&&) noexcept = default; 248 | 249 | ~const_varint_iterator() noexcept = default; 250 | 251 | value_type operator*() const { 252 | const char* d = m_data; // will be thrown away 253 | return static_cast(decode_varint(&d, m_end)); 254 | } 255 | 256 | const_varint_iterator& operator++() { 257 | skip_varint(&m_data, m_end); 258 | return *this; 259 | } 260 | 261 | const_varint_iterator operator++(int) { 262 | const const_varint_iterator tmp(*this); 263 | ++(*this); 264 | return tmp; 265 | } 266 | 267 | bool operator==(const const_varint_iterator& rhs) const noexcept { 268 | return m_data == rhs.m_data && m_end == rhs.m_end; 269 | } 270 | 271 | bool operator!=(const const_varint_iterator& rhs) const noexcept { 272 | return !(*this == rhs); 273 | } 274 | 275 | }; // class const_varint_iterator 276 | 277 | /** 278 | * A forward iterator used for accessing packed repeated svarint fields 279 | * (sint32, sint64). 280 | */ 281 | template 282 | class const_svarint_iterator : public const_varint_iterator { 283 | 284 | public: 285 | 286 | using iterator_category = std::forward_iterator_tag; 287 | using value_type = T; 288 | using difference_type = std::ptrdiff_t; 289 | using pointer = value_type*; 290 | using reference = value_type&; 291 | 292 | const_svarint_iterator() noexcept : 293 | const_varint_iterator() { 294 | } 295 | 296 | const_svarint_iterator(const char* data, const char* end) noexcept : 297 | const_varint_iterator(data, end) { 298 | } 299 | 300 | const_svarint_iterator(const const_svarint_iterator&) = default; 301 | const_svarint_iterator(const_svarint_iterator&&) = default; 302 | 303 | const_svarint_iterator& operator=(const const_svarint_iterator&) = default; 304 | const_svarint_iterator& operator=(const_svarint_iterator&&) = default; 305 | 306 | ~const_svarint_iterator() = default; 307 | 308 | value_type operator*() const { 309 | const char* d = this->m_data; // will be thrown away 310 | return static_cast(decode_zigzag64(decode_varint(&d, this->m_end))); 311 | } 312 | 313 | const_svarint_iterator& operator++() { 314 | skip_varint(&this->m_data, this->m_end); 315 | return *this; 316 | } 317 | 318 | const_svarint_iterator operator++(int) { 319 | const const_svarint_iterator tmp(*this); 320 | ++(*this); 321 | return tmp; 322 | } 323 | 324 | }; // class const_svarint_iterator 325 | 326 | } // end namespace protozero 327 | 328 | #endif // PROTOZERO_ITERATORS_HPP 329 | -------------------------------------------------------------------------------- /protozero/pbf_builder.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_PBF_BUILDER_HPP 2 | #define PROTOZERO_PBF_BUILDER_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file pbf_builder.hpp 15 | * 16 | * @brief Contains the pbf_builder template class. 17 | */ 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | namespace protozero { 25 | 26 | /** 27 | * The pbf_builder is used to write PBF formatted messages into a buffer. It 28 | * is based on the pbf_writer class and has all the same methods. The 29 | * difference is that while the pbf_writer class takes an integer tag, 30 | * this template class takes a tag of the template type T. The idea is that 31 | * T will be an enumeration value and this helps reduce the possibility of 32 | * programming errors. 33 | * 34 | * Almost all methods in this class can throw an std::bad_alloc exception if 35 | * the std::string used as a buffer wants to resize. 36 | * 37 | * Read the tutorial to understand how this class is used. 38 | */ 39 | template 40 | class pbf_builder : public pbf_writer { 41 | 42 | static_assert(std::is_same::type>::value, 43 | "T must be enum with underlying type protozero::pbf_tag_type"); 44 | 45 | public: 46 | 47 | using enum_type = T; 48 | 49 | explicit pbf_builder(std::string& data) noexcept : 50 | pbf_writer(data) { 51 | } 52 | 53 | template 54 | pbf_builder(pbf_writer& parent_writer, P tag) noexcept : 55 | pbf_writer(parent_writer, pbf_tag_type(tag)) { 56 | } 57 | 58 | /// @cond INTERNAL 59 | #define PROTOZERO_WRITER_WRAP_ADD_SCALAR(name, type) \ 60 | void add_##name(T tag, type value) { \ 61 | pbf_writer::add_##name(pbf_tag_type(tag), value); \ 62 | } 63 | 64 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(bool, bool) 65 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(enum, int32_t) 66 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(int32, int32_t) 67 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint32, int32_t) 68 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint32, uint32_t) 69 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(int64, int64_t) 70 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint64, int64_t) 71 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint64, uint64_t) 72 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed32, uint32_t) 73 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed32, int32_t) 74 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed64, uint64_t) 75 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed64, int64_t) 76 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(float, float) 77 | PROTOZERO_WRITER_WRAP_ADD_SCALAR(double, double) 78 | 79 | #undef PROTOZERO_WRITER_WRAP_ADD_SCALAR 80 | /// @endcond 81 | 82 | void add_bytes(T tag, const char* value, std::size_t size) { 83 | pbf_writer::add_bytes(pbf_tag_type(tag), value, size); 84 | } 85 | 86 | void add_bytes(T tag, const data_view& value) { 87 | pbf_writer::add_bytes(pbf_tag_type(tag), value); 88 | } 89 | 90 | void add_bytes(T tag, const std::string& value) { 91 | pbf_writer::add_bytes(pbf_tag_type(tag), value); 92 | } 93 | 94 | void add_bytes(T tag, const char* value) { 95 | pbf_writer::add_bytes(pbf_tag_type(tag), value); 96 | } 97 | 98 | template 99 | void add_bytes_vectored(T tag, Ts&&... values) { 100 | pbf_writer::add_bytes_vectored(pbf_tag_type(tag), std::forward(values)...); 101 | } 102 | 103 | void add_string(T tag, const char* value, std::size_t size) { 104 | pbf_writer::add_string(pbf_tag_type(tag), value, size); 105 | } 106 | 107 | void add_string(T tag, const data_view& value) { 108 | pbf_writer::add_string(pbf_tag_type(tag), value); 109 | } 110 | 111 | void add_string(T tag, const std::string& value) { 112 | pbf_writer::add_string(pbf_tag_type(tag), value); 113 | } 114 | 115 | void add_string(T tag, const char* value) { 116 | pbf_writer::add_string(pbf_tag_type(tag), value); 117 | } 118 | 119 | void add_message(T tag, const char* value, std::size_t size) { 120 | pbf_writer::add_message(pbf_tag_type(tag), value, size); 121 | } 122 | 123 | void add_message(T tag, const data_view& value) { 124 | pbf_writer::add_message(pbf_tag_type(tag), value); 125 | } 126 | 127 | void add_message(T tag, const std::string& value) { 128 | pbf_writer::add_message(pbf_tag_type(tag), value); 129 | } 130 | 131 | /// @cond INTERNAL 132 | #define PROTOZERO_WRITER_WRAP_ADD_PACKED(name) \ 133 | template \ 134 | void add_packed_##name(T tag, InputIterator first, InputIterator last) { \ 135 | pbf_writer::add_packed_##name(pbf_tag_type(tag), first, last); \ 136 | } 137 | 138 | PROTOZERO_WRITER_WRAP_ADD_PACKED(bool) 139 | PROTOZERO_WRITER_WRAP_ADD_PACKED(enum) 140 | PROTOZERO_WRITER_WRAP_ADD_PACKED(int32) 141 | PROTOZERO_WRITER_WRAP_ADD_PACKED(sint32) 142 | PROTOZERO_WRITER_WRAP_ADD_PACKED(uint32) 143 | PROTOZERO_WRITER_WRAP_ADD_PACKED(int64) 144 | PROTOZERO_WRITER_WRAP_ADD_PACKED(sint64) 145 | PROTOZERO_WRITER_WRAP_ADD_PACKED(uint64) 146 | PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed32) 147 | PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed32) 148 | PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed64) 149 | PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed64) 150 | PROTOZERO_WRITER_WRAP_ADD_PACKED(float) 151 | PROTOZERO_WRITER_WRAP_ADD_PACKED(double) 152 | 153 | #undef PROTOZERO_WRITER_WRAP_ADD_PACKED 154 | /// @endcond 155 | 156 | }; // class pbf_builder 157 | 158 | } // end namespace protozero 159 | 160 | #endif // PROTOZERO_PBF_BUILDER_HPP 161 | -------------------------------------------------------------------------------- /protozero/pbf_message.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_PBF_MESSAGE_HPP 2 | #define PROTOZERO_PBF_MESSAGE_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file pbf_message.hpp 15 | * 16 | * @brief Contains the pbf_message template class. 17 | */ 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | namespace protozero { 25 | 26 | /** 27 | * This class represents a protobuf message. Either a top-level message or 28 | * a nested sub-message. Top-level messages can be created from any buffer 29 | * with a pointer and length: 30 | * 31 | * @code 32 | * enum class Message : protozero::pbf_tag_type { 33 | * ... 34 | * }; 35 | * 36 | * std::string buffer; 37 | * // fill buffer... 38 | * pbf_message message(buffer.data(), buffer.size()); 39 | * @endcode 40 | * 41 | * Sub-messages are created using get_message(): 42 | * 43 | * @code 44 | * enum class SubMessage : protozero::pbf_tag_type { 45 | * ... 46 | * }; 47 | * 48 | * pbf_message message(...); 49 | * message.next(); 50 | * pbf_message submessage = message.get_message(); 51 | * @endcode 52 | * 53 | * All methods of the pbf_message class except get_bytes() and get_string() 54 | * provide the strong exception guarantee, ie they either succeed or do not 55 | * change the pbf_message object they are called on. Use the get_data() method 56 | * instead of get_bytes() or get_string(), if you need this guarantee. 57 | * 58 | * This template class is based on the pbf_reader class and has all the same 59 | * methods. The difference is that whereever the pbf_reader class takes an 60 | * integer tag, this template class takes a tag of the template type T. 61 | * 62 | * Read the tutorial to understand how this class is used. 63 | */ 64 | template 65 | class pbf_message : public pbf_reader { 66 | 67 | static_assert(std::is_same::type>::value, "T must be enum with underlying type protozero::pbf_tag_type"); 68 | 69 | public: 70 | 71 | using enum_type = T; 72 | 73 | template 74 | pbf_message(Args&&... args) noexcept : 75 | pbf_reader(std::forward(args)...) { 76 | } 77 | 78 | bool next() { 79 | return pbf_reader::next(); 80 | } 81 | 82 | bool next(T next_tag) { 83 | return pbf_reader::next(pbf_tag_type(next_tag)); 84 | } 85 | 86 | bool next(T next_tag, pbf_wire_type type) { 87 | return pbf_reader::next(pbf_tag_type(next_tag), type); 88 | } 89 | 90 | T tag() const noexcept { 91 | return T(pbf_reader::tag()); 92 | } 93 | 94 | }; // class pbf_message 95 | 96 | } // end namespace protozero 97 | 98 | #endif // PROTOZERO_PBF_MESSAGE_HPP 99 | -------------------------------------------------------------------------------- /protozero/pbf_writer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_PBF_WRITER_HPP 2 | #define PROTOZERO_PBF_WRITER_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file pbf_writer.hpp 15 | * 16 | * @brief Contains the pbf_writer class. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN 32 | # include 33 | #endif 34 | 35 | namespace protozero { 36 | 37 | namespace detail { 38 | 39 | template class packed_field_varint; 40 | template class packed_field_svarint; 41 | template class packed_field_fixed; 42 | 43 | } // end namespace detail 44 | 45 | /** 46 | * The pbf_writer is used to write PBF formatted messages into a buffer. 47 | * 48 | * Almost all methods in this class can throw an std::bad_alloc exception if 49 | * the std::string used as a buffer wants to resize. 50 | */ 51 | class pbf_writer { 52 | 53 | // A pointer to a string buffer holding the data already written to the 54 | // PBF message. For default constructed writers or writers that have been 55 | // rolled back, this is a nullptr. 56 | std::string* m_data; 57 | 58 | // A pointer to a parent writer object if this is a submessage. If this 59 | // is a top-level writer, it is a nullptr. 60 | pbf_writer* m_parent_writer; 61 | 62 | // This is usually 0. If there is an open submessage, this is set in the 63 | // parent to the rollback position, ie. the last position before the 64 | // submessage was started. This is the position where the header of the 65 | // submessage starts. 66 | std::size_t m_rollback_pos = 0; 67 | 68 | // This is usually 0. If there is an open submessage, this is set in the 69 | // parent to the position where the data of the submessage is written to. 70 | std::size_t m_pos = 0; 71 | 72 | void add_varint(uint64_t value) { 73 | protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); 74 | protozero_assert(m_data); 75 | write_varint(std::back_inserter(*m_data), value); 76 | } 77 | 78 | void add_field(pbf_tag_type tag, pbf_wire_type type) { 79 | protozero_assert(((tag > 0 && tag < 19000) || (tag > 19999 && tag <= ((1 << 29) - 1))) && "tag out of range"); 80 | const uint32_t b = (tag << 3) | uint32_t(type); 81 | add_varint(b); 82 | } 83 | 84 | void add_tagged_varint(pbf_tag_type tag, uint64_t value) { 85 | add_field(tag, pbf_wire_type::varint); 86 | add_varint(value); 87 | } 88 | 89 | template 90 | void add_fixed(T value) { 91 | protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); 92 | protozero_assert(m_data); 93 | #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN 94 | detail::byteswap_inplace(&value); 95 | #endif 96 | m_data->append(reinterpret_cast(&value), sizeof(T)); 97 | } 98 | 99 | template 100 | void add_packed_fixed(pbf_tag_type tag, It first, It last, std::input_iterator_tag) { 101 | if (first == last) { 102 | return; 103 | } 104 | 105 | pbf_writer sw(*this, tag); 106 | 107 | while (first != last) { 108 | sw.add_fixed(*first++); 109 | } 110 | } 111 | 112 | template 113 | void add_packed_fixed(pbf_tag_type tag, It first, It last, std::forward_iterator_tag) { 114 | if (first == last) { 115 | return; 116 | } 117 | 118 | const auto length = std::distance(first, last); 119 | add_length_varint(tag, sizeof(T) * pbf_length_type(length)); 120 | reserve(sizeof(T) * std::size_t(length)); 121 | 122 | while (first != last) { 123 | add_fixed(*first++); 124 | } 125 | } 126 | 127 | template 128 | void add_packed_varint(pbf_tag_type tag, It first, It last) { 129 | if (first == last) { 130 | return; 131 | } 132 | 133 | pbf_writer sw(*this, tag); 134 | 135 | while (first != last) { 136 | sw.add_varint(uint64_t(*first++)); 137 | } 138 | } 139 | 140 | template 141 | void add_packed_svarint(pbf_tag_type tag, It first, It last) { 142 | if (first == last) { 143 | return; 144 | } 145 | 146 | pbf_writer sw(*this, tag); 147 | 148 | while (first != last) { 149 | sw.add_varint(encode_zigzag64(*first++)); 150 | } 151 | } 152 | 153 | // The number of bytes to reserve for the varint holding the length of 154 | // a length-delimited field. The length has to fit into pbf_length_type, 155 | // and a varint needs 8 bit for every 7 bit. 156 | enum constant_reserve_bytes : int { 157 | reserve_bytes = sizeof(pbf_length_type) * 8 / 7 + 1 158 | }; 159 | 160 | // If m_rollpack_pos is set to this special value, it means that when 161 | // the submessage is closed, nothing needs to be done, because the length 162 | // of the submessage has already been written correctly. 163 | enum constant_size_is_known : std::size_t { 164 | size_is_known = std::numeric_limits::max() 165 | }; 166 | 167 | void open_submessage(pbf_tag_type tag, std::size_t size) { 168 | protozero_assert(m_pos == 0); 169 | protozero_assert(m_data); 170 | if (size == 0) { 171 | m_rollback_pos = m_data->size(); 172 | add_field(tag, pbf_wire_type::length_delimited); 173 | m_data->append(std::size_t(reserve_bytes), '\0'); 174 | } else { 175 | m_rollback_pos = size_is_known; 176 | add_length_varint(tag, pbf_length_type(size)); 177 | reserve(size); 178 | } 179 | m_pos = m_data->size(); 180 | } 181 | 182 | void rollback_submessage() { 183 | protozero_assert(m_pos != 0); 184 | protozero_assert(m_rollback_pos != size_is_known); 185 | protozero_assert(m_data); 186 | m_data->resize(m_rollback_pos); 187 | m_pos = 0; 188 | } 189 | 190 | void commit_submessage() { 191 | protozero_assert(m_pos != 0); 192 | protozero_assert(m_rollback_pos != size_is_known); 193 | protozero_assert(m_data); 194 | const auto length = pbf_length_type(m_data->size() - m_pos); 195 | 196 | protozero_assert(m_data->size() >= m_pos - reserve_bytes); 197 | const auto n = write_varint(m_data->begin() + long(m_pos) - reserve_bytes, length); 198 | 199 | m_data->erase(m_data->begin() + long(m_pos) - reserve_bytes + n, m_data->begin() + long(m_pos)); 200 | m_pos = 0; 201 | } 202 | 203 | void close_submessage() { 204 | protozero_assert(m_data); 205 | if (m_pos == 0 || m_rollback_pos == size_is_known) { 206 | return; 207 | } 208 | if (m_data->size() - m_pos == 0) { 209 | rollback_submessage(); 210 | } else { 211 | commit_submessage(); 212 | } 213 | } 214 | 215 | void add_length_varint(pbf_tag_type tag, pbf_length_type length) { 216 | add_field(tag, pbf_wire_type::length_delimited); 217 | add_varint(length); 218 | } 219 | 220 | public: 221 | 222 | /** 223 | * Create a writer using the given string as a data store. The pbf_writer 224 | * stores a reference to that string and adds all data to it. The string 225 | * doesn't have to be empty. The pbf_writer will just append data. 226 | */ 227 | explicit pbf_writer(std::string& data) noexcept : 228 | m_data(&data), 229 | m_parent_writer(nullptr) { 230 | } 231 | 232 | /** 233 | * Create a writer without a data store. In this form the writer can not 234 | * be used! 235 | */ 236 | pbf_writer() noexcept : 237 | m_data(nullptr), 238 | m_parent_writer(nullptr) { 239 | } 240 | 241 | /** 242 | * Construct a pbf_writer for a submessage from the pbf_writer of the 243 | * parent message. 244 | * 245 | * @param parent_writer The pbf_writer 246 | * @param tag Tag (field number) of the field that will be written 247 | * @param size Optional size of the submessage in bytes (use 0 for unknown). 248 | * Setting this allows some optimizations but is only possible in 249 | * a few very specific cases. 250 | */ 251 | pbf_writer(pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size=0) : 252 | m_data(parent_writer.m_data), 253 | m_parent_writer(&parent_writer) { 254 | m_parent_writer->open_submessage(tag, size); 255 | } 256 | 257 | /// A pbf_writer object can be copied 258 | pbf_writer(const pbf_writer&) noexcept = default; 259 | 260 | /// A pbf_writer object can be copied 261 | pbf_writer& operator=(const pbf_writer&) noexcept = default; 262 | 263 | /// A pbf_writer object can be moved 264 | pbf_writer(pbf_writer&&) noexcept = default; 265 | 266 | /// A pbf_writer object can be moved 267 | pbf_writer& operator=(pbf_writer&&) noexcept = default; 268 | 269 | ~pbf_writer() { 270 | if (m_parent_writer) { 271 | m_parent_writer->close_submessage(); 272 | } 273 | } 274 | 275 | /** 276 | * Swap the contents of this object with the other. 277 | * 278 | * @param other Other object to swap data with. 279 | */ 280 | void swap(pbf_writer& other) noexcept { 281 | using std::swap; 282 | swap(m_data, other.m_data); 283 | swap(m_parent_writer, other.m_parent_writer); 284 | swap(m_rollback_pos, other.m_rollback_pos); 285 | swap(m_pos, other.m_pos); 286 | } 287 | 288 | /** 289 | * Reserve size bytes in the underlying message store in addition to 290 | * whatever the message store already holds. So unlike 291 | * the `std::string::reserve()` method this is not an absolute size, 292 | * but additional memory that should be reserved. 293 | * 294 | * @param size Number of bytes to reserve in underlying message store. 295 | */ 296 | void reserve(std::size_t size) { 297 | protozero_assert(m_data); 298 | m_data->reserve(m_data->size() + size); 299 | } 300 | 301 | /** 302 | * Cancel writing of this submessage. The complete submessage will be 303 | * removed as if it was never created and no fields were added. 304 | * 305 | * @pre Must be a pbf_writer of a submessage, ie one opened with the 306 | * pbf_writer constructor taking a parent message. 307 | */ 308 | void rollback() { 309 | protozero_assert(m_parent_writer && "you can't call rollback() on a pbf_writer without a parent"); 310 | protozero_assert(m_pos == 0 && "you can't call rollback() on a pbf_writer that has an open nested submessage"); 311 | m_parent_writer->rollback_submessage(); 312 | m_data = nullptr; 313 | } 314 | 315 | ///@{ 316 | /** 317 | * @name Scalar field writer functions 318 | */ 319 | 320 | /** 321 | * Add "bool" field to data. 322 | * 323 | * @param tag Tag (field number) of the field 324 | * @param value Value to be written 325 | */ 326 | void add_bool(pbf_tag_type tag, bool value) { 327 | add_field(tag, pbf_wire_type::varint); 328 | protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); 329 | protozero_assert(m_data); 330 | m_data->append(1, value); 331 | } 332 | 333 | /** 334 | * Add "enum" field to data. 335 | * 336 | * @param tag Tag (field number) of the field 337 | * @param value Value to be written 338 | */ 339 | void add_enum(pbf_tag_type tag, int32_t value) { 340 | add_tagged_varint(tag, uint64_t(value)); 341 | } 342 | 343 | /** 344 | * Add "int32" field to data. 345 | * 346 | * @param tag Tag (field number) of the field 347 | * @param value Value to be written 348 | */ 349 | void add_int32(pbf_tag_type tag, int32_t value) { 350 | add_tagged_varint(tag, uint64_t(value)); 351 | } 352 | 353 | /** 354 | * Add "sint32" field to data. 355 | * 356 | * @param tag Tag (field number) of the field 357 | * @param value Value to be written 358 | */ 359 | void add_sint32(pbf_tag_type tag, int32_t value) { 360 | add_tagged_varint(tag, encode_zigzag32(value)); 361 | } 362 | 363 | /** 364 | * Add "uint32" field to data. 365 | * 366 | * @param tag Tag (field number) of the field 367 | * @param value Value to be written 368 | */ 369 | void add_uint32(pbf_tag_type tag, uint32_t value) { 370 | add_tagged_varint(tag, value); 371 | } 372 | 373 | /** 374 | * Add "int64" field to data. 375 | * 376 | * @param tag Tag (field number) of the field 377 | * @param value Value to be written 378 | */ 379 | void add_int64(pbf_tag_type tag, int64_t value) { 380 | add_tagged_varint(tag, uint64_t(value)); 381 | } 382 | 383 | /** 384 | * Add "sint64" field to data. 385 | * 386 | * @param tag Tag (field number) of the field 387 | * @param value Value to be written 388 | */ 389 | void add_sint64(pbf_tag_type tag, int64_t value) { 390 | add_tagged_varint(tag, encode_zigzag64(value)); 391 | } 392 | 393 | /** 394 | * Add "uint64" field to data. 395 | * 396 | * @param tag Tag (field number) of the field 397 | * @param value Value to be written 398 | */ 399 | void add_uint64(pbf_tag_type tag, uint64_t value) { 400 | add_tagged_varint(tag, value); 401 | } 402 | 403 | /** 404 | * Add "fixed32" field to data. 405 | * 406 | * @param tag Tag (field number) of the field 407 | * @param value Value to be written 408 | */ 409 | void add_fixed32(pbf_tag_type tag, uint32_t value) { 410 | add_field(tag, pbf_wire_type::fixed32); 411 | add_fixed(value); 412 | } 413 | 414 | /** 415 | * Add "sfixed32" field to data. 416 | * 417 | * @param tag Tag (field number) of the field 418 | * @param value Value to be written 419 | */ 420 | void add_sfixed32(pbf_tag_type tag, int32_t value) { 421 | add_field(tag, pbf_wire_type::fixed32); 422 | add_fixed(value); 423 | } 424 | 425 | /** 426 | * Add "fixed64" field to data. 427 | * 428 | * @param tag Tag (field number) of the field 429 | * @param value Value to be written 430 | */ 431 | void add_fixed64(pbf_tag_type tag, uint64_t value) { 432 | add_field(tag, pbf_wire_type::fixed64); 433 | add_fixed(value); 434 | } 435 | 436 | /** 437 | * Add "sfixed64" field to data. 438 | * 439 | * @param tag Tag (field number) of the field 440 | * @param value Value to be written 441 | */ 442 | void add_sfixed64(pbf_tag_type tag, int64_t value) { 443 | add_field(tag, pbf_wire_type::fixed64); 444 | add_fixed(value); 445 | } 446 | 447 | /** 448 | * Add "float" field to data. 449 | * 450 | * @param tag Tag (field number) of the field 451 | * @param value Value to be written 452 | */ 453 | void add_float(pbf_tag_type tag, float value) { 454 | add_field(tag, pbf_wire_type::fixed32); 455 | add_fixed(value); 456 | } 457 | 458 | /** 459 | * Add "double" field to data. 460 | * 461 | * @param tag Tag (field number) of the field 462 | * @param value Value to be written 463 | */ 464 | void add_double(pbf_tag_type tag, double value) { 465 | add_field(tag, pbf_wire_type::fixed64); 466 | add_fixed(value); 467 | } 468 | 469 | /** 470 | * Add "bytes" field to data. 471 | * 472 | * @param tag Tag (field number) of the field 473 | * @param value Pointer to value to be written 474 | * @param size Number of bytes to be written 475 | */ 476 | void add_bytes(pbf_tag_type tag, const char* value, std::size_t size) { 477 | protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); 478 | protozero_assert(m_data); 479 | protozero_assert(size <= std::numeric_limits::max()); 480 | add_length_varint(tag, pbf_length_type(size)); 481 | m_data->append(value, size); 482 | } 483 | 484 | /** 485 | * Add "bytes" field to data. 486 | * 487 | * @param tag Tag (field number) of the field 488 | * @param value Value to be written 489 | */ 490 | void add_bytes(pbf_tag_type tag, const data_view& value) { 491 | add_bytes(tag, value.data(), value.size()); 492 | } 493 | 494 | /** 495 | * Add "bytes" field to data. 496 | * 497 | * @param tag Tag (field number) of the field 498 | * @param value Value to be written 499 | */ 500 | void add_bytes(pbf_tag_type tag, const std::string& value) { 501 | add_bytes(tag, value.data(), value.size()); 502 | } 503 | 504 | /** 505 | * Add "bytes" field to data. Bytes from the value are written until 506 | * a null byte is encountered. The null byte is not added. 507 | * 508 | * @param tag Tag (field number) of the field 509 | * @param value Pointer to zero-delimited value to be written 510 | */ 511 | void add_bytes(pbf_tag_type tag, const char* value) { 512 | add_bytes(tag, value, std::strlen(value)); 513 | } 514 | 515 | /** 516 | * Add "bytes" field to data using vectored input. All the data in the 517 | * 2nd and further arguments is "concatenated" with only a single copy 518 | * into the final buffer. 519 | * 520 | * This will work with objects of any type supporting the data() and 521 | * size() methods like std::string or protozero::data_view. 522 | * 523 | * Example: 524 | * @code 525 | * std::string data1 = "abc"; 526 | * std::string data2 = "xyz"; 527 | * writer.add_bytes_vectored(1, data1, data2); 528 | * @endcode 529 | * 530 | * @tparam Ts List of types supporting data() and size() methods. 531 | * @param tag Tag (field number) of the field 532 | * @param values List of objects of types Ts with data to be appended. 533 | */ 534 | template 535 | void add_bytes_vectored(pbf_tag_type tag, Ts&&... values) { 536 | protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); 537 | protozero_assert(m_data); 538 | size_t sum_size = 0; 539 | (void)std::initializer_list{sum_size += values.size()...}; 540 | protozero_assert(sum_size <= std::numeric_limits::max()); 541 | add_length_varint(tag, pbf_length_type(sum_size)); 542 | m_data->reserve(m_data->size() + sum_size); 543 | (void)std::initializer_list{(m_data->append(values.data(), values.size()), 0)...}; 544 | } 545 | 546 | /** 547 | * Add "string" field to data. 548 | * 549 | * @param tag Tag (field number) of the field 550 | * @param value Pointer to value to be written 551 | * @param size Number of bytes to be written 552 | */ 553 | void add_string(pbf_tag_type tag, const char* value, std::size_t size) { 554 | add_bytes(tag, value, size); 555 | } 556 | 557 | /** 558 | * Add "string" field to data. 559 | * 560 | * @param tag Tag (field number) of the field 561 | * @param value Value to be written 562 | */ 563 | void add_string(pbf_tag_type tag, const data_view& value) { 564 | add_bytes(tag, value.data(), value.size()); 565 | } 566 | 567 | /** 568 | * Add "string" field to data. 569 | * 570 | * @param tag Tag (field number) of the field 571 | * @param value Value to be written 572 | */ 573 | void add_string(pbf_tag_type tag, const std::string& value) { 574 | add_bytes(tag, value.data(), value.size()); 575 | } 576 | 577 | /** 578 | * Add "string" field to data. Bytes from the value are written until 579 | * a null byte is encountered. The null byte is not added. 580 | * 581 | * @param tag Tag (field number) of the field 582 | * @param value Pointer to value to be written 583 | */ 584 | void add_string(pbf_tag_type tag, const char* value) { 585 | add_bytes(tag, value, std::strlen(value)); 586 | } 587 | 588 | /** 589 | * Add "message" field to data. 590 | * 591 | * @param tag Tag (field number) of the field 592 | * @param value Pointer to message to be written 593 | * @param size Length of the message 594 | */ 595 | void add_message(pbf_tag_type tag, const char* value, std::size_t size) { 596 | add_bytes(tag, value, size); 597 | } 598 | 599 | /** 600 | * Add "message" field to data. 601 | * 602 | * @param tag Tag (field number) of the field 603 | * @param value Value to be written. The value must be a complete message. 604 | */ 605 | void add_message(pbf_tag_type tag, const data_view& value) { 606 | add_bytes(tag, value.data(), value.size()); 607 | } 608 | 609 | /** 610 | * Add "message" field to data. 611 | * 612 | * @param tag Tag (field number) of the field 613 | * @param value Value to be written. The value must be a complete message. 614 | */ 615 | void add_message(pbf_tag_type tag, const std::string& value) { 616 | add_bytes(tag, value.data(), value.size()); 617 | } 618 | 619 | ///@} 620 | 621 | ///@{ 622 | /** 623 | * @name Repeated packed field writer functions 624 | */ 625 | 626 | /** 627 | * Add "repeated packed bool" field to data. 628 | * 629 | * @tparam InputIterator A type satisfying the InputIterator concept. 630 | * Dereferencing the iterator must yield a type assignable to bool. 631 | * @param tag Tag (field number) of the field 632 | * @param first Iterator pointing to the beginning of the data 633 | * @param last Iterator pointing one past the end of data 634 | */ 635 | template 636 | void add_packed_bool(pbf_tag_type tag, InputIterator first, InputIterator last) { 637 | add_packed_varint(tag, first, last); 638 | } 639 | 640 | /** 641 | * Add "repeated packed enum" field to data. 642 | * 643 | * @tparam InputIterator A type satisfying the InputIterator concept. 644 | * Dereferencing the iterator must yield a type assignable to int32_t. 645 | * @param tag Tag (field number) of the field 646 | * @param first Iterator pointing to the beginning of the data 647 | * @param last Iterator pointing one past the end of data 648 | */ 649 | template 650 | void add_packed_enum(pbf_tag_type tag, InputIterator first, InputIterator last) { 651 | add_packed_varint(tag, first, last); 652 | } 653 | 654 | /** 655 | * Add "repeated packed int32" field to data. 656 | * 657 | * @tparam InputIterator A type satisfying the InputIterator concept. 658 | * Dereferencing the iterator must yield a type assignable to int32_t. 659 | * @param tag Tag (field number) of the field 660 | * @param first Iterator pointing to the beginning of the data 661 | * @param last Iterator pointing one past the end of data 662 | */ 663 | template 664 | void add_packed_int32(pbf_tag_type tag, InputIterator first, InputIterator last) { 665 | add_packed_varint(tag, first, last); 666 | } 667 | 668 | /** 669 | * Add "repeated packed sint32" field to data. 670 | * 671 | * @tparam InputIterator A type satisfying the InputIterator concept. 672 | * Dereferencing the iterator must yield a type assignable to int32_t. 673 | * @param tag Tag (field number) of the field 674 | * @param first Iterator pointing to the beginning of the data 675 | * @param last Iterator pointing one past the end of data 676 | */ 677 | template 678 | void add_packed_sint32(pbf_tag_type tag, InputIterator first, InputIterator last) { 679 | add_packed_svarint(tag, first, last); 680 | } 681 | 682 | /** 683 | * Add "repeated packed uint32" field to data. 684 | * 685 | * @tparam InputIterator A type satisfying the InputIterator concept. 686 | * Dereferencing the iterator must yield a type assignable to uint32_t. 687 | * @param tag Tag (field number) of the field 688 | * @param first Iterator pointing to the beginning of the data 689 | * @param last Iterator pointing one past the end of data 690 | */ 691 | template 692 | void add_packed_uint32(pbf_tag_type tag, InputIterator first, InputIterator last) { 693 | add_packed_varint(tag, first, last); 694 | } 695 | 696 | /** 697 | * Add "repeated packed int64" field to data. 698 | * 699 | * @tparam InputIterator A type satisfying the InputIterator concept. 700 | * Dereferencing the iterator must yield a type assignable to int64_t. 701 | * @param tag Tag (field number) of the field 702 | * @param first Iterator pointing to the beginning of the data 703 | * @param last Iterator pointing one past the end of data 704 | */ 705 | template 706 | void add_packed_int64(pbf_tag_type tag, InputIterator first, InputIterator last) { 707 | add_packed_varint(tag, first, last); 708 | } 709 | 710 | /** 711 | * Add "repeated packed sint64" field to data. 712 | * 713 | * @tparam InputIterator A type satisfying the InputIterator concept. 714 | * Dereferencing the iterator must yield a type assignable to int64_t. 715 | * @param tag Tag (field number) of the field 716 | * @param first Iterator pointing to the beginning of the data 717 | * @param last Iterator pointing one past the end of data 718 | */ 719 | template 720 | void add_packed_sint64(pbf_tag_type tag, InputIterator first, InputIterator last) { 721 | add_packed_svarint(tag, first, last); 722 | } 723 | 724 | /** 725 | * Add "repeated packed uint64" field to data. 726 | * 727 | * @tparam InputIterator A type satisfying the InputIterator concept. 728 | * Dereferencing the iterator must yield a type assignable to uint64_t. 729 | * @param tag Tag (field number) of the field 730 | * @param first Iterator pointing to the beginning of the data 731 | * @param last Iterator pointing one past the end of data 732 | */ 733 | template 734 | void add_packed_uint64(pbf_tag_type tag, InputIterator first, InputIterator last) { 735 | add_packed_varint(tag, first, last); 736 | } 737 | 738 | /** 739 | * Add "repeated packed fixed32" field to data. 740 | * 741 | * @tparam InputIterator A type satisfying the InputIterator concept. 742 | * Dereferencing the iterator must yield a type assignable to uint32_t. 743 | * @param tag Tag (field number) of the field 744 | * @param first Iterator pointing to the beginning of the data 745 | * @param last Iterator pointing one past the end of data 746 | */ 747 | template 748 | void add_packed_fixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { 749 | add_packed_fixed(tag, first, last, 750 | typename std::iterator_traits::iterator_category()); 751 | } 752 | 753 | /** 754 | * Add "repeated packed sfixed32" field to data. 755 | * 756 | * @tparam InputIterator A type satisfying the InputIterator concept. 757 | * Dereferencing the iterator must yield a type assignable to int32_t. 758 | * @param tag Tag (field number) of the field 759 | * @param first Iterator pointing to the beginning of the data 760 | * @param last Iterator pointing one past the end of data 761 | */ 762 | template 763 | void add_packed_sfixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { 764 | add_packed_fixed(tag, first, last, 765 | typename std::iterator_traits::iterator_category()); 766 | } 767 | 768 | /** 769 | * Add "repeated packed fixed64" field to data. 770 | * 771 | * @tparam InputIterator A type satisfying the InputIterator concept. 772 | * Dereferencing the iterator must yield a type assignable to uint64_t. 773 | * @param tag Tag (field number) of the field 774 | * @param first Iterator pointing to the beginning of the data 775 | * @param last Iterator pointing one past the end of data 776 | */ 777 | template 778 | void add_packed_fixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { 779 | add_packed_fixed(tag, first, last, 780 | typename std::iterator_traits::iterator_category()); 781 | } 782 | 783 | /** 784 | * Add "repeated packed sfixed64" field to data. 785 | * 786 | * @tparam InputIterator A type satisfying the InputIterator concept. 787 | * Dereferencing the iterator must yield a type assignable to int64_t. 788 | * @param tag Tag (field number) of the field 789 | * @param first Iterator pointing to the beginning of the data 790 | * @param last Iterator pointing one past the end of data 791 | */ 792 | template 793 | void add_packed_sfixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { 794 | add_packed_fixed(tag, first, last, 795 | typename std::iterator_traits::iterator_category()); 796 | } 797 | 798 | /** 799 | * Add "repeated packed float" field to data. 800 | * 801 | * @tparam InputIterator A type satisfying the InputIterator concept. 802 | * Dereferencing the iterator must yield a type assignable to float. 803 | * @param tag Tag (field number) of the field 804 | * @param first Iterator pointing to the beginning of the data 805 | * @param last Iterator pointing one past the end of data 806 | */ 807 | template 808 | void add_packed_float(pbf_tag_type tag, InputIterator first, InputIterator last) { 809 | add_packed_fixed(tag, first, last, 810 | typename std::iterator_traits::iterator_category()); 811 | } 812 | 813 | /** 814 | * Add "repeated packed double" field to data. 815 | * 816 | * @tparam InputIterator A type satisfying the InputIterator concept. 817 | * Dereferencing the iterator must yield a type assignable to double. 818 | * @param tag Tag (field number) of the field 819 | * @param first Iterator pointing to the beginning of the data 820 | * @param last Iterator pointing one past the end of data 821 | */ 822 | template 823 | void add_packed_double(pbf_tag_type tag, InputIterator first, InputIterator last) { 824 | add_packed_fixed(tag, first, last, 825 | typename std::iterator_traits::iterator_category()); 826 | } 827 | 828 | ///@} 829 | 830 | template friend class detail::packed_field_varint; 831 | template friend class detail::packed_field_svarint; 832 | template friend class detail::packed_field_fixed; 833 | 834 | }; // class pbf_writer 835 | 836 | /** 837 | * Swap two pbf_writer objects. 838 | * 839 | * @param lhs First object. 840 | * @param rhs Second object. 841 | */ 842 | inline void swap(pbf_writer& lhs, pbf_writer& rhs) noexcept { 843 | lhs.swap(rhs); 844 | } 845 | 846 | namespace detail { 847 | 848 | class packed_field { 849 | 850 | protected: 851 | 852 | pbf_writer m_writer; 853 | 854 | public: 855 | 856 | packed_field(const packed_field&) = delete; 857 | packed_field& operator=(const packed_field&) = delete; 858 | 859 | packed_field(packed_field&&) = default; 860 | packed_field& operator=(packed_field&&) = default; 861 | 862 | packed_field(pbf_writer& parent_writer, pbf_tag_type tag) : 863 | m_writer(parent_writer, tag) { 864 | } 865 | 866 | packed_field(pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size) : 867 | m_writer(parent_writer, tag, size) { 868 | } 869 | 870 | void rollback() { 871 | m_writer.rollback(); 872 | } 873 | 874 | }; // class packed_field 875 | 876 | template 877 | class packed_field_fixed : public packed_field { 878 | 879 | public: 880 | 881 | template 882 | packed_field_fixed(pbf_writer& parent_writer, P tag) : 883 | packed_field(parent_writer, static_cast(tag)) { 884 | } 885 | 886 | template 887 | packed_field_fixed(pbf_writer& parent_writer, P tag, std::size_t size) : 888 | packed_field(parent_writer, static_cast(tag), size * sizeof(T)) { 889 | } 890 | 891 | void add_element(T value) { 892 | m_writer.add_fixed(value); 893 | } 894 | 895 | }; // class packed_field_fixed 896 | 897 | template 898 | class packed_field_varint : public packed_field { 899 | 900 | public: 901 | 902 | template 903 | packed_field_varint(pbf_writer& parent_writer, P tag) : 904 | packed_field(parent_writer, static_cast(tag)) { 905 | } 906 | 907 | void add_element(T value) { 908 | m_writer.add_varint(uint64_t(value)); 909 | } 910 | 911 | }; // class packed_field_varint 912 | 913 | template 914 | class packed_field_svarint : public packed_field { 915 | 916 | public: 917 | 918 | template 919 | packed_field_svarint(pbf_writer& parent_writer, P tag) : 920 | packed_field(parent_writer, static_cast(tag)) { 921 | } 922 | 923 | void add_element(T value) { 924 | m_writer.add_varint(encode_zigzag64(value)); 925 | } 926 | 927 | }; // class packed_field_svarint 928 | 929 | } // end namespace detail 930 | 931 | /// Class for generating packed repeated bool fields. 932 | using packed_field_bool = detail::packed_field_varint; 933 | 934 | /// Class for generating packed repeated enum fields. 935 | using packed_field_enum = detail::packed_field_varint; 936 | 937 | /// Class for generating packed repeated int32 fields. 938 | using packed_field_int32 = detail::packed_field_varint; 939 | 940 | /// Class for generating packed repeated sint32 fields. 941 | using packed_field_sint32 = detail::packed_field_svarint; 942 | 943 | /// Class for generating packed repeated uint32 fields. 944 | using packed_field_uint32 = detail::packed_field_varint; 945 | 946 | /// Class for generating packed repeated int64 fields. 947 | using packed_field_int64 = detail::packed_field_varint; 948 | 949 | /// Class for generating packed repeated sint64 fields. 950 | using packed_field_sint64 = detail::packed_field_svarint; 951 | 952 | /// Class for generating packed repeated uint64 fields. 953 | using packed_field_uint64 = detail::packed_field_varint; 954 | 955 | /// Class for generating packed repeated fixed32 fields. 956 | using packed_field_fixed32 = detail::packed_field_fixed; 957 | 958 | /// Class for generating packed repeated sfixed32 fields. 959 | using packed_field_sfixed32 = detail::packed_field_fixed; 960 | 961 | /// Class for generating packed repeated fixed64 fields. 962 | using packed_field_fixed64 = detail::packed_field_fixed; 963 | 964 | /// Class for generating packed repeated sfixed64 fields. 965 | using packed_field_sfixed64 = detail::packed_field_fixed; 966 | 967 | /// Class for generating packed repeated float fields. 968 | using packed_field_float = detail::packed_field_fixed; 969 | 970 | /// Class for generating packed repeated double fields. 971 | using packed_field_double = detail::packed_field_fixed; 972 | 973 | } // end namespace protozero 974 | 975 | #endif // PROTOZERO_PBF_WRITER_HPP 976 | -------------------------------------------------------------------------------- /protozero/types.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_TYPES_HPP 2 | #define PROTOZERO_TYPES_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file types.hpp 15 | * 16 | * @brief Contains the declaration of low-level types used in the pbf format. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | namespace protozero { 29 | 30 | /** 31 | * The type used for field tags (field numbers). 32 | */ 33 | using pbf_tag_type = uint32_t; 34 | 35 | /** 36 | * The type used to encode type information. 37 | * See the table on 38 | * https://developers.google.com/protocol-buffers/docs/encoding 39 | */ 40 | enum class pbf_wire_type : uint32_t { 41 | varint = 0, // int32/64, uint32/64, sint32/64, bool, enum 42 | fixed64 = 1, // fixed64, sfixed64, double 43 | length_delimited = 2, // string, bytes, embedded messages, 44 | // packed repeated fields 45 | fixed32 = 5, // fixed32, sfixed32, float 46 | unknown = 99 // used for default setting in this library 47 | }; 48 | 49 | /** 50 | * Get the tag and wire type of the current field in one integer suitable 51 | * for comparison with a switch statement. 52 | * 53 | * See pbf_reader.tag_and_type() for an example how to use this. 54 | */ 55 | template 56 | constexpr inline uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept { 57 | return (static_cast(static_cast(tag)) << 3) | static_cast(wire_type); 58 | } 59 | 60 | /** 61 | * The type used for length values, such as the length of a field. 62 | */ 63 | using pbf_length_type = uint32_t; 64 | 65 | #ifdef PROTOZERO_USE_VIEW 66 | using data_view = PROTOZERO_USE_VIEW; 67 | #else 68 | 69 | /** 70 | * Holds a pointer to some data and a length. 71 | * 72 | * This class is supposed to be compatible with the std::string_view 73 | * that will be available in C++17. 74 | */ 75 | class data_view { 76 | 77 | const char* m_data; 78 | std::size_t m_size; 79 | 80 | public: 81 | 82 | /** 83 | * Default constructor. Construct an empty data_view. 84 | */ 85 | constexpr data_view() noexcept 86 | : m_data(nullptr), 87 | m_size(0) { 88 | } 89 | 90 | /** 91 | * Create data_view from pointer and size. 92 | * 93 | * @param ptr Pointer to the data. 94 | * @param length Length of the data. 95 | */ 96 | constexpr data_view(const char* ptr, std::size_t length) noexcept 97 | : m_data(ptr), 98 | m_size(length) { 99 | } 100 | 101 | /** 102 | * Create data_view from string. 103 | * 104 | * @param str String with the data. 105 | */ 106 | data_view(const std::string& str) noexcept 107 | : m_data(str.data()), 108 | m_size(str.size()) { 109 | } 110 | 111 | /** 112 | * Create data_view from zero-terminated string. 113 | * 114 | * @param ptr Pointer to the data. 115 | */ 116 | data_view(const char* ptr) noexcept 117 | : m_data(ptr), 118 | m_size(std::strlen(ptr)) { 119 | } 120 | 121 | /** 122 | * Swap the contents of this object with the other. 123 | * 124 | * @param other Other object to swap data with. 125 | */ 126 | void swap(data_view& other) noexcept { 127 | using std::swap; 128 | swap(m_data, other.m_data); 129 | swap(m_size, other.m_size); 130 | } 131 | 132 | /// Return pointer to data. 133 | constexpr const char* data() const noexcept { 134 | return m_data; 135 | } 136 | 137 | /// Return length of data in bytes. 138 | constexpr std::size_t size() const noexcept { 139 | return m_size; 140 | } 141 | 142 | /// Returns true if size is 0. 143 | constexpr bool empty() const noexcept { 144 | return m_size == 0; 145 | } 146 | 147 | /** 148 | * Convert data view to string. 149 | * 150 | * @pre Must not be default constructed data_view. 151 | */ 152 | std::string to_string() const { 153 | protozero_assert(m_data); 154 | return std::string{m_data, m_size}; 155 | } 156 | 157 | /** 158 | * Convert data view to string. 159 | * 160 | * @pre Must not be default constructed data_view. 161 | */ 162 | explicit operator std::string() const { 163 | protozero_assert(m_data); 164 | return std::string{m_data, m_size}; 165 | } 166 | 167 | }; // class data_view 168 | 169 | /** 170 | * Swap two data_view objects. 171 | * 172 | * @param lhs First object. 173 | * @param rhs Second object. 174 | */ 175 | inline void swap(data_view& lhs, data_view& rhs) noexcept { 176 | lhs.swap(rhs); 177 | } 178 | 179 | /** 180 | * Two data_view instances are equal if they have the same size and the 181 | * same content. 182 | * 183 | * @param lhs First object. 184 | * @param rhs Second object. 185 | */ 186 | inline bool operator==(const data_view& lhs, const data_view& rhs) noexcept { 187 | return lhs.size() == rhs.size() && std::equal(lhs.data(), lhs.data() + lhs.size(), rhs.data()); 188 | } 189 | 190 | /** 191 | * Two data_view instances are not equal if they have different sizes or the 192 | * content differs. 193 | * 194 | * @param lhs First object. 195 | * @param rhs Second object. 196 | */ 197 | inline bool operator!=(const data_view& lhs, const data_view& rhs) noexcept { 198 | return !(lhs == rhs); 199 | } 200 | 201 | #endif 202 | 203 | 204 | } // end namespace protozero 205 | 206 | #endif // PROTOZERO_TYPES_HPP 207 | -------------------------------------------------------------------------------- /protozero/varint.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_VARINT_HPP 2 | #define PROTOZERO_VARINT_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file varint.hpp 15 | * 16 | * @brief Contains low-level varint and zigzag encoding and decoding functions. 17 | */ 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace protozero { 24 | 25 | /** 26 | * The maximum length of a 64 bit varint. 27 | */ 28 | constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1; 29 | 30 | namespace detail { 31 | 32 | // from https://github.com/facebook/folly/blob/master/folly/Varint.h 33 | inline uint64_t decode_varint_impl(const char** data, const char* end) { 34 | const int8_t* begin = reinterpret_cast(*data); 35 | const int8_t* iend = reinterpret_cast(end); 36 | const int8_t* p = begin; 37 | uint64_t val = 0; 38 | 39 | if (iend - begin >= max_varint_length) { // fast path 40 | do { 41 | int64_t b; 42 | b = *p++; val = uint64_t((b & 0x7f) ); if (b >= 0) break; 43 | b = *p++; val |= uint64_t((b & 0x7f) << 7); if (b >= 0) break; 44 | b = *p++; val |= uint64_t((b & 0x7f) << 14); if (b >= 0) break; 45 | b = *p++; val |= uint64_t((b & 0x7f) << 21); if (b >= 0) break; 46 | b = *p++; val |= uint64_t((b & 0x7f) << 28); if (b >= 0) break; 47 | b = *p++; val |= uint64_t((b & 0x7f) << 35); if (b >= 0) break; 48 | b = *p++; val |= uint64_t((b & 0x7f) << 42); if (b >= 0) break; 49 | b = *p++; val |= uint64_t((b & 0x7f) << 49); if (b >= 0) break; 50 | b = *p++; val |= uint64_t((b & 0x7f) << 56); if (b >= 0) break; 51 | b = *p++; val |= uint64_t((b & 0x7f) << 63); if (b >= 0) break; 52 | throw varint_too_long_exception(); 53 | } while (false); 54 | } else { 55 | int shift = 0; 56 | while (p != iend && *p < 0) { 57 | val |= uint64_t(*p++ & 0x7f) << shift; 58 | shift += 7; 59 | } 60 | if (p == iend) { 61 | throw end_of_buffer_exception(); 62 | } 63 | val |= uint64_t(*p++) << shift; 64 | } 65 | 66 | *data = reinterpret_cast(p); 67 | return val; 68 | } 69 | 70 | } // end namespace detail 71 | 72 | /** 73 | * Decode a 64 bit varint. 74 | * 75 | * Strong exception guarantee: if there is an exception the data pointer will 76 | * not be changed. 77 | * 78 | * @param[in,out] data Pointer to pointer to the input data. After the function 79 | * returns this will point to the next data to be read. 80 | * @param[in] end Pointer one past the end of the input data. 81 | * @returns The decoded integer 82 | * @throws varint_too_long_exception if the varint is longer then the maximum 83 | * length that would fit in a 64 bit int. Usually this means your data 84 | * is corrupted or you are trying to read something as a varint that 85 | * isn't. 86 | * @throws end_of_buffer_exception if the *end* of the buffer was reached 87 | * before the end of the varint. 88 | */ 89 | inline uint64_t decode_varint(const char** data, const char* end) { 90 | // If this is a one-byte varint, decode it here. 91 | if (end != *data && ((**data & 0x80) == 0)) { 92 | uint64_t val = uint64_t(**data); 93 | ++(*data); 94 | return val; 95 | } 96 | // If this varint is more than one byte, defer to complete implementation. 97 | return detail::decode_varint_impl(data, end); 98 | } 99 | 100 | /** 101 | * Skip over a varint. 102 | * 103 | * Strong exception guarantee: if there is an exception the data pointer will 104 | * not be changed. 105 | * 106 | * @param[in,out] data Pointer to pointer to the input data. After the function 107 | * returns this will point to the next data to be read. 108 | * @param[in] end Pointer one past the end of the input data. 109 | * @throws end_of_buffer_exception if the *end* of the buffer was reached 110 | * before the end of the varint. 111 | */ 112 | inline void skip_varint(const char** data, const char* end) { 113 | const int8_t* begin = reinterpret_cast(*data); 114 | const int8_t* iend = reinterpret_cast(end); 115 | const int8_t* p = begin; 116 | 117 | while (p != iend && *p < 0) { 118 | ++p; 119 | } 120 | 121 | if (p >= begin + max_varint_length) { 122 | throw varint_too_long_exception(); 123 | } 124 | 125 | if (p == iend) { 126 | throw end_of_buffer_exception(); 127 | } 128 | 129 | ++p; 130 | 131 | *data = reinterpret_cast(p); 132 | } 133 | 134 | /** 135 | * Varint encode a 64 bit integer. 136 | * 137 | * @tparam T An output iterator type. 138 | * @param data Output iterator the varint encoded value will be written to 139 | * byte by byte. 140 | * @param value The integer that will be encoded. 141 | * @throws Any exception thrown by increment or dereference operator on data. 142 | */ 143 | template 144 | inline int write_varint(T data, uint64_t value) { 145 | int n = 1; 146 | 147 | while (value >= 0x80) { 148 | *data++ = char((value & 0x7f) | 0x80); 149 | value >>= 7; 150 | ++n; 151 | } 152 | *data++ = char(value); 153 | 154 | return n; 155 | } 156 | 157 | /** 158 | * ZigZag encodes a 32 bit integer. 159 | */ 160 | inline constexpr uint32_t encode_zigzag32(int32_t value) noexcept { 161 | return (static_cast(value) << 1) ^ (static_cast(value >> 31)); 162 | } 163 | 164 | /** 165 | * ZigZag encodes a 64 bit integer. 166 | */ 167 | inline constexpr uint64_t encode_zigzag64(int64_t value) noexcept { 168 | return (static_cast(value) << 1) ^ (static_cast(value >> 63)); 169 | } 170 | 171 | /** 172 | * Decodes a 32 bit ZigZag-encoded integer. 173 | */ 174 | inline constexpr int32_t decode_zigzag32(uint32_t value) noexcept { 175 | return static_cast(value >> 1) ^ -static_cast(value & 1); 176 | } 177 | 178 | /** 179 | * Decodes a 64 bit ZigZag-encoded integer. 180 | */ 181 | inline constexpr int64_t decode_zigzag64(uint64_t value) noexcept { 182 | return static_cast(value >> 1) ^ -static_cast(value & 1); 183 | } 184 | 185 | } // end namespace protozero 186 | 187 | #endif // PROTOZERO_VARINT_HPP 188 | -------------------------------------------------------------------------------- /protozero/version.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROTOZERO_VERSION_HPP 2 | #define PROTOZERO_VERSION_HPP 3 | 4 | /***************************************************************************** 5 | 6 | protozero - Minimalistic protocol buffer decoder and encoder in C++. 7 | 8 | This file is from https://github.com/mapbox/protozero where you can find more 9 | documentation. 10 | 11 | *****************************************************************************/ 12 | 13 | /** 14 | * @file version.hpp 15 | * 16 | * @brief Contains macros defining the protozero version. 17 | */ 18 | 19 | /// The major version number 20 | #define PROTOZERO_VERSION_MAJOR 1 21 | 22 | /// The minor version number 23 | #define PROTOZERO_VERSION_MINOR 5 24 | 25 | /// The patch number 26 | #define PROTOZERO_VERSION_PATCH 2 27 | 28 | /// The complete version number 29 | #define PROTOZERO_VERSION_CODE (PROTOZERO_VERSION_MAJOR * 10000 + PROTOZERO_VERSION_MINOR * 100 + PROTOZERO_VERSION_PATCH) 30 | 31 | /// Version number as string 32 | #define PROTOZERO_VERSION_STRING "1.5.2" 33 | 34 | #endif // PROTOZERO_VERSION_HPP 35 | -------------------------------------------------------------------------------- /serial.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "serial.hpp" 4 | 5 | void write64(FILE *out, unsigned long long v) { 6 | // Big-endian so memcmp() sorts numerically 7 | for (ssize_t i = 64 - 8; i >= 0; i -= 8) { 8 | if (putc((v >> i) & 0xFF, out) == EOF) { 9 | perror("Write data"); 10 | exit(EXIT_FAILURE); 11 | } 12 | } 13 | } 14 | 15 | void write64(unsigned char **out, unsigned long long v) { 16 | // Big-endian so memcmp() sorts numerically 17 | for (ssize_t i = 64 - 8; i >= 0; i -= 8) { 18 | **out = (v >> i) & 0xFF; 19 | (*out)++; 20 | } 21 | } 22 | 23 | void write32(FILE *out, unsigned long long v) { 24 | // Big-endian so memcmp() sorts numerically 25 | for (ssize_t i = 32 - 8; i >= 0; i -= 8) { 26 | if (putc((v >> i) & 0xFF, out) == EOF) { 27 | perror("Write data"); 28 | exit(EXIT_FAILURE); 29 | } 30 | } 31 | } 32 | 33 | void write32(unsigned char **out, unsigned long long v) { 34 | // Big-endian so memcmp() sorts numerically 35 | for (ssize_t i = 32 - 8; i >= 0; i -= 8) { 36 | **out = (v >> i) & 0xFF; 37 | (*out)++; 38 | } 39 | } 40 | 41 | unsigned long long read64(unsigned char *c) { 42 | unsigned long long out = 0; 43 | 44 | for (ssize_t i = 0; i < 8; i++) { 45 | out = (out << 8) | c[i]; 46 | } 47 | 48 | return out; 49 | } 50 | 51 | unsigned long long read32(unsigned char *c) { 52 | unsigned long long out = 0; 53 | 54 | for (ssize_t i = 0; i < 4; i++) { 55 | out = (out << 8) | c[i]; 56 | } 57 | 58 | return out; 59 | } 60 | -------------------------------------------------------------------------------- /serial.hpp: -------------------------------------------------------------------------------- 1 | void write64(FILE *out, unsigned long long v); 2 | void write64(unsigned char **out, unsigned long long v); 3 | void write32(FILE *out, unsigned long long v); 4 | void write32(unsigned char **out, unsigned long long v); 5 | unsigned long long read64(unsigned char *c); 6 | unsigned long long read32(unsigned char *c); 7 | -------------------------------------------------------------------------------- /tests/check-minimum-count.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 'use strict'; 4 | 5 | var fs = require('fs'); 6 | 7 | var data = JSON.parse(fs.readFileSync(process.argv[2], 'utf8')); 8 | var min = process.argv[3]; 9 | 10 | function check(data) { 11 | if (data.type === 'FeatureCollection') { 12 | var i; 13 | for (i = 0; i < data.features.length; i++) { 14 | check(data.features[i]); 15 | } 16 | } else if (data.type === 'Feature') { 17 | if (data.properties.count < min) { 18 | console.error("Found " + data.properties.count + " < " + min + " in " + process.argv[2]); 19 | console.error(JSON.stringify(data)); 20 | process.exit(1); 21 | } 22 | } 23 | } 24 | 25 | check(data); 26 | -------------------------------------------------------------------------------- /tippecanoe/mbtiles.cpp: -------------------------------------------------------------------------------- 1 | // for vasprintf() on Linux 2 | #ifndef _GNU_SOURCE 3 | #define _GNU_SOURCE 4 | #endif 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "mvt.hpp" 15 | #include "mbtiles.hpp" 16 | #include "text.hpp" 17 | #include "milo/dtoa_milo.h" 18 | 19 | sqlite3 *mbtiles_open(char *dbname, char **argv, int forcetable) { 20 | sqlite3 *outdb; 21 | 22 | if (sqlite3_open(dbname, &outdb) != SQLITE_OK) { 23 | fprintf(stderr, "%s: %s: %s\n", argv[0], dbname, sqlite3_errmsg(outdb)); 24 | exit(EXIT_FAILURE); 25 | } 26 | 27 | char *err = NULL; 28 | if (sqlite3_exec(outdb, "PRAGMA synchronous=0", NULL, NULL, &err) != SQLITE_OK) { 29 | fprintf(stderr, "%s: async: %s\n", argv[0], err); 30 | exit(EXIT_FAILURE); 31 | } 32 | if (sqlite3_exec(outdb, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, &err) != SQLITE_OK) { 33 | fprintf(stderr, "%s: async: %s\n", argv[0], err); 34 | exit(EXIT_FAILURE); 35 | } 36 | if (sqlite3_exec(outdb, "PRAGMA journal_mode=DELETE", NULL, NULL, &err) != SQLITE_OK) { 37 | fprintf(stderr, "%s: async: %s\n", argv[0], err); 38 | exit(EXIT_FAILURE); 39 | } 40 | if (sqlite3_exec(outdb, "CREATE TABLE metadata (name text, value text);", NULL, NULL, &err) != SQLITE_OK) { 41 | fprintf(stderr, "%s: create metadata table: %s\n", argv[0], err); 42 | if (!forcetable) { 43 | exit(EXIT_FAILURE); 44 | } 45 | } 46 | if (sqlite3_exec(outdb, "CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob);", NULL, NULL, &err) != SQLITE_OK) { 47 | fprintf(stderr, "%s: create tiles table: %s\n", argv[0], err); 48 | if (!forcetable) { 49 | exit(EXIT_FAILURE); 50 | } 51 | } 52 | if (sqlite3_exec(outdb, "create unique index name on metadata (name);", NULL, NULL, &err) != SQLITE_OK) { 53 | fprintf(stderr, "%s: index metadata: %s\n", argv[0], err); 54 | if (!forcetable) { 55 | exit(EXIT_FAILURE); 56 | } 57 | } 58 | if (sqlite3_exec(outdb, "create unique index tile_index on tiles (zoom_level, tile_column, tile_row);", NULL, NULL, &err) != SQLITE_OK) { 59 | fprintf(stderr, "%s: index tiles: %s\n", argv[0], err); 60 | if (!forcetable) { 61 | exit(EXIT_FAILURE); 62 | } 63 | } 64 | 65 | return outdb; 66 | } 67 | 68 | void mbtiles_write_tile(sqlite3 *outdb, int z, int tx, int ty, const char *data, int size) { 69 | sqlite3_stmt *stmt; 70 | const char *query = "insert into tiles (zoom_level, tile_column, tile_row, tile_data) values (?, ?, ?, ?)"; 71 | if (sqlite3_prepare_v2(outdb, query, -1, &stmt, NULL) != SQLITE_OK) { 72 | fprintf(stderr, "sqlite3 insert prep failed\n"); 73 | exit(EXIT_FAILURE); 74 | } 75 | 76 | sqlite3_bind_int(stmt, 1, z); 77 | sqlite3_bind_int(stmt, 2, tx); 78 | sqlite3_bind_int(stmt, 3, (1 << z) - 1 - ty); 79 | sqlite3_bind_blob(stmt, 4, data, size, NULL); 80 | 81 | if (sqlite3_step(stmt) != SQLITE_DONE) { 82 | fprintf(stderr, "sqlite3 insert failed: %s\n", sqlite3_errmsg(outdb)); 83 | } 84 | if (sqlite3_finalize(stmt) != SQLITE_OK) { 85 | fprintf(stderr, "sqlite3 finalize failed: %s\n", sqlite3_errmsg(outdb)); 86 | } 87 | } 88 | 89 | static void quote(std::string &buf, std::string const &s) { 90 | for (size_t i = 0; i < s.size(); i++) { 91 | unsigned char ch = s[i]; 92 | 93 | if (ch == '\\' || ch == '\"') { 94 | buf.push_back('\\'); 95 | buf.push_back(ch); 96 | } else if (ch < ' ') { 97 | char tmp[7]; 98 | sprintf(tmp, "\\u%04x", ch); 99 | buf.append(std::string(tmp)); 100 | } else { 101 | buf.push_back(ch); 102 | } 103 | } 104 | } 105 | 106 | void aprintf(std::string *buf, const char *format, ...) { 107 | va_list ap; 108 | char *tmp; 109 | 110 | va_start(ap, format); 111 | if (vasprintf(&tmp, format, ap) < 0) { 112 | fprintf(stderr, "memory allocation failure\n"); 113 | exit(EXIT_FAILURE); 114 | } 115 | va_end(ap); 116 | 117 | buf->append(tmp, strlen(tmp)); 118 | free(tmp); 119 | } 120 | 121 | bool type_and_string::operator<(const type_and_string &o) const { 122 | if (string < o.string) { 123 | return true; 124 | } 125 | if (string == o.string && type < o.type) { 126 | return true; 127 | } 128 | return false; 129 | } 130 | 131 | bool type_and_string::operator!=(const type_and_string &o) const { 132 | if (type != o.type) { 133 | return true; 134 | } 135 | if (string != o.string) { 136 | return true; 137 | } 138 | return false; 139 | } 140 | 141 | std::string tilestats(std::map const &layermap1, size_t elements) { 142 | // Consolidate layers/attributes whose names are truncated 143 | std::vector> lmv; 144 | lmv.push_back(layermap1); 145 | std::map layermap = merge_layermaps(lmv, true); 146 | 147 | std::string out = "{\n"; 148 | 149 | out.append("\t\"layerCount\": "); 150 | out.append(std::to_string(layermap.size())); 151 | out.append(",\n"); 152 | 153 | out.append("\t\"layers\": [\n"); 154 | 155 | bool first = true; 156 | for (auto layer : layermap) { 157 | if (!first) { 158 | out.append(",\n"); 159 | } 160 | first = false; 161 | 162 | out.append("\t\t{\n"); 163 | 164 | out.append("\t\t\t\"layer\": \""); 165 | quote(out, layer.first.c_str()); 166 | out.append("\",\n"); 167 | 168 | out.append("\t\t\t\"count\": "); 169 | out.append(std::to_string(layer.second.points + layer.second.lines + layer.second.polygons)); 170 | out.append(",\n"); 171 | 172 | std::string geomtype = "Polygon"; 173 | if (layer.second.points >= layer.second.lines && layer.second.points >= layer.second.polygons) { 174 | geomtype = "Point"; 175 | } else if (layer.second.lines >= layer.second.polygons && layer.second.lines >= layer.second.points) { 176 | geomtype = "LineString"; 177 | } 178 | 179 | out.append("\t\t\t\"geometry\": \""); 180 | quote(out, geomtype.c_str()); 181 | out.append("\",\n"); 182 | 183 | size_t attrib_count = layer.second.file_keys.size(); 184 | if (attrib_count > 1000) { 185 | attrib_count = 1000; 186 | } 187 | 188 | out.append("\t\t\t\"attributeCount\": "); 189 | out.append(std::to_string(attrib_count)); 190 | out.append(",\n"); 191 | 192 | out.append("\t\t\t\"attributes\": [\n"); 193 | 194 | size_t attrs = 0; 195 | for (auto attribute : layer.second.file_keys) { 196 | if (attrs == elements) { 197 | break; 198 | } 199 | if (attrs != 0) { 200 | out.append(",\n"); 201 | } 202 | attrs++; 203 | 204 | out.append("\t\t\t\t{\n"); 205 | 206 | out.append("\t\t\t\t\t\"attribute\": \""); 207 | quote(out, attribute.first.c_str()); 208 | out.append("\",\n"); 209 | 210 | size_t val_count = attribute.second.sample_values.size(); 211 | if (val_count > 1000) { 212 | val_count = 1000; 213 | } 214 | 215 | out.append("\t\t\t\t\t\"count\": "); 216 | out.append(std::to_string(val_count)); 217 | out.append(",\n"); 218 | 219 | int type = 0; 220 | for (auto s : attribute.second.sample_values) { 221 | type |= (1 << s.type); 222 | } 223 | 224 | std::string type_str; 225 | // No "null" because null attributes are dropped 226 | if (type == (1 << mvt_double)) { 227 | type_str = "number"; 228 | } else if (type == (1 << mvt_bool)) { 229 | type_str = "boolean"; 230 | } else if (type == (1 << mvt_string)) { 231 | type_str = "string"; 232 | } else { 233 | type_str = "mixed"; 234 | } 235 | 236 | out.append("\t\t\t\t\t\"type\": \""); 237 | quote(out, type_str.c_str()); 238 | out.append("\",\n"); 239 | 240 | out.append("\t\t\t\t\t\"values\": [\n"); 241 | 242 | size_t vals = 0; 243 | for (auto value : attribute.second.sample_values) { 244 | if (vals == elements) { 245 | break; 246 | } 247 | 248 | if (value.type == mvt_double || value.type == mvt_bool) { 249 | if (vals != 0) { 250 | out.append(",\n"); 251 | } 252 | vals++; 253 | 254 | out.append("\t\t\t\t\t\t"); 255 | out.append(value.string); 256 | } else { 257 | std::string trunc = truncate16(value.string, 256); 258 | 259 | if (trunc.size() == value.string.size()) { 260 | if (vals != 0) { 261 | out.append(",\n"); 262 | } 263 | vals++; 264 | 265 | out.append("\t\t\t\t\t\t\""); 266 | quote(out, value.string.c_str()); 267 | out.append("\""); 268 | } 269 | } 270 | } 271 | 272 | out.append("\n"); 273 | out.append("\t\t\t\t\t]"); 274 | 275 | if ((type & (1 << mvt_double)) != 0) { 276 | out.append(",\n"); 277 | 278 | out.append("\t\t\t\t\t\"min\": "); 279 | out.append(milo::dtoa_milo(attribute.second.min)); 280 | out.append(",\n"); 281 | 282 | out.append("\t\t\t\t\t\"max\": "); 283 | out.append(milo::dtoa_milo(attribute.second.max)); 284 | } 285 | 286 | out.append("\n"); 287 | out.append("\t\t\t\t}"); 288 | } 289 | 290 | out.append("\n\t\t\t]\n"); 291 | out.append("\t\t}"); 292 | } 293 | 294 | out.append("\n"); 295 | out.append("\t]\n"); 296 | out.append("}"); 297 | 298 | std::string out2; 299 | 300 | for (size_t i = 0; i < out.size(); i++) { 301 | if (out[i] != '\t' && out[i] != '\n') { 302 | out2.push_back(out[i]); 303 | } 304 | } 305 | 306 | return out2; 307 | } 308 | 309 | void mbtiles_write_metadata(sqlite3 *outdb, const char *outdir, const char *fname, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, int forcetable, const char *attribution, std::map const &layermap, bool vector, const char *description, bool do_tilestats) { 310 | char *sql, *err; 311 | 312 | sqlite3 *db = outdb; 313 | if (outdb == NULL) { 314 | if (sqlite3_open("", &db) != SQLITE_OK) { 315 | fprintf(stderr, "Temporary db: %s\n", sqlite3_errmsg(db)); 316 | exit(EXIT_FAILURE); 317 | } 318 | if (sqlite3_exec(db, "CREATE TABLE metadata (name text, value text);", NULL, NULL, &err) != SQLITE_OK) { 319 | fprintf(stderr, "Create metadata table: %s\n", err); 320 | exit(EXIT_FAILURE); 321 | } 322 | } 323 | 324 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('name', %Q);", fname); 325 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 326 | fprintf(stderr, "set name in metadata: %s\n", err); 327 | if (!forcetable) { 328 | exit(EXIT_FAILURE); 329 | } 330 | } 331 | sqlite3_free(sql); 332 | 333 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('description', %Q);", description != NULL ? description : fname); 334 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 335 | fprintf(stderr, "set description in metadata: %s\n", err); 336 | if (!forcetable) { 337 | exit(EXIT_FAILURE); 338 | } 339 | } 340 | sqlite3_free(sql); 341 | 342 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('version', %d);", 2); 343 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 344 | fprintf(stderr, "set version : %s\n", err); 345 | if (!forcetable) { 346 | exit(EXIT_FAILURE); 347 | } 348 | } 349 | sqlite3_free(sql); 350 | 351 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('minzoom', %d);", minzoom); 352 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 353 | fprintf(stderr, "set minzoom: %s\n", err); 354 | if (!forcetable) { 355 | exit(EXIT_FAILURE); 356 | } 357 | } 358 | sqlite3_free(sql); 359 | 360 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('maxzoom', %d);", maxzoom); 361 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 362 | fprintf(stderr, "set maxzoom: %s\n", err); 363 | if (!forcetable) { 364 | exit(EXIT_FAILURE); 365 | } 366 | } 367 | sqlite3_free(sql); 368 | 369 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('center', '%f,%f,%d');", midlon, midlat, maxzoom); 370 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 371 | fprintf(stderr, "set center: %s\n", err); 372 | if (!forcetable) { 373 | exit(EXIT_FAILURE); 374 | } 375 | } 376 | sqlite3_free(sql); 377 | 378 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('bounds', '%f,%f,%f,%f');", minlon, minlat, maxlon, maxlat); 379 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 380 | fprintf(stderr, "set bounds: %s\n", err); 381 | if (!forcetable) { 382 | exit(EXIT_FAILURE); 383 | } 384 | } 385 | sqlite3_free(sql); 386 | 387 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('type', %Q);", "overlay"); 388 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 389 | fprintf(stderr, "set type: %s\n", err); 390 | if (!forcetable) { 391 | exit(EXIT_FAILURE); 392 | } 393 | } 394 | sqlite3_free(sql); 395 | 396 | if (attribution != NULL) { 397 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('attribution', %Q);", attribution); 398 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 399 | fprintf(stderr, "set type: %s\n", err); 400 | if (!forcetable) { 401 | exit(EXIT_FAILURE); 402 | } 403 | } 404 | sqlite3_free(sql); 405 | } 406 | 407 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('format', %Q);", vector ? "pbf" : "png"); 408 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 409 | fprintf(stderr, "set format: %s\n", err); 410 | if (!forcetable) { 411 | exit(EXIT_FAILURE); 412 | } 413 | } 414 | sqlite3_free(sql); 415 | 416 | if (vector) { 417 | size_t elements = 100; 418 | std::string buf; 419 | 420 | { 421 | buf = "{"; 422 | aprintf(&buf, "\"vector_layers\": [ "); 423 | 424 | std::vector lnames; 425 | for (auto ai = layermap.begin(); ai != layermap.end(); ++ai) { 426 | lnames.push_back(ai->first); 427 | } 428 | 429 | for (size_t i = 0; i < lnames.size(); i++) { 430 | if (i != 0) { 431 | aprintf(&buf, ", "); 432 | } 433 | 434 | auto fk = layermap.find(lnames[i]); 435 | aprintf(&buf, "{ \"id\": \""); 436 | quote(buf, lnames[i]); 437 | aprintf(&buf, "\", \"description\": \"\", \"minzoom\": %d, \"maxzoom\": %d, \"fields\": {", fk->second.minzoom, fk->second.maxzoom); 438 | 439 | bool first = true; 440 | for (auto j = fk->second.file_keys.begin(); j != fk->second.file_keys.end(); ++j) { 441 | if (first) { 442 | first = false; 443 | } else { 444 | aprintf(&buf, ", "); 445 | } 446 | 447 | aprintf(&buf, "\""); 448 | quote(buf, j->first.c_str()); 449 | 450 | int type = 0; 451 | for (auto s : j->second.sample_values) { 452 | type |= (1 << s.type); 453 | } 454 | 455 | if (type == (1 << mvt_double)) { 456 | aprintf(&buf, "\": \"Number\""); 457 | } else if (type == (1 << mvt_bool)) { 458 | aprintf(&buf, "\": \"Boolean\""); 459 | } else if (type == (1 << mvt_string)) { 460 | aprintf(&buf, "\": \"String\""); 461 | } else { 462 | aprintf(&buf, "\": \"Mixed\""); 463 | } 464 | } 465 | 466 | aprintf(&buf, "} }"); 467 | } 468 | 469 | aprintf(&buf, " ]"); 470 | 471 | if (do_tilestats && elements > 0) { 472 | aprintf(&buf, ",\"tilestats\": %s", tilestats(layermap, elements).c_str()); 473 | } 474 | 475 | aprintf(&buf, "}"); 476 | } 477 | 478 | sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('json', %Q);", buf.c_str()); 479 | if (sqlite3_exec(db, sql, NULL, NULL, &err) != SQLITE_OK) { 480 | fprintf(stderr, "set json: %s\n", err); 481 | if (!forcetable) { 482 | exit(EXIT_FAILURE); 483 | } 484 | } 485 | sqlite3_free(sql); 486 | } 487 | 488 | if (outdir != NULL) { 489 | std::string metadata = std::string(outdir) + "/metadata.json"; 490 | FILE *fp = fopen(metadata.c_str(), "w"); 491 | if (fp == NULL) { 492 | perror(metadata.c_str()); 493 | exit(EXIT_FAILURE); 494 | } 495 | 496 | fprintf(fp, "{\n"); 497 | 498 | sqlite3_stmt *stmt; 499 | bool first = true; 500 | if (sqlite3_prepare_v2(db, "SELECT name, value from metadata;", -1, &stmt, NULL) == SQLITE_OK) { 501 | while (sqlite3_step(stmt) == SQLITE_ROW) { 502 | std::string key, value; 503 | 504 | quote(key, (const char *) sqlite3_column_text(stmt, 0)); 505 | quote(value, (const char *) sqlite3_column_text(stmt, 1)); 506 | 507 | if (!first) { 508 | fprintf(fp, ",\n"); 509 | } 510 | fprintf(fp, " \"%s\": \"%s\"", key.c_str(), value.c_str()); 511 | first = false; 512 | } 513 | sqlite3_finalize(stmt); 514 | } 515 | 516 | fprintf(fp, "\n}\n"); 517 | fclose(fp); 518 | } 519 | 520 | if (outdb == NULL) { 521 | if (sqlite3_close(db) != SQLITE_OK) { 522 | fprintf(stderr, "Could not close temp database: %s\n", sqlite3_errmsg(db)); 523 | exit(EXIT_FAILURE); 524 | } 525 | } 526 | } 527 | 528 | void mbtiles_close(sqlite3 *outdb, const char *pgm) { 529 | char *err; 530 | 531 | if (sqlite3_exec(outdb, "ANALYZE;", NULL, NULL, &err) != SQLITE_OK) { 532 | fprintf(stderr, "%s: ANALYZE failed: %s\n", pgm, err); 533 | exit(EXIT_FAILURE); 534 | } 535 | if (sqlite3_close(outdb) != SQLITE_OK) { 536 | fprintf(stderr, "%s: could not close database: %s\n", pgm, sqlite3_errmsg(outdb)); 537 | exit(EXIT_FAILURE); 538 | } 539 | } 540 | 541 | std::map merge_layermaps(std::vector> const &maps) { 542 | return merge_layermaps(maps, false); 543 | } 544 | 545 | std::map merge_layermaps(std::vector> const &maps, bool trunc) { 546 | std::map out; 547 | 548 | for (size_t i = 0; i < maps.size(); i++) { 549 | for (auto map = maps[i].begin(); map != maps[i].end(); ++map) { 550 | if (map->second.points + map->second.lines + map->second.polygons == 0) { 551 | continue; 552 | } 553 | 554 | std::string layername = map->first; 555 | if (trunc) { 556 | layername = truncate16(layername, 256); 557 | } 558 | 559 | if (out.count(layername) == 0) { 560 | out.insert(std::pair(layername, layermap_entry(out.size()))); 561 | auto out_entry = out.find(layername); 562 | out_entry->second.minzoom = map->second.minzoom; 563 | out_entry->second.maxzoom = map->second.maxzoom; 564 | } 565 | 566 | auto out_entry = out.find(layername); 567 | if (out_entry == out.end()) { 568 | fprintf(stderr, "Internal error merging layers\n"); 569 | exit(EXIT_FAILURE); 570 | } 571 | 572 | for (auto fk = map->second.file_keys.begin(); fk != map->second.file_keys.end(); ++fk) { 573 | std::string attribname = fk->first; 574 | if (trunc) { 575 | attribname = truncate16(attribname, 256); 576 | } 577 | 578 | auto fk2 = out_entry->second.file_keys.find(attribname); 579 | 580 | if (fk2 == out_entry->second.file_keys.end()) { 581 | out_entry->second.file_keys.insert(std::pair(attribname, fk->second)); 582 | } else { 583 | for (auto val : fk->second.sample_values) { 584 | auto pt = std::lower_bound(fk2->second.sample_values.begin(), fk2->second.sample_values.end(), val); 585 | if (pt == fk2->second.sample_values.end() || *pt != val) { // not found 586 | fk2->second.sample_values.insert(pt, val); 587 | 588 | if (fk2->second.sample_values.size() > 1000) { 589 | fk2->second.sample_values.pop_back(); 590 | } 591 | } 592 | } 593 | 594 | fk2->second.type |= fk->second.type; 595 | 596 | if (fk->second.min < fk2->second.min) { 597 | fk2->second.min = fk->second.min; 598 | } 599 | if (fk->second.max > fk2->second.max) { 600 | fk2->second.max = fk->second.max; 601 | } 602 | } 603 | } 604 | 605 | if (map->second.minzoom < out_entry->second.minzoom) { 606 | out_entry->second.minzoom = map->second.minzoom; 607 | } 608 | if (map->second.maxzoom > out_entry->second.maxzoom) { 609 | out_entry->second.maxzoom = map->second.maxzoom; 610 | } 611 | 612 | out_entry->second.points += map->second.points; 613 | out_entry->second.lines += map->second.lines; 614 | out_entry->second.polygons += map->second.polygons; 615 | } 616 | } 617 | 618 | return out; 619 | } 620 | 621 | void add_to_file_keys(std::map &file_keys, std::string const &attrib, type_and_string const &val) { 622 | auto fka = file_keys.find(attrib); 623 | if (fka == file_keys.end()) { 624 | file_keys.insert(std::pair(attrib, type_and_string_stats())); 625 | fka = file_keys.find(attrib); 626 | } 627 | 628 | if (fka == file_keys.end()) { 629 | fprintf(stderr, "Can't happen (tilestats)\n"); 630 | exit(EXIT_FAILURE); 631 | } 632 | 633 | if (val.type == mvt_double) { 634 | double d = atof(val.string.c_str()); 635 | 636 | if (d < fka->second.min) { 637 | fka->second.min = d; 638 | } 639 | if (d > fka->second.max) { 640 | fka->second.max = d; 641 | } 642 | } 643 | 644 | auto pt = std::lower_bound(fka->second.sample_values.begin(), fka->second.sample_values.end(), val); 645 | if (pt == fka->second.sample_values.end() || *pt != val) { // not found 646 | fka->second.sample_values.insert(pt, val); 647 | 648 | if (fka->second.sample_values.size() > 1000) { 649 | fka->second.sample_values.pop_back(); 650 | } 651 | } 652 | 653 | fka->second.type |= (1 << val.type); 654 | } 655 | -------------------------------------------------------------------------------- /tippecanoe/mbtiles.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MBTILES_HPP 2 | #define MBTILES_HPP 3 | 4 | #include 5 | #include 6 | #include "mvt.hpp" 7 | 8 | struct type_and_string { 9 | int type; 10 | std::string string; 11 | 12 | bool operator<(const type_and_string &o) const; 13 | bool operator!=(const type_and_string &o) const; 14 | }; 15 | 16 | struct type_and_string_stats { 17 | std::vector sample_values; // sorted 18 | double min = INFINITY; 19 | double max = -INFINITY; 20 | int type = 0; 21 | }; 22 | 23 | struct layermap_entry { 24 | size_t id; 25 | std::map file_keys; 26 | int minzoom; 27 | int maxzoom; 28 | 29 | size_t points = 0; 30 | size_t lines = 0; 31 | size_t polygons = 0; 32 | 33 | layermap_entry(size_t _id) { 34 | id = _id; 35 | } 36 | }; 37 | 38 | sqlite3 *mbtiles_open(char *dbname, char **argv, int forcetable); 39 | 40 | void mbtiles_write_tile(sqlite3 *outdb, int z, int tx, int ty, const char *data, int size); 41 | 42 | void mbtiles_write_metadata(sqlite3 *outdb, const char *outdir, const char *fname, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, int forcetable, const char *attribution, std::map const &layermap, bool vector, const char *description, bool do_tilestats); 43 | 44 | void mbtiles_close(sqlite3 *outdb, const char *pgm); 45 | 46 | void aprintf(std::string *buf, const char *format, ...); 47 | 48 | std::map merge_layermaps(std::vector > const &maps); 49 | std::map merge_layermaps(std::vector > const &maps, bool trunc); 50 | 51 | void add_to_file_keys(std::map &file_keys, std::string const &layername, type_and_string const &val); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /tippecanoe/mvt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "mvt.hpp" 10 | #include "protozero/varint.hpp" 11 | #include "protozero/pbf_reader.hpp" 12 | #include "protozero/pbf_writer.hpp" 13 | #include "milo/dtoa_milo.h" 14 | 15 | mvt_geometry::mvt_geometry(int nop, long long nx, long long ny) { 16 | this->op = nop; 17 | this->x = nx; 18 | this->y = ny; 19 | } 20 | 21 | // https://github.com/mapbox/mapnik-vector-tile/blob/master/src/vector_tile_compression.hpp 22 | bool is_compressed(std::string const &data) { 23 | return data.size() > 2 && (((uint8_t) data[0] == 0x78 && (uint8_t) data[1] == 0x9C) || ((uint8_t) data[0] == 0x1F && (uint8_t) data[1] == 0x8B)); 24 | } 25 | 26 | // https://github.com/mapbox/mapnik-vector-tile/blob/master/src/vector_tile_compression.hpp 27 | int decompress(std::string const &input, std::string &output) { 28 | z_stream inflate_s; 29 | inflate_s.zalloc = Z_NULL; 30 | inflate_s.zfree = Z_NULL; 31 | inflate_s.opaque = Z_NULL; 32 | inflate_s.avail_in = 0; 33 | inflate_s.next_in = Z_NULL; 34 | if (inflateInit2(&inflate_s, 32 + 15) != Z_OK) { 35 | fprintf(stderr, "error: %s\n", inflate_s.msg); 36 | } 37 | inflate_s.next_in = (Bytef *) input.data(); 38 | inflate_s.avail_in = input.size(); 39 | size_t length = 0; 40 | do { 41 | output.resize(length + 2 * input.size()); 42 | inflate_s.avail_out = 2 * input.size(); 43 | inflate_s.next_out = (Bytef *) (output.data() + length); 44 | int ret = inflate(&inflate_s, Z_FINISH); 45 | if (ret != Z_STREAM_END && ret != Z_OK && ret != Z_BUF_ERROR) { 46 | fprintf(stderr, "error: %s\n", inflate_s.msg); 47 | return 0; 48 | } 49 | 50 | length += (2 * input.size() - inflate_s.avail_out); 51 | } while (inflate_s.avail_out == 0); 52 | inflateEnd(&inflate_s); 53 | output.resize(length); 54 | return 1; 55 | } 56 | 57 | // https://github.com/mapbox/mapnik-vector-tile/blob/master/src/vector_tile_compression.hpp 58 | int compress(std::string const &input, std::string &output) { 59 | z_stream deflate_s; 60 | deflate_s.zalloc = Z_NULL; 61 | deflate_s.zfree = Z_NULL; 62 | deflate_s.opaque = Z_NULL; 63 | deflate_s.avail_in = 0; 64 | deflate_s.next_in = Z_NULL; 65 | deflateInit2(&deflate_s, Z_BEST_COMPRESSION, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY); 66 | deflate_s.next_in = (Bytef *) input.data(); 67 | deflate_s.avail_in = input.size(); 68 | size_t length = 0; 69 | do { 70 | size_t increase = input.size() / 2 + 1024; 71 | output.resize(length + increase); 72 | deflate_s.avail_out = increase; 73 | deflate_s.next_out = (Bytef *) (output.data() + length); 74 | int ret = deflate(&deflate_s, Z_FINISH); 75 | if (ret != Z_STREAM_END && ret != Z_OK && ret != Z_BUF_ERROR) { 76 | return -1; 77 | } 78 | length += (increase - deflate_s.avail_out); 79 | } while (deflate_s.avail_out == 0); 80 | deflateEnd(&deflate_s); 81 | output.resize(length); 82 | return 0; 83 | } 84 | 85 | bool mvt_tile::decode(std::string &message, bool &was_compressed) { 86 | layers.clear(); 87 | std::string src; 88 | 89 | if (is_compressed(message)) { 90 | std::string uncompressed; 91 | decompress(message, uncompressed); 92 | src = uncompressed; 93 | was_compressed = true; 94 | } else { 95 | src = message; 96 | was_compressed = false; 97 | } 98 | 99 | protozero::pbf_reader reader(src); 100 | 101 | while (reader.next()) { 102 | switch (reader.tag()) { 103 | case 3: /* layer */ 104 | { 105 | protozero::pbf_reader layer_reader(reader.get_message()); 106 | mvt_layer layer; 107 | 108 | while (layer_reader.next()) { 109 | switch (layer_reader.tag()) { 110 | case 1: /* name */ 111 | layer.name = layer_reader.get_string(); 112 | break; 113 | 114 | case 3: /* key */ 115 | layer.keys.push_back(layer_reader.get_string()); 116 | break; 117 | 118 | case 4: /* value */ 119 | { 120 | protozero::pbf_reader value_reader(layer_reader.get_message()); 121 | mvt_value value; 122 | 123 | while (value_reader.next()) { 124 | switch (value_reader.tag()) { 125 | case 1: /* string */ 126 | value.type = mvt_string; 127 | value.string_value = value_reader.get_string(); 128 | break; 129 | 130 | case 2: /* float */ 131 | value.type = mvt_float; 132 | value.numeric_value.float_value = value_reader.get_float(); 133 | break; 134 | 135 | case 3: /* double */ 136 | value.type = mvt_double; 137 | value.numeric_value.double_value = value_reader.get_double(); 138 | break; 139 | 140 | case 4: /* int */ 141 | value.type = mvt_int; 142 | value.numeric_value.int_value = value_reader.get_int64(); 143 | break; 144 | 145 | case 5: /* uint */ 146 | value.type = mvt_uint; 147 | value.numeric_value.uint_value = value_reader.get_uint64(); 148 | break; 149 | 150 | case 6: /* sint */ 151 | value.type = mvt_sint; 152 | value.numeric_value.sint_value = value_reader.get_sint64(); 153 | break; 154 | 155 | case 7: /* bool */ 156 | value.type = mvt_bool; 157 | value.numeric_value.bool_value = value_reader.get_bool(); 158 | break; 159 | 160 | default: 161 | value_reader.skip(); 162 | break; 163 | } 164 | } 165 | 166 | layer.values.push_back(value); 167 | break; 168 | } 169 | 170 | case 5: /* extent */ 171 | layer.extent = layer_reader.get_uint32(); 172 | break; 173 | 174 | case 15: /* version */ 175 | layer.version = layer_reader.get_uint32(); 176 | break; 177 | 178 | case 2: /* feature */ 179 | { 180 | protozero::pbf_reader feature_reader(layer_reader.get_message()); 181 | mvt_feature feature; 182 | std::vector geoms; 183 | 184 | while (feature_reader.next()) { 185 | switch (feature_reader.tag()) { 186 | case 1: /* id */ 187 | feature.id = feature_reader.get_uint64(); 188 | feature.has_id = true; 189 | break; 190 | 191 | case 2: /* tag */ 192 | { 193 | auto pi = feature_reader.get_packed_uint32(); 194 | for (auto it = pi.first; it != pi.second; ++it) { 195 | feature.tags.push_back(*it); 196 | } 197 | break; 198 | } 199 | 200 | case 3: /* feature type */ 201 | feature.type = feature_reader.get_enum(); 202 | break; 203 | 204 | case 4: /* geometry */ 205 | { 206 | auto pi = feature_reader.get_packed_uint32(); 207 | for (auto it = pi.first; it != pi.second; ++it) { 208 | geoms.push_back(*it); 209 | } 210 | break; 211 | } 212 | 213 | default: 214 | feature_reader.skip(); 215 | break; 216 | } 217 | } 218 | 219 | long long px = 0, py = 0; 220 | for (size_t g = 0; g < geoms.size(); g++) { 221 | uint32_t geom = geoms[g]; 222 | uint32_t op = geom & 7; 223 | uint32_t count = geom >> 3; 224 | 225 | if (op == mvt_moveto || op == mvt_lineto) { 226 | for (size_t k = 0; k < count && g + 2 < geoms.size(); k++) { 227 | px += protozero::decode_zigzag32(geoms[g + 1]); 228 | py += protozero::decode_zigzag32(geoms[g + 2]); 229 | g += 2; 230 | 231 | feature.geometry.push_back(mvt_geometry(op, px, py)); 232 | } 233 | } else { 234 | feature.geometry.push_back(mvt_geometry(op, 0, 0)); 235 | } 236 | } 237 | 238 | layer.features.push_back(feature); 239 | break; 240 | } 241 | 242 | default: 243 | layer_reader.skip(); 244 | break; 245 | } 246 | } 247 | 248 | for (size_t i = 0; i < layer.keys.size(); i++) { 249 | layer.key_map.insert(std::pair(layer.keys[i], i)); 250 | } 251 | for (size_t i = 0; i < layer.values.size(); i++) { 252 | layer.value_map.insert(std::pair(layer.values[i], i)); 253 | } 254 | 255 | layers.push_back(layer); 256 | break; 257 | } 258 | 259 | default: 260 | reader.skip(); 261 | break; 262 | } 263 | } 264 | 265 | return true; 266 | } 267 | 268 | std::string mvt_tile::encode() { 269 | std::string data; 270 | 271 | protozero::pbf_writer writer(data); 272 | 273 | for (size_t i = 0; i < layers.size(); i++) { 274 | std::string layer_string; 275 | protozero::pbf_writer layer_writer(layer_string); 276 | 277 | layer_writer.add_uint32(15, layers[i].version); /* version */ 278 | layer_writer.add_string(1, layers[i].name); /* name */ 279 | layer_writer.add_uint32(5, layers[i].extent); /* extent */ 280 | 281 | for (size_t j = 0; j < layers[i].keys.size(); j++) { 282 | layer_writer.add_string(3, layers[i].keys[j]); /* key */ 283 | } 284 | 285 | for (size_t v = 0; v < layers[i].values.size(); v++) { 286 | std::string value_string; 287 | protozero::pbf_writer value_writer(value_string); 288 | mvt_value &pbv = layers[i].values[v]; 289 | 290 | if (pbv.type == mvt_string) { 291 | value_writer.add_string(1, pbv.string_value); 292 | } else if (pbv.type == mvt_float) { 293 | value_writer.add_float(2, pbv.numeric_value.float_value); 294 | } else if (pbv.type == mvt_double) { 295 | value_writer.add_double(3, pbv.numeric_value.double_value); 296 | } else if (pbv.type == mvt_int) { 297 | value_writer.add_int64(4, pbv.numeric_value.int_value); 298 | } else if (pbv.type == mvt_uint) { 299 | value_writer.add_uint64(5, pbv.numeric_value.uint_value); 300 | } else if (pbv.type == mvt_sint) { 301 | value_writer.add_sint64(6, pbv.numeric_value.sint_value); 302 | } else if (pbv.type == mvt_bool) { 303 | value_writer.add_bool(7, pbv.numeric_value.bool_value); 304 | } 305 | 306 | layer_writer.add_message(4, value_string); 307 | } 308 | 309 | for (size_t f = 0; f < layers[i].features.size(); f++) { 310 | std::string feature_string; 311 | protozero::pbf_writer feature_writer(feature_string); 312 | 313 | feature_writer.add_enum(3, layers[i].features[f].type); 314 | feature_writer.add_packed_uint32(2, std::begin(layers[i].features[f].tags), std::end(layers[i].features[f].tags)); 315 | 316 | if (layers[i].features[f].has_id) { 317 | feature_writer.add_uint64(1, layers[i].features[f].id); 318 | } 319 | 320 | std::vector geometry; 321 | 322 | int px = 0, py = 0; 323 | int cmd_idx = -1; 324 | int cmd = -1; 325 | int length = 0; 326 | 327 | std::vector &geom = layers[i].features[f].geometry; 328 | 329 | for (size_t g = 0; g < geom.size(); g++) { 330 | int op = geom[g].op; 331 | 332 | if (op != cmd) { 333 | if (cmd_idx >= 0) { 334 | geometry[cmd_idx] = (length << 3) | (cmd & ((1 << 3) - 1)); 335 | } 336 | 337 | cmd = op; 338 | length = 0; 339 | cmd_idx = geometry.size(); 340 | geometry.push_back(0); 341 | } 342 | 343 | if (op == mvt_moveto || op == mvt_lineto) { 344 | long long wwx = geom[g].x; 345 | long long wwy = geom[g].y; 346 | 347 | int dx = wwx - px; 348 | int dy = wwy - py; 349 | 350 | geometry.push_back(protozero::encode_zigzag32(dx)); 351 | geometry.push_back(protozero::encode_zigzag32(dy)); 352 | 353 | px = wwx; 354 | py = wwy; 355 | length++; 356 | } else if (op == mvt_closepath) { 357 | length++; 358 | } else { 359 | fprintf(stderr, "\nInternal error: corrupted geometry\n"); 360 | exit(EXIT_FAILURE); 361 | } 362 | } 363 | 364 | if (cmd_idx >= 0) { 365 | geometry[cmd_idx] = (length << 3) | (cmd & ((1 << 3) - 1)); 366 | } 367 | 368 | feature_writer.add_packed_uint32(4, std::begin(geometry), std::end(geometry)); 369 | layer_writer.add_message(2, feature_string); 370 | } 371 | 372 | writer.add_message(3, layer_string); 373 | } 374 | 375 | return data; 376 | } 377 | 378 | bool mvt_value::operator<(const mvt_value &o) const { 379 | if (type < o.type) { 380 | return true; 381 | } 382 | if (type == o.type) { 383 | if ((type == mvt_string && string_value < o.string_value) || 384 | (type == mvt_float && numeric_value.float_value < o.numeric_value.float_value) || 385 | (type == mvt_double && numeric_value.double_value < o.numeric_value.double_value) || 386 | (type == mvt_int && numeric_value.int_value < o.numeric_value.int_value) || 387 | (type == mvt_uint && numeric_value.uint_value < o.numeric_value.uint_value) || 388 | (type == mvt_sint && numeric_value.sint_value < o.numeric_value.sint_value) || 389 | (type == mvt_bool && numeric_value.bool_value < o.numeric_value.bool_value)) { 390 | return true; 391 | } 392 | } 393 | 394 | return false; 395 | } 396 | 397 | static std::string quote(std::string const &s) { 398 | std::string buf; 399 | 400 | for (size_t i = 0; i < s.size(); i++) { 401 | unsigned char ch = s[i]; 402 | 403 | if (ch == '\\' || ch == '\"') { 404 | buf.push_back('\\'); 405 | buf.push_back(ch); 406 | } else if (ch < ' ') { 407 | char tmp[7]; 408 | sprintf(tmp, "\\u%04x", ch); 409 | buf.append(std::string(tmp)); 410 | } else { 411 | buf.push_back(ch); 412 | } 413 | } 414 | 415 | return buf; 416 | } 417 | 418 | std::string mvt_value::toString() { 419 | if (type == mvt_string) { 420 | return quote(string_value); 421 | } else if (type == mvt_int) { 422 | return std::to_string((long long) numeric_value.int_value); 423 | } else if (type == mvt_double) { 424 | double v = numeric_value.double_value; 425 | if (v == (long long) v) { 426 | return std::to_string((long long) v); 427 | } else { 428 | return milo::dtoa_milo(v); 429 | } 430 | } else if (type == mvt_float) { 431 | double v = numeric_value.float_value; 432 | if (v == (long long) v) { 433 | return std::to_string((long long) v); 434 | } else { 435 | return milo::dtoa_milo(v); 436 | } 437 | } else if (type == mvt_sint) { 438 | return std::to_string((long long) numeric_value.sint_value); 439 | } else if (type == mvt_uint) { 440 | return std::to_string((long long) numeric_value.uint_value); 441 | } else if (type == mvt_bool) { 442 | return numeric_value.bool_value ? "true" : "false"; 443 | } else { 444 | return "unknown"; 445 | } 446 | } 447 | 448 | void mvt_layer::tag(mvt_feature &feature, std::string key, mvt_value value) { 449 | size_t ko, vo; 450 | 451 | std::map::iterator ki = key_map.find(key); 452 | std::map::iterator vi = value_map.find(value); 453 | 454 | if (ki == key_map.end()) { 455 | ko = keys.size(); 456 | keys.push_back(key); 457 | key_map.insert(std::pair(key, ko)); 458 | } else { 459 | ko = ki->second; 460 | } 461 | 462 | if (vi == value_map.end()) { 463 | vo = values.size(); 464 | values.push_back(value); 465 | value_map.insert(std::pair(value, vo)); 466 | } else { 467 | vo = vi->second; 468 | } 469 | 470 | feature.tags.push_back(ko); 471 | feature.tags.push_back(vo); 472 | } 473 | 474 | static int is_integer(const char *s, long long *v) { 475 | errno = 0; 476 | char *endptr; 477 | 478 | *v = strtoll(s, &endptr, 0); 479 | if (*v == 0 && errno != 0) { 480 | return 0; 481 | } 482 | if ((*v == LLONG_MIN || *v == LLONG_MAX) && (errno == ERANGE)) { 483 | return 0; 484 | } 485 | if (*endptr != '\0') { 486 | // Special case: If it is an integer followed by .0000 or similar, 487 | // it is still an integer 488 | 489 | if (*endptr != '.') { 490 | return 0; 491 | } 492 | endptr++; 493 | for (; *endptr != '\0'; endptr++) { 494 | if (*endptr != '0') { 495 | return 0; 496 | } 497 | } 498 | 499 | return 1; 500 | } 501 | 502 | return 1; 503 | } 504 | 505 | mvt_value stringified_to_mvt_value(int type, const char *s) { 506 | mvt_value tv; 507 | 508 | if (type == mvt_double) { 509 | long long v; 510 | if (is_integer(s, &v)) { 511 | if (v >= 0) { 512 | tv.type = mvt_int; 513 | tv.numeric_value.int_value = v; 514 | } else { 515 | tv.type = mvt_sint; 516 | tv.numeric_value.sint_value = v; 517 | } 518 | } else { 519 | double d = atof(s); 520 | 521 | if (d == (float) d) { 522 | tv.type = mvt_float; 523 | tv.numeric_value.float_value = d; 524 | } else { 525 | tv.type = mvt_double; 526 | tv.numeric_value.double_value = d; 527 | } 528 | } 529 | } else if (type == mvt_bool) { 530 | tv.type = mvt_bool; 531 | tv.numeric_value.bool_value = (s[0] == 't'); 532 | } else if (type == mvt_null) { 533 | tv.type = mvt_null; 534 | } else { 535 | tv.type = mvt_string; 536 | tv.string_value = s; 537 | } 538 | 539 | return tv; 540 | } 541 | -------------------------------------------------------------------------------- /tippecanoe/mvt.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MVT_HPP 2 | #define MVT_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct mvt_value; 11 | struct mvt_layer; 12 | 13 | enum mvt_operation { 14 | mvt_moveto = 1, 15 | mvt_lineto = 2, 16 | mvt_closepath = 7 17 | }; 18 | 19 | struct mvt_geometry { 20 | long long x; 21 | long long y; 22 | int /* mvt_operation */ op; 23 | 24 | mvt_geometry(int op, long long x, long long y); 25 | 26 | bool operator<(mvt_geometry const &s) const { 27 | if (y < s.y || (y == s.y && x < s.x)) { 28 | return true; 29 | } else { 30 | return false; 31 | } 32 | } 33 | 34 | bool operator==(mvt_geometry const &s) const { 35 | return y == s.y && x == s.x; 36 | } 37 | }; 38 | 39 | enum mvt_geometry_type { 40 | mvt_point = 1, 41 | mvt_linestring = 2, 42 | mvt_polygon = 3 43 | }; 44 | 45 | struct mvt_feature { 46 | std::vector tags; 47 | std::vector geometry; 48 | int /* mvt_geometry_type */ type; 49 | unsigned long long id; 50 | bool has_id; 51 | 52 | mvt_feature() { 53 | has_id = false; 54 | id = 0; 55 | } 56 | }; 57 | 58 | enum mvt_value_type { 59 | mvt_string, 60 | mvt_float, 61 | mvt_double, 62 | mvt_int, 63 | mvt_uint, 64 | mvt_sint, 65 | mvt_bool, 66 | mvt_null, 67 | }; 68 | 69 | struct mvt_value { 70 | mvt_value_type type; 71 | std::string string_value; 72 | union { 73 | float float_value; 74 | double double_value; 75 | long long int_value; 76 | unsigned long long uint_value; 77 | long long sint_value; 78 | bool bool_value; 79 | } numeric_value; 80 | 81 | bool operator<(const mvt_value &o) const; 82 | std::string toString(); 83 | }; 84 | 85 | struct mvt_layer { 86 | int version; 87 | std::string name; 88 | std::vector features; 89 | std::vector keys; 90 | std::vector values; 91 | long long extent; 92 | 93 | // Add a key-value pair to a feature, using this layer's constant pool 94 | void tag(mvt_feature &feature, std::string key, mvt_value value); 95 | 96 | // For tracking the key-value constants already used in this layer 97 | std::map key_map; 98 | std::map value_map; 99 | }; 100 | 101 | struct mvt_tile { 102 | std::vector layers; 103 | 104 | std::string encode(); 105 | bool decode(std::string &message, bool &was_compressed); 106 | }; 107 | 108 | bool is_compressed(std::string const &data); 109 | int decompress(std::string const &input, std::string &output); 110 | int compress(std::string const &input, std::string &output); 111 | int dezig(unsigned n); 112 | 113 | mvt_value stringified_to_mvt_value(int type, const char *s); 114 | #endif 115 | -------------------------------------------------------------------------------- /tippecanoe/projection.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "projection.hpp" 7 | 8 | struct projection projections[] = { 9 | {"EPSG:4326", lonlat2tile, tile2lonlat, "urn:ogc:def:crs:OGC:1.3:CRS84"}, 10 | {"EPSG:3857", epsg3857totile, tiletoepsg3857, "urn:ogc:def:crs:EPSG::3857"}, 11 | {NULL, NULL, NULL, NULL}, 12 | }; 13 | 14 | struct projection *projection = &projections[0]; 15 | 16 | // http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames 17 | void lonlat2tile(double lon, double lat, int zoom, long long *x, long long *y) { 18 | // Place infinite and NaN coordinates off the edge of the Mercator plane 19 | 20 | int lat_class = fpclassify(lat); 21 | int lon_class = fpclassify(lon); 22 | 23 | if (lat_class == FP_INFINITE || lat_class == FP_NAN) { 24 | lat = 89.9; 25 | } 26 | if (lon_class == FP_INFINITE || lon_class == FP_NAN) { 27 | lon = 360; 28 | } 29 | 30 | // Must limit latitude somewhere to prevent overflow. 31 | // 89.9 degrees latitude is 0.621 worlds beyond the edge of the flat earth, 32 | // hopefully far enough out that there are few expectations about the shape. 33 | if (lat < -89.9) { 34 | lat = -89.9; 35 | } 36 | if (lat > 89.9) { 37 | lat = 89.9; 38 | } 39 | 40 | if (lon < -360) { 41 | lon = -360; 42 | } 43 | if (lon > 360) { 44 | lon = 360; 45 | } 46 | 47 | double lat_rad = lat * M_PI / 180; 48 | unsigned long long n = 1LL << zoom; 49 | 50 | long long llx = n * ((lon + 180) / 360); 51 | long long lly = n * (1 - (log(tan(lat_rad) + 1 / cos(lat_rad)) / M_PI)) / 2; 52 | 53 | *x = llx; 54 | *y = lly; 55 | } 56 | 57 | // http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames 58 | void tile2lonlat(long long x, long long y, int zoom, double *lon, double *lat) { 59 | unsigned long long n = 1LL << zoom; 60 | *lon = 360.0 * x / n - 180.0; 61 | *lat = atan(sinh(M_PI * (1 - 2.0 * y / n))) * 180.0 / M_PI; 62 | } 63 | 64 | void epsg3857totile(double ix, double iy, int zoom, long long *x, long long *y) { 65 | // Place infinite and NaN coordinates off the edge of the Mercator plane 66 | 67 | int iy_class = fpclassify(iy); 68 | int ix_class = fpclassify(ix); 69 | 70 | if (iy_class == FP_INFINITE || iy_class == FP_NAN) { 71 | iy = 40000000.0; 72 | } 73 | if (ix_class == FP_INFINITE || ix_class == FP_NAN) { 74 | ix = 40000000.0; 75 | } 76 | 77 | *x = ix * (1LL << 31) / 6378137.0 / M_PI + (1LL << 31); 78 | *y = ((1LL << 32) - 1) - (iy * (1LL << 31) / 6378137.0 / M_PI + (1LL << 31)); 79 | 80 | if (zoom != 0) { 81 | *x >>= (32 - zoom); 82 | *y >>= (32 - zoom); 83 | } 84 | } 85 | 86 | void tiletoepsg3857(long long ix, long long iy, int zoom, double *ox, double *oy) { 87 | if (zoom != 0) { 88 | ix <<= (32 - zoom); 89 | iy <<= (32 - zoom); 90 | } 91 | 92 | *ox = (ix - (1LL << 31)) * M_PI * 6378137.0 / (1LL << 31); 93 | *oy = ((1LL << 32) - 1 - iy - (1LL << 31)) * M_PI * 6378137.0 / (1LL << 31); 94 | } 95 | 96 | unsigned long long encode(unsigned int wx, unsigned int wy) { 97 | unsigned long long out = 0; 98 | 99 | int i; 100 | for (i = 0; i < 32; i++) { 101 | unsigned long long v = ((wx >> (32 - (i + 1))) & 1) << 1; 102 | v |= (wy >> (32 - (i + 1))) & 1; 103 | v = v << (64 - 2 * (i + 1)); 104 | 105 | out |= v; 106 | } 107 | 108 | return out; 109 | } 110 | 111 | static std::atomic decodex[256]; 112 | static std::atomic decodey[256]; 113 | 114 | void decode(unsigned long long index, unsigned *wx, unsigned *wy) { 115 | static std::atomic initialized(0); 116 | if (!initialized) { 117 | for (size_t ix = 0; ix < 256; ix++) { 118 | size_t xx = 0, yy = 0; 119 | 120 | for (size_t i = 0; i < 32; i++) { 121 | xx |= ((ix >> (64 - 2 * (i + 1) + 1)) & 1) << (32 - (i + 1)); 122 | yy |= ((ix >> (64 - 2 * (i + 1) + 0)) & 1) << (32 - (i + 1)); 123 | } 124 | 125 | decodex[ix] = xx; 126 | decodey[ix] = yy; 127 | } 128 | 129 | initialized = 1; 130 | } 131 | 132 | *wx = *wy = 0; 133 | 134 | for (size_t i = 0; i < 8; i++) { 135 | *wx |= ((unsigned) decodex[(index >> (8 * i)) & 0xFF]) << (4 * i); 136 | *wy |= ((unsigned) decodey[(index >> (8 * i)) & 0xFF]) << (4 * i); 137 | } 138 | } 139 | 140 | void set_projection_or_exit(const char *optarg) { 141 | struct projection *p; 142 | for (p = projections; p->name != NULL; p++) { 143 | if (strcmp(p->name, optarg) == 0) { 144 | projection = p; 145 | break; 146 | } 147 | if (strcmp(p->alias, optarg) == 0) { 148 | projection = p; 149 | break; 150 | } 151 | } 152 | if (p->name == NULL) { 153 | fprintf(stderr, "Unknown projection (-s): %s\n", optarg); 154 | exit(EXIT_FAILURE); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /tippecanoe/projection.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PROJECTION_HPP 2 | #define PROJECTION_HPP 3 | 4 | void lonlat2tile(double lon, double lat, int zoom, long long *x, long long *y); 5 | void epsg3857totile(double ix, double iy, int zoom, long long *x, long long *y); 6 | void tile2lonlat(long long x, long long y, int zoom, double *lon, double *lat); 7 | void tiletoepsg3857(long long x, long long y, int zoom, double *ox, double *oy); 8 | unsigned long long encode(unsigned int wx, unsigned int wy); 9 | void decode(unsigned long long index, unsigned *wx, unsigned *wy); 10 | void set_projection_or_exit(const char *optarg); 11 | 12 | struct projection { 13 | const char *name; 14 | void (*project)(double ix, double iy, int zoom, long long *ox, long long *oy); 15 | void (*unproject)(long long ix, long long iy, int zoom, double *ox, double *oy); 16 | const char *alias; 17 | }; 18 | 19 | extern struct projection *projection; 20 | extern struct projection projections[]; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /tippecanoe/text.cpp: -------------------------------------------------------------------------------- 1 | #include "text.hpp" 2 | #include 3 | 4 | /** 5 | * Returns an empty string if `s` is valid utf8; 6 | * otherwise returns an error message. 7 | */ 8 | std::string check_utf8(std::string s) { 9 | for (size_t i = 0; i < s.size(); i++) { 10 | size_t fail = 0; 11 | 12 | if ((s[i] & 0x80) == 0x80) { 13 | if ((s[i] & 0xE0) == 0xC0) { 14 | if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) { 15 | fail = 2; 16 | } else { 17 | i += 1; 18 | } 19 | } else if ((s[i] & 0xF0) == 0xE0) { 20 | if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) { 21 | fail = 3; 22 | } else { 23 | i += 2; 24 | } 25 | } else if ((s[i] & 0xF8) == 0xF0) { 26 | if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) { 27 | fail = 4; 28 | } else { 29 | i += 3; 30 | } 31 | } else { 32 | fail = 1; 33 | } 34 | } 35 | 36 | if (fail != 0) { 37 | std::string out = "\"" + s + "\" is not valid UTF-8 ("; 38 | for (size_t j = 0; j < fail && i + j < s.size(); j++) { 39 | if (j != 0) { 40 | out += " "; 41 | } 42 | char tmp[6]; 43 | sprintf(tmp, "0x%02X", s[i + j] & 0xFF); 44 | out += std::string(tmp); 45 | } 46 | out += ")"; 47 | return out; 48 | } 49 | } 50 | 51 | return ""; 52 | } 53 | 54 | const char *utf8_next(const char *s, long *c) { 55 | if (s == NULL) { 56 | *c = -1; 57 | return NULL; 58 | } 59 | 60 | if (*s == '\0') { 61 | *c = -1; 62 | return NULL; 63 | } 64 | 65 | if ((s[0] & 0x80) == 0x80) { 66 | if ((s[0] & 0xE0) == 0xC0) { 67 | if ((s[1] & 0xC0) != 0x80) { 68 | *c = 0xFFFD; 69 | s++; 70 | } else { 71 | *c = ((long) (s[0] & 0x1F) << 6) | ((long) (s[1] & 0x7F)); 72 | s += 2; 73 | } 74 | } else if ((s[0] & 0xF0) == 0xE0) { 75 | if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) { 76 | *c = 0xFFFD; 77 | s++; 78 | } else { 79 | *c = ((long) (s[0] & 0x0F) << 12) | ((long) (s[1] & 0x7F) << 6) | ((long) (s[2] & 0x7F)); 80 | s += 3; 81 | } 82 | } else if ((s[0] & 0xF8) == 0xF0) { 83 | if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) { 84 | *c = 0xFFFD; 85 | s++; 86 | } else { 87 | *c = ((long) (s[0] & 0x0F) << 18) | ((long) (s[1] & 0x7F) << 12) | ((long) (s[2] & 0x7F) << 6) | ((long) (s[3] & 0x7F)); 88 | s += 4; 89 | } 90 | } else { 91 | *c = 0xFFFD; 92 | s++; 93 | } 94 | } else { 95 | *c = s[0]; 96 | s++; 97 | } 98 | 99 | return s; 100 | } 101 | 102 | std::string truncate16(std::string const &s, size_t runes) { 103 | const char *cp = s.c_str(); 104 | const char *start = cp; 105 | const char *lastgood = cp; 106 | size_t len = 0; 107 | long c; 108 | 109 | while ((cp = utf8_next(cp, &c)) != NULL) { 110 | if (c <= 0xFFFF) { 111 | len++; 112 | } else { 113 | len += 2; 114 | } 115 | 116 | if (len <= runes) { 117 | lastgood = cp; 118 | } else { 119 | break; 120 | } 121 | } 122 | 123 | return std::string(s, 0, lastgood - start); 124 | } 125 | -------------------------------------------------------------------------------- /tippecanoe/text.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TEXT_HPP 2 | #define TEXT_HPP 3 | 4 | #include 5 | 6 | std::string check_utf8(std::string text); 7 | const char *utf8_next(const char *s, long *c); 8 | std::string truncate16(std::string const &s, size_t runes); 9 | 10 | #endif 11 | --------------------------------------------------------------------------------