├── .gitignore ├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── NEWS ├── README ├── README.md ├── aclocal ├── ax_boost_base.m4 ├── ax_boost_date_time.m4 ├── ax_boost_filesystem.m4 ├── ax_boost_iostreams.m4 ├── ax_boost_program_options.m4 ├── ax_boost_system.m4 └── ax_boost_thread.m4 ├── autogen.sh ├── configure.ac ├── include ├── changeset_filter.hpp ├── changeset_map.hpp ├── copy_elements.hpp ├── dump_archive.hpp ├── dump_reader.hpp ├── extract_kv.hpp ├── history_filter.hpp ├── insert_kv.hpp ├── output_writer.hpp ├── pbf_writer.hpp ├── table_extractor.hpp ├── time_epoch.hpp ├── types.hpp ├── unescape_copy_row.hpp ├── vendor │ └── boost │ │ └── iostreams │ │ └── filter │ │ └── gzip.hpp ├── writer_common.hpp └── xml_writer.hpp ├── m4 └── ax_cxx_compile_stdcxx.m4 ├── script ├── emacs-format-file.el └── fmt.sh ├── src ├── Makefile.am ├── changeset_filter.cpp ├── changeset_map.cpp ├── copy_elements.cpp ├── dump_archive.cpp ├── dump_reader.cpp ├── extract_kv.cpp ├── history_filter.cpp ├── insert_kv.cpp ├── output_writer.cpp ├── pbf_writer.cpp ├── planet-dump.cpp ├── time_epoch.cpp ├── types.cpp └── xml_writer.cpp └── test ├── bad-character.dmp ├── changesets-badchar.xml.case ├── changesets.osm.bz2 └── cmd.sh ├── changesets-empty.xml.case ├── changesets.osm.bz2 └── cmd.sh ├── changesets.xml.case ├── changesets-no-userinfo.osm.bz2 ├── changesets.osm.bz2 └── cmd.sh ├── discussions-badchar.xml.case ├── cmd.sh └── discussions.osm.bz2 ├── discussions-long-comment.xml.case ├── cmd.sh └── discussions.osm.bz2 ├── discussions.xml.case ├── cmd.sh ├── discussions-no-userinfo.osm.bz2 └── discussions.osm.bz2 ├── empty.dmp ├── history.pbf.case ├── cmd.sh ├── history-no-userinfo.osm.pbf └── history.osm.pbf ├── history.xml.case ├── cmd.sh ├── history-no-userinfo.osm.bz2 └── history.osm.bz2 ├── liechtenstein-2013-08-03.dmp ├── long-changeset-comment.dmp ├── planet.pbf.case ├── cmd.sh ├── planet-no-userinfo.osm.pbf └── planet.osm.pbf ├── planet.xml.case ├── cmd.sh ├── planet-no-userinfo.osm.bz2 └── planet.osm.bz2 └── test-case-runner.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *~ 3 | .dirstamp 4 | Makefile 5 | Makefile.in 6 | aclocal.m4 7 | autom4te.cache 8 | config.guess 9 | config.log 10 | config.status 11 | config.sub 12 | configure 13 | depcomp 14 | include/config.h 15 | include/config.h.in 16 | include/stamp-h1 17 | install-sh 18 | missing 19 | test-driver 20 | planet-dump-ng 21 | src/.deps 22 | src/Makefile 23 | src/Makefile.in 24 | compile 25 | test/*.trs 26 | test/*.log 27 | test-suite.log 28 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Matt Amos 2 | Frederik Ramm 3 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Matt Amos 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the 14 | distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2014-02-06 Matt Amos 2 | 3 | * src/pbf_writer.cpp (pimpl::add_dense_node): Frederik (@woodpeck) 4 | added dense nodes support to the PBF writer. 5 | 6 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Installation Instructions 2 | ************************* 3 | 4 | Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation, 5 | Inc. 6 | 7 | Copying and distribution of this file, with or without modification, 8 | are permitted in any medium without royalty provided the copyright 9 | notice and this notice are preserved. This file is offered as-is, 10 | without warranty of any kind. 11 | 12 | Basic Installation 13 | ================== 14 | 15 | Briefly, the shell commands `./configure; make; make install' should 16 | configure, build, and install this package. The following 17 | more-detailed instructions are generic; see the `README' file for 18 | instructions specific to this package. Some packages provide this 19 | `INSTALL' file but do not implement all of the features documented 20 | below. The lack of an optional feature in a given package is not 21 | necessarily a bug. More recommendations for GNU packages can be found 22 | in *note Makefile Conventions: (standards)Makefile Conventions. 23 | 24 | The `configure' shell script attempts to guess correct values for 25 | various system-dependent variables used during compilation. It uses 26 | those values to create a `Makefile' in each directory of the package. 27 | It may also create one or more `.h' files containing system-dependent 28 | definitions. Finally, it creates a shell script `config.status' that 29 | you can run in the future to recreate the current configuration, and a 30 | file `config.log' containing compiler output (useful mainly for 31 | debugging `configure'). 32 | 33 | It can also use an optional file (typically called `config.cache' 34 | and enabled with `--cache-file=config.cache' or simply `-C') that saves 35 | the results of its tests to speed up reconfiguring. Caching is 36 | disabled by default to prevent problems with accidental use of stale 37 | cache files. 38 | 39 | If you need to do unusual things to compile the package, please try 40 | to figure out how `configure' could check whether to do them, and mail 41 | diffs or instructions to the address given in the `README' so they can 42 | be considered for the next release. If you are using the cache, and at 43 | some point `config.cache' contains results you don't want to keep, you 44 | may remove or edit it. 45 | 46 | The file `configure.ac' (or `configure.in') is used to create 47 | `configure' by a program called `autoconf'. You need `configure.ac' if 48 | you want to change it or regenerate `configure' using a newer version 49 | of `autoconf'. 50 | 51 | The simplest way to compile this package is: 52 | 53 | 1. `cd' to the directory containing the package's source code and type 54 | `./configure' to configure the package for your system. 55 | 56 | Running `configure' might take a while. While running, it prints 57 | some messages telling which features it is checking for. 58 | 59 | 2. Type `make' to compile the package. 60 | 61 | 3. Optionally, type `make check' to run any self-tests that come with 62 | the package, generally using the just-built uninstalled binaries. 63 | 64 | 4. Type `make install' to install the programs and any data files and 65 | documentation. When installing into a prefix owned by root, it is 66 | recommended that the package be configured and built as a regular 67 | user, and only the `make install' phase executed with root 68 | privileges. 69 | 70 | 5. Optionally, type `make installcheck' to repeat any self-tests, but 71 | this time using the binaries in their final installed location. 72 | This target does not install anything. Running this target as a 73 | regular user, particularly if the prior `make install' required 74 | root privileges, verifies that the installation completed 75 | correctly. 76 | 77 | 6. You can remove the program binaries and object files from the 78 | source code directory by typing `make clean'. To also remove the 79 | files that `configure' created (so you can compile the package for 80 | a different kind of computer), type `make distclean'. There is 81 | also a `make maintainer-clean' target, but that is intended mainly 82 | for the package's developers. If you use it, you may have to get 83 | all sorts of other programs in order to regenerate files that came 84 | with the distribution. 85 | 86 | 7. Often, you can also type `make uninstall' to remove the installed 87 | files again. In practice, not all packages have tested that 88 | uninstallation works correctly, even though it is required by the 89 | GNU Coding Standards. 90 | 91 | 8. Some packages, particularly those that use Automake, provide `make 92 | distcheck', which can by used by developers to test that all other 93 | targets like `make install' and `make uninstall' work correctly. 94 | This target is generally not run by end users. 95 | 96 | Compilers and Options 97 | ===================== 98 | 99 | Some systems require unusual options for compilation or linking that 100 | the `configure' script does not know about. Run `./configure --help' 101 | for details on some of the pertinent environment variables. 102 | 103 | You can give `configure' initial values for configuration parameters 104 | by setting variables in the command line or in the environment. Here 105 | is an example: 106 | 107 | ./configure CC=c99 CFLAGS=-g LIBS=-lposix 108 | 109 | *Note Defining Variables::, for more details. 110 | 111 | Compiling For Multiple Architectures 112 | ==================================== 113 | 114 | You can compile the package for more than one kind of computer at the 115 | same time, by placing the object files for each architecture in their 116 | own directory. To do this, you can use GNU `make'. `cd' to the 117 | directory where you want the object files and executables to go and run 118 | the `configure' script. `configure' automatically checks for the 119 | source code in the directory that `configure' is in and in `..'. This 120 | is known as a "VPATH" build. 121 | 122 | With a non-GNU `make', it is safer to compile the package for one 123 | architecture at a time in the source code directory. After you have 124 | installed the package for one architecture, use `make distclean' before 125 | reconfiguring for another architecture. 126 | 127 | On MacOS X 10.5 and later systems, you can create libraries and 128 | executables that work on multiple system types--known as "fat" or 129 | "universal" binaries--by specifying multiple `-arch' options to the 130 | compiler but only a single `-arch' option to the preprocessor. Like 131 | this: 132 | 133 | ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ 134 | CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ 135 | CPP="gcc -E" CXXCPP="g++ -E" 136 | 137 | This is not guaranteed to produce working output in all cases, you 138 | may have to build one architecture at a time and combine the results 139 | using the `lipo' tool if you have problems. 140 | 141 | Installation Names 142 | ================== 143 | 144 | By default, `make install' installs the package's commands under 145 | `/usr/local/bin', include files under `/usr/local/include', etc. You 146 | can specify an installation prefix other than `/usr/local' by giving 147 | `configure' the option `--prefix=PREFIX', where PREFIX must be an 148 | absolute file name. 149 | 150 | You can specify separate installation prefixes for 151 | architecture-specific files and architecture-independent files. If you 152 | pass the option `--exec-prefix=PREFIX' to `configure', the package uses 153 | PREFIX as the prefix for installing programs and libraries. 154 | Documentation and other data files still use the regular prefix. 155 | 156 | In addition, if you use an unusual directory layout you can give 157 | options like `--bindir=DIR' to specify different values for particular 158 | kinds of files. Run `configure --help' for a list of the directories 159 | you can set and what kinds of files go in them. In general, the 160 | default for these options is expressed in terms of `${prefix}', so that 161 | specifying just `--prefix' will affect all of the other directory 162 | specifications that were not explicitly provided. 163 | 164 | The most portable way to affect installation locations is to pass the 165 | correct locations to `configure'; however, many packages provide one or 166 | both of the following shortcuts of passing variable assignments to the 167 | `make install' command line to change installation locations without 168 | having to reconfigure or recompile. 169 | 170 | The first method involves providing an override variable for each 171 | affected directory. For example, `make install 172 | prefix=/alternate/directory' will choose an alternate location for all 173 | directory configuration variables that were expressed in terms of 174 | `${prefix}'. Any directories that were specified during `configure', 175 | but not in terms of `${prefix}', must each be overridden at install 176 | time for the entire installation to be relocated. The approach of 177 | makefile variable overrides for each directory variable is required by 178 | the GNU Coding Standards, and ideally causes no recompilation. 179 | However, some platforms have known limitations with the semantics of 180 | shared libraries that end up requiring recompilation when using this 181 | method, particularly noticeable in packages that use GNU Libtool. 182 | 183 | The second method involves providing the `DESTDIR' variable. For 184 | example, `make install DESTDIR=/alternate/directory' will prepend 185 | `/alternate/directory' before all installation names. The approach of 186 | `DESTDIR' overrides is not required by the GNU Coding Standards, and 187 | does not work on platforms that have drive letters. On the other hand, 188 | it does better at avoiding recompilation issues, and works well even 189 | when some directory options were not specified in terms of `${prefix}' 190 | at `configure' time. 191 | 192 | Optional Features 193 | ================= 194 | 195 | If the package supports it, you can cause programs to be installed 196 | with an extra prefix or suffix on their names by giving `configure' the 197 | option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. 198 | 199 | Some packages pay attention to `--enable-FEATURE' options to 200 | `configure', where FEATURE indicates an optional part of the package. 201 | They may also pay attention to `--with-PACKAGE' options, where PACKAGE 202 | is something like `gnu-as' or `x' (for the X Window System). The 203 | `README' should mention any `--enable-' and `--with-' options that the 204 | package recognizes. 205 | 206 | For packages that use the X Window System, `configure' can usually 207 | find the X include and library files automatically, but if it doesn't, 208 | you can use the `configure' options `--x-includes=DIR' and 209 | `--x-libraries=DIR' to specify their locations. 210 | 211 | Some packages offer the ability to configure how verbose the 212 | execution of `make' will be. For these packages, running `./configure 213 | --enable-silent-rules' sets the default to minimal output, which can be 214 | overridden with `make V=1'; while running `./configure 215 | --disable-silent-rules' sets the default to verbose, which can be 216 | overridden with `make V=0'. 217 | 218 | Particular systems 219 | ================== 220 | 221 | On HP-UX, the default C compiler is not ANSI C compatible. If GNU 222 | CC is not installed, it is recommended to use the following options in 223 | order to use an ANSI C compiler: 224 | 225 | ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" 226 | 227 | and if that doesn't work, install pre-built binaries of GCC for HP-UX. 228 | 229 | HP-UX `make' updates targets which have the same time stamps as 230 | their prerequisites, which makes it generally unusable when shipped 231 | generated files such as `configure' are involved. Use GNU `make' 232 | instead. 233 | 234 | On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot 235 | parse its `' header file. The option `-nodtk' can be used as 236 | a workaround. If GNU CC is not installed, it is therefore recommended 237 | to try 238 | 239 | ./configure CC="cc" 240 | 241 | and if that doesn't work, try 242 | 243 | ./configure CC="cc -nodtk" 244 | 245 | On Solaris, don't put `/usr/ucb' early in your `PATH'. This 246 | directory contains several dysfunctional programs; working variants of 247 | these programs are available in `/usr/bin'. So, if you need `/usr/ucb' 248 | in your `PATH', put it _after_ `/usr/bin'. 249 | 250 | On Haiku, software installed for all users goes in `/boot/common', 251 | not `/usr/local'. It is recommended to use the following options: 252 | 253 | ./configure --prefix=/boot/common 254 | 255 | Specifying the System Type 256 | ========================== 257 | 258 | There may be some features `configure' cannot figure out 259 | automatically, but needs to determine by the type of machine the package 260 | will run on. Usually, assuming the package is built to be run on the 261 | _same_ architectures, `configure' can figure that out, but if it prints 262 | a message saying it cannot guess the machine type, give it the 263 | `--build=TYPE' option. TYPE can either be a short name for the system 264 | type, such as `sun4', or a canonical name which has the form: 265 | 266 | CPU-COMPANY-SYSTEM 267 | 268 | where SYSTEM can have one of these forms: 269 | 270 | OS 271 | KERNEL-OS 272 | 273 | See the file `config.sub' for the possible values of each field. If 274 | `config.sub' isn't included in this package, then this package doesn't 275 | need to know the machine type. 276 | 277 | If you are _building_ compiler tools for cross-compiling, you should 278 | use the option `--target=TYPE' to select the type of system they will 279 | produce code for. 280 | 281 | If you want to _use_ a cross compiler, that generates code for a 282 | platform different from the build platform, you should specify the 283 | "host" platform (i.e., that on which the generated programs will 284 | eventually be run) with `--host=TYPE'. 285 | 286 | Sharing Defaults 287 | ================ 288 | 289 | If you want to set default values for `configure' scripts to share, 290 | you can create a site shell script called `config.site' that gives 291 | default values for variables like `CC', `cache_file', and `prefix'. 292 | `configure' looks for `PREFIX/share/config.site' if it exists, then 293 | `PREFIX/etc/config.site' if it exists. Or, you can set the 294 | `CONFIG_SITE' environment variable to the location of the site script. 295 | A warning: not all `configure' scripts look for a site script. 296 | 297 | Defining Variables 298 | ================== 299 | 300 | Variables not defined in a site shell script can be set in the 301 | environment passed to `configure'. However, some packages may run 302 | configure again during the build, and the customized values of these 303 | variables may be lost. In order to avoid this problem, you should set 304 | them in the `configure' command line, using `VAR=value'. For example: 305 | 306 | ./configure CC=/usr/local2/bin/gcc 307 | 308 | causes the specified `gcc' to be used as the C compiler (unless it is 309 | overridden in the site shell script). 310 | 311 | Unfortunately, this technique does not work for `CONFIG_SHELL' due to 312 | an Autoconf bug. Until the bug is fixed you can use this workaround: 313 | 314 | CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash 315 | 316 | `configure' Invocation 317 | ====================== 318 | 319 | `configure' recognizes the following options to control how it 320 | operates. 321 | 322 | `--help' 323 | `-h' 324 | Print a summary of all of the options to `configure', and exit. 325 | 326 | `--help=short' 327 | `--help=recursive' 328 | Print a summary of the options unique to this package's 329 | `configure', and exit. The `short' variant lists options used 330 | only in the top level, while the `recursive' variant lists options 331 | also present in any nested packages. 332 | 333 | `--version' 334 | `-V' 335 | Print the version of Autoconf used to generate the `configure' 336 | script, and exit. 337 | 338 | `--cache-file=FILE' 339 | Enable the cache: use and save the results of the tests in FILE, 340 | traditionally `config.cache'. FILE defaults to `/dev/null' to 341 | disable caching. 342 | 343 | `--config-cache' 344 | `-C' 345 | Alias for `--cache-file=config.cache'. 346 | 347 | `--quiet' 348 | `--silent' 349 | `-q' 350 | Do not print messages saying which checks are being made. To 351 | suppress all normal output, redirect it to `/dev/null' (any error 352 | messages will still be shown). 353 | 354 | `--srcdir=DIR' 355 | Look for the package's source code in directory DIR. Usually 356 | `configure' can determine that directory automatically. 357 | 358 | `--prefix=DIR' 359 | Use DIR as the installation prefix. *note Installation Names:: 360 | for more details, including other options available for fine-tuning 361 | the installation locations. 362 | 363 | `--no-create' 364 | `-n' 365 | Run the configure checks, but stop before creating any output 366 | files. 367 | 368 | `configure' also accepts some other, not widely useful, options. Run 369 | `configure --help' for more details. 370 | 371 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=src 2 | 3 | TESTS = \ 4 | test/planet.xml.case \ 5 | test/history.xml.case \ 6 | test/planet.pbf.case \ 7 | test/history.pbf.case \ 8 | test/changesets.xml.case \ 9 | test/changesets-badchar.xml.case \ 10 | test/changesets-empty.xml.case \ 11 | test/discussions.xml.case \ 12 | test/discussions-badchar.xml.case \ 13 | test/discussions-long-comment.xml.case 14 | TEST_EXTENSIONS = .case 15 | CASE_LOG_COMPILER = test/test-case-runner.sh 16 | 17 | fmt: script/fmt.sh script/emacs-format-file.el 18 | @for file in `find -name "*.[ch]pp"`; do \ 19 | script/fmt.sh $$file; \ 20 | done 21 | 22 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/NEWS -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Planet Dump (Next Generation) 2 | ============================= 3 | 4 | Tool for converting an OpenStreetMap database dump into planet files. 5 | 6 | By operating on the database dump rather than a running server, this means that 7 | running the extraction from PostgreSQL dump file to planetfile(s) is completely 8 | independent of the database server, and can be done on a disconnected machine 9 | without putting any load on any database. 10 | 11 | The previous version of this tool required the database server to keep a 12 | consistent transaction context open for the duration of the dump, which would 13 | usually be several days. This created problems as the long-running transaction 14 | could get cancelled, meaning the planet dump would have to be started again 15 | from scratch. 16 | 17 | Building 18 | -------- 19 | 20 | Before building the code, you will need: 21 | 22 | * A C++ build system (GCC 4.7 recommended), 23 | * libxml2 (version 2.6.31 recommended), 24 | * The Boost libraries (version 1.49 recommended), 25 | * libosmpbf (version 1.3.0 recommended), 26 | * libprotobuf and libprotobuf-lite (version 2.4.1 recommended) 27 | 28 | To install these on Ubuntu, you can just type: 29 | 30 | sudo apt-get install build-essential automake autoconf \ 31 | libxml2-dev libboost-dev libboost-program-options-dev \ 32 | libboost-date-time-dev libboost-filesystem-dev \ 33 | libboost-thread-dev libboost-iostreams-dev \ 34 | libosmpbf-dev osmpbf-bin libprotobuf-dev pkg-config 35 | 36 | After that, it should just be a matter of running: 37 | 38 | ./autogen.sh 39 | ./configure 40 | make 41 | 42 | If you run into any issues with this, please file a bug on the github 43 | issues page for this project, giving as much detail as you can about 44 | the error and the environment it occurred in. 45 | 46 | Running 47 | ------- 48 | 49 | The planet dump program has a decent built-in usage description, which 50 | you can read by running: 51 | 52 | planet-dump-ng --help 53 | 54 | One thing to note is that the program will create on-disk databases in 55 | the current working directory, so it is wise to run the program 56 | somewhere with plenty of fast disk space. Existing files may interfere 57 | with the operation of the program, so it's best to run it in its own, 58 | clean directory. 59 | 60 | All files can be created in a default version (includes "uid" and 61 | "user" fields), and a "no-userinfo" version (without these fields). 62 | 63 | Architecture 64 | ------------ 65 | 66 | This started out with the aim of being easy to change in response to 67 | schema changes in the API. However, somehow the templates escaped and 68 | began to multiply. Sadly, the code is now much less readable than I 69 | would like, but on the bright side is a contender for the Most 70 | Egregiously Templated Code award. 71 | 72 | Simplifying, the code consists of two basic parts; the bit which reads 73 | the PostgreSQL dump, and the part which writes XML and/or PBF. 74 | 75 | The part which reads the PostgreSQL dump operates by launching 76 | "pg_restore" as a sub-process and parsing its output (in quite a naive 77 | way) to get the row data. The part which writes the XML and/or PBF then 78 | does a join between the top level elements like nodes, ways and 79 | relations and their "inners" - things like tags, way nodes and relation 80 | members. 81 | 82 | In order that the system can output a planet file or a history planet file in 83 | the same run, both are generated from the history tables. The history planet 84 | file contains all these versions, but the planet file without history data 85 | ("current") does not. This requires a minor adjustment to how the non-history 86 | planet is written, with a filter which only keeps the most recent version of 87 | each element and does not output any elements which are flagged as deleted. 88 | 89 | History 90 | ------- 91 | 92 | This evolved, by a somewhat roundabout route, from an attempt to 93 | create a new planet dump which read the absolute minimum from the 94 | database; that is changesets, changeset tags and just the IDs and 95 | versions of the current tables for nodes, ways and relations. The 96 | remaining information could be filled in at any time from the history 97 | tables because, with the minor exception of redactions, the nodes, 98 | ways and relations tables are append-only. 99 | 100 | Dumping the IDs and versions would still take time, so it seemed worth 101 | looking at "pg_dump" to see how it would best be done 102 | efficiently. While looking at "pg_dump", it became clear that what was 103 | really needed was just the dump itself - a dump which is produced 104 | regularly for backup purposes anyway. 105 | 106 | Tag sorting 107 | ----------- 108 | 109 | Starting with version 1.2.0, `planet-dump-ng` sorts tags by the UTF-8 encoded 110 | bytes of the keys. Most keys use only ASCII characters, so this corresponds to 111 | an alphabetic ordering for those keys. Other keys will depend on how the 112 | codepoint is expressed in UTF-8. Values do not contribute to ordering. 113 | -------------------------------------------------------------------------------- /aclocal/ax_boost_base.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_base.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for the Boost C++ libraries of a particular version (or newer) 12 | # 13 | # If no path to the installed boost library is given the macro searchs 14 | # under /usr, /usr/local, /opt and /opt/local and evaluates the 15 | # $BOOST_ROOT environment variable. Further documentation is available at 16 | # . 17 | # 18 | # This macro calls: 19 | # 20 | # AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) 21 | # 22 | # And sets: 23 | # 24 | # HAVE_BOOST 25 | # 26 | # LICENSE 27 | # 28 | # Copyright (c) 2008 Thomas Porschberg 29 | # Copyright (c) 2009 Peter Adolphs 30 | # 31 | # Copying and distribution of this file, with or without modification, are 32 | # permitted in any medium without royalty provided the copyright notice 33 | # and this notice are preserved. This file is offered as-is, without any 34 | # warranty. 35 | 36 | #serial 26 37 | 38 | AC_DEFUN([AX_BOOST_BASE], 39 | [ 40 | AC_ARG_WITH([boost], 41 | [AS_HELP_STRING([--with-boost@<:@=ARG@:>@], 42 | [use Boost library from a standard location (ARG=yes), 43 | from the specified location (ARG=), 44 | or disable it (ARG=no) 45 | @<:@ARG=yes@:>@ ])], 46 | [ 47 | if test "$withval" = "no"; then 48 | want_boost="no" 49 | elif test "$withval" = "yes"; then 50 | want_boost="yes" 51 | ac_boost_path="" 52 | else 53 | want_boost="yes" 54 | ac_boost_path="$withval" 55 | fi 56 | ], 57 | [want_boost="yes"]) 58 | 59 | 60 | AC_ARG_WITH([boost-libdir], 61 | AS_HELP_STRING([--with-boost-libdir=LIB_DIR], 62 | [Force given directory for boost libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]), 63 | [ 64 | if test -d "$withval" 65 | then 66 | ac_boost_lib_path="$withval" 67 | else 68 | AC_MSG_ERROR(--with-boost-libdir expected directory name) 69 | fi 70 | ], 71 | [ac_boost_lib_path=""] 72 | ) 73 | 74 | if test "x$want_boost" = "xyes"; then 75 | boost_lib_version_req=ifelse([$1], ,1.20.0,$1) 76 | boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` 77 | boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` 78 | boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` 79 | boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` 80 | if test "x$boost_lib_version_req_sub_minor" = "x" ; then 81 | boost_lib_version_req_sub_minor="0" 82 | fi 83 | WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` 84 | AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req) 85 | succeeded=no 86 | 87 | dnl On 64-bit systems check for system libraries in both lib64 and lib. 88 | dnl The former is specified by FHS, but e.g. Debian does not adhere to 89 | dnl this (as it rises problems for generic multi-arch support). 90 | dnl The last entry in the list is chosen by default when no libraries 91 | dnl are found, e.g. when only header-only libraries are installed! 92 | libsubdirs="lib" 93 | ax_arch=`uname -m` 94 | case $ax_arch in 95 | x86_64) 96 | libsubdirs="lib64 libx32 lib lib64" 97 | ;; 98 | ppc64|s390x|sparc64|aarch64|ppc64le) 99 | libsubdirs="lib64 lib lib64 ppc64le" 100 | ;; 101 | esac 102 | 103 | dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give 104 | dnl them priority over the other paths since, if libs are found there, they 105 | dnl are almost assuredly the ones desired. 106 | AC_REQUIRE([AC_CANONICAL_HOST]) 107 | libsubdirs="lib/${host_cpu}-${host_os} $libsubdirs" 108 | 109 | case ${host_cpu} in 110 | i?86) 111 | libsubdirs="lib/i386-${host_os} $libsubdirs" 112 | ;; 113 | esac 114 | 115 | dnl first we check the system location for boost libraries 116 | dnl this location ist chosen if boost libraries are installed with the --layout=system option 117 | dnl or if you install boost with RPM 118 | if test "$ac_boost_path" != ""; then 119 | BOOST_CPPFLAGS="-I$ac_boost_path/include" 120 | for ac_boost_path_tmp in $libsubdirs; do 121 | if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then 122 | BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp" 123 | break 124 | fi 125 | done 126 | elif test "$cross_compiling" != yes; then 127 | for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do 128 | if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then 129 | for libsubdir in $libsubdirs ; do 130 | if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi 131 | done 132 | BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir" 133 | BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" 134 | break; 135 | fi 136 | done 137 | fi 138 | 139 | dnl overwrite ld flags if we have required special directory with 140 | dnl --with-boost-libdir parameter 141 | if test "$ac_boost_lib_path" != ""; then 142 | BOOST_LDFLAGS="-L$ac_boost_lib_path" 143 | fi 144 | 145 | CPPFLAGS_SAVED="$CPPFLAGS" 146 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 147 | export CPPFLAGS 148 | 149 | LDFLAGS_SAVED="$LDFLAGS" 150 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 151 | export LDFLAGS 152 | 153 | AC_REQUIRE([AC_PROG_CXX]) 154 | AC_LANG_PUSH(C++) 155 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ 156 | @%:@include 157 | ]], [[ 158 | #if BOOST_VERSION >= $WANT_BOOST_VERSION 159 | // Everything is okay 160 | #else 161 | # error Boost version is too old 162 | #endif 163 | ]])],[ 164 | AC_MSG_RESULT(yes) 165 | succeeded=yes 166 | found_system=yes 167 | ],[ 168 | ]) 169 | AC_LANG_POP([C++]) 170 | 171 | 172 | 173 | dnl if we found no boost with system layout we search for boost libraries 174 | dnl built and installed without the --layout=system option or for a staged(not installed) version 175 | if test "x$succeeded" != "xyes"; then 176 | CPPFLAGS="$CPPFLAGS_SAVED" 177 | LDFLAGS="$LDFLAGS_SAVED" 178 | BOOST_CPPFLAGS= 179 | BOOST_LDFLAGS= 180 | _version=0 181 | if test "$ac_boost_path" != ""; then 182 | if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then 183 | for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do 184 | _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` 185 | V_CHECK=`expr $_version_tmp \> $_version` 186 | if test "$V_CHECK" = "1" ; then 187 | _version=$_version_tmp 188 | fi 189 | VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` 190 | BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" 191 | done 192 | dnl if nothing found search for layout used in Windows distributions 193 | if test -z "$BOOST_CPPFLAGS"; then 194 | if test -d "$ac_boost_path/boost" && test -r "$ac_boost_path/boost"; then 195 | BOOST_CPPFLAGS="-I$ac_boost_path" 196 | fi 197 | fi 198 | fi 199 | else 200 | if test "$cross_compiling" != yes; then 201 | for ac_boost_path in /usr /usr/local /opt /opt/local ; do 202 | if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then 203 | for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do 204 | _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` 205 | V_CHECK=`expr $_version_tmp \> $_version` 206 | if test "$V_CHECK" = "1" ; then 207 | _version=$_version_tmp 208 | best_path=$ac_boost_path 209 | fi 210 | done 211 | fi 212 | done 213 | 214 | VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` 215 | BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" 216 | if test "$ac_boost_lib_path" = ""; then 217 | for libsubdir in $libsubdirs ; do 218 | if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi 219 | done 220 | BOOST_LDFLAGS="-L$best_path/$libsubdir" 221 | fi 222 | fi 223 | 224 | if test "x$BOOST_ROOT" != "x"; then 225 | for libsubdir in $libsubdirs ; do 226 | if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi 227 | done 228 | if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then 229 | version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` 230 | stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` 231 | stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` 232 | V_CHECK=`expr $stage_version_shorten \>\= $_version` 233 | if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then 234 | AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) 235 | BOOST_CPPFLAGS="-I$BOOST_ROOT" 236 | BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" 237 | fi 238 | fi 239 | fi 240 | fi 241 | 242 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 243 | export CPPFLAGS 244 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 245 | export LDFLAGS 246 | 247 | AC_LANG_PUSH(C++) 248 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ 249 | @%:@include 250 | ]], [[ 251 | #if BOOST_VERSION >= $WANT_BOOST_VERSION 252 | // Everything is okay 253 | #else 254 | # error Boost version is too old 255 | #endif 256 | ]])],[ 257 | AC_MSG_RESULT(yes) 258 | succeeded=yes 259 | found_system=yes 260 | ],[ 261 | ]) 262 | AC_LANG_POP([C++]) 263 | fi 264 | 265 | if test "$succeeded" != "yes" ; then 266 | if test "$_version" = "0" ; then 267 | AC_MSG_NOTICE([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation.]]) 268 | else 269 | AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) 270 | fi 271 | # execute ACTION-IF-NOT-FOUND (if present): 272 | ifelse([$3], , :, [$3]) 273 | else 274 | AC_SUBST(BOOST_CPPFLAGS) 275 | AC_SUBST(BOOST_LDFLAGS) 276 | AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) 277 | # execute ACTION-IF-FOUND (if present): 278 | ifelse([$2], , :, [$2]) 279 | fi 280 | 281 | CPPFLAGS="$CPPFLAGS_SAVED" 282 | LDFLAGS="$LDFLAGS_SAVED" 283 | fi 284 | 285 | ]) 286 | -------------------------------------------------------------------------------- /aclocal/ax_boost_date_time.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_date_time.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_DATE_TIME 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for Date_Time library from the Boost C++ libraries. The macro 12 | # requires a preceding call to AX_BOOST_BASE. Further documentation is 13 | # available at . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_DATE_TIME_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_DATE_TIME 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2008 Thomas Porschberg 26 | # Copyright (c) 2008 Michael Tindal 27 | # 28 | # Copying and distribution of this file, with or without modification, are 29 | # permitted in any medium without royalty provided the copyright notice 30 | # and this notice are preserved. This file is offered as-is, without any 31 | # warranty. 32 | 33 | #serial 21 34 | 35 | AC_DEFUN([AX_BOOST_DATE_TIME], 36 | [ 37 | AC_ARG_WITH([boost-date-time], 38 | AS_HELP_STRING([--with-boost-date-time@<:@=special-lib@:>@], 39 | [use the Date_Time library from boost - it is possible to specify a certain library for the linker 40 | e.g. --with-boost-date-time=boost_date_time-gcc-mt-d-1_33_1 ]), 41 | [ 42 | if test "$withval" = "no"; then 43 | want_boost="no" 44 | elif test "$withval" = "yes"; then 45 | want_boost="yes" 46 | ax_boost_user_date_time_lib="" 47 | else 48 | want_boost="yes" 49 | ax_boost_user_date_time_lib="$withval" 50 | fi 51 | ], 52 | [want_boost="yes"] 53 | ) 54 | 55 | if test "x$want_boost" = "xyes"; then 56 | AC_REQUIRE([AC_PROG_CC]) 57 | CPPFLAGS_SAVED="$CPPFLAGS" 58 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 59 | export CPPFLAGS 60 | 61 | LDFLAGS_SAVED="$LDFLAGS" 62 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 63 | export LDFLAGS 64 | 65 | AC_CACHE_CHECK(whether the Boost::Date_Time library is available, 66 | ax_cv_boost_date_time, 67 | [AC_LANG_PUSH([C++]) 68 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], 69 | [[using namespace boost::gregorian; date d(2002,Jan,10); 70 | return 0; 71 | ]])], 72 | ax_cv_boost_date_time=yes, ax_cv_boost_date_time=no) 73 | AC_LANG_POP([C++]) 74 | ]) 75 | if test "x$ax_cv_boost_date_time" = "xyes"; then 76 | AC_DEFINE(HAVE_BOOST_DATE_TIME,,[define if the Boost::Date_Time library is available]) 77 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 78 | if test "x$ax_boost_user_date_time_lib" = "x"; then 79 | for libextension in `ls $BOOSTLIBDIR/libboost_date_time*.so* $BOOSTLIBDIR/libboost_date_time*.dylib* $BOOSTLIBDIR/libboost_date_time*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_date_time.*\)\.so.*$;\1;' -e 's;^lib\(boost_date_time.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_date_time.*\)\.a*$;\1;'` ; do 80 | ax_lib=${libextension} 81 | AC_CHECK_LIB($ax_lib, exit, 82 | [BOOST_DATE_TIME_LIB="-l$ax_lib"; AC_SUBST(BOOST_DATE_TIME_LIB) link_date_time="yes"; break], 83 | [link_date_time="no"]) 84 | done 85 | if test "x$link_date_time" != "xyes"; then 86 | for libextension in `ls $BOOSTLIBDIR/boost_date_time*.dll* $BOOSTLIBDIR/boost_date_time*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_date_time.*\)\.dll.*$;\1;' -e 's;^\(boost_date_time.*\)\.a.*$;\1;'` ; do 87 | ax_lib=${libextension} 88 | AC_CHECK_LIB($ax_lib, exit, 89 | [BOOST_DATE_TIME_LIB="-l$ax_lib"; AC_SUBST(BOOST_DATE_TIME_LIB) link_date_time="yes"; break], 90 | [link_date_time="no"]) 91 | done 92 | fi 93 | 94 | else 95 | for ax_lib in $ax_boost_user_date_time_lib boost_date_time-$ax_boost_user_date_time_lib; do 96 | AC_CHECK_LIB($ax_lib, main, 97 | [BOOST_DATE_TIME_LIB="-l$ax_lib"; AC_SUBST(BOOST_DATE_TIME_LIB) link_date_time="yes"; break], 98 | [link_date_time="no"]) 99 | done 100 | 101 | fi 102 | if test "x$ax_lib" = "x"; then 103 | AC_MSG_ERROR(Could not find a version of the library!) 104 | fi 105 | if test "x$link_date_time" != "xyes"; then 106 | AC_MSG_ERROR(Could not link against $ax_lib !) 107 | fi 108 | fi 109 | 110 | CPPFLAGS="$CPPFLAGS_SAVED" 111 | LDFLAGS="$LDFLAGS_SAVED" 112 | fi 113 | ]) 114 | -------------------------------------------------------------------------------- /aclocal/ax_boost_filesystem.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_FILESYSTEM 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for Filesystem library from the Boost C++ libraries. The macro 12 | # requires a preceding call to AX_BOOST_BASE. Further documentation is 13 | # available at . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_FILESYSTEM_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_FILESYSTEM 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2009 Thomas Porschberg 26 | # Copyright (c) 2009 Michael Tindal 27 | # Copyright (c) 2009 Roman Rybalko 28 | # 29 | # Copying and distribution of this file, with or without modification, are 30 | # permitted in any medium without royalty provided the copyright notice 31 | # and this notice are preserved. This file is offered as-is, without any 32 | # warranty. 33 | 34 | #serial 26 35 | 36 | AC_DEFUN([AX_BOOST_FILESYSTEM], 37 | [ 38 | AC_ARG_WITH([boost-filesystem], 39 | AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@], 40 | [use the Filesystem library from boost - it is possible to specify a certain library for the linker 41 | e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]), 42 | [ 43 | if test "$withval" = "no"; then 44 | want_boost="no" 45 | elif test "$withval" = "yes"; then 46 | want_boost="yes" 47 | ax_boost_user_filesystem_lib="" 48 | else 49 | want_boost="yes" 50 | ax_boost_user_filesystem_lib="$withval" 51 | fi 52 | ], 53 | [want_boost="yes"] 54 | ) 55 | 56 | if test "x$want_boost" = "xyes"; then 57 | AC_REQUIRE([AC_PROG_CC]) 58 | CPPFLAGS_SAVED="$CPPFLAGS" 59 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 60 | export CPPFLAGS 61 | 62 | LDFLAGS_SAVED="$LDFLAGS" 63 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 64 | export LDFLAGS 65 | 66 | LIBS_SAVED=$LIBS 67 | LIBS="$LIBS $BOOST_SYSTEM_LIB" 68 | export LIBS 69 | 70 | AC_CACHE_CHECK(whether the Boost::Filesystem library is available, 71 | ax_cv_boost_filesystem, 72 | [AC_LANG_PUSH([C++]) 73 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], 74 | [[using namespace boost::filesystem; 75 | path my_path( "foo/bar/data.txt" ); 76 | return 0;]])], 77 | ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no) 78 | AC_LANG_POP([C++]) 79 | ]) 80 | if test "x$ax_cv_boost_filesystem" = "xyes"; then 81 | AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available]) 82 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 83 | if test "x$ax_boost_user_filesystem_lib" = "x"; then 84 | for libextension in `ls -r $BOOSTLIBDIR/libboost_filesystem* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do 85 | ax_lib=${libextension} 86 | AC_CHECK_LIB($ax_lib, exit, 87 | [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], 88 | [link_filesystem="no"]) 89 | done 90 | if test "x$link_filesystem" != "xyes"; then 91 | for libextension in `ls -r $BOOSTLIBDIR/boost_filesystem* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do 92 | ax_lib=${libextension} 93 | AC_CHECK_LIB($ax_lib, exit, 94 | [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], 95 | [link_filesystem="no"]) 96 | done 97 | fi 98 | else 99 | for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do 100 | AC_CHECK_LIB($ax_lib, exit, 101 | [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], 102 | [link_filesystem="no"]) 103 | done 104 | 105 | fi 106 | if test "x$ax_lib" = "x"; then 107 | AC_MSG_ERROR(Could not find a version of the library!) 108 | fi 109 | if test "x$link_filesystem" != "xyes"; then 110 | AC_MSG_ERROR(Could not link against $ax_lib !) 111 | fi 112 | fi 113 | 114 | CPPFLAGS="$CPPFLAGS_SAVED" 115 | LDFLAGS="$LDFLAGS_SAVED" 116 | LIBS="$LIBS_SAVED" 117 | fi 118 | ]) 119 | -------------------------------------------------------------------------------- /aclocal/ax_boost_iostreams.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_iostreams.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_IOSTREAMS 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for IOStreams library from the Boost C++ libraries. The macro 12 | # requires a preceding call to AX_BOOST_BASE. Further documentation is 13 | # available at . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_IOSTREAMS_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_IOSTREAMS 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2008 Thomas Porschberg 26 | # 27 | # Copying and distribution of this file, with or without modification, are 28 | # permitted in any medium without royalty provided the copyright notice 29 | # and this notice are preserved. This file is offered as-is, without any 30 | # warranty. 31 | 32 | #serial 20 33 | 34 | AC_DEFUN([AX_BOOST_IOSTREAMS], 35 | [ 36 | AC_ARG_WITH([boost-iostreams], 37 | AS_HELP_STRING([--with-boost-iostreams@<:@=special-lib@:>@], 38 | [use the IOStreams library from boost - it is possible to specify a certain library for the linker 39 | e.g. --with-boost-iostreams=boost_iostreams-gcc-mt-d-1_33_1 ]), 40 | [ 41 | if test "$withval" = "no"; then 42 | want_boost="no" 43 | elif test "$withval" = "yes"; then 44 | want_boost="yes" 45 | ax_boost_user_iostreams_lib="" 46 | else 47 | want_boost="yes" 48 | ax_boost_user_iostreams_lib="$withval" 49 | fi 50 | ], 51 | [want_boost="yes"] 52 | ) 53 | 54 | if test "x$want_boost" = "xyes"; then 55 | AC_REQUIRE([AC_PROG_CC]) 56 | CPPFLAGS_SAVED="$CPPFLAGS" 57 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 58 | export CPPFLAGS 59 | 60 | LDFLAGS_SAVED="$LDFLAGS" 61 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 62 | export LDFLAGS 63 | 64 | AC_CACHE_CHECK(whether the Boost::IOStreams library is available, 65 | ax_cv_boost_iostreams, 66 | [AC_LANG_PUSH([C++]) 67 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include 68 | @%:@include 69 | ]], 70 | [[std::string input = "Hello World!"; 71 | namespace io = boost::iostreams; 72 | io::filtering_istream in(boost::make_iterator_range(input)); 73 | return 0; 74 | ]])], 75 | ax_cv_boost_iostreams=yes, ax_cv_boost_iostreams=no) 76 | AC_LANG_POP([C++]) 77 | ]) 78 | ax_lib="" 79 | if test "x$ax_cv_boost_iostreams" = "xyes"; then 80 | AC_DEFINE(HAVE_BOOST_IOSTREAMS,,[define if the Boost::IOStreams library is available]) 81 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 82 | if test "x$ax_boost_user_iostreams_lib" = "x"; then 83 | for libextension in `ls $BOOSTLIBDIR/libboost_iostreams*.so* $BOOSTLIBDIR/libboost_iostream*.dylib* $BOOSTLIBDIR/libboost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_iostreams.*\)\.so.*$;\1;' -e 's;^lib\(boost_iostream.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_iostreams.*\)\.a.*$;\1;'` ; do 84 | ax_lib=${libextension} 85 | AC_CHECK_LIB($ax_lib, exit, 86 | [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], 87 | [link_iostreams="no"]) 88 | done 89 | if test "x$link_iostreams" != "xyes"; then 90 | for libextension in `ls $BOOSTLIBDIR/boost_iostreams*.dll* $BOOSTLIBDIR/boost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_iostreams.*\)\.dll.*$;\1;' -e 's;^\(boost_iostreams.*\)\.a.*$;\1;'` ; do 91 | ax_lib=${libextension} 92 | AC_CHECK_LIB($ax_lib, exit, 93 | [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], 94 | [link_iostreams="no"]) 95 | done 96 | fi 97 | 98 | else 99 | for ax_lib in $ax_boost_user_iostreams_lib boost_iostreams-$ax_boost_user_iostreams_lib; do 100 | AC_CHECK_LIB($ax_lib, main, 101 | [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], 102 | [link_iostreams="no"]) 103 | done 104 | 105 | fi 106 | if test "x$ax_lib" = "x"; then 107 | AC_MSG_ERROR(Could not find a version of the library!) 108 | fi 109 | if test "x$link_iostreams" != "xyes"; then 110 | AC_MSG_ERROR(Could not link against $ax_lib !) 111 | fi 112 | fi 113 | 114 | CPPFLAGS="$CPPFLAGS_SAVED" 115 | LDFLAGS="$LDFLAGS_SAVED" 116 | fi 117 | ]) 118 | -------------------------------------------------------------------------------- /aclocal/ax_boost_program_options.m4: -------------------------------------------------------------------------------- 1 | # ============================================================================ 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_program_options.html 3 | # ============================================================================ 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_PROGRAM_OPTIONS 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for program options library from the Boost C++ libraries. The macro 12 | # requires a preceding call to AX_BOOST_BASE. Further documentation is 13 | # available at . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_PROGRAM_OPTIONS 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2009 Thomas Porschberg 26 | # 27 | # Copying and distribution of this file, with or without modification, are 28 | # permitted in any medium without royalty provided the copyright notice 29 | # and this notice are preserved. This file is offered as-is, without any 30 | # warranty. 31 | 32 | #serial 24 33 | 34 | AC_DEFUN([AX_BOOST_PROGRAM_OPTIONS], 35 | [ 36 | AC_ARG_WITH([boost-program-options], 37 | AS_HELP_STRING([--with-boost-program-options@<:@=special-lib@:>@], 38 | [use the program options library from boost - it is possible to specify a certain library for the linker 39 | e.g. --with-boost-program-options=boost_program_options-gcc-mt-1_33_1 ]), 40 | [ 41 | if test "$withval" = "no"; then 42 | want_boost="no" 43 | elif test "$withval" = "yes"; then 44 | want_boost="yes" 45 | ax_boost_user_program_options_lib="" 46 | else 47 | want_boost="yes" 48 | ax_boost_user_program_options_lib="$withval" 49 | fi 50 | ], 51 | [want_boost="yes"] 52 | ) 53 | 54 | if test "x$want_boost" = "xyes"; then 55 | AC_REQUIRE([AC_PROG_CC]) 56 | export want_boost 57 | CPPFLAGS_SAVED="$CPPFLAGS" 58 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 59 | export CPPFLAGS 60 | LDFLAGS_SAVED="$LDFLAGS" 61 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 62 | export LDFLAGS 63 | AC_CACHE_CHECK([whether the Boost::Program_Options library is available], 64 | ax_cv_boost_program_options, 65 | [AC_LANG_PUSH(C++) 66 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include 67 | ]], 68 | [[boost::program_options::error err("Error message"); 69 | return 0;]])], 70 | ax_cv_boost_program_options=yes, ax_cv_boost_program_options=no) 71 | AC_LANG_POP([C++]) 72 | ]) 73 | if test "$ax_cv_boost_program_options" = yes; then 74 | AC_DEFINE(HAVE_BOOST_PROGRAM_OPTIONS,,[define if the Boost::PROGRAM_OPTIONS library is available]) 75 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 76 | if test "x$ax_boost_user_program_options_lib" = "x"; then 77 | for libextension in `ls $BOOSTLIBDIR/libboost_program_options*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.dylib* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.dylib.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.a.*$;\1;'` ; do 78 | ax_lib=${libextension} 79 | AC_CHECK_LIB($ax_lib, exit, 80 | [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], 81 | [link_program_options="no"]) 82 | done 83 | if test "x$link_program_options" != "xyes"; then 84 | for libextension in `ls $BOOSTLIBDIR/boost_program_options*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.a.*$;\1;'` ; do 85 | ax_lib=${libextension} 86 | AC_CHECK_LIB($ax_lib, exit, 87 | [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], 88 | [link_program_options="no"]) 89 | done 90 | fi 91 | else 92 | for ax_lib in $ax_boost_user_program_options_lib boost_program_options-$ax_boost_user_program_options_lib; do 93 | AC_CHECK_LIB($ax_lib, main, 94 | [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], 95 | [link_program_options="no"]) 96 | done 97 | fi 98 | if test "x$ax_lib" = "x"; then 99 | AC_MSG_ERROR(Could not find a version of the library!) 100 | fi 101 | if test "x$link_program_options" != "xyes"; then 102 | AC_MSG_ERROR([Could not link against [$ax_lib] !]) 103 | fi 104 | fi 105 | CPPFLAGS="$CPPFLAGS_SAVED" 106 | LDFLAGS="$LDFLAGS_SAVED" 107 | fi 108 | ]) 109 | -------------------------------------------------------------------------------- /aclocal/ax_boost_system.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_system.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_SYSTEM 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for System library from the Boost C++ libraries. The macro requires 12 | # a preceding call to AX_BOOST_BASE. Further documentation is available at 13 | # . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_SYSTEM_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_SYSTEM 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2008 Thomas Porschberg 26 | # Copyright (c) 2008 Michael Tindal 27 | # Copyright (c) 2008 Daniel Casimiro 28 | # 29 | # Copying and distribution of this file, with or without modification, are 30 | # permitted in any medium without royalty provided the copyright notice 31 | # and this notice are preserved. This file is offered as-is, without any 32 | # warranty. 33 | 34 | #serial 18 35 | 36 | AC_DEFUN([AX_BOOST_SYSTEM], 37 | [ 38 | AC_ARG_WITH([boost-system], 39 | AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@], 40 | [use the System library from boost - it is possible to specify a certain library for the linker 41 | e.g. --with-boost-system=boost_system-gcc-mt ]), 42 | [ 43 | if test "$withval" = "no"; then 44 | want_boost="no" 45 | elif test "$withval" = "yes"; then 46 | want_boost="yes" 47 | ax_boost_user_system_lib="" 48 | else 49 | want_boost="yes" 50 | ax_boost_user_system_lib="$withval" 51 | fi 52 | ], 53 | [want_boost="yes"] 54 | ) 55 | 56 | if test "x$want_boost" = "xyes"; then 57 | AC_REQUIRE([AC_PROG_CC]) 58 | AC_REQUIRE([AC_CANONICAL_BUILD]) 59 | CPPFLAGS_SAVED="$CPPFLAGS" 60 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 61 | export CPPFLAGS 62 | 63 | LDFLAGS_SAVED="$LDFLAGS" 64 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 65 | export LDFLAGS 66 | 67 | AC_CACHE_CHECK(whether the Boost::System library is available, 68 | ax_cv_boost_system, 69 | [AC_LANG_PUSH([C++]) 70 | CXXFLAGS_SAVE=$CXXFLAGS 71 | CXXFLAGS= 72 | 73 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], 74 | [[boost::system::error_category *a = 0;]])], 75 | ax_cv_boost_system=yes, ax_cv_boost_system=no) 76 | CXXFLAGS=$CXXFLAGS_SAVE 77 | AC_LANG_POP([C++]) 78 | ]) 79 | if test "x$ax_cv_boost_system" = "xyes"; then 80 | AC_SUBST(BOOST_CPPFLAGS) 81 | 82 | AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available]) 83 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 84 | 85 | LDFLAGS_SAVE=$LDFLAGS 86 | if test "x$ax_boost_user_system_lib" = "x"; then 87 | for libextension in `ls -r $BOOSTLIBDIR/libboost_system* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do 88 | ax_lib=${libextension} 89 | AC_CHECK_LIB($ax_lib, exit, 90 | [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], 91 | [link_system="no"]) 92 | done 93 | if test "x$link_system" != "xyes"; then 94 | for libextension in `ls -r $BOOSTLIBDIR/boost_system* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do 95 | ax_lib=${libextension} 96 | AC_CHECK_LIB($ax_lib, exit, 97 | [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], 98 | [link_system="no"]) 99 | done 100 | fi 101 | 102 | else 103 | for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do 104 | AC_CHECK_LIB($ax_lib, exit, 105 | [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], 106 | [link_system="no"]) 107 | done 108 | 109 | fi 110 | if test "x$ax_lib" = "x"; then 111 | AC_MSG_ERROR(Could not find a version of the library!) 112 | fi 113 | if test "x$link_system" = "xno"; then 114 | AC_MSG_ERROR(Could not link against $ax_lib !) 115 | fi 116 | fi 117 | 118 | CPPFLAGS="$CPPFLAGS_SAVED" 119 | LDFLAGS="$LDFLAGS_SAVED" 120 | fi 121 | ]) 122 | -------------------------------------------------------------------------------- /aclocal/ax_boost_thread.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_boost_thread.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_THREAD 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for Thread library from the Boost C++ libraries. The macro requires 12 | # a preceding call to AX_BOOST_BASE. Further documentation is available at 13 | # . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_THREAD_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_THREAD 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2009 Thomas Porschberg 26 | # Copyright (c) 2009 Michael Tindal 27 | # 28 | # Copying and distribution of this file, with or without modification, are 29 | # permitted in any medium without royalty provided the copyright notice 30 | # and this notice are preserved. This file is offered as-is, without any 31 | # warranty. 32 | 33 | #serial 27 34 | 35 | AC_DEFUN([AX_BOOST_THREAD], 36 | [ 37 | AC_ARG_WITH([boost-thread], 38 | AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@], 39 | [use the Thread library from boost - it is possible to specify a certain library for the linker 40 | e.g. --with-boost-thread=boost_thread-gcc-mt ]), 41 | [ 42 | if test "$withval" = "no"; then 43 | want_boost="no" 44 | elif test "$withval" = "yes"; then 45 | want_boost="yes" 46 | ax_boost_user_thread_lib="" 47 | else 48 | want_boost="yes" 49 | ax_boost_user_thread_lib="$withval" 50 | fi 51 | ], 52 | [want_boost="yes"] 53 | ) 54 | 55 | if test "x$want_boost" = "xyes"; then 56 | AC_REQUIRE([AC_PROG_CC]) 57 | AC_REQUIRE([AC_CANONICAL_BUILD]) 58 | CPPFLAGS_SAVED="$CPPFLAGS" 59 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 60 | export CPPFLAGS 61 | 62 | LDFLAGS_SAVED="$LDFLAGS" 63 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 64 | export LDFLAGS 65 | 66 | AC_CACHE_CHECK(whether the Boost::Thread library is available, 67 | ax_cv_boost_thread, 68 | [AC_LANG_PUSH([C++]) 69 | CXXFLAGS_SAVE=$CXXFLAGS 70 | 71 | if test "x$host_os" = "xsolaris" ; then 72 | CXXFLAGS="-pthreads $CXXFLAGS" 73 | elif test "x$host_os" = "xmingw32" ; then 74 | CXXFLAGS="-mthreads $CXXFLAGS" 75 | else 76 | CXXFLAGS="-pthread $CXXFLAGS" 77 | fi 78 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], 79 | [[boost::thread_group thrds; 80 | return 0;]])], 81 | ax_cv_boost_thread=yes, ax_cv_boost_thread=no) 82 | CXXFLAGS=$CXXFLAGS_SAVE 83 | AC_LANG_POP([C++]) 84 | ]) 85 | ax_lib="" 86 | if test "x$ax_cv_boost_thread" = "xyes"; then 87 | if test "x$host_os" = "xsolaris" ; then 88 | BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS" 89 | elif test "x$host_os" = "xmingw32" ; then 90 | BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS" 91 | else 92 | BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS" 93 | fi 94 | 95 | AC_SUBST(BOOST_CPPFLAGS) 96 | 97 | AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available]) 98 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 99 | 100 | LDFLAGS_SAVE=$LDFLAGS 101 | case "x$host_os" in 102 | *bsd* ) 103 | LDFLAGS="-pthread $LDFLAGS" 104 | break; 105 | ;; 106 | esac 107 | if test "x$ax_boost_user_thread_lib" = "x"; then 108 | for libextension in `ls -r $BOOSTLIBDIR/libboost_thread* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'`; do 109 | ax_lib=${libextension} 110 | AC_CHECK_LIB($ax_lib, exit, 111 | [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], 112 | [link_thread="no"]) 113 | done 114 | if test "x$link_thread" != "xyes"; then 115 | for libextension in `ls -r $BOOSTLIBDIR/boost_thread* 2>/dev/null | sed 's,.*/,,' | sed 's,\..*,,'`; do 116 | ax_lib=${libextension} 117 | AC_CHECK_LIB($ax_lib, exit, 118 | [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], 119 | [link_thread="no"]) 120 | done 121 | fi 122 | 123 | else 124 | for ax_lib in $ax_boost_user_thread_lib boost_thread-$ax_boost_user_thread_lib; do 125 | AC_CHECK_LIB($ax_lib, exit, 126 | [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], 127 | [link_thread="no"]) 128 | done 129 | 130 | fi 131 | if test "x$ax_lib" = "x"; then 132 | AC_MSG_ERROR(Could not find a version of the library!) 133 | fi 134 | if test "x$link_thread" = "xno"; then 135 | AC_MSG_ERROR(Could not link against $ax_lib !) 136 | else 137 | case "x$host_os" in 138 | *bsd* ) 139 | BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS" 140 | break; 141 | ;; 142 | esac 143 | 144 | fi 145 | fi 146 | 147 | CPPFLAGS="$CPPFLAGS_SAVED" 148 | LDFLAGS="$LDFLAGS_SAVED" 149 | fi 150 | ]) 151 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | aclocal -I aclocal 4 | autoheader 5 | automake -a 6 | autoconf 7 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([planet-dump-ng], 2 | [1.2.8], 3 | [https://github.com/zerebubuth/planet-dump-ng/issues], 4 | [planet-dump-ng-1.2.8], 5 | [https://github.com/zerebubuth/planet-dump-ng]) 6 | AM_INIT_AUTOMAKE([subdir-objects parallel-tests]) 7 | AM_SILENT_RULES([yes]) 8 | AC_CONFIG_HEADERS([include/config.h]) 9 | 10 | AC_PROG_CXX 11 | AC_PROG_INSTALL 12 | AC_PROG_MAKE_SET 13 | 14 | AC_HEADER_STDC 15 | AC_LANG_CPLUSPLUS 16 | 17 | AC_CONFIG_MACRO_DIR([m4]) 18 | AX_CXX_COMPILE_STDCXX(11, noext, mandatory) 19 | 20 | PKG_CHECK_MODULES(LIBXML, libxml-2.0 >= 2.6.31) 21 | AC_SUBST(LIBXML_CFLAGS) 22 | AC_SUBST(LIBXML_LIBS) 23 | 24 | AX_BOOST_BASE([1.43], , [AC_MSG_ERROR([Boost libraries are required for building planet-dump-ng. Please install libboost-dev.])]) 25 | AX_BOOST_SYSTEM 26 | AX_BOOST_FILESYSTEM 27 | AX_BOOST_PROGRAM_OPTIONS 28 | AX_BOOST_DATE_TIME 29 | AX_BOOST_THREAD 30 | AX_BOOST_IOSTREAMS 31 | 32 | PKG_CHECK_MODULES([PROTOBUF_LITE], "protobuf-lite") 33 | AC_SUBST([PROTOBUF_LITE_CFLAGS]) 34 | AC_SUBST([PROTOBUF_LITE_LIBS]) 35 | 36 | PKG_CHECK_MODULES([PROTOBUF], "protobuf") 37 | AC_SUBST([PROTOBUF_CFLAGS]) 38 | AC_SUBST([PROTOBUF_LIBS]) 39 | 40 | AC_CHECK_HEADER([osmpbf/osmpbf.h],[],[AC_MSG_ERROR([Unable to find the osmpbf headers, you might need to install libosmpbf-dev.])]) 41 | 42 | AC_MSG_CHECKING([whether you have an ancient version of osmpbf.]) 43 | AC_ARG_ENABLE([old-osmpbf], 44 | [AS_HELP_STRING([--enable-old-osmpbf], 45 | [Use this flag if you have a version of libosmpbf-dev before 1.3.0.])], 46 | [with_old_osmpbf="yes"], 47 | [with_old_osmpbf="no"]) 48 | AC_MSG_RESULT($with_old_osmpbf) 49 | AS_IF([test "x$with_old_osmpbf" == xyes], 50 | [AC_DEFINE([WITH_OLD_OSMPBF], [1], [Define when libosmpbf version is ancient.])]) 51 | AM_CONDITIONAL([WITH_OLD_OSMPBF], [test "x$with_old_osmpbf" == xyes]) 52 | 53 | AC_CONFIG_FILES([ 54 | Makefile 55 | src/Makefile]) 56 | 57 | AC_OUTPUT 58 | -------------------------------------------------------------------------------- /include/changeset_filter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CHANGESET_FILTER_HPP 2 | #define CHANGESET_FILTER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "output_writer.hpp" 9 | 10 | /** 11 | * acts as an output_writer filter, removing all elements except 12 | * changesets from the output. this allows us to easily re-use 13 | * the xml_writer to output changeset-only dumps. 14 | */ 15 | template 16 | struct changeset_filter : public output_writer { 17 | changeset_filter(const std::string &, const boost::program_options::variables_map &, 18 | const user_map_t &, const boost::posix_time::ptime &, user_info_level, historical_versions, changeset_discussions); 19 | virtual ~changeset_filter(); 20 | 21 | void changesets(const std::vector &, 22 | const std::vector &, 23 | const std::vector &); 24 | void nodes(const std::vector &, const std::vector &); 25 | void ways(const std::vector &, const std::vector &, const std::vector &); 26 | void relations(const std::vector &, const std::vector &, const std::vector &); 27 | void finish(); 28 | 29 | private: 30 | boost::scoped_ptr m_writer; 31 | }; 32 | 33 | #endif /* CHANGESET_FILTER_HPP */ 34 | -------------------------------------------------------------------------------- /include/changeset_map.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CHANGESET_MAP_HPP 2 | #define CHANGESET_MAP_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | struct changeset_map { 9 | typedef int64_t * iterator; 10 | typedef const int64_t * const_iterator; 11 | typedef std::pair value_type; 12 | 13 | void insert(const value_type &); 14 | 15 | const_iterator find(int64_t) const; 16 | const_iterator end() const; 17 | 18 | private: 19 | std::vector > > m_data; 20 | }; 21 | 22 | #endif /* CHANGESET_MAP_HPP */ 23 | -------------------------------------------------------------------------------- /include/copy_elements.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COPY_ELEMENTS_HPP 2 | #define COPY_ELEMENTS_HPP 3 | 4 | #include "output_writer.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | /** 10 | * Read the disk database for users, and extract all the public data 11 | * ones into a map of user ID to display name. 12 | */ 13 | void extract_users(std::map &display_name_map); 14 | 15 | /** 16 | * Copy the elements (and associated tags, way nodes, etc...) for 17 | * some type T, and write them in parallel threads to all of the 18 | * writers. 19 | */ 20 | template 21 | void run_threads(std::vector > writers); 22 | 23 | #endif /* COPY_ELEMENTS_HPP */ 24 | -------------------------------------------------------------------------------- /include/dump_archive.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DUMP_ARCHIVE_HPP 2 | #define DUMP_ARCHIVE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "stdint.h" 10 | 11 | struct base_thread { 12 | virtual ~base_thread(); 13 | virtual boost::posix_time::ptime join() = 0; 14 | }; 15 | 16 | template 17 | struct run_thread : public base_thread { 18 | boost::posix_time::ptime timestamp; 19 | boost::exception_ptr error; 20 | boost::thread thr; 21 | std::string table_name; 22 | 23 | run_thread(std::string table_name_, std::string dump_file, bool resume, unsigned int max_concurrency); 24 | ~run_thread(); 25 | boost::posix_time::ptime join(); 26 | }; 27 | 28 | #endif /* DUMP_ARCHIVE_HPP */ 29 | -------------------------------------------------------------------------------- /include/dump_reader.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DUMP_READER_HPP 2 | #define DUMP_READER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct dump_reader 10 | : public boost::noncopyable { 11 | dump_reader(const std::string &table_name, 12 | const std::string &dump_file, 13 | unsigned int max_concurrency); 14 | 15 | ~dump_reader(); 16 | 17 | const std::vector &column_names() const; 18 | size_t read(std::string &); 19 | void put(const std::string &, const std::string &); 20 | void finish(); 21 | 22 | private: 23 | struct pimpl; 24 | boost::scoped_ptr m_impl; 25 | }; 26 | 27 | #endif /* DUMP_READER_HPP */ 28 | -------------------------------------------------------------------------------- /include/extract_kv.hpp: -------------------------------------------------------------------------------- 1 | #ifndef EXTRACT_KV_HPP 2 | #define EXTRACT_KV_HPP 3 | 4 | #include 5 | #include 6 | 7 | template 8 | struct extract_kv { 9 | void operator()(T &t, std::string &key, std::string &val); 10 | private: 11 | std::ostringstream out; 12 | }; 13 | 14 | #endif /* EXTRACT_KV_HPP */ 15 | -------------------------------------------------------------------------------- /include/history_filter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HISTORY_FILTER_HPP 2 | #define HISTORY_FILTER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "output_writer.hpp" 9 | 10 | /** 11 | * acts as an output_writer filter, removing all the deleted elements 12 | * and elements whose version number is not a maximum for their ID. 13 | */ 14 | template 15 | struct history_filter : public output_writer { 16 | history_filter(const std::string &, const boost::program_options::variables_map &, const user_map_t &, const boost::posix_time::ptime &, user_info_level, historical_versions, changeset_discussions); 17 | virtual ~history_filter(); 18 | 19 | void changesets(const std::vector &, 20 | const std::vector &, 21 | const std::vector &); 22 | void nodes(const std::vector &, const std::vector &); 23 | void ways(const std::vector &, const std::vector &, const std::vector &); 24 | void relations(const std::vector &, const std::vector &, const std::vector &); 25 | void finish(); 26 | 27 | private: 28 | boost::scoped_ptr m_writer; 29 | 30 | // when filtering the history and we reach the end of a block of nodes 31 | // ways or relations, we don't know whether the final element in the 32 | // block is a maximum-version element until we've seen the next block. 33 | // so we need to store the last-seen element in a block until we either 34 | // get a new block, or finish() is called. 35 | struct left_over_nodes { 36 | node n; 37 | std::vector tags; 38 | }; 39 | struct left_over_ways { 40 | way w; 41 | std::vector nodes; 42 | std::vector tags; 43 | }; 44 | struct left_over_relations { 45 | relation r; 46 | std::vector members; 47 | std::vector tags; 48 | }; 49 | 50 | boost::optional m_left_over_nodes; 51 | boost::optional m_left_over_ways; 52 | boost::optional m_left_over_relations; 53 | }; 54 | 55 | #endif /* HISTORY_FILTER_HPP */ 56 | -------------------------------------------------------------------------------- /include/insert_kv.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INSERT_KV_HPP 2 | #define INSERT_KV_HPP 3 | 4 | #include "config.h" 5 | 6 | #include 7 | typedef std::string slice_t; 8 | 9 | template 10 | void insert_kv(T &t, const slice_t &key, const slice_t &val); 11 | 12 | #endif /* INSERT_KV_HPP */ 13 | -------------------------------------------------------------------------------- /include/output_writer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OUTPUT_WRITER_HPP 2 | #define OUTPUT_WRITER_HPP 3 | 4 | #include 5 | #include 6 | #include "types.hpp" 7 | 8 | /** 9 | * generic output sink for OSM element types. 10 | * 11 | * this interface is designed to be able to reasonably efficiently dump out 12 | * chunks of OSM element types, while still being generic enough to handle 13 | * output to XML, PBF and any other file types which we would want to write. 14 | */ 15 | struct output_writer : private boost::noncopyable { 16 | typedef std::map user_map_t; 17 | 18 | virtual ~output_writer(); 19 | 20 | // dump a chunk of elements. included are the associated tags and other 21 | // inner types for that element. the chunk will be already ordered and 22 | // the inner types ordered by the (id, version) of their element. 23 | virtual void changesets(const std::vector &, 24 | const std::vector &, 25 | const std::vector &) = 0; 26 | virtual void nodes(const std::vector &, const std::vector &) = 0; 27 | virtual void ways(const std::vector &, const std::vector &, const std::vector &) = 0; 28 | virtual void relations(const std::vector &, const std::vector &, const std::vector &) = 0; 29 | 30 | // called once, at the end of the writing process. at this point the 31 | // output writer should write any remaining data, flush the output 32 | // file and close it. anything which could throw should be in here, 33 | // not in the destructor. 34 | virtual void finish() = 0; 35 | }; 36 | 37 | #endif /* OUTPUT_WRITER */ 38 | -------------------------------------------------------------------------------- /include/pbf_writer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PBF_WRITER_HPP 2 | #define PBF_WRITER_HPP 3 | 4 | #include "output_writer.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | class pbf_writer : public output_writer { 13 | public: 14 | pbf_writer(const std::string &, const boost::program_options::variables_map &, const user_map_t &, const boost::posix_time::ptime &, user_info_level, historical_versions, changeset_discussions); 15 | virtual ~pbf_writer(); 16 | 17 | void changesets(const std::vector &, 18 | const std::vector &, 19 | const std::vector &); 20 | void nodes(const std::vector &, const std::vector &); 21 | void ways(const std::vector &, const std::vector &, const std::vector &); 22 | void relations(const std::vector &, const std::vector &, const std::vector &); 23 | void finish(); 24 | 25 | struct pimpl; 26 | 27 | private: 28 | boost::scoped_ptr m_impl; 29 | }; 30 | 31 | #endif /* PBF_WRITER_HPP */ 32 | -------------------------------------------------------------------------------- /include/table_extractor.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TABLE_EXTRACTOR_HPP 2 | #define TABLE_EXTRACTOR_HPP 3 | 4 | #include 5 | #include 6 | #include "dump_reader.hpp" 7 | #include "extract_kv.hpp" 8 | #include "unescape_copy_row.hpp" 9 | 10 | template 11 | boost::posix_time::ptime timestamp_of(const T &) { 12 | return boost::posix_time::ptime(boost::posix_time::neg_infin); 13 | } 14 | 15 | template <> boost::posix_time::ptime timestamp_of(const changeset &cs) { return cs.created_at; } 16 | template <> boost::posix_time::ptime timestamp_of(const node &n) { return n.timestamp; } 17 | template <> boost::posix_time::ptime timestamp_of(const way &w) { return w.timestamp; } 18 | template <> boost::posix_time::ptime timestamp_of(const relation &r) { return r.timestamp; } 19 | template <> boost::posix_time::ptime timestamp_of(const changeset_comment &cc) { return cc.created_at; } 20 | 21 | template 22 | struct table_extractor_with_timestamp { 23 | typedef R row_type; 24 | 25 | table_extractor_with_timestamp(const std::string &table_name, 26 | const std::string &dump_file, 27 | unsigned int max_concurrency) 28 | : m_reader(table_name, dump_file, max_concurrency) { 29 | } 30 | 31 | boost::posix_time::ptime read() { 32 | boost::posix_time::ptime timestamp(boost::posix_time::neg_infin); 33 | size_t bytes = 0; 34 | row_type row; 35 | unescape_copy_row filter(m_reader); 36 | extract_kv extract; 37 | while ((bytes = filter.read(row)) > 0) { 38 | std::string key, val; 39 | extract(row, key, val); 40 | m_reader.put(key, val); 41 | if (timestamp_of(row) > timestamp) { 42 | timestamp = timestamp_of(row); 43 | } 44 | } 45 | m_reader.finish(); 46 | return timestamp; 47 | } 48 | 49 | private: 50 | dump_reader m_reader; 51 | }; 52 | 53 | #endif /* TABLE_EXTRACTOR_HPP */ 54 | -------------------------------------------------------------------------------- /include/time_epoch.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TIME_EPOCH_HPP 2 | #define TIME_EPOCH_HPP 3 | 4 | #include 5 | 6 | extern const boost::posix_time::ptime time_epoch; 7 | 8 | #endif /* TIME_EPOCH_HPP */ 9 | -------------------------------------------------------------------------------- /include/types.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TYPES_HPP 2 | #define TYPES_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | enum user_status_enum { 10 | user_status_pending, 11 | user_status_active, 12 | user_status_confirmed, 13 | user_status_suspended, 14 | user_status_deleted 15 | }; 16 | 17 | enum format_enum { 18 | format_html, 19 | format_markdown, 20 | format_text 21 | }; 22 | 23 | enum nwr_enum { 24 | nwr_node, 25 | nwr_way, 26 | nwr_relation 27 | }; 28 | 29 | struct user { 30 | static const int num_keys = 1; 31 | static const std::vector &column_names(); 32 | 33 | int64_t id; 34 | std::string display_name; 35 | bool data_public; 36 | }; 37 | 38 | BOOST_FUSION_ADAPT_STRUCT( 39 | user, 40 | (int64_t, id) 41 | (std::string, display_name) 42 | (bool, data_public) 43 | ) 44 | 45 | struct current_tag { 46 | static const int num_keys = 2; 47 | static const std::vector &column_names(); 48 | 49 | int64_t element_id; 50 | std::string key, value; 51 | }; 52 | 53 | BOOST_FUSION_ADAPT_STRUCT( 54 | current_tag, 55 | (int64_t, element_id) 56 | (std::string, key) 57 | (std::string, value) 58 | ) 59 | 60 | struct changeset_comment { 61 | static const int num_keys = 2; 62 | static const std::vector &column_names(); 63 | 64 | int64_t changeset_id, author_id; 65 | std::string body; 66 | boost::posix_time::ptime created_at; 67 | bool visible; 68 | }; 69 | 70 | BOOST_FUSION_ADAPT_STRUCT( 71 | changeset_comment, 72 | (int64_t, changeset_id) 73 | (boost::posix_time::ptime, created_at) 74 | (int64_t, author_id) 75 | (std::string, body) 76 | (bool, visible) 77 | ) 78 | 79 | struct changeset { 80 | static const int num_keys = 1; 81 | static const std::vector &column_names(); 82 | static const std::string table_name(); 83 | static const std::string tag_table_name(); 84 | static const std::string inner_table_name(); 85 | 86 | typedef current_tag tag_type; 87 | typedef changeset_comment inner_type; 88 | 89 | int64_t id; 90 | int32_t uid; 91 | boost::posix_time::ptime created_at; 92 | boost::optional min_lat, max_lat, min_lon, max_lon; 93 | boost::posix_time::ptime closed_at; 94 | int32_t num_changes; 95 | }; 96 | 97 | BOOST_FUSION_ADAPT_STRUCT( 98 | changeset, 99 | (int64_t, id) 100 | (int32_t, uid) 101 | (boost::posix_time::ptime, created_at) 102 | (boost::optional, min_lat) 103 | (boost::optional, max_lat) 104 | (boost::optional, min_lon) 105 | (boost::optional, max_lon) 106 | (boost::posix_time::ptime, closed_at) 107 | (int32_t, num_changes) 108 | ) 109 | 110 | struct old_tag { 111 | static const int num_keys = 3; 112 | static const std::vector &column_names(); 113 | 114 | int64_t element_id, version; 115 | std::string key, value; 116 | }; 117 | 118 | BOOST_FUSION_ADAPT_STRUCT( 119 | old_tag, 120 | (int64_t, element_id) 121 | (int64_t, version) 122 | (std::string, key) 123 | (std::string, value) 124 | ) 125 | 126 | struct node { 127 | static const int num_keys = 2; 128 | static const std::vector &column_names(); 129 | static const std::string table_name(); 130 | static const std::string tag_table_name(); 131 | static const std::string inner_table_name(); 132 | 133 | typedef old_tag tag_type; 134 | typedef int inner_type; 135 | 136 | int64_t id, version, changeset_id; 137 | bool visible; 138 | boost::posix_time::ptime timestamp; 139 | boost::optional redaction_id; 140 | int32_t latitude, longitude; 141 | }; 142 | 143 | BOOST_FUSION_ADAPT_STRUCT( 144 | node, 145 | (int64_t, id) 146 | (int64_t, version) 147 | (int64_t, changeset_id) 148 | (bool, visible) 149 | (boost::posix_time::ptime, timestamp) 150 | (boost::optional, redaction_id) 151 | (int32_t, latitude) 152 | (int32_t, longitude) 153 | ) 154 | 155 | struct way_node { 156 | static const int num_keys = 3; 157 | static const std::vector &column_names(); 158 | 159 | int64_t way_id, version, sequence_id, node_id; 160 | }; 161 | 162 | BOOST_FUSION_ADAPT_STRUCT( 163 | way_node, 164 | (int64_t, way_id) 165 | (int64_t, version) 166 | (int64_t, sequence_id) 167 | (int64_t, node_id) 168 | ) 169 | 170 | struct way { 171 | static const int num_keys = 2; 172 | static const std::vector &column_names(); 173 | static const std::string table_name(); 174 | static const std::string tag_table_name(); 175 | static const std::string inner_table_name(); 176 | 177 | typedef old_tag tag_type; 178 | typedef way_node inner_type; 179 | 180 | int64_t id, version, changeset_id; 181 | bool visible; 182 | boost::posix_time::ptime timestamp; 183 | boost::optional redaction_id; 184 | }; 185 | 186 | BOOST_FUSION_ADAPT_STRUCT( 187 | way, 188 | (int64_t, id) 189 | (int64_t, version) 190 | (int64_t, changeset_id) 191 | (bool, visible) 192 | (boost::posix_time::ptime, timestamp) 193 | (boost::optional, redaction_id) 194 | ) 195 | 196 | struct relation_member { 197 | static const int num_keys = 3; 198 | static const std::vector &column_names(); 199 | 200 | int64_t relation_id, version, sequence_id; 201 | nwr_enum member_type; 202 | int64_t member_id; 203 | std::string member_role; 204 | }; 205 | 206 | BOOST_FUSION_ADAPT_STRUCT( 207 | relation_member, 208 | (int64_t, relation_id) 209 | (int64_t, version) 210 | (int64_t, sequence_id) 211 | (nwr_enum, member_type) 212 | (int64_t, member_id) 213 | (std::string, member_role) 214 | ) 215 | 216 | struct relation { 217 | static const int num_keys = 2; 218 | static const std::vector &column_names(); 219 | static const std::string table_name(); 220 | static const std::string tag_table_name(); 221 | static const std::string inner_table_name(); 222 | 223 | typedef old_tag tag_type; 224 | typedef relation_member inner_type; 225 | 226 | int64_t id, version, changeset_id; 227 | bool visible; 228 | boost::posix_time::ptime timestamp; 229 | boost::optional redaction_id; 230 | }; 231 | 232 | BOOST_FUSION_ADAPT_STRUCT( 233 | relation, 234 | (int64_t, id) 235 | (int64_t, version) 236 | (int64_t, changeset_id) 237 | (bool, visible) 238 | (boost::posix_time::ptime, timestamp) 239 | (boost::optional, redaction_id) 240 | ) 241 | 242 | enum class user_info_level { 243 | FULL, 244 | ANON 245 | }; 246 | 247 | enum class historical_versions { 248 | NONE, 249 | FULL 250 | }; 251 | 252 | enum class changeset_discussions { 253 | NONE, 254 | FULL 255 | }; 256 | 257 | #endif /* TYPES_HPP */ 258 | -------------------------------------------------------------------------------- /include/unescape_copy_row.hpp: -------------------------------------------------------------------------------- 1 | #ifndef UNESCAPE_COPY_ROW_HPP 2 | #define UNESCAPE_COPY_ROW_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "types.hpp" 13 | 14 | template 15 | struct unescape_copy_row 16 | : public boost::noncopyable { 17 | static const size_t s_num_columns = boost::fusion::result_of::size::value; 18 | 19 | explicit unescape_copy_row(S &source) 20 | : m_source(source), 21 | m_reorder(calculate_reorder(m_source.column_names())) { 22 | } 23 | 24 | ~unescape_copy_row() { 25 | } 26 | 27 | size_t read(T &row) { 28 | std::string line; 29 | size_t num = m_source.read(line); 30 | if (num > 0) { 31 | unpack(line, row); 32 | } 33 | return num; 34 | } 35 | 36 | private: 37 | void unpack(std::string &line, T &row) { 38 | const size_t sz = s_num_columns; 39 | std::vector > columns, old_columns; 40 | { 41 | char *prev_ptr = &line[0]; 42 | char * const end_ptr = &line[line.size()]; 43 | char *ptr = &line[0]; 44 | for (; ptr != end_ptr; ++ptr) { 45 | if (*ptr == '\t') { 46 | *ptr = '\0'; 47 | old_columns.push_back(std::make_pair(prev_ptr, std::distance(prev_ptr, ptr))); 48 | prev_ptr = ptr + 1; 49 | } 50 | } 51 | old_columns.push_back(std::make_pair(prev_ptr, std::distance(prev_ptr, ptr))); 52 | } 53 | 54 | columns.reserve(sz); 55 | for (size_t i = 0; i < sz; ++i) { 56 | if (i >= m_reorder.size()) { 57 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Index %1% exceeds m_reorder.size() %2%, this is a bug.") 58 | % i % m_reorder.size()).str())); 59 | } 60 | size_t j = m_reorder[i]; 61 | if (j >= old_columns.size()) { 62 | BOOST_THROW_EXCEPTION(std::runtime_error("Reordered index exceeds old_columns.size(), this is a bug.")); 63 | } 64 | columns.push_back(old_columns[j]); 65 | } 66 | 67 | if (columns.size() != sz) { 68 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Wrong number of columns: expecting %1%, got %2% in line `%3%'.") 69 | % sz % columns.size() % line).str())); 70 | } 71 | try { 72 | set_values(row, columns); 73 | } catch (const std::exception &e) { 74 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("%1%: in line `%2%'.") % e.what() % line).str())); 75 | } 76 | } 77 | 78 | inline void set_values(T &t, std::vector > &vs) { 79 | boost::fusion::for_each(t, set_value(vs.begin())); 80 | } 81 | 82 | struct set_value { 83 | explicit set_value(std::vector >::iterator i) : itr(i) {} 84 | 85 | void operator()(bool &b) const { 86 | std::pair str = *itr++; 87 | switch (str.first[0]) { 88 | case 't': 89 | b = true; 90 | break; 91 | case 'f': 92 | b = false; 93 | break; 94 | default: 95 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unrecognised value for bool: `%1%'") % str.first).str())); 96 | } 97 | } 98 | 99 | void operator()(int16_t &i) const { 100 | std::pair str = *itr++; 101 | unescape(str); 102 | i = int16_t(strtol(str.first, NULL, 10)); 103 | } 104 | 105 | void operator()(int32_t &i) const { 106 | std::pair str = *itr++; 107 | unescape(str); 108 | i = int32_t(strtol(str.first, NULL, 10)); 109 | } 110 | 111 | void operator()(int64_t &i) const { 112 | std::pair str = *itr++; 113 | unescape(str); 114 | i = int64_t(strtoll(str.first, NULL, 10)); 115 | } 116 | 117 | void operator()(double &d) const { 118 | std::pair str = *itr++; 119 | unescape(str); 120 | d = strtod(str.first, NULL); 121 | } 122 | 123 | void operator()(std::string &v) const { 124 | std::pair str = *itr++; 125 | unescape(str); 126 | v.assign(str.first, str.second); 127 | } 128 | 129 | void operator()(boost::posix_time::ptime &t) const { 130 | std::pair str = *itr++; 131 | unescape(str); 132 | // 11111111112 133 | // 12345678901234567890 134 | // format is 2013-09-11 13:39:52.742365 135 | if (str.second < 19) { 136 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unexpected format for timestamp: `%1%'.") 137 | % str.first).str())); 138 | } 139 | int year = ((str.first[0] - '0') * 1000 + 140 | (str.first[1] - '0') * 100 + 141 | (str.first[2] - '0') * 10 + 142 | (str.first[3] - '0')); 143 | int month = ((str.first[5] - '0') * 10 + (str.first[6] - '0')); 144 | int day = ((str.first[8] - '0') * 10 + (str.first[9] - '0')); 145 | int hour = ((str.first[11] - '0') * 10 + (str.first[12] - '0')); 146 | int min = ((str.first[14] - '0') * 10 + (str.first[15] - '0')); 147 | int sec = ((str.first[17] - '0') * 10 + (str.first[18] - '0')); 148 | t = boost::posix_time::ptime(boost::gregorian::date(year, month, day), 149 | boost::posix_time::time_duration(hour, min, sec)); 150 | } 151 | 152 | template 153 | void operator()(boost::optional &o) const { 154 | std::pair s = *itr; 155 | if (strncmp(s.first, "\\N", s.second) == 0) { 156 | o = boost::none; 157 | ++itr; 158 | } else { 159 | V v; 160 | operator()(v); 161 | o = v; 162 | } 163 | } 164 | 165 | void operator()(user_status_enum &e) const { 166 | std::pair str = *itr++; 167 | unescape(str); 168 | if (strncmp(str.first, "pending", str.second) == 0) { 169 | e = user_status_pending; 170 | } else if (strncmp(str.first, "active", str.second) == 0) { 171 | e = user_status_active; 172 | } else if (strncmp(str.first, "confirmed", str.second) == 0) { 173 | e = user_status_confirmed; 174 | } else if (strncmp(str.first, "suspended", str.second) == 0) { 175 | e = user_status_suspended; 176 | } else if (strncmp(str.first, "deleted", str.second) == 0) { 177 | e = user_status_deleted; 178 | } else { 179 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unrecognised value for user_status_enum: `%1%'.") % str.first).str())); 180 | } 181 | } 182 | 183 | void operator()(format_enum &e) const { 184 | std::pair str = *itr++; 185 | unescape(str); 186 | if (strncmp(str.first, "html", str.second) == 0) { 187 | e = format_html; 188 | } else if (strncmp(str.first, "markdown", str.second) == 0) { 189 | e = format_markdown; 190 | } else if (strncmp(str.first, "text", str.second) == 0) { 191 | e = format_text; 192 | } else { 193 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unrecognised value for format_enum: `%1%'.") % str.first).str())); 194 | } 195 | } 196 | 197 | void operator()(nwr_enum &e) const { 198 | std::pair str = *itr++; 199 | unescape(str); 200 | if (strncmp(str.first, "Node", str.second) == 0) { 201 | e = nwr_node; 202 | } else if (strncmp(str.first, "Way", str.second) == 0) { 203 | e = nwr_way; 204 | } else if (strncmp(str.first, "Relation", str.second) == 0) { 205 | e = nwr_relation; 206 | } else { 207 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unrecognised value for nwr_enum: `%1%'.") % str.first).str())); 208 | } 209 | } 210 | 211 | inline int hex2digit(char ch) const { 212 | switch (ch) { 213 | case '0': 214 | case '1': 215 | case '2': 216 | case '3': 217 | case '4': 218 | case '5': 219 | case '6': 220 | case '7': 221 | case '8': 222 | case '9': 223 | return int(ch - '0'); 224 | 225 | case 'a': 226 | case 'b': 227 | case 'c': 228 | case 'd': 229 | case 'e': 230 | case 'f': 231 | return 10 + int(ch - 'a'); 232 | 233 | case 'A': 234 | case 'B': 235 | case 'C': 236 | case 'D': 237 | case 'E': 238 | case 'F': 239 | return 10 + int(ch - 'A'); 240 | 241 | default: 242 | BOOST_THROW_EXCEPTION(std::runtime_error("Invalid hex digit.")); 243 | } 244 | } 245 | 246 | inline int oct2digit(char ch) const { 247 | if ((ch >= '0') && (ch <= '7')) { 248 | return int(ch - '0'); 249 | } else { 250 | BOOST_THROW_EXCEPTION(std::runtime_error("Invalid octal digit.")); 251 | } 252 | } 253 | 254 | void unescape(std::pair &s) const { 255 | const size_t end = s.second; 256 | char *str = s.first; 257 | size_t j = 0; 258 | 259 | for (size_t i = 0; i < end; ++i) { 260 | switch (str[i]) { 261 | case '\\': 262 | ++i; 263 | if (i < end) { 264 | switch (str[i]) { 265 | case 'b': 266 | str[j] = '\b'; 267 | break; 268 | 269 | case 'f': 270 | str[j] = '\f'; 271 | break; 272 | 273 | case 'n': 274 | str[j] = '\n'; 275 | break; 276 | 277 | case 'r': 278 | str[j] = '\r'; 279 | break; 280 | 281 | case 't': 282 | str[j] = '\t'; 283 | break; 284 | 285 | case 'v': 286 | str[j] = '\v'; 287 | break; 288 | 289 | case 'x': 290 | i += 2; 291 | if (i < end) { 292 | } else { 293 | str[j] = char(hex2digit(str[i-1]) * 16 + hex2digit(str[i])); 294 | BOOST_THROW_EXCEPTION(std::runtime_error("Unterminated hex escape sequence.")); 295 | } 296 | break; 297 | 298 | case '0': 299 | case '1': 300 | case '2': 301 | case '3': 302 | case '4': 303 | case '5': 304 | case '6': 305 | case '7': 306 | i += 2; 307 | if (i < end) { 308 | str[j] = char(oct2digit(str[i-2]) * 64 + oct2digit(str[i-1]) * 8 + oct2digit(str[i])); 309 | } else { 310 | BOOST_THROW_EXCEPTION(std::runtime_error("Unterminated octal escape sequence.")); 311 | } 312 | break; 313 | 314 | default: 315 | // an unnecessary escape 316 | str[j] = str[i]; 317 | } 318 | 319 | } else { 320 | BOOST_THROW_EXCEPTION(std::runtime_error("Unterminated escape sequence.")); 321 | } 322 | break; 323 | 324 | default: 325 | if (i != j) { 326 | str[j] = str[i]; 327 | } 328 | } 329 | 330 | ++j; 331 | } 332 | 333 | str[j] = '\0'; 334 | s.second = j; 335 | } 336 | 337 | mutable std::vector >::iterator itr; 338 | }; 339 | 340 | static std::vector calculate_reorder(const std::vector &names) { 341 | std::vector indexes; 342 | const std::vector &wanted_names = T::column_names(); 343 | 344 | const size_t num_columns = wanted_names.size(); 345 | indexes.reserve(num_columns); 346 | for (size_t i = 0; i < num_columns; ++i) { 347 | const std::string &wanted_name = wanted_names[i]; 348 | size_t j = i; 349 | 350 | if (wanted_name != "*") { 351 | std::vector::const_iterator itr = std::find(names.begin(), names.end(), wanted_name); 352 | if (itr == names.end()) { 353 | std::ostringstream ostr; 354 | ostr << "Unable to find wanted column name \"" << wanted_name << "\" in available names: "; 355 | for (std::vector::const_iterator jtr = names.begin(); jtr != names.end(); ++jtr) { 356 | ostr << "\"" << *jtr << "\", "; 357 | } 358 | BOOST_THROW_EXCEPTION(std::runtime_error(ostr.str())); 359 | } 360 | j = std::distance(names.begin(), itr); 361 | } 362 | 363 | indexes.push_back(j); 364 | } 365 | 366 | return indexes; 367 | } 368 | 369 | S &m_source; 370 | std::vector m_reorder; 371 | }; 372 | 373 | #endif /* UNESCAPE_COPY_ROW_HPP */ 374 | -------------------------------------------------------------------------------- /include/writer_common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef WRITER_COMMON_HPP 2 | #define WRITER_COMMON_HPP 3 | 4 | #define OSM_LICENSE_TEXT "http://opendatacommons.org/licenses/odbl/1-0/" 5 | #define OSM_COPYRIGHT_TEXT "OpenStreetMap and contributors" 6 | #define OSM_VERSION_TEXT "0.6" 7 | #define OSM_ATTRIBUTION_TEXT "http://www.openstreetmap.org/copyright" 8 | #define OSM_API_ORIGIN "http://www.openstreetmap.org/api/0.6" 9 | 10 | #endif /* WRITER_COMMON_HPP */ 11 | -------------------------------------------------------------------------------- /include/xml_writer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef XML_WRITER_HPP 2 | #define XML_WRITER_HPP 3 | 4 | #include "output_writer.hpp" 5 | #include "changeset_map.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | class xml_writer : public output_writer { 14 | public: 15 | typedef changeset_map changeset_map_t; 16 | 17 | xml_writer(const std::string &, const boost::program_options::variables_map &, const user_map_t &, 18 | const boost::posix_time::ptime &max_time, 19 | user_info_level, historical_versions, changeset_discussions); 20 | virtual ~xml_writer(); 21 | 22 | void changesets(const std::vector &, 23 | const std::vector &, 24 | const std::vector &); 25 | void nodes(const std::vector &, const std::vector &); 26 | void ways(const std::vector &, const std::vector &, const std::vector &); 27 | void relations(const std::vector &, const std::vector &, const std::vector &); 28 | void finish(); 29 | 30 | struct pimpl; 31 | 32 | private: 33 | boost::scoped_ptr m_impl; 34 | const user_map_t &m_users; 35 | changeset_discussions m_changeset_discussions; 36 | user_info_level m_user_info_level; 37 | std::string m_generator_name; 38 | std::string m_author_name; 39 | std::string m_source_name; 40 | std::string m_copyleft_name; 41 | std::string m_attribution_name; 42 | changeset_map_t m_changesets; 43 | }; 44 | 45 | #endif /* XML_WRITER_HPP */ 46 | -------------------------------------------------------------------------------- /script/emacs-format-file.el: -------------------------------------------------------------------------------- 1 | ;;; File: emacs-format-file 2 | ;;; Original author: 3 | ;;; Stan Warford 4 | ;;; 17 May 2006 5 | ;;; Adapted from: http://www.cslab.pepperdine.edu/warford/BatchIndentationEmacs.html 6 | 7 | (c-add-style "mystyle" 8 | '((fill-column . 80) 9 | (c++-indent-level . 2) 10 | (c-basic-offset . 2) 11 | (indent-tabs-mode . nil) 12 | (c-hanging-colons-alist . ((case-label) 13 | (label after) 14 | (access-label after) 15 | (member-init-intro before) 16 | (inher-intro))) 17 | (c-offsets-alist . ((statement-block-intro . +) 18 | (substatement-open . 0) 19 | (substatement-label . 0) 20 | (label . 0) 21 | (statement-cont . +) 22 | (innamespace 0) 23 | (member-init-intro . +) 24 | (inher-intro . +))))) 25 | 26 | (setq default-tab-width 2) 27 | 28 | (defun emacs-format-function () 29 | "Format the whole buffer." 30 | (c-set-style "mystyle") 31 | (indent-region (point-min) (point-max) nil) 32 | (untabify (point-min) (point-max)) 33 | (save-buffer) 34 | ) -------------------------------------------------------------------------------- /script/fmt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # File: fmt.sh 3 | # Opens a set of files in emacs and executes the emacs-format-function. 4 | # Assumes the function named emacs-format-function is defined in the 5 | # file named emacs-format-file. 6 | # Adapted from: http://www.cslab.pepperdine.edu/warford/BatchIndentationEmacs.html 7 | 8 | dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 9 | format_file=${dir}/emacs-format-file.el 10 | 11 | if [ $# -eq 0 ] 12 | then 13 | echo "my-indent requires at least one argument." 1>&2 14 | echo "Usage: my-indent files-to-indent" 1>&2 15 | exit 1 16 | fi 17 | while [ $# -ge 1 ] 18 | do 19 | if [ -d $1 ] 20 | then 21 | echo "Argument of my-indent $1 cannot be a directory." 1>&2 22 | exit 1 23 | fi 24 | # Check for existence of file: 25 | ls $1 2> /dev/null | grep $1 > /dev/null 26 | if [ $? != 0 ] 27 | then 28 | echo "my-indent: $1 not found." 1>&2 29 | exit 1 30 | fi 31 | echo "Indenting $1 with emacs in batch mode" 32 | emacs -batch $1 -l $format_file -f emacs-format-function 33 | echo 34 | shift 1 35 | done 36 | exit 0 37 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | LDADD=@LIBXML_LIBS@ @BOOST_FILESYSTEM_LIB@ @BOOST_PROGRAM_OPTIONS_LIB@ @BOOST_DATE_TIME_LIB@ @BOOST_SYSTEM_LIB@ @BOOST_THREAD_LIB@ @BOOST_IOSTREAMS_LIB@ @PROTOBUF_LITE_LIBS@ @PROTOBUF_LIBS@ -losmpbf -lpthread 2 | 3 | AM_LDFLAGS=@BOOST_LDFLAGS@ 4 | AM_CPPFLAGS=-I../include @LIBXML_CFLAGS@ @BOOST_CPPFLAGS@ @PROTOBUF_LITE_CFLAGS@ @PROTOBUF_CFLAGS@ 5 | 6 | bin_PROGRAMS=../planet-dump-ng 7 | ################################################################################ 8 | ___planet_dump_ng_SOURCES=\ 9 | changeset_filter.cpp \ 10 | changeset_map.cpp \ 11 | copy_elements.cpp \ 12 | dump_archive.cpp \ 13 | dump_reader.cpp \ 14 | extract_kv.cpp \ 15 | history_filter.cpp \ 16 | insert_kv.cpp \ 17 | output_writer.cpp \ 18 | pbf_writer.cpp \ 19 | planet-dump.cpp \ 20 | time_epoch.cpp \ 21 | types.cpp \ 22 | xml_writer.cpp 23 | -------------------------------------------------------------------------------- /src/changeset_filter.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "changeset_filter.hpp" 3 | #include 4 | 5 | #include "xml_writer.hpp" 6 | 7 | template 8 | changeset_filter::changeset_filter(const std::string &option_name, const boost::program_options::variables_map &options, 9 | const user_map_t &user_map, const boost::posix_time::ptime &max_time, user_info_level uil, 10 | historical_versions hv, changeset_discussions cd) 11 | : m_writer(new T(option_name, options, user_map, max_time, uil, historical_versions::NONE, cd)) { 12 | } 13 | 14 | template 15 | changeset_filter::~changeset_filter() { 16 | } 17 | 18 | template 19 | void changeset_filter::changesets(const std::vector &cs, 20 | const std::vector &ts, 21 | const std::vector &ccs) { 22 | // no filtering for changesets - we want them. 23 | m_writer->changesets(cs, ts, ccs); 24 | } 25 | 26 | template 27 | void changeset_filter::nodes(const std::vector &ns, const std::vector &ts) { 28 | // do nothing - we don't want nodes in the changeset output 29 | } 30 | 31 | template 32 | void changeset_filter::ways(const std::vector &ws, const std::vector &wns, const std::vector &ts) { 33 | // do nothing - we don't want ways in the changeset output 34 | } 35 | 36 | template 37 | void changeset_filter::relations(const std::vector &rs, const std::vector &rms, const std::vector &ts) { 38 | // do nothing - we don't want relations in the changeset output 39 | } 40 | 41 | template 42 | void changeset_filter::finish() { 43 | // finish the underlying output writer 44 | m_writer->finish(); 45 | } 46 | 47 | // note that a changeset_filter on pbf_writer is, at present, 48 | // somewhat useless due to the lack of changeset implementation 49 | // in PBF format. 50 | template struct changeset_filter; 51 | -------------------------------------------------------------------------------- /src/changeset_map.cpp: -------------------------------------------------------------------------------- 1 | #include "changeset_map.hpp" 2 | #include 3 | 4 | #define BLOCK_BITS 17 5 | #define BLOCK_SIZE (1L << BLOCK_BITS) 6 | #define BLOCK_MASK (BLOCK_SIZE - 1) 7 | 8 | void changeset_map::insert(const changeset_map::value_type &kv) { 9 | assert(kv.first > 0); 10 | assert(kv.second >= 0); 11 | 12 | const size_t block_id = kv.first >> BLOCK_BITS; 13 | const size_t offset = kv.first & BLOCK_MASK; 14 | 15 | if (block_id >= m_data.size()) { 16 | m_data.resize(block_id + 1); 17 | } 18 | 19 | boost::shared_ptr > ptr = m_data[block_id]; 20 | if (!ptr) { 21 | ptr = boost::make_shared >(BLOCK_SIZE, int64_t(-1)); 22 | m_data[block_id] = ptr; 23 | } 24 | 25 | std::vector &vec = *ptr; 26 | vec[offset] = kv.second; 27 | } 28 | 29 | changeset_map::const_iterator changeset_map::find(int64_t k) const { 30 | if (k < 1) { return NULL; } 31 | 32 | const size_t block_id = k >> BLOCK_BITS; 33 | const size_t offset = k & BLOCK_MASK; 34 | 35 | if (block_id >= m_data.size()) { 36 | return NULL; 37 | } 38 | 39 | boost::shared_ptr > ptr = m_data[block_id]; 40 | if (!ptr) { 41 | return NULL; 42 | } 43 | 44 | std::vector &vec = *ptr; 45 | if (vec[offset] < 0) { 46 | return NULL; 47 | } else { 48 | return &vec[offset]; 49 | } 50 | } 51 | 52 | changeset_map::const_iterator changeset_map::end() const { 53 | return NULL; 54 | } 55 | -------------------------------------------------------------------------------- /src/copy_elements.cpp: -------------------------------------------------------------------------------- 1 | #include "copy_elements.hpp" 2 | #include "insert_kv.hpp" 3 | #include "types.hpp" 4 | #include "config.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | // include vendored later header to deal with https://svn.boost.org/trac/boost/ticket/5237 24 | // #include 25 | #include "vendor/boost/iostreams/filter/gzip.hpp" 26 | #include 27 | #include 28 | #include 29 | 30 | namespace bio = boost::iostreams; 31 | namespace fs = boost::filesystem; 32 | 33 | namespace { 34 | 35 | template 36 | struct control_block { 37 | typedef typename T::tag_type tag_type; 38 | typedef typename T::inner_type inner_type; 39 | 40 | control_block(unsigned int num_threads) 41 | : pre_swap_barrier(num_threads), 42 | post_swap_barrier(num_threads), 43 | thread_status(num_threads, 0) { 44 | } 45 | 46 | boost::barrier pre_swap_barrier, post_swap_barrier; 47 | 48 | std::vector thread_status; 49 | boost::mutex thread_finished_mutex; 50 | boost::condition_variable thread_finished_cond; 51 | 52 | std::vector elements; 53 | std::vector tags; 54 | std::vector inners; 55 | std::vector comments; 56 | }; 57 | 58 | template 59 | struct thread_writer { 60 | typedef typename T::tag_type tag_type; 61 | typedef typename T::inner_type inner_type; 62 | 63 | boost::shared_ptr > blk; 64 | 65 | thread_writer(boost::shared_ptr > b) : blk(b) {} 66 | 67 | void write(std::vector &els, std::vector &inners, std::vector &tags) { 68 | blk->pre_swap_barrier.wait(); 69 | std::swap(els, blk->elements); 70 | std::swap(inners, blk->inners); 71 | std::swap(tags, blk->tags); 72 | blk->post_swap_barrier.wait(); 73 | } 74 | }; 75 | 76 | template 77 | struct db_reader { 78 | explicit db_reader(const std::string &subdir) : m_end(false) { 79 | m_file_name = (boost::format("%1$s/final_%2$08x.data") % subdir % 0).str(); 80 | if (!fs::exists(m_file_name)) { 81 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("File '%1%' does not exist.") % m_file_name).str())); 82 | } 83 | m_file.open(m_file_name.c_str()); 84 | if (!m_file.is_open()) { 85 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unable to open '%1%'.") % m_file_name).str())); 86 | } 87 | if (!m_file.good()) { 88 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("File '%1%' is open, but not good.") % m_file_name).str())); 89 | } 90 | 91 | m_stream.push(bio::gzip_decompressor()); 92 | m_stream.push(m_file); 93 | } 94 | 95 | ~db_reader() { 96 | bio::close(m_stream); 97 | m_file.close(); 98 | } 99 | 100 | bool operator()(T &t) { 101 | static const uint16_t max_uint16_t = std::numeric_limits::max(); 102 | if (m_end) { return false; } 103 | uint16_t ksz = 0, vsz = 0; 104 | uint64_t kextsz = 0, vextsz = 0; 105 | 106 | if (bio::read(m_stream, (char *)&ksz, sizeof(uint16_t)) != sizeof(uint16_t)) { m_end = true; return false; } 107 | if (ksz == max_uint16_t) { 108 | if (bio::read(m_stream, (char *)&kextsz, sizeof(uint64_t)) != sizeof(uint64_t)) { m_end = true; return false; } 109 | } 110 | if (bio::read(m_stream, (char *)&vsz, sizeof(uint16_t)) != sizeof(uint16_t)) { m_end = true; return false; } 111 | if (vsz == max_uint16_t) { 112 | if (bio::read(m_stream, (char *)&vextsz, sizeof(uint64_t)) != sizeof(uint64_t)) { m_end = true; return false; } 113 | } 114 | 115 | size_t key_size = (ksz == max_uint16_t) ? size_t(kextsz) : size_t(ksz); 116 | size_t val_size = (vsz == max_uint16_t) ? size_t(vextsz) : size_t(vsz); 117 | std::string k, v; 118 | k.resize(key_size); 119 | if (bio::read(m_stream, &k[0], key_size) != key_size) { m_end = true; return false; } 120 | v.resize(val_size); 121 | if (bio::read(m_stream, &v[0], val_size) != val_size) { m_end = true; return false; } 122 | 123 | insert_kv(t, k, v); 124 | 125 | return true; 126 | } 127 | 128 | private: 129 | bool m_end; 130 | std::string m_file_name; 131 | std::ifstream m_file; 132 | bio::filtering_streambuf m_stream; 133 | }; 134 | 135 | template <> 136 | struct db_reader { 137 | db_reader(const std::string &) {} 138 | }; 139 | 140 | template struct block_size_trait { static const size_t value = 1048576; }; 141 | template <> struct block_size_trait { static const size_t value = 65536; }; 142 | 143 | template void zero_init(T &); 144 | template int64_t id_of(const T &); 145 | 146 | template <> inline void zero_init(current_tag &t) { t.element_id = -1; } 147 | template <> inline void zero_init(old_tag &t) { t.element_id = -1; } 148 | template <> inline void zero_init(way_node &wn) { wn.way_id = -1; } 149 | template <> inline void zero_init(relation_member &rm) { rm.relation_id = -1; } 150 | template <> inline void zero_init(changeset_comment &cc) { cc.changeset_id = -1; } 151 | template <> inline void zero_init(int &) { } 152 | 153 | template <> inline int64_t id_of(const current_tag &t) { return t.element_id; } 154 | template <> inline int64_t id_of(const old_tag &t) { return t.element_id; } 155 | template <> inline int64_t id_of(const way_node &wn) { return wn.way_id; } 156 | template <> inline int64_t id_of(const relation_member &rm) { return rm.relation_id; } 157 | template <> inline int64_t id_of(const changeset_comment &cc) { return cc.changeset_id; } 158 | 159 | template 160 | inline int64_t version_of(const T &t) { return t.version; } 161 | 162 | template <> inline int64_t version_of(const changeset &) { return 0; } 163 | template <> inline int64_t version_of(const current_tag &t) { return 0; } 164 | template <> inline int64_t version_of(const changeset_comment &) { return 0; } 165 | 166 | template 167 | inline void fetch_associated(T &t, int64_t id, int64_t version, db_reader &reader, std::vector &vec) { 168 | while ((id_of(t) < id) || ((id_of(t) == id) && (version_of(t) <= version))) { 169 | if ((id_of(t) == id) && (version_of(t) == version)) { 170 | vec.push_back(t); 171 | } 172 | if (!reader(t)) { 173 | break; 174 | } 175 | } 176 | } 177 | 178 | template <> 179 | inline void fetch_associated(int &, int64_t, int64_t, db_reader &, std::vector &) { 180 | } 181 | 182 | template 183 | inline bool is_redacted(const T &t) { return bool(t.redaction_id); } 184 | 185 | template <> inline bool is_redacted(const changeset &) { return false; } 186 | 187 | template 188 | void extract_element(thread_writer &writer) { 189 | typedef typename T::tag_type tag_type; 190 | typedef typename T::inner_type inner_type; 191 | 192 | const size_t block_size = block_size_trait::value; 193 | 194 | db_reader element_reader(T::table_name()); 195 | db_reader tag_reader(T::tag_table_name()); 196 | db_reader inner_reader(T::inner_table_name()); 197 | 198 | std::vector elements; 199 | std::vector tags; 200 | std::vector inners; 201 | 202 | elements.resize(block_size); 203 | size_t i = 0; 204 | 205 | tag_type current_tag; 206 | inner_type current_inner; 207 | 208 | zero_init(current_tag); 209 | zero_init(current_inner); 210 | 211 | while (element_reader(elements[i])) { 212 | // skip all redacted elements - they don't appear in the output 213 | // at all. 214 | if (is_redacted(elements[i])) { continue; } 215 | 216 | // skip all negative ID elements - these shouldn't appear in the 217 | // database at all. 218 | if (elements[i].id < 0) { continue; } 219 | 220 | fetch_associated(current_inner, elements[i].id, version_of(elements[i]), inner_reader, inners); 221 | fetch_associated(current_tag, elements[i].id, version_of(elements[i]), tag_reader, tags); 222 | 223 | ++i; 224 | if (i == block_size) { 225 | writer.write(elements, inners, tags); 226 | inners.clear(); 227 | tags.clear(); 228 | i = 0; 229 | if (elements.size() != block_size) { elements.resize(block_size); } 230 | } 231 | } 232 | 233 | elements.resize(i); 234 | writer.write(elements, inners, tags); 235 | } 236 | 237 | template void write_elements(output_writer &writer, control_block &blk); 238 | 239 | template <> inline void write_elements(output_writer &writer, control_block &blk) { 240 | writer.changesets(blk.elements, blk.tags, blk.inners); 241 | } 242 | template <> inline void write_elements(output_writer &writer, control_block &blk) { 243 | writer.nodes(blk.elements, blk.tags); 244 | } 245 | template <> inline void write_elements(output_writer &writer, control_block &blk) { 246 | writer.ways(blk.elements, blk.inners, blk.tags); 247 | } 248 | template <> inline void write_elements(output_writer &writer, control_block &blk) { 249 | writer.relations(blk.elements, blk.inners, blk.tags); 250 | } 251 | 252 | template 253 | void writer_thread(int thread_index, 254 | boost::exception_ptr exc, 255 | boost::shared_ptr writer, 256 | boost::shared_ptr > blk) { 257 | const size_t block_size = block_size_trait::value; 258 | 259 | do { 260 | try { 261 | blk->pre_swap_barrier.wait(); 262 | blk->post_swap_barrier.wait(); 263 | } catch (...) { 264 | exc = boost::current_exception(); 265 | std::cerr << "EXCEPTION: writer_thread(" << thread_index << "): " 266 | << boost::diagnostic_information(exc) << std::endl; 267 | // not sure we can recover from an error in synchronisation here, as we 268 | // have no way of figuring out which state this thread or the other 269 | // threads are in. so just explode. 270 | abort(); 271 | } 272 | 273 | try { 274 | // if write_elements previously threw an exception, then don't call it 275 | // again. but we need to continue going through the barrier loops, or all 276 | // the other threads will lock up waiting for this thread. 277 | if (exc == boost::exception_ptr()) { 278 | write_elements(*writer, *blk); 279 | } 280 | 281 | } catch (...) { 282 | exc = boost::current_exception(); 283 | std::cerr << "EXCEPTION: writer_thread(" << thread_index << "): " 284 | << boost::diagnostic_information(exc) 285 | << ". Trying to continue..." 286 | << std::endl; 287 | } 288 | } while (blk->elements.size() == block_size); 289 | 290 | try { 291 | boost::lock_guard lock(blk->thread_finished_mutex); 292 | blk->thread_status[thread_index] = 1; 293 | blk->thread_finished_cond.notify_one(); 294 | 295 | } catch (...) { 296 | // this is a difficult case to handle - it's possible for locking 297 | // to fail, but unless we signal the condition variable then the 298 | // program would hang. instead, treat this as a fatal error. 299 | std::cerr << "Thread " << thread_index << " failed to lock mutex!" 300 | << std::endl; 301 | abort(); 302 | } 303 | } 304 | 305 | void join_all_but(size_t i, std::vector > &threads) { 306 | bool still_running = true; 307 | 308 | while (still_running) { 309 | still_running = false; 310 | 311 | for (size_t j = 0; j < threads.size(); ++j) { 312 | if ((j != i) && threads[j]->joinable()) { 313 | // if the thread isn't ready to join for a second, then it is probably blocked 314 | // on something - this is the exceptional path, so the likely case is that some 315 | // thread has thrown an exception and the rest are waiting for it at the 316 | // barrier. 317 | if (!threads[j]->timed_join(boost::posix_time::time_duration(0, 0, 1))) { 318 | still_running = true; 319 | threads[j]->interrupt(); 320 | } 321 | } 322 | } 323 | } 324 | } 325 | 326 | } // anonymous namespace 327 | 328 | void extract_users(std::map &display_name_map) { 329 | db_reader reader("users"); 330 | user u; 331 | display_name_map.clear(); 332 | while (reader(u)) { 333 | if (u.data_public) { 334 | display_name_map.insert(std::make_pair(u.id, u.display_name)); 335 | } 336 | } 337 | } 338 | 339 | template 340 | void reader_thread(int thread_index, 341 | boost::exception_ptr exc, 342 | boost::shared_ptr > blk) { 343 | try { 344 | thread_writer writer(blk); 345 | extract_element(writer); 346 | 347 | } catch (...) { 348 | exc = boost::current_exception(); 349 | std::cerr << "EXCEPTION: reader_thread(" << thread_index << "): " 350 | << boost::diagnostic_information(exc) << std::endl; 351 | // if the reader thread failed, we can't make any progress, and it's 352 | // unlikely that the writer threads can recover from this safely, so 353 | // just explode. 354 | abort(); 355 | } 356 | 357 | try { 358 | boost::lock_guard lock(blk->thread_finished_mutex); 359 | blk->thread_status[thread_index] = 1; 360 | blk->thread_finished_cond.notify_one(); 361 | 362 | } catch (...) { 363 | // this is a difficult case to handle - it's possible for locking 364 | // to fail, but unless we signal the condition variable then the 365 | // program would hang. instead, treat this as a fatal error. 366 | std::cerr << "Thread " << thread_index << " failed to lock mutex!" 367 | << std::endl; 368 | abort(); 369 | } 370 | } 371 | 372 | template 373 | void run_threads(std::vector > writers) { 374 | std::vector > threads; 375 | std::vector exceptions; 376 | const int num_threads = writers.size() + 1; 377 | int i = 0, num_running_threads = num_threads; 378 | 379 | exceptions.resize(num_threads); 380 | boost::shared_ptr > blk = boost::make_shared >(writers.size() + 1); 381 | 382 | threads.push_back(boost::make_shared(boost::bind(&reader_thread, i, exceptions[i], blk))); 383 | 384 | BOOST_FOREACH(boost::shared_ptr writer, writers) { 385 | ++i; 386 | threads.push_back(boost::make_shared(boost::bind(&writer_thread, i, exceptions[i], writer, blk))); 387 | } 388 | 389 | { 390 | boost::unique_lock lock(blk->thread_finished_mutex); 391 | while (num_running_threads > 0) { 392 | blk->thread_finished_cond.wait(lock); 393 | 394 | for (int idx = 0; idx < num_threads; ++idx) { 395 | if (blk->thread_status[idx] != 0) { 396 | 397 | blk->thread_status[idx] = 0; 398 | 399 | boost::shared_ptr thread = threads[idx]; 400 | thread->join(); 401 | --num_running_threads; 402 | 403 | if (exceptions[idx]) { 404 | lock.unlock(); 405 | // interrupt all other threads and join them 406 | join_all_but(idx, threads); 407 | lock.lock(); 408 | // re-throw the exception 409 | boost::rethrow_exception(exceptions[idx]); 410 | } 411 | } 412 | } 413 | } 414 | } 415 | } 416 | 417 | template void run_threads(std::vector >); 418 | template void run_threads(std::vector >); 419 | template void run_threads(std::vector >); 420 | template void run_threads(std::vector >); 421 | -------------------------------------------------------------------------------- /src/dump_archive.cpp: -------------------------------------------------------------------------------- 1 | #include "dump_archive.hpp" 2 | #include "table_extractor.hpp" 3 | #include "types.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace bt = boost::posix_time; 21 | namespace fs = boost::filesystem; 22 | 23 | namespace { 24 | 25 | struct tag_table_name; 26 | typedef boost::error_info errinfo_table_name; 27 | 28 | template 29 | bt::ptime extract_table_with_timestamp(const std::string &table_name, 30 | const std::string &dump_file, 31 | bool resume, 32 | unsigned int max_concurrency) { 33 | typedef R row_type; 34 | fs::path base_dir(table_name); 35 | boost::optional timestamp; 36 | 37 | if (fs::exists(base_dir)) { 38 | if (fs::is_directory(base_dir) && fs::exists(base_dir / ".complete") && resume) { 39 | std::string timestamp_str; 40 | fs::ifstream in(base_dir / ".complete"); 41 | std::getline(in, timestamp_str); 42 | if (timestamp_str == "-infinity") { 43 | timestamp = bt::ptime(bt::neg_infin); 44 | } else { 45 | timestamp = bt::time_from_string(timestamp_str); 46 | } 47 | 48 | } else { 49 | fs::remove_all(base_dir); 50 | } 51 | } 52 | 53 | if (timestamp) { 54 | return timestamp.get(); 55 | 56 | } else { 57 | table_extractor_with_timestamp extractor(table_name, dump_file, max_concurrency); 58 | timestamp = extractor.read(); 59 | fs::ofstream out(base_dir / ".complete"); 60 | out << bt::to_simple_string(timestamp.get()) << "\n"; 61 | return timestamp.get(); 62 | } 63 | } 64 | 65 | template 66 | void thread_extract_with_timestamp(bt::ptime ×tamp, 67 | boost::exception_ptr &error, 68 | std::string table_name, 69 | std::string dump_file, 70 | bool resume, 71 | unsigned int max_concurrency) { 72 | try { 73 | bt::ptime ts = extract_table_with_timestamp(table_name, dump_file, resume, max_concurrency); 74 | timestamp = ts; 75 | 76 | } catch (const boost::exception &e) { 77 | error = boost::current_exception(); 78 | 79 | } catch (const std::exception &e) { 80 | error = boost::current_exception(); 81 | 82 | } catch (...) { 83 | std::cerr << "Unexpected exception of unknown type in " 84 | << "thread_extract_with_timestamp(" << table_name 85 | << ", " << dump_file << ")!" << std::endl; 86 | abort(); 87 | } 88 | } 89 | 90 | } // anonymous namespace 91 | 92 | base_thread::~base_thread() {} 93 | 94 | template 95 | run_thread::run_thread(std::string table_name_, std::string dump_file, bool resume, unsigned int max_concurrency) 96 | : timestamp(), error(), 97 | thr(&thread_extract_with_timestamp, 98 | boost::ref(timestamp), boost::ref(error), 99 | table_name_, dump_file, resume, max_concurrency), table_name(table_name_) { 100 | } 101 | 102 | template 103 | run_thread::~run_thread() { 104 | try { 105 | thr.join(); 106 | } catch (...) { 107 | } 108 | } 109 | 110 | template 111 | bt::ptime run_thread::join() { 112 | thr.join(); 113 | if (error) { 114 | boost::throw_exception(boost::enable_error_info(std::runtime_error("Error during archive dump to disk database.")) 115 | << boost::errinfo_nested_exception(error) 116 | << errinfo_table_name(table_name)); 117 | } 118 | return timestamp; 119 | } 120 | 121 | template struct run_thread; 122 | template struct run_thread; 123 | template struct run_thread; 124 | template struct run_thread; 125 | template struct run_thread; 126 | template struct run_thread; 127 | template struct run_thread; 128 | template struct run_thread; 129 | template struct run_thread; 130 | template struct run_thread; 131 | -------------------------------------------------------------------------------- /src/dump_reader.cpp: -------------------------------------------------------------------------------- 1 | #include "dump_reader.hpp" 2 | #include "config.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | // include vendored later header to deal with https://svn.boost.org/trac/boost/ticket/5237 15 | // #include 16 | #include "vendor/boost/iostreams/filter/gzip.hpp" 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | //#include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | #define BATCH_SIZE (10240) 34 | #define MAX_MERGESORT_BLOCK_SIZE (67108864) 35 | 36 | namespace { 37 | 38 | namespace qi = boost::spirit::qi; 39 | namespace bio = boost::iostreams; 40 | namespace fs = boost::filesystem; 41 | 42 | struct tag_copy_header; 43 | 44 | typedef boost::error_info copy_header; 45 | 46 | struct popen_error : public boost::exception, std::exception {}; 47 | struct fread_error : public boost::exception, std::exception {}; 48 | struct early_termination_error : public boost::exception, std::exception {}; 49 | struct copy_header_parse_error : public boost::exception, std::exception {}; 50 | 51 | typedef boost::shared_ptr pipe_ptr; 52 | 53 | static void pipe_closer(FILE *fh) { 54 | if (fh != NULL) { 55 | if (pclose(fh) == -1) { 56 | std::cerr << "ERROR while closing popen." << std::endl; 57 | abort(); 58 | } 59 | } 60 | } 61 | 62 | struct process 63 | : public boost::noncopyable { 64 | explicit process(const std::string &cmd) 65 | : m_fh(popen(cmd.c_str(), "r"), &pipe_closer) { 66 | if (!m_fh) { 67 | BOOST_THROW_EXCEPTION(popen_error() << boost::errinfo_file_name(cmd)); 68 | } 69 | } 70 | 71 | ~process() { 72 | } 73 | 74 | size_t read(char *buf, size_t len) { 75 | size_t n = fread(buf, 1, len, m_fh.get()); 76 | if (ferror(m_fh.get()) != 0) { 77 | boost::weak_ptr fh(m_fh); 78 | BOOST_THROW_EXCEPTION(fread_error() << boost::errinfo_file_handle(fh)); 79 | } 80 | return n; 81 | } 82 | 83 | private: 84 | pipe_ptr m_fh; 85 | }; 86 | 87 | template 88 | struct to_line_filter 89 | : public boost::noncopyable { 90 | to_line_filter(T &source, size_t buffer_size) 91 | : m_source(source), 92 | m_buffer(buffer_size, '\0'), 93 | m_buffer_pos(m_buffer.begin()), 94 | m_buffer_end(m_buffer_pos) { 95 | } 96 | 97 | ~to_line_filter() { 98 | } 99 | 100 | size_t read(std::string &line) { 101 | line.clear(); 102 | std::string::iterator begin_pos = m_buffer_pos; 103 | char c = '\0'; 104 | 105 | do { 106 | if (m_buffer_pos == m_buffer_end) { 107 | line.append(begin_pos, m_buffer_pos); 108 | if (refill() == 0) { 109 | return 0; 110 | } 111 | begin_pos = m_buffer_pos; 112 | } 113 | 114 | c = *m_buffer_pos; 115 | if (c != '\n') { 116 | ++m_buffer_pos; 117 | } 118 | } while (c != '\n'); 119 | 120 | line.append(begin_pos, m_buffer_pos); 121 | ++m_buffer_pos; 122 | 123 | return 1; 124 | } 125 | 126 | private: 127 | size_t refill() { 128 | size_t bytes = 0; 129 | while (bytes < m_buffer.size()) { 130 | size_t len = m_source.read(&m_buffer[bytes], m_buffer.size() - bytes); 131 | if (len == 0) { 132 | break; 133 | } 134 | bytes += len; 135 | } 136 | m_buffer_pos = m_buffer.begin(); 137 | m_buffer_end = m_buffer.begin() + bytes; 138 | return bytes; 139 | } 140 | 141 | T &m_source; 142 | std::string m_buffer; 143 | std::string::iterator m_buffer_pos, m_buffer_end; 144 | }; 145 | 146 | // COPY current_nodes (id, latitude, longitude, changeset_id, visible, "timestamp", tile, version) FROM stdin; 147 | template 148 | struct copy_line 149 | : qi::grammar(), qi::space_type> { 150 | 151 | copy_line(const std::string &table_name) 152 | : copy_line::base_type(root) { 153 | using qi::char_; 154 | using qi::alpha; 155 | using qi::alnum; 156 | using qi::lexeme; 157 | using qi::lit; 158 | 159 | root = lit("COPY") >> -lit("public.") >> lit(table_name) >> lit("(") >> (ident % lit(',')) >> lit(") FROM stdin;"); 160 | ident = (alpha >> *(alnum | char_('_'))) | (lit("\"") >> *(char_ - '"' - '\\') >> lit("\"")); 161 | } 162 | 163 | qi::rule(), qi::space_type> root; 164 | qi::rule ident; 165 | }; 166 | 167 | template 168 | struct filter_copy_contents 169 | : public boost::noncopyable { 170 | explicit filter_copy_contents(T &source, const std::string &table_name) 171 | : m_source(source), 172 | m_in_copy(false), 173 | m_start_prefix("COPY "), 174 | m_end_line("\\."), 175 | m_grammar(table_name), 176 | m_table_name(table_name) { 177 | } 178 | 179 | ~filter_copy_contents() { 180 | } 181 | 182 | std::vector init() { 183 | std::vector column_names; 184 | std::string line; 185 | size_t got_data = 0; 186 | 187 | do { 188 | got_data = m_source.read(line); 189 | 190 | if (got_data == 0) { 191 | BOOST_THROW_EXCEPTION(early_termination_error()); 192 | } 193 | 194 | if (line.compare(0, m_start_prefix.size(), m_start_prefix) == 0) { 195 | std::string::iterator begin = line.begin(); 196 | std::string::iterator end = line.end(); 197 | bool result = qi::phrase_parse(begin, end, m_grammar, qi::space, column_names); 198 | if (!result) { 199 | BOOST_THROW_EXCEPTION(copy_header_parse_error() << copy_header(line)); 200 | } 201 | m_in_copy = true; 202 | break; 203 | } 204 | } while (true); 205 | 206 | if (!m_in_copy) { 207 | BOOST_THROW_EXCEPTION(early_termination_error()); 208 | } 209 | if (column_names.empty()) { 210 | BOOST_THROW_EXCEPTION(early_termination_error()); 211 | } 212 | 213 | return column_names; 214 | } 215 | 216 | size_t read(std::string &line) { 217 | size_t got_data = 0; 218 | do { 219 | got_data = m_source.read(line); 220 | 221 | if (got_data == 0) { 222 | break; 223 | } 224 | 225 | if (m_in_copy && (line.compare(m_end_line) == 0)) { 226 | m_in_copy = false; 227 | } 228 | } while (!m_in_copy); 229 | 230 | return got_data; 231 | } 232 | 233 | private: 234 | T &m_source; 235 | bool m_in_copy; 236 | const std::string m_start_prefix, m_end_line; 237 | copy_line m_grammar; 238 | std::string m_table_name; 239 | }; 240 | 241 | typedef std::pair kv_pair_t; 242 | 243 | struct block_reader : public boost::noncopyable { 244 | block_reader(const std::string &subdir, const std::string &prefix, size_t block_counter) 245 | : m_file_name((boost::format("%1$s/%2$s_%3$08x.data") % subdir % prefix % block_counter).str()), 246 | m_end(false) { 247 | if (!fs::exists(m_file_name)) { 248 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("File '%1%' does not exist.") % m_file_name).str())); 249 | } 250 | m_file.open(m_file_name.c_str()); 251 | if (!m_file.is_open()) { 252 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unable to open '%1%'.") % m_file_name).str())); 253 | } 254 | if (!m_file.good()) { 255 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("File '%1%' is open, but not good.") % m_file_name).str())); 256 | } 257 | 258 | m_stream.push(bio::gzip_decompressor()); 259 | m_stream.push(m_file); 260 | 261 | next(); 262 | } 263 | 264 | ~block_reader() { 265 | bio::close(m_stream); 266 | m_file.close(); 267 | } 268 | 269 | bool at_end() { return m_end; } 270 | 271 | const kv_pair_t &value() { return m_current; } 272 | 273 | void next() { 274 | static const uint16_t max_uint16_t = std::numeric_limits::max(); 275 | uint16_t ksz = 0, vsz = 0; 276 | uint64_t kextsz = 0, vextsz = 0; 277 | 278 | if (bio::read(m_stream, (char *)&ksz, sizeof(uint16_t)) != sizeof(uint16_t)) { m_end = true; return; } 279 | if (ksz == max_uint16_t) { 280 | if (bio::read(m_stream, (char *)&kextsz, sizeof(uint64_t)) != sizeof(uint64_t)) { m_end = true; return; } 281 | } 282 | if (bio::read(m_stream, (char *)&vsz, sizeof(uint16_t)) != sizeof(uint16_t)) { m_end = true; return; } 283 | if (vsz == max_uint16_t) { 284 | if (bio::read(m_stream, (char *)&vextsz, sizeof(uint64_t)) != sizeof(uint64_t)) { m_end = true; return; } 285 | } 286 | 287 | size_t key_size = (ksz == max_uint16_t) ? size_t(kextsz) : size_t(ksz); 288 | size_t val_size = (vsz == max_uint16_t) ? size_t(vextsz) : size_t(vsz); 289 | m_current.first.resize(key_size); 290 | if (bio::read(m_stream, &m_current.first[0], key_size) != key_size) { m_end = true; return; } 291 | m_current.second.resize(val_size); 292 | if (bio::read(m_stream, &m_current.second[0], val_size) != val_size) { m_end = true; return; } 293 | } 294 | 295 | const std::string &file_name() const { return m_file_name; } 296 | 297 | private: 298 | std::string m_file_name; 299 | bool m_end; 300 | std::ifstream m_file; 301 | bio::filtering_streambuf m_stream; 302 | kv_pair_t m_current; 303 | }; 304 | 305 | struct block_writer : public boost::noncopyable { 306 | block_writer(const std::string &subdir, const std::string &bit, size_t block_counter) 307 | : m_anything_written(false) { 308 | m_file_name = (boost::format("%1$s/%2$s_%3$08x.data") % subdir % bit % block_counter).str(); 309 | if (fs::exists(m_file_name)) { 310 | fs::remove(m_file_name); 311 | } 312 | m_out.open(m_file_name.c_str()); 313 | if (!m_out.is_open()) { 314 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Unable to open '%1%'.") % m_file_name).str())); 315 | } 316 | if (!m_out.good()) { 317 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("File '%1%' is open, but not good.") % m_file_name).str())); 318 | } 319 | 320 | m_stream.push(bio::gzip_compressor(1)); 321 | m_stream.push(m_out); 322 | 323 | // TODO: future optimisation 324 | // int fd = (m_out.rdbuf())->fd(); 325 | // int status = posix_fallocate(fd, 0, MAX_MERGESORT_BLOCK_SIZE); 326 | // if (status != 0) { 327 | // BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("posix_fallocate() on '%1%' failed. status=%2%.") % file_name % status).str())); 328 | // } 329 | } 330 | 331 | ~block_writer() { 332 | bio::flush(m_stream); 333 | bio::close(m_stream); 334 | m_out.close(); 335 | } 336 | 337 | inline void operator()(const kv_pair_t &kv) { 338 | static const size_t max_uint16_t = size_t(std::numeric_limits::max()); 339 | const std::string &k = kv.first; 340 | const std::string &v = kv.second; 341 | 342 | uint16_t key_size = 0, val_size = 0; 343 | uint64_t key_extra_size = 0, val_extra_size = 0; 344 | 345 | if (k.size() >= max_uint16_t) { 346 | key_size = std::numeric_limits::max(); 347 | key_extra_size = uint64_t(k.size()); 348 | } else { 349 | key_size = uint16_t(k.size()); 350 | } 351 | 352 | if (v.size() >= max_uint16_t) { 353 | val_size = std::numeric_limits::max(); 354 | val_extra_size = uint64_t(v.size()); 355 | } else { 356 | val_size = uint16_t(v.size()); 357 | } 358 | 359 | bio::write(m_stream, (const char *)(&key_size), sizeof(uint16_t)); 360 | if (key_extra_size > 0) { 361 | bio::write(m_stream, (const char *)(&key_extra_size), sizeof(uint64_t)); 362 | } 363 | bio::write(m_stream, (const char *)(&val_size), sizeof(uint16_t)); 364 | if (val_extra_size > 0) { 365 | bio::write(m_stream, (const char *)(&val_extra_size), sizeof(uint64_t)); 366 | } 367 | bio::write(m_stream, k.c_str(), k.size()); 368 | bio::write(m_stream, v.c_str(), v.size()); 369 | m_anything_written = true; 370 | } 371 | 372 | private: 373 | bool m_anything_written; 374 | std::string m_file_name; 375 | std::ofstream m_out; 376 | bio::filtering_streambuf m_stream; 377 | }; 378 | 379 | struct compare_first { 380 | bool operator()(const kv_pair_t &a, const kv_pair_t &b) const { 381 | const size_t end = std::min(a.first.size(), b.first.size()); 382 | for (size_t i = 0; i < end; ++i) { 383 | unsigned char ac = (unsigned char)a.first[i]; 384 | unsigned char bc = (unsigned char)b.first[i]; 385 | if (ac < bc) { return true; } 386 | if (ac > bc) { return false; } 387 | } 388 | return end == a.first.size(); 389 | } 390 | }; 391 | 392 | struct thread_control_block : public boost::noncopyable { 393 | sem_t *m_sem; 394 | std::string m_subdir, m_prefix; 395 | size_t m_block_number; 396 | std::vector m_strings; 397 | std::vector > m_waits; 398 | boost::shared_ptr m_thread; 399 | boost::exception_ptr m_error; 400 | 401 | thread_control_block(sem_t *sem, 402 | std::string subdir, std::string prefix, size_t block_number, 403 | std::vector &strings, 404 | std::vector > waits = 405 | std::vector >()) 406 | : m_sem(sem), m_subdir(subdir), m_prefix(prefix), m_block_number(block_number), m_strings(), m_waits(waits), 407 | m_thread(), m_error() { 408 | std::swap(m_strings, strings); 409 | strings.clear(); 410 | 411 | // lock the semaphore now, before starting the thread, so that we block the 412 | // dump reader thread's progress and prevent it spawning loads of threads. 413 | int status = sem_wait(m_sem); 414 | if (status != 0) { 415 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Failed to sem_wait, return = %1%.") % status).str())); 416 | } 417 | 418 | m_thread = boost::make_shared(boost::bind(&thread_control_block::run, boost::ref(*this))); 419 | } 420 | 421 | std::string file_name() const { 422 | return (boost::format("%1$s/%2$s_%3$08x.data") % m_subdir % m_prefix % m_block_number).str(); 423 | } 424 | 425 | static void run(thread_control_block &tcb) { 426 | std::size_t sum = 0; 427 | BOOST_FOREACH(const kv_pair_t &kv, tcb.m_strings) { 428 | sum += sizeof(kv_pair_t) + kv.first.size() + kv.second.size(); 429 | } 430 | sum += sizeof(m_strings); 431 | std::cerr << "Starting thread with " << sum << " bytes" << std::endl; 432 | try { 433 | if (tcb.m_waits.size() > 0) { 434 | tcb.run_merge(); 435 | 436 | } else { 437 | tcb.run_write(); 438 | } 439 | 440 | } catch (...) { 441 | tcb.m_error = boost::current_exception(); 442 | } 443 | std::cerr << "Finishing thread with " << sum << " bytes" << std::endl; 444 | int status = sem_post(tcb.m_sem); 445 | if (status != 0) { 446 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Failed to sem_post, return = %1%.") % status).str())); 447 | } 448 | } 449 | 450 | void run_merge() { 451 | if (m_waits.size() == 1) { 452 | // wait for only thread to finish 453 | thread_control_block &tcb2 = *(m_waits[0]); 454 | tcb2.m_thread->join(); 455 | if (tcb2.m_error) { boost::rethrow_exception(tcb2.m_error); } 456 | 457 | // just move it into place. 458 | std::string part_file_name = tcb2.file_name(); 459 | std::string final_file_name = file_name(); 460 | fs::rename(part_file_name, final_file_name); 461 | return; 462 | } 463 | 464 | std::list readers; 465 | BOOST_FOREACH(boost::shared_ptr tcb2, m_waits) { 466 | tcb2->m_thread->join(); 467 | if (tcb2->m_error) { boost::rethrow_exception(tcb2->m_error); } 468 | readers.push_back(new block_reader(tcb2->m_subdir, tcb2->m_prefix, tcb2->m_block_number)); 469 | } 470 | m_waits.clear(); 471 | 472 | compare_first comp; 473 | block_writer writer(m_subdir, m_prefix, m_block_number); 474 | while (!readers.empty()) { 475 | std::list::iterator min_itr = readers.begin(); 476 | kv_pair_t min_pair = (*min_itr)->value(); 477 | 478 | std::list::iterator itr = readers.begin(); 479 | ++itr; 480 | while (itr != readers.end()) { 481 | const kv_pair_t &val = (*itr)->value(); 482 | if (comp(val, min_pair)) { 483 | min_pair = val; 484 | min_itr = itr; 485 | } 486 | ++itr; 487 | } 488 | 489 | writer(min_pair); 490 | 491 | (*min_itr)->next(); 492 | if ((*min_itr)->at_end()) { 493 | fs::remove((*min_itr)->file_name()); 494 | delete *min_itr; 495 | readers.erase(min_itr); 496 | } 497 | } 498 | } 499 | 500 | void run_write() { 501 | block_writer writer(m_subdir, m_prefix, m_block_number); 502 | compare_first comp; 503 | 504 | std::sort(m_strings.begin(), m_strings.end(), comp); 505 | 506 | BOOST_FOREACH(const kv_pair_t &kv, m_strings) { 507 | writer(kv); 508 | } 509 | 510 | // actually want to make sure m_strings is deallocated here, because we're 511 | // done using it and this thread owns that memory until the thread is joined 512 | // and this TCB is deallocated - which might be significantly past this 513 | // point in time. (and std::vector<>::clear() doesn't / can't release 514 | // memory) 515 | std::vector().swap(m_strings); 516 | } 517 | }; 518 | 519 | struct db_writer : public boost::noncopyable { 520 | explicit db_writer(const std::string &table_name, unsigned int max_concurrency) 521 | : m_subdir(table_name), 522 | m_block_counter(0), 523 | m_bytes_this_block(0) { 524 | // TODO: configurable value? the memory usage should be *approximately* 525 | // 64MB (MAX_MERGESORT_BLOCK_SIZE) * the number of threads, controlled by 526 | // the semaphore below. 527 | int status = sem_init(&m_sem, 0, max_concurrency); 528 | if (status != 0) { 529 | BOOST_THROW_EXCEPTION(std::runtime_error((boost::format("Failed to sem_init, return = %1%.") % status).str())); 530 | } 531 | fs::create_directories(m_subdir); 532 | } 533 | 534 | ~db_writer() { 535 | BOOST_FOREACH(boost::shared_ptr tcb, m_blocks) { 536 | try { 537 | tcb->m_thread->join(); 538 | } catch (...) { 539 | std::cerr << "Caught exception on " << tcb->file_name() << " but already in destructor." << std::endl; 540 | } 541 | } 542 | BOOST_FOREACH(boost::shared_ptr tcb, m_blocks2) { 543 | try { 544 | tcb->m_thread->join(); 545 | } catch (...) { 546 | std::cerr << "Caught exception on " << tcb->file_name() << " but already in destructor." << std::endl; 547 | } 548 | } 549 | BOOST_FOREACH(boost::shared_ptr tcb, m_blocks3) { 550 | try { 551 | tcb->m_thread->join(); 552 | } catch (...) { 553 | std::cerr << "Caught exception on " << tcb->file_name() << " but already in destructor." << std::endl; 554 | } 555 | } 556 | 557 | int status = sem_destroy(&m_sem); 558 | if (status != 0) { 559 | std::cerr << "ERROR: Failed to destroy semaphore: error " << status << std::endl; 560 | } 561 | } 562 | 563 | void finish() { 564 | if (m_strings.size() > 0) { 565 | flush_block(); 566 | } 567 | combine_blocks(); 568 | } 569 | 570 | void put(const std::string &k, const std::string &v) { 571 | static const size_t max_uint16_t = size_t(std::numeric_limits::max()); 572 | size_t extra_bytes = 0; 573 | if (k.size() >= max_uint16_t) { 574 | extra_bytes += sizeof(uint64_t); 575 | } 576 | if (v.size() >= max_uint16_t) { 577 | extra_bytes += sizeof(uint64_t); 578 | } 579 | size_t bytes = k.size() + v.size() + extra_bytes + 2 * sizeof(uint16_t); 580 | if ((m_bytes_this_block + bytes) > MAX_MERGESORT_BLOCK_SIZE) { 581 | flush_block(); 582 | } 583 | m_strings.push_back(make_pair(k, v)); 584 | m_bytes_this_block += bytes; 585 | } 586 | 587 | private: 588 | sem_t m_sem; 589 | std::string m_subdir; 590 | size_t m_block_counter; 591 | size_t m_bytes_this_block; 592 | std::vector m_strings; 593 | std::vector > m_blocks, m_blocks2, m_blocks3; 594 | 595 | void flush_block() { 596 | static const std::string part_1("part"), part_2("part2"), part_3("part3"); 597 | m_blocks.push_back(boost::make_shared(&m_sem, m_subdir, part_1, m_block_counter, boost::ref(m_strings))); 598 | m_strings.clear(); 599 | 600 | if (m_blocks.size() >= 16) { 601 | m_blocks2.push_back(boost::make_shared(&m_sem, m_subdir, part_2, m_block_counter, boost::ref(m_strings), m_blocks)); 602 | m_strings.clear(); 603 | m_blocks.clear(); 604 | 605 | if (m_blocks2.size() >= 16) { 606 | m_blocks3.push_back(boost::make_shared(&m_sem, m_subdir, part_3, m_block_counter, boost::ref(m_strings), m_blocks2)); 607 | m_strings.clear(); 608 | m_blocks2.clear(); 609 | } 610 | } 611 | m_bytes_this_block = 0; 612 | ++m_block_counter; 613 | } 614 | 615 | void combine_blocks() { 616 | if (m_blocks2.size() > 0) { 617 | m_blocks.insert(m_blocks.end(), m_blocks2.begin(), m_blocks2.end()); 618 | m_blocks2.clear(); 619 | } 620 | if (m_blocks3.size() > 0) { 621 | m_blocks.insert(m_blocks.end(), m_blocks3.begin(), m_blocks3.end()); 622 | m_blocks3.clear(); 623 | } 624 | thread_control_block tcb(&m_sem, m_subdir, "final", 0, m_strings, m_blocks); 625 | m_strings.clear(); 626 | tcb.m_thread->join(); 627 | if (tcb.m_error) { boost::rethrow_exception(tcb.m_error); } 628 | } 629 | }; 630 | 631 | } // anonymous namespace 632 | 633 | struct dump_reader::pimpl { 634 | pimpl(const std::string &cmd, const std::string &table_name, unsigned int max_concurrency) 635 | : m_proc(cmd), 636 | m_line_filter(m_proc, 1024 * 1024), 637 | m_cont_filter(m_line_filter, table_name), 638 | m_writer(table_name, max_concurrency) { 639 | 640 | // get the headers for the COPY data 641 | m_column_names = m_cont_filter.init(); 642 | } 643 | 644 | ~pimpl() { 645 | } 646 | 647 | process m_proc; 648 | to_line_filter m_line_filter; 649 | filter_copy_contents > m_cont_filter; 650 | 651 | db_writer m_writer; 652 | 653 | std::vector m_column_names; 654 | }; 655 | 656 | dump_reader::dump_reader(const std::string &table_name, 657 | const std::string &dump_file, 658 | unsigned int max_concurrency) 659 | : m_impl() { 660 | std::ostringstream cmd; 661 | cmd << "pg_restore -f - -a -t " << table_name << " " << dump_file; 662 | m_impl.reset(new pimpl(cmd.str(), table_name, max_concurrency)); 663 | } 664 | 665 | dump_reader::~dump_reader() { 666 | } 667 | 668 | const std::vector &dump_reader::column_names() const { 669 | return m_impl->m_column_names; 670 | } 671 | 672 | size_t dump_reader::read(std::string &line) { 673 | return m_impl->m_cont_filter.read(line); 674 | } 675 | 676 | void dump_reader::put(const std::string &k, const std::string &v) { 677 | m_impl->m_writer.put(k, v); 678 | } 679 | 680 | void dump_reader::finish() { 681 | m_impl->m_writer.finish(); 682 | } 683 | -------------------------------------------------------------------------------- /src/extract_kv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "extract_kv.hpp" 13 | #include "types.hpp" 14 | #include "time_epoch.hpp" 15 | 16 | namespace bt = boost::posix_time; 17 | namespace bf = boost::fusion; 18 | 19 | namespace { 20 | 21 | struct app_item { 22 | typedef int result_type; 23 | 24 | app_item(std::ostream &o) : out(o) {} 25 | 26 | int operator()(int, bool b) const { 27 | char c = b ? 1 : 0; 28 | out.write(&c, 1); 29 | return 0; 30 | } 31 | 32 | int operator()(int, int16_t i) const { 33 | uint16_t ii = htobe16(i); 34 | out.write((const char *)(&ii), sizeof(int16_t)); 35 | return 0; 36 | } 37 | 38 | int operator()(int, int32_t i) const { 39 | uint32_t ii = htobe32(i); 40 | out.write((const char *)(&ii), sizeof(int32_t)); 41 | return 0; 42 | } 43 | 44 | int operator()(int, int64_t i) const { 45 | uint64_t ii = htobe64(i); 46 | out.write((const char *)(&ii), sizeof(int64_t)); 47 | return 0; 48 | } 49 | 50 | int operator()(int, uint16_t i) const { 51 | uint16_t ii = htobe16(i); 52 | out.write((const char *)(&ii), sizeof(uint16_t)); 53 | return 0; 54 | } 55 | 56 | int operator()(int, uint32_t i) const { 57 | uint32_t ii = htobe32(i); 58 | out.write((const char *)(&ii), sizeof(uint32_t)); 59 | return 0; 60 | } 61 | 62 | int operator()(int, uint64_t i) const { 63 | uint64_t ii = htobe64(i); 64 | out.write((const char *)(&ii), sizeof(uint64_t)); 65 | return 0; 66 | } 67 | 68 | int operator()(int, double d) const { 69 | out.write((const char *)(&d), sizeof(double)); 70 | return 0; 71 | } 72 | 73 | int operator()(int, const std::string &s) const { 74 | uint32_t size = s.size(); 75 | if (size > size_t(std::numeric_limits::max())) { 76 | BOOST_THROW_EXCEPTION(std::runtime_error("String length too long.")); 77 | } 78 | 79 | // serialise as null-terminated UTF-8. this means the sorting order is 80 | // (sort of) meaningful, but means we have to stop on the first null 81 | // byte. these strings _shouldn't_ contain null bytes, but lots of things 82 | // that shouldn't happen still do. 83 | std::size_t len = s.find('\0'); 84 | if (len == std::string::npos) { 85 | len = s.size(); 86 | } 87 | 88 | out.write(s.data(), len); 89 | out.write("\0", 1); 90 | return 0; 91 | } 92 | 93 | int operator()(int, const bt::ptime &t) const { 94 | if (t < time_epoch) { 95 | BOOST_THROW_EXCEPTION(std::runtime_error("Time is before epoch.")); 96 | } 97 | bt::time_duration dt = t - time_epoch; 98 | long seconds = dt.total_seconds(); 99 | if (seconds > long(std::numeric_limits::max())) { 100 | BOOST_THROW_EXCEPTION(std::runtime_error("Time is too late after epoch.")); 101 | } 102 | operator()(0, uint32_t(seconds)); 103 | return 0; 104 | } 105 | 106 | template 107 | int operator()(int, const boost::optional &o) const { 108 | if (o) { 109 | out.put(0x01); 110 | operator()(0, o.get()); 111 | } else { 112 | out.put(0x00); 113 | } 114 | return 0; 115 | } 116 | 117 | int operator()(int, user_status_enum e) const { 118 | char c = char(e); 119 | out.put(c); 120 | return 0; 121 | } 122 | 123 | int operator()(int, format_enum e) const { 124 | char c = char(e); 125 | out.put(c); 126 | return 0; 127 | } 128 | 129 | int operator()(int, nwr_enum e) const { 130 | char c = char(e); 131 | out.put(c); 132 | return 0; 133 | } 134 | 135 | std::ostream &out; 136 | }; 137 | 138 | template 139 | std::string to_binary(std::ostringstream &out, const T &t) { 140 | out.clear(); 141 | out.seekp(0); 142 | bf::fold(t, 0, app_item(out)); 143 | // because out.str() gives us the contents of the string buffer, 144 | // not the contents written since clear() was called, we need to 145 | // chop off any remaining garbage at the end. 146 | std::string rv = out.str(); 147 | std::streampos pos = out.tellp(); 148 | rv.resize(pos); 149 | return rv; 150 | } 151 | 152 | } // anonymous namespace 153 | 154 | template 155 | void extract_kv::operator()(T &t, std::string &key, std::string &val) { 156 | static const int num_keys = T::num_keys; 157 | typedef typename bf::result_of::begin::type it_begin; 158 | typedef typename bf::result_of::end::type it_end; 159 | typedef typename bf::result_of::advance_c::type it_key; 160 | 161 | it_begin v_begin(t, 0); 162 | it_key v_key(t, 0); 163 | it_end v_end(t, 0); 164 | 165 | key = to_binary(out, bf::iterator_range(v_begin, v_key)); 166 | val = to_binary(out, bf::iterator_range(v_key, v_end)); 167 | } 168 | 169 | template struct extract_kv; 170 | template struct extract_kv; 171 | template struct extract_kv; 172 | template struct extract_kv; 173 | template struct extract_kv; 174 | template struct extract_kv; 175 | template struct extract_kv; 176 | template struct extract_kv; 177 | template struct extract_kv; 178 | template struct extract_kv; 179 | -------------------------------------------------------------------------------- /src/history_filter.cpp: -------------------------------------------------------------------------------- 1 | #include "history_filter.hpp" 2 | #include 3 | 4 | #include "xml_writer.hpp" 5 | #include "pbf_writer.hpp" 6 | 7 | template 8 | history_filter::history_filter(const std::string &option_name, const boost::program_options::variables_map &options, 9 | const user_map_t &user_map, const boost::posix_time::ptime &max_time, user_info_level uil, historical_versions hv, changeset_discussions cd) 10 | : m_writer(new T(option_name, options, user_map, max_time, uil, historical_versions::NONE, cd)), 11 | m_left_over_nodes(boost::none), 12 | m_left_over_ways(boost::none), 13 | m_left_over_relations(boost::none) { 14 | } 15 | 16 | template 17 | history_filter::~history_filter() { 18 | } 19 | 20 | template 21 | void history_filter::changesets(const std::vector &cs, 22 | const std::vector &ts, 23 | const std::vector &ccs) { 24 | // no filtering for changesets - they are all "current", and all get passed 25 | // through to the backend. 26 | m_writer->changesets(cs, ts, ccs); 27 | } 28 | 29 | template 30 | void history_filter::nodes(const std::vector &ns, const std::vector &ts) { 31 | std::vector cn; 32 | std::vector ct; 33 | 34 | // handle a left over node, but only if its version list doesn't continue into 35 | // this block - if it does, then we can ignore the left over one. 36 | if (m_left_over_nodes && (ns.empty() || (ns[0].id > m_left_over_nodes->n.id))) { 37 | if (m_left_over_nodes->n.visible) { 38 | cn.push_back(m_left_over_nodes->n); 39 | std::swap(m_left_over_nodes->tags, ct); 40 | } 41 | } 42 | 43 | std::vector::const_iterator t_itr = ts.begin(); 44 | const std::vector::const_iterator t_end = ts.end(); 45 | 46 | for (size_t i = 1; i < ns.size(); ++i) { 47 | if (ns[i].id > ns[i-1].id) { 48 | const node &nn = ns[i-1]; 49 | // if the node is deleted, we don't want it in the non-history 50 | // file, so skip to the next item. 51 | if (!nn.visible) { continue; } 52 | 53 | cn.push_back(nn); 54 | 55 | while ((t_itr != t_end) && (t_itr->element_id <= nn.id)) { 56 | if ((t_itr->version == nn.version) && (t_itr->element_id == nn.id)) { 57 | ct.push_back(*t_itr); 58 | } 59 | ++t_itr; 60 | } 61 | } 62 | } 63 | 64 | // push to the underlying writer 65 | m_writer->nodes(cn, ct); 66 | 67 | // and save the last node for next time 68 | if (!ns.empty()) { 69 | if (!m_left_over_nodes) { m_left_over_nodes = left_over_nodes(); } 70 | const node &nn = ns[ns.size()-1]; 71 | m_left_over_nodes->n = nn; 72 | m_left_over_nodes->tags.clear(); 73 | 74 | while ((t_itr != t_end) && (t_itr->element_id <= nn.id)) { 75 | if ((t_itr->version == nn.version) && (t_itr->element_id == nn.id)) { 76 | m_left_over_nodes->tags.push_back(*t_itr); 77 | } 78 | ++t_itr; 79 | } 80 | } else { 81 | m_left_over_nodes = boost::none; 82 | } 83 | } 84 | 85 | template 86 | void history_filter::ways(const std::vector &ws, const std::vector &wns, const std::vector &ts) { 87 | std::vector cw; 88 | std::vector cwn; 89 | std::vector ct; 90 | 91 | // if there are any left over nodes, finish them now 92 | if (m_left_over_nodes) { 93 | std::vector ns; std::vector nts; 94 | nodes(ns, nts); 95 | } 96 | 97 | // handle a left over way, but only if its version list doesn't continue into 98 | // this block - if it does, then we can ignore the left over one. 99 | if (m_left_over_ways && (ws.empty() || (ws[0].id > m_left_over_ways->w.id))) { 100 | if (m_left_over_ways->w.visible) { 101 | cw.push_back(m_left_over_ways->w); 102 | std::swap(m_left_over_ways->nodes, cwn); 103 | std::swap(m_left_over_ways->tags, ct); 104 | } 105 | } 106 | 107 | std::vector::const_iterator n_itr = wns.begin(); 108 | const std::vector::const_iterator n_end = wns.end(); 109 | std::vector::const_iterator t_itr = ts.begin(); 110 | const std::vector::const_iterator t_end = ts.end(); 111 | 112 | for (size_t i = 1; i < ws.size(); ++i) { 113 | if (ws[i].id > ws[i-1].id) { 114 | const way &ww = ws[i-1]; 115 | // if the way is deleted, we don't want it in the non-history 116 | // file, so skip to the next item. 117 | if (!ww.visible) { continue; } 118 | 119 | cw.push_back(ww); 120 | 121 | while ((n_itr != n_end) && (n_itr->way_id <= ww.id)) { 122 | if ((n_itr->version == ww.version) && (n_itr->way_id == ww.id)) { 123 | cwn.push_back(*n_itr); 124 | } 125 | ++n_itr; 126 | } 127 | 128 | while ((t_itr != t_end) && (t_itr->element_id <= ww.id)) { 129 | if ((t_itr->version == ww.version) && (t_itr->element_id == ww.id)) { 130 | ct.push_back(*t_itr); 131 | } 132 | ++t_itr; 133 | } 134 | } 135 | } 136 | 137 | // push to the underlying writer 138 | m_writer->ways(cw, cwn, ct); 139 | 140 | // and save the last way for next time 141 | if (!ws.empty()) { 142 | if (!m_left_over_ways) { m_left_over_ways = left_over_ways(); } 143 | const way &ww = ws[ws.size()-1]; 144 | m_left_over_ways->w = ww; 145 | m_left_over_ways->nodes.clear(); 146 | m_left_over_ways->tags.clear(); 147 | 148 | while ((n_itr != n_end) && (n_itr->way_id <= ww.id)) { 149 | if ((n_itr->version == ww.version) && (n_itr->way_id == ww.id)) { 150 | m_left_over_ways->nodes.push_back(*n_itr); 151 | } 152 | ++n_itr; 153 | } 154 | 155 | while ((t_itr != t_end) && (t_itr->element_id <= ww.id)) { 156 | if ((t_itr->version == ww.version) && (t_itr->element_id == ww.id)) { 157 | m_left_over_ways->tags.push_back(*t_itr); 158 | } 159 | ++t_itr; 160 | } 161 | } else { 162 | m_left_over_ways = boost::none; 163 | } 164 | } 165 | 166 | template 167 | void history_filter::relations(const std::vector &rs, const std::vector &rms, const std::vector &ts) { 168 | std::vector cr; 169 | std::vector crm; 170 | std::vector ct; 171 | 172 | // if there are any ways left over, finish them now 173 | if (m_left_over_ways) { 174 | std::vector ws; std::vector wns; std::vector wts; 175 | ways(ws, wns, wts); 176 | } 177 | 178 | // handle a left over relation, but only if its version list doesn't continue into 179 | // this block - if it does, then we can ignore the left over one. 180 | if (m_left_over_relations && (rs.empty() || (rs[0].id > m_left_over_relations->r.id))) { 181 | if (m_left_over_relations->r.visible) { 182 | cr.push_back(m_left_over_relations->r); 183 | std::swap(m_left_over_relations->members, crm); 184 | std::swap(m_left_over_relations->tags, ct); 185 | } 186 | } 187 | 188 | std::vector::const_iterator m_itr = rms.begin(); 189 | const std::vector::const_iterator m_end = rms.end(); 190 | std::vector::const_iterator t_itr = ts.begin(); 191 | const std::vector::const_iterator t_end = ts.end(); 192 | 193 | for (size_t i = 1; i < rs.size(); ++i) { 194 | if (rs[i].id > rs[i-1].id) { 195 | const relation &rr = rs[i-1]; 196 | // if the relation is deleted, we don't want it in the non-history 197 | // file, so skip to the next item. 198 | if (!rr.visible) { continue; } 199 | 200 | cr.push_back(rr); 201 | 202 | while ((m_itr != m_end) && (m_itr->relation_id <= rr.id)) { 203 | if ((m_itr->version == rr.version) && (m_itr->relation_id == rr.id)) { 204 | crm.push_back(*m_itr); 205 | } 206 | ++m_itr; 207 | } 208 | 209 | while ((t_itr != t_end) && (t_itr->element_id <= rr.id)) { 210 | if ((t_itr->version == rr.version) && (t_itr->element_id == rr.id)) { 211 | ct.push_back(*t_itr); 212 | } 213 | ++t_itr; 214 | } 215 | } 216 | } 217 | 218 | // push to the underlying writer 219 | m_writer->relations(cr, crm, ct); 220 | 221 | // and save the last relation for next time 222 | if (!rs.empty()) { 223 | if (!m_left_over_relations) { m_left_over_relations = left_over_relations(); } 224 | const relation &rr = rs[rs.size()-1]; 225 | m_left_over_relations->r = rr; 226 | m_left_over_relations->members.clear(); 227 | m_left_over_relations->tags.clear(); 228 | 229 | while ((m_itr != m_end) && (m_itr->relation_id <= rr.id)) { 230 | if ((m_itr->version == rr.version) && (m_itr->relation_id == rr.id)) { 231 | m_left_over_relations->members.push_back(*m_itr); 232 | } 233 | ++m_itr; 234 | } 235 | 236 | while ((t_itr != t_end) && (t_itr->element_id <= rr.id)) { 237 | if ((t_itr->version == rr.version) && (t_itr->element_id == rr.id)) { 238 | m_left_over_relations->tags.push_back(*t_itr); 239 | } 240 | ++t_itr; 241 | } 242 | } else { 243 | m_left_over_relations = boost::none; 244 | } 245 | } 246 | 247 | template 248 | void history_filter::finish() { 249 | // if there are any left over relations, finish them now. 250 | if (m_left_over_relations) { 251 | std::vector rs; std::vector rms; std::vector rts; 252 | relations(rs, rms, rts); 253 | } 254 | 255 | // finish the underlying output writer 256 | m_writer->finish(); 257 | } 258 | 259 | template struct history_filter; 260 | template struct history_filter; 261 | -------------------------------------------------------------------------------- /src/insert_kv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "insert_kv.hpp" 13 | #include "types.hpp" 14 | #include "time_epoch.hpp" 15 | 16 | namespace bt = boost::posix_time; 17 | namespace bf = boost::fusion; 18 | 19 | namespace { 20 | 21 | struct unapp_item { 22 | typedef int result_type; 23 | 24 | unapp_item(std::istream &i) : in(i) {} 25 | 26 | int operator()(int, bool &b) const { 27 | char c; 28 | in.read(&c, 1); 29 | b = c != 0; 30 | return 0; 31 | } 32 | 33 | int operator()(int, int16_t &i) const { 34 | uint16_t ii; 35 | in.read((char *)(&ii), sizeof(int16_t)); 36 | i = be16toh(ii); 37 | return 0; 38 | } 39 | 40 | int operator()(int, int32_t &i) const { 41 | uint32_t ii; 42 | in.read((char *)(&ii), sizeof(int32_t)); 43 | i = be32toh(ii); 44 | return 0; 45 | } 46 | 47 | int operator()(int, int64_t &i) const { 48 | uint64_t ii; 49 | in.read((char *)(&ii), sizeof(int64_t)); 50 | i = be64toh(ii); 51 | return 0; 52 | } 53 | 54 | int operator()(int, uint16_t &i) const { 55 | uint16_t ii; 56 | in.read((char *)(&ii), sizeof(uint16_t)); 57 | i = be16toh(ii); 58 | return 0; 59 | } 60 | 61 | int operator()(int, uint32_t &i) const { 62 | uint32_t ii; 63 | in.read((char *)(&ii), sizeof(uint32_t)); 64 | i = be32toh(ii); 65 | return 0; 66 | } 67 | 68 | int operator()(int, uint64_t &i) const { 69 | uint64_t ii; 70 | in.read((char *)(&ii), sizeof(uint64_t)); 71 | i = be64toh(ii); 72 | return 0; 73 | } 74 | 75 | int operator()(int, double &d) const { 76 | in.read((char *)(&d), sizeof(double)); 77 | return 0; 78 | } 79 | 80 | int operator()(int, std::string &s) const { 81 | s.clear(); 82 | unsigned char c = 0; 83 | 84 | for (in.read((char *)&c, 1); c != '\0'; in.read((char *)&c, 1)) { 85 | s.push_back(c); 86 | } 87 | 88 | return 0; 89 | } 90 | 91 | int operator()(int, bt::ptime &t) const { 92 | uint32_t dt; 93 | operator()(0, dt); 94 | t = time_epoch + bt::seconds(dt); 95 | return 0; 96 | } 97 | 98 | template 99 | int operator()(int, boost::optional &o) const { 100 | char c; 101 | in.get(c); 102 | if (c == 0) { 103 | o = boost::none; 104 | } else { 105 | T t; 106 | operator()(0, t); 107 | o = t; 108 | } 109 | return 0; 110 | } 111 | 112 | int operator()(int, user_status_enum &e) const { 113 | char c; 114 | in.read(&c, 1); 115 | e = user_status_enum(c); 116 | return 0; 117 | } 118 | 119 | int operator()(int, format_enum &e) const { 120 | char c; 121 | in.read(&c, 1); 122 | e = format_enum(c); 123 | return 0; 124 | } 125 | 126 | int operator()(int, nwr_enum &e) const { 127 | char c; 128 | in.read(&c, 1); 129 | e = nwr_enum(c); 130 | return 0; 131 | } 132 | 133 | std::istream ∈ 134 | }; 135 | 136 | template 137 | void from_binary(const slice_t &s, T &t) { 138 | std::istringstream in(s); 139 | bf::fold(t, 0, unapp_item(in)); 140 | } 141 | 142 | } // anonymous namespace 143 | 144 | template 145 | void insert_kv(T &t, const slice_t &key, const slice_t &val) { 146 | static const int num_keys = T::num_keys; 147 | typedef typename bf::result_of::begin::type it_begin; 148 | typedef typename bf::result_of::end::type it_end; 149 | typedef typename bf::result_of::advance_c::type it_key; 150 | 151 | it_begin v_begin(t, 0); 152 | it_key v_key(t, 0); 153 | it_end v_end(t, 0); 154 | 155 | bf::iterator_range key_range(v_begin, v_key); 156 | bf::iterator_range val_range(v_key, v_end); 157 | 158 | from_binary(key, key_range); 159 | from_binary(val, val_range); 160 | } 161 | 162 | template void insert_kv(user &, const slice_t &, const slice_t &); 163 | template void insert_kv(changeset &, const slice_t &, const slice_t &); 164 | template void insert_kv(current_tag &, const slice_t &, const slice_t &); 165 | template void insert_kv(old_tag &, const slice_t &, const slice_t &); 166 | template void insert_kv(node &, const slice_t &, const slice_t &); 167 | template void insert_kv(way &, const slice_t &, const slice_t &); 168 | template void insert_kv(way_node &, const slice_t &, const slice_t &); 169 | template void insert_kv(relation &, const slice_t &, const slice_t &); 170 | template void insert_kv(relation_member &, const slice_t &, const slice_t &); 171 | template void insert_kv(changeset_comment &, const slice_t &, const slice_t &); 172 | -------------------------------------------------------------------------------- /src/output_writer.cpp: -------------------------------------------------------------------------------- 1 | #include "output_writer.hpp" 2 | 3 | output_writer::~output_writer() { 4 | } 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/planet-dump.cpp: -------------------------------------------------------------------------------- 1 | #include "copy_elements.hpp" 2 | #include "dump_archive.hpp" 3 | #include "output_writer.hpp" 4 | #include "xml_writer.hpp" 5 | #include "pbf_writer.hpp" 6 | #include "history_filter.hpp" 7 | #include "changeset_filter.hpp" 8 | #include "config.h" 9 | #include "writer_common.hpp" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace bt = boost::posix_time; 22 | namespace po = boost::program_options; 23 | 24 | /** 25 | * get command line options, handle --help and usage, validate options. 26 | */ 27 | static void get_options(int argc, char **argv, po::variables_map &vm) { 28 | std::string meta_file; 29 | 30 | po::options_description desc(PACKAGE_STRING ": Allowed options"); 31 | 32 | desc.add_options() 33 | ("help,h", "display help text and exit") 34 | ("compress-command,c", po::value()->default_value("bzip2 -c"), 35 | "program used to compress XML output, must read from stdin and write to stdout") 36 | ("xml,x", po::value(), "planet XML output file (without history)") 37 | ("history-xml,X", po::value(), "history XML output file") 38 | ("pbf,p", po::value(), "planet PBF output file (without history)") 39 | ("history-pbf,P", po::value(), "history PBF output file") 40 | ("changesets,C", po::value(), "changeset XML output file") 41 | ("changeset-discussions,D", po::value(), 42 | "changeset discussions XML output file") 43 | ("xml-no-userinfo", po::value(), "planet XML output file (without history or user data)") 44 | ("history-xml-no-userinfo", po::value(), "history XML output file (without user data)") 45 | ("pbf-no-userinfo", po::value(), "planet PBF output file (without history or user data)") 46 | ("history-pbf-no-userinfo", po::value(), "history PBF output file (without user data)") 47 | ("changesets-no-userinfo", po::value(), "changeset XML output file (without user data)") 48 | ("changeset-discussions-no-userinfo", po::value(), 49 | "changeset discussions XML output file (without user data)") 50 | ("dense-nodes,d", po::value()->default_value("true"), "use dense nodes for PBF output") 51 | ("dump-file,f", po::value(), "PostgreSQL table dump to read") 52 | ("generator", po::value()->default_value(PACKAGE_STRING), 53 | "Override the generator string used by the program. Used by the tests to " 54 | "ensure consistent output, probably shouldn't be used in normal usage.") 55 | ("resume", "If this argument is present, then planet-dump-ng will attempt " 56 | "to resume processing from partial data. If not present, then it will " 57 | "start from scratch.") 58 | ("max-concurrency", po::value()->default_value(16), 59 | "Maximum number of disk writing threads to run for *each* table.") 60 | ("meta-file,M", po::value(&meta_file), "data metainfo configuration file") 61 | ; 62 | 63 | po::options_description meta; 64 | 65 | meta.add_options() 66 | ("meta-author", po::value()->default_value(OSM_COPYRIGHT_TEXT), "author data metainfo") 67 | ("meta-source", po::value()->default_value(OSM_API_ORIGIN), "source data metainfo") 68 | ("meta-copyleft", po::value()->default_value(OSM_LICENSE_TEXT), "copyleft data metainfo") 69 | ("meta-attribution", po::value()->default_value(OSM_ATTRIBUTION_TEXT), "attribution data metainfo") 70 | ; 71 | 72 | po::options_description optns; 73 | optns.add(desc).add(meta); 74 | 75 | po::store(po::parse_command_line(argc, argv, optns), vm); 76 | po::notify(vm); 77 | 78 | if (vm.count("help")) { 79 | std::cout << desc << std::endl; 80 | exit(0); 81 | } 82 | 83 | if (vm.count("dump-file") == 0) { 84 | BOOST_THROW_EXCEPTION(std::runtime_error("A PostgreSQL table dump file (--dump-file) must be provided.")); 85 | } 86 | 87 | if ((vm.count("xml") + vm.count("history-xml") + 88 | vm.count("pbf") + vm.count("history-pbf") + 89 | vm.count("changesets") + vm.count("changeset-discussions") + 90 | vm.count("xml-no-userinfo") + vm.count("history-xml-no-userinfo") + 91 | vm.count("pbf-no-userinfo") + vm.count("history-pbf-no-userinfo") + 92 | vm.count("changesets-no-userinfo") + vm.count("changeset-discussions-no-userinfo")) == 0) { 93 | std::cerr << 94 | "No output file provided! You must provide one or more of " 95 | "--xml, --history-xml, --pbf, --history-pbf, --changesets, " 96 | "--changeset-discussions (or the respective -no-userinfo options) to get output.\n\n"; 97 | std::cerr << desc << std::endl; 98 | exit(1); 99 | } 100 | 101 | if (vm.count("meta-file")) { 102 | std::ifstream ifs(meta_file.c_str()); 103 | if (!ifs) 104 | { 105 | std::cout << "Can not open metainfo file: " << meta_file << "\n"; 106 | exit(1); 107 | } 108 | else 109 | { 110 | po::store(po::parse_config_file(ifs, optns), vm); 111 | po::notify(vm); 112 | } 113 | } 114 | } 115 | 116 | /** 117 | * read the dump file in parallel to get all of the elements into on-disk 118 | * databases. this is primarily so that the data is sorted, which is not 119 | * guaranteed in the PostgreSQL dump file. returns the maximum time seen 120 | * in a timestamp of any element in the dump file. 121 | */ 122 | bt::ptime setup_databases(const std::string &dump_file, bool resume, unsigned int max_concurrency) { 123 | std::list > threads; 124 | 125 | #define THREAD_RUN(type,table) threads.push_back(boost::make_shared >(table, dump_file, resume, max_concurrency)) 126 | 127 | THREAD_RUN(changeset, "changesets"); 128 | THREAD_RUN(node, "nodes"); 129 | THREAD_RUN(way, "ways"); 130 | THREAD_RUN(relation, "relations"); 131 | 132 | THREAD_RUN(current_tag, "changeset_tags"); 133 | THREAD_RUN(old_tag, "node_tags"); 134 | THREAD_RUN(old_tag, "way_tags"); 135 | THREAD_RUN(old_tag, "relation_tags"); 136 | THREAD_RUN(way_node, "way_nodes"); 137 | THREAD_RUN(relation_member, "relation_members"); 138 | 139 | THREAD_RUN(user, "users"); 140 | THREAD_RUN(changeset_comment, "changeset_comments"); 141 | 142 | #undef THREAD_RUN 143 | 144 | bt::ptime max_time(bt::neg_infin); 145 | BOOST_FOREACH(boost::shared_ptr &thr, threads) { 146 | max_time = std::max(max_time, thr->join()); 147 | thr.reset(); 148 | } 149 | threads.clear(); 150 | 151 | return max_time; 152 | } 153 | 154 | int main(int argc, char *argv[]) { 155 | try { 156 | po::variables_map options; 157 | get_options(argc, argv, options); 158 | 159 | #if BOOST_VERSION < 107300 160 | // workaround for https://svn.boost.org/trac/boost/ticket/5638 161 | boost::gregorian::greg_month::get_month_map_ptr(); 162 | #endif 163 | 164 | // extract data from the dump file for the "sorted" data tables, like nodes, 165 | // ways, relations, changesets and their associated tags, etc... 166 | const bool resume = options.count("resume") > 0; 167 | unsigned int max_concurrency = options["max-concurrency"].as(); 168 | const std::string dump_file(options["dump-file"].as()); 169 | const bt::ptime max_time = setup_databases(dump_file, resume, max_concurrency); 170 | 171 | // users aren't dumped directly to the files. we only use them to build up a map 172 | // of uid -> name where a missing uid indicates that the user doesn't have public 173 | // data. 174 | std::map display_name_map; 175 | extract_users(display_name_map); 176 | 177 | // build up a list of writers. these will be written to in parallel, which is 178 | // mildly wasteful if there's just one output type, but works great when all of 179 | // the output types are being used. 180 | std::vector > writers; 181 | if (options.count("history-xml")) { 182 | std::string output_file = options["history-xml"].as(); 183 | writers.push_back(boost::shared_ptr(new xml_writer(output_file, options, 184 | display_name_map, max_time, user_info_level::FULL, historical_versions::FULL, changeset_discussions::NONE))); 185 | } 186 | if (options.count("history-xml-no-userinfo")) { 187 | std::string output_file = options["history-xml-no-userinfo"].as(); 188 | writers.push_back(boost::shared_ptr(new xml_writer(output_file, options, 189 | display_name_map, max_time, user_info_level::ANON, historical_versions::FULL, changeset_discussions::NONE))); 190 | } 191 | if (options.count("history-pbf")) { 192 | std::string output_file = options["history-pbf"].as(); 193 | writers.push_back(boost::shared_ptr(new pbf_writer(output_file, options, 194 | display_name_map, max_time, user_info_level::FULL, historical_versions::FULL, changeset_discussions::NONE))); 195 | } 196 | if (options.count("history-pbf-no-userinfo")) { 197 | std::string output_file = options["history-pbf-no-userinfo"].as(); 198 | writers.push_back(boost::shared_ptr(new pbf_writer(output_file, options, 199 | display_name_map, max_time, user_info_level::ANON, historical_versions::FULL, changeset_discussions::NONE))); 200 | } 201 | if (options.count("xml")) { 202 | std::string output_file = options["xml"].as(); 203 | writers.push_back(boost::shared_ptr(new history_filter(output_file, options, 204 | display_name_map, max_time, user_info_level::FULL, historical_versions::NONE, changeset_discussions::NONE))); 205 | } 206 | if (options.count("xml-no-userinfo")) { 207 | std::string output_file = options["xml-no-userinfo"].as(); 208 | writers.push_back(boost::shared_ptr(new history_filter(output_file, options, 209 | display_name_map, max_time, user_info_level::ANON, historical_versions::NONE, changeset_discussions::NONE))); 210 | } 211 | if (options.count("pbf")) { 212 | std::string output_file = options["pbf"].as(); 213 | writers.push_back(boost::shared_ptr(new history_filter(output_file, options, 214 | display_name_map, max_time, user_info_level::FULL, historical_versions::NONE, changeset_discussions::NONE))); 215 | } 216 | if (options.count("pbf-no-userinfo")) { 217 | std::string output_file = options["pbf-no-userinfo"].as(); 218 | writers.push_back(boost::shared_ptr(new history_filter(output_file, options, 219 | display_name_map, max_time, user_info_level::ANON, historical_versions::NONE, changeset_discussions::NONE))); 220 | } 221 | if (options.count("changesets")) { 222 | std::string output_file = options["changesets"].as(); 223 | writers.push_back(boost::shared_ptr(new changeset_filter(output_file, options, 224 | display_name_map, max_time, user_info_level::FULL, historical_versions::NONE, changeset_discussions::NONE))); 225 | } 226 | if (options.count("changesets-no-userinfo")) { 227 | std::string output_file = options["changesets-no-userinfo"].as(); 228 | writers.push_back(boost::shared_ptr(new changeset_filter(output_file, options, 229 | display_name_map, max_time, user_info_level::ANON, historical_versions::NONE, changeset_discussions::NONE))); 230 | } 231 | if (options.count("changeset-discussions")) { 232 | std::string output_file = options["changeset-discussions"].as(); 233 | writers.push_back(boost::shared_ptr(new changeset_filter(output_file, options, 234 | display_name_map, max_time, user_info_level::FULL, historical_versions::NONE, changeset_discussions::FULL))); 235 | } 236 | if (options.count("changeset-discussions-no-userinfo")) { 237 | std::string output_file = options["changeset-discussions-no-userinfo"].as(); 238 | writers.push_back(boost::shared_ptr(new changeset_filter(output_file, options, 239 | display_name_map, max_time, user_info_level::ANON, historical_versions::NONE, changeset_discussions::FULL))); 240 | } 241 | 242 | std::cerr << "Writing changesets..." << std::endl; 243 | run_threads(writers); 244 | std::cerr << "Writing nodes..." << std::endl; 245 | run_threads(writers); 246 | std::cerr << "Writing ways..." << std::endl; 247 | run_threads(writers); 248 | std::cerr << "Writing relations..." << std::endl; 249 | run_threads(writers); 250 | 251 | // tell writers to clean up - write finals, close files, that sort of thing 252 | BOOST_FOREACH(boost::shared_ptr writer, writers) { 253 | writer->finish(); 254 | } 255 | std::cerr << "Done" << std::endl; 256 | 257 | } catch (const boost::exception &e) { 258 | std::cerr << "EXCEPTION: " << boost::current_exception_diagnostic_information() << "\n"; 259 | return 1; 260 | 261 | } catch (const std::exception &e) { 262 | std::cerr << "EXCEPTION: " << boost::current_exception_diagnostic_information() << "\n"; 263 | return 1; 264 | 265 | } catch (...) { 266 | std::cerr << "UNEXPLAINED ERROR\n"; 267 | return 1; 268 | } 269 | 270 | return 0; 271 | } 272 | -------------------------------------------------------------------------------- /src/time_epoch.cpp: -------------------------------------------------------------------------------- 1 | #include "time_epoch.hpp" 2 | 3 | namespace bt = boost::posix_time; 4 | 5 | // set epoch as midnight Jan 1 2004 6 | const bt::ptime time_epoch(boost::gregorian::date(2004, 1, 1), bt::time_duration(0, 0, 0)); 7 | -------------------------------------------------------------------------------- /src/types.cpp: -------------------------------------------------------------------------------- 1 | #include "types.hpp" 2 | 3 | namespace { 4 | 5 | const char *user_column_names_[] = { "id", "display_name", "data_public" }; 6 | const char *changeset_column_names_[] = { "id", "user_id", "created_at", "min_lat", "max_lat", "min_lon", "max_lon", "closed_at", "num_changes" }; 7 | const char *current_tag_column_names_[] = { "*", "k", "v" }; 8 | const char *old_tag_column_names_[] = { "*", "version", "k", "v" }; 9 | const char *node_column_names_[] = { "node_id", "version", "changeset_id", "visible", "timestamp", "redaction_id", "latitude", "longitude" }; 10 | const char *way_column_names_[] = { "way_id", "version", "changeset_id", "visible", "timestamp", "redaction_id" }; 11 | const char *way_node_column_names_[] = { "way_id", "version", "sequence_id", "node_id" }; 12 | const char *relation_column_names_[] = { "relation_id", "version", "changeset_id", "visible", "timestamp", "redaction_id" }; 13 | const char *relation_member_column_names_[] = { "relation_id", "version", "sequence_id", "member_type", "member_id", "member_role" }; 14 | const char *changeset_comment_column_names_[] = { "changeset_id", "created_at", "author_id", "body", "visible" }; 15 | 16 | const std::vector user_column_names = std::vector(user_column_names_, user_column_names_ + sizeof(user_column_names_) / sizeof(*user_column_names_)); 17 | const std::vector changeset_column_names = std::vector(changeset_column_names_, changeset_column_names_ + sizeof(changeset_column_names_) / sizeof(*changeset_column_names_)); 18 | const std::vector current_tag_column_names = std::vector(current_tag_column_names_, current_tag_column_names_ + sizeof(current_tag_column_names_) / sizeof(*current_tag_column_names_)); 19 | const std::vector old_tag_column_names = std::vector(old_tag_column_names_, old_tag_column_names_ + sizeof(old_tag_column_names_) / sizeof(*old_tag_column_names_)); 20 | const std::vector node_column_names = std::vector(node_column_names_, node_column_names_ + sizeof(node_column_names_) / sizeof(*node_column_names_)); 21 | const std::vector way_column_names = std::vector(way_column_names_, way_column_names_ + sizeof(way_column_names_) / sizeof(*way_column_names_)); 22 | const std::vector way_node_column_names = std::vector(way_node_column_names_, way_node_column_names_ + sizeof(way_node_column_names_) / sizeof(*way_node_column_names_)); 23 | const std::vector relation_column_names = std::vector(relation_column_names_, relation_column_names_ + sizeof(relation_column_names_) / sizeof(*relation_column_names_)); 24 | const std::vector relation_member_column_names = std::vector(relation_member_column_names_, relation_member_column_names_ + sizeof(relation_member_column_names_) / sizeof(*relation_member_column_names_)); 25 | const std::vector changeset_comment_column_names = std::vector(changeset_comment_column_names_, changeset_comment_column_names_ + sizeof(changeset_comment_column_names_) / sizeof(*changeset_comment_column_names_)); 26 | 27 | } // anonymous namespace 28 | 29 | const std::vector &user::column_names() { return user_column_names; } 30 | const std::vector &changeset::column_names() { return changeset_column_names; } 31 | const std::vector ¤t_tag::column_names() { return current_tag_column_names; } 32 | const std::vector &old_tag::column_names() { return old_tag_column_names; } 33 | const std::vector &node::column_names() { return node_column_names; } 34 | const std::vector &way::column_names() { return way_column_names; } 35 | const std::vector &way_node::column_names() { return way_node_column_names; } 36 | const std::vector &relation::column_names() { return relation_column_names; } 37 | const std::vector &relation_member::column_names() { return relation_member_column_names; } 38 | const std::vector &changeset_comment::column_names() { return changeset_comment_column_names; } 39 | 40 | const std::string changeset::table_name() { return "changesets"; } 41 | const std::string changeset::tag_table_name() { return "changeset_tags"; } 42 | const std::string changeset::inner_table_name() { return "changeset_comments"; } 43 | 44 | const std::string node::table_name() { return "nodes"; } 45 | const std::string node::tag_table_name() { return "node_tags"; } 46 | const std::string node::inner_table_name() { return ""; } 47 | 48 | const std::string way::table_name() { return "ways"; } 49 | const std::string way::tag_table_name() { return "way_tags"; } 50 | const std::string way::inner_table_name() { return "way_nodes"; } 51 | 52 | const std::string relation::table_name() { return "relations"; } 53 | const std::string relation::tag_table_name() { return "relation_tags"; } 54 | const std::string relation::inner_table_name() { return "relation_members"; } 55 | -------------------------------------------------------------------------------- /test/bad-character.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/bad-character.dmp -------------------------------------------------------------------------------- /test/changesets-badchar.xml.case/changesets.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/changesets-badchar.xml.case/changesets.osm.bz2 -------------------------------------------------------------------------------- /test/changesets-badchar.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --changesets changesets.osm.bz2 --dump-file $1/test/bad-character.dmp 4 | -------------------------------------------------------------------------------- /test/changesets-empty.xml.case/changesets.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/changesets-empty.xml.case/changesets.osm.bz2 -------------------------------------------------------------------------------- /test/changesets-empty.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --changesets changesets.osm.bz2 --dump-file $1/test/empty.dmp 4 | -------------------------------------------------------------------------------- /test/changesets.xml.case/changesets-no-userinfo.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/changesets.xml.case/changesets-no-userinfo.osm.bz2 -------------------------------------------------------------------------------- /test/changesets.xml.case/changesets.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/changesets.xml.case/changesets.osm.bz2 -------------------------------------------------------------------------------- /test/changesets.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --changesets changesets.osm.bz2 --changesets-no-userinfo changesets-no-userinfo.osm.bz2 --dump-file $1/test/liechtenstein-2013-08-03.dmp 4 | -------------------------------------------------------------------------------- /test/discussions-badchar.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --changeset-discussions discussions.osm.bz2 --dump-file $1/test/bad-character.dmp 4 | -------------------------------------------------------------------------------- /test/discussions-badchar.xml.case/discussions.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/discussions-badchar.xml.case/discussions.osm.bz2 -------------------------------------------------------------------------------- /test/discussions-long-comment.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --changeset-discussions discussions.osm.bz2 --dump-file $1/test/long-changeset-comment.dmp 4 | -------------------------------------------------------------------------------- /test/discussions-long-comment.xml.case/discussions.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/discussions-long-comment.xml.case/discussions.osm.bz2 -------------------------------------------------------------------------------- /test/discussions.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --changeset-discussions discussions.osm.bz2 --changeset-discussions-no-userinfo discussions-no-userinfo.osm.bz2 --dump-file $1/test/liechtenstein-2013-08-03.dmp 4 | -------------------------------------------------------------------------------- /test/discussions.xml.case/discussions-no-userinfo.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/discussions.xml.case/discussions-no-userinfo.osm.bz2 -------------------------------------------------------------------------------- /test/discussions.xml.case/discussions.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/discussions.xml.case/discussions.osm.bz2 -------------------------------------------------------------------------------- /test/empty.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/empty.dmp -------------------------------------------------------------------------------- /test/history.pbf.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --history-pbf history.osm.pbf --dump-file $1/test/liechtenstein-2013-08-03.dmp 4 | -------------------------------------------------------------------------------- /test/history.pbf.case/history-no-userinfo.osm.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/history.pbf.case/history-no-userinfo.osm.pbf -------------------------------------------------------------------------------- /test/history.pbf.case/history.osm.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/history.pbf.case/history.osm.pbf -------------------------------------------------------------------------------- /test/history.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --history-xml history.osm.bz2 --history-xml-no-userinfo history-no-userinfo.osm.bz2 --dump-file $1/test/liechtenstein-2013-08-03.dmp 4 | -------------------------------------------------------------------------------- /test/history.xml.case/history-no-userinfo.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/history.xml.case/history-no-userinfo.osm.bz2 -------------------------------------------------------------------------------- /test/history.xml.case/history.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/history.xml.case/history.osm.bz2 -------------------------------------------------------------------------------- /test/liechtenstein-2013-08-03.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/liechtenstein-2013-08-03.dmp -------------------------------------------------------------------------------- /test/long-changeset-comment.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/long-changeset-comment.dmp -------------------------------------------------------------------------------- /test/planet.pbf.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --pbf planet.osm.pbf --dump-file $1/test/liechtenstein-2013-08-03.dmp 4 | -------------------------------------------------------------------------------- /test/planet.pbf.case/planet-no-userinfo.osm.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/planet.pbf.case/planet-no-userinfo.osm.pbf -------------------------------------------------------------------------------- /test/planet.pbf.case/planet.osm.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/planet.pbf.case/planet.osm.pbf -------------------------------------------------------------------------------- /test/planet.xml.case/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $1/planet-dump-ng --generator "planet-dump-ng test X.Y.Z" --xml planet.osm.bz2 --xml-no-userinfo planet-no-userinfo.osm.bz2 --dump-file $1/test/liechtenstein-2013-08-03.dmp 4 | -------------------------------------------------------------------------------- /test/planet.xml.case/planet-no-userinfo.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/planet.xml.case/planet-no-userinfo.osm.bz2 -------------------------------------------------------------------------------- /test/planet.xml.case/planet.osm.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerebubuth/planet-dump-ng/fe191b9980daed8c6917251de95853d2c4b518fe/test/planet.xml.case/planet.osm.bz2 -------------------------------------------------------------------------------- /test/test-case-runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | TMPDIR=`mktemp -d 2>/dev/null || mktemp -d -t 'planet-dump-ng-tmp'` 6 | SRCDIR=$(pwd) 7 | 8 | function cleanup { 9 | cd $SRCDIR 10 | rm -rf $TMPDIR 11 | } 12 | 13 | trap cleanup EXIT 14 | 15 | test_case=$1 16 | if [ ! -d "$test_case" ]; then 17 | echo "Test case directory '$test_case' does not exist." 1>&2 18 | exit 1 19 | fi 20 | if [ ! -x "$test_case/cmd.sh" ]; then 21 | echo "Test case does not contain command to run." 1>&2 22 | exit 1 23 | fi 24 | 25 | # switch to the temporary directory 26 | cd $TMPDIR 27 | 28 | # run the test case 29 | $SRCDIR/$test_case/cmd.sh $SRCDIR 30 | if [ $? -ne 0 ]; then 31 | echo "Failed to run test case." 2>&1 32 | exit 1 33 | fi 34 | 35 | # compare the outputs 36 | for name in *.bz2; do 37 | if [ -f "$name" ]; then 38 | bunzip2 -dc "$name" > actual 39 | bunzip2 -dc "$SRCDIR/$test_case/$name" > expected 40 | cmp actual expected 41 | if [ $? -ne 0 ]; then 42 | echo "Output '$name' does not match '$SRCDIR/$test_case/$name'" 43 | exit 1 44 | fi 45 | fi 46 | done 47 | for name in *.pbf; do 48 | if [ -f "$name" ]; then 49 | cmp "$name" "$SRCDIR/$test_case/$name" 50 | if [ $? -ne 0 ]; then 51 | echo "Output '$name' does not match '$SRCDIR/$test_case/$name'" 52 | exit 1 53 | fi 54 | fi 55 | done 56 | 57 | cd $OLDPWD 58 | exit 0 59 | --------------------------------------------------------------------------------