├── .circleci
    └── config.yml
├── .github
    ├── docker-compose.yml
    ├── pull_request_template.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.org
├── LICENSE
├── README.md
├── RELEASING.md
├── bench
    └── src
    │   └── main
    │       └── scala
    │           └── vectorpipe
    │               └── Bench.scala
├── build.sbt
├── data
    ├── 8shapedmultipolygon.osm
    ├── diomede.osm
    ├── india-pakistan.osm
    ├── linestring.mvt
    ├── onepoint.mvt
    ├── polygon.mvt
    ├── quarry-rock.osm
    └── roads.mvt
├── project
    ├── Dependencies.scala
    ├── Version.scala
    ├── assembly.sbt
    ├── build.properties
    └── plugins.sbt
├── sbt
├── scripts
    ├── cibuild
    ├── cipublish
    └── test
└── src
    ├── main
        ├── resources
        │   ├── META-INF
        │   │   └── services
        │   │   │   └── org.apache.spark.sql.sources.DataSourceRegister
        │   └── microsite
        │   │   └── data
        │   │       └── menu.yml
        ├── scala
        │   └── vectorpipe
        │   │   ├── OSM.scala
        │   │   ├── VectorPipe.scala
        │   │   ├── examples
        │   │       ├── AugmentedDiffProcessor.scala
        │   │       ├── AugmentedDiffStreamProcessor.scala
        │   │       ├── ChangeProcessor.scala
        │   │       ├── ChangeStreamProcessor.scala
        │   │       ├── ChangesetProcessor.scala
        │   │       └── ChangesetStreamProcessor.scala
        │   │   ├── functions
        │   │       ├── osm
        │   │       │   └── package.scala
        │   │       └── package.scala
        │   │   ├── internal
        │   │       └── package.scala
        │   │   ├── model
        │   │       ├── Actions.scala
        │   │       ├── AugmentedDiff.scala
        │   │       ├── Change.scala
        │   │       ├── Changeset.scala
        │   │       ├── ChangesetComment.scala
        │   │       ├── ElementWithSequence.scala
        │   │       ├── Member.scala
        │   │       └── Nd.scala
        │   │   ├── relations
        │   │       ├── MultiPolygons.scala
        │   │       ├── Routes.scala
        │   │       ├── package.scala
        │   │       └── utils
        │   │       │   ├── PartialCoordinateSequence.scala
        │   │       │   ├── ReversedCoordinateSequence.scala
        │   │       │   ├── VirtualCoordinateSequence.scala
        │   │       │   └── package.scala
        │   │   ├── sources
        │   │       ├── AugmentedDiffMicroBatchReader.scala
        │   │       ├── AugmentedDiffProvider.scala
        │   │       ├── AugmentedDiffReader.scala
        │   │       ├── AugmentedDiffSource.scala
        │   │       ├── ChangeMicroBatchReader.scala
        │   │       ├── ChangeProvider.scala
        │   │       ├── ChangeReader.scala
        │   │       ├── ChangeSource.scala
        │   │       ├── ChangesetMicroBatchReader.scala
        │   │       ├── ChangesetProvider.scala
        │   │       ├── ChangesetReader.scala
        │   │       ├── ChangesetSource.scala
        │   │       ├── ReplicationReader.scala
        │   │       ├── ReplicationStreamBatchReader.scala
        │   │       ├── ReplicationStreamMicroBatchReader.scala
        │   │       ├── SequenceOffset.scala
        │   │       └── Source.scala
        │   │   ├── util
        │   │       ├── Auth.scala
        │   │       ├── DBUtils.scala
        │   │       ├── Geocode.scala
        │   │       ├── Implicits.scala
        │   │       ├── JsonRobustFeatureCollection.scala
        │   │       ├── JsonRobustFeatureCollectionMap.scala
        │   │       ├── Resource.scala
        │   │       ├── RobustFeature.scala
        │   │       └── package.scala
        │   │   └── vectortile
        │   │       ├── Clipping.scala
        │   │       ├── Pipeline.scala
        │   │       ├── Simplify.scala
        │   │       ├── export
        │   │           └── package.scala
        │   │       └── package.scala
        └── tut
        │   ├── index.md
        │   ├── outputs.md
        │   ├── sources.md
        │   ├── usage.md
        │   └── usage
        │       ├── concepts.md
        │       ├── osm.md
        │       └── usage.md
    └── test
        ├── resources
            ├── .gitignore
            ├── isle-of-man-latest.osm.orc
            ├── log4j.properties
            ├── relation-110564.orc
            ├── relation-110564.wkt
            ├── relation-191199.orc
            ├── relation-191199.wkt
            ├── relation-191204.orc
            ├── relation-191204.wkt
            ├── relation-1949938.orc
            ├── relation-1949938.wkt
            ├── relation-2554903.orc
            ├── relation-2554903.wkt
            ├── relation-2580685.orc
            ├── relation-2580685.wkt
            ├── relation-3080946.orc
            ├── relation-3080946.wkt
            ├── relation-3105056.orc
            ├── relation-3105056.wkt
            ├── relation-333501.orc
            ├── relation-333501.wkt
            ├── relation-393502.orc
            ├── relation-393502.wkt
            ├── relation-5448156.orc
            ├── relation-5448156.wkt
            ├── relation-5448691.orc
            ├── relation-5448691.wkt
            ├── relation-5612959.orc
            ├── relation-5612959.wkt
            ├── relation-61315.orc
            ├── relation-61315.wkt
            ├── relation-6710544.orc
            ├── relation-6710544.wkt
            └── view
            │   ├── cluster-view.html
            │   └── layer-test.html
        └── scala
            └── vectorpipe
                ├── MultiPolygonRelationReconstructionSpec.scala
                ├── ProcessOSMTest.scala
                ├── TestEnvironment.scala
                ├── functions
                    └── osm
                    │   └── FunctionSpec.scala
                ├── sources
                    └── AugmentedDiffSourceTest.scala
                └── vectortile
                    ├── LayerTestPipeline.scala
                    ├── PipelineSpec.scala
                    ├── TestPipeline.scala
                    └── WeightedCentroid.scala


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | aliases:
 2 |   - &restore_sbt_cache
 3 |     key: sbt-cache-{{ checksum "/tmp/scala_version" }}
 4 | 
 5 |   - &save_sbt_cache
 6 |     key: sbt-cache-{{ checksum "/tmp/scala_version" }}-{{ epoch }}
 7 |     paths:
 8 |       - "~/.ivy2/cache"
 9 |       - "~/.sbt"
10 |       - "~/.cache/coursier"
11 | 
12 |   - &run_cibuild
13 |     - checkout
14 |     - run: echo "${SCALA_VERSION}" > /tmp/scala_version
15 |     - restore_cache: *restore_sbt_cache
16 |     - run:
17 |         name: Executing cibuild
18 |         command: ./scripts/cibuild
19 |     - save_cache: *save_sbt_cache
20 | 
21 |   - &run_cipublish
22 |     - checkout
23 |     - run: echo "${SCALA_VERSION}" > /tmp/scala_version
24 |     - restore_cache: *restore_sbt_cache
25 |     - run:
26 |         name: "Import signing key"
27 |         command: |
28 |           gpg --keyserver keyserver.ubuntu.com \
29 |             --recv-keys 0x13E9AA1D8153E95E && \
30 |           echo "${GPG_KEY}" | base64 -d > signing_key.asc && \
31 |           gpg --import signing_key.asc
32 |     - run:
33 |         name: Executing cipublish
34 |         command: ./scripts/cipublish
35 | 
36 |   # Build environments
37 |   - &machine-openjdk8-scala2_11_12-environment
38 |     machine:
39 |       image: ubuntu-1604:201903-01
40 |     environment:
41 |       SCALA_VERSION: 2.11.12
42 | 
43 |   - &openjdk8-scala2_11_12-environment
44 |     docker:
45 |       - image: circleci/openjdk:8-jdk
46 |     environment:
47 |       SCALA_VERSION: 2.11.12
48 | 
49 | version: 2
50 | workflows:
51 |   version: 2
52 |   build:
53 |     jobs:
54 |       - "openjdk8-scala2.11.12":
55 |           filters: # required since `openjdk8-scala2.11.12_deploy` has tag filters AND requires `openjdk8-scala2.11.12`
56 |             tags:
57 |               only:
58 |                 - /^(.*)$/
59 |       - "openjdk8-scala2.11.12_deploy":
60 |           requires:
61 |             - "openjdk8-scala2.11.12"
62 |           filters:
63 |             tags:
64 |               only:
65 |                 - /^(.*)$/
66 | 
67 | jobs:
68 |   # Execute cibuild in machine executor so we can use our existing
69 |   # docker-compose test setup
70 |   "openjdk8-scala2.11.12":
71 |     <<: *machine-openjdk8-scala2_11_12-environment
72 |     steps: *run_cibuild
73 | 
74 |   "openjdk8-scala2.11.12_deploy":
75 |     <<: *openjdk8-scala2_11_12-environment
76 |     steps: *run_cipublish
77 | 


--------------------------------------------------------------------------------
/.github/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.7"
 2 | 
 3 | services:
 4 |   test:
 5 |     image: openjdk:8-jdk
 6 |     working_dir: /usr/local/src
 7 |     command: ./sbt ++$SCALA_VERSION test
 8 |     environment:
 9 |       - CI
10 |       - SCALA_VERSION
11 |     volumes:
12 |       - ./../:/usr/local/src
13 |     network_mode: host
14 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | Brief description of what this PR does and why it's important
 4 | 
 5 | ## Demo
 6 | 
 7 | Optional. Screenshots, etc. 
 8 | 
 9 | ## Notes
10 | 
11 | Optional. Extra context, ancillary topics, alternative strategies that didn't work out, etc.
12 | 
13 | ## Testing Instructions
14 | 
15 | Optional. Include if there's more specifics than "CI tests should pass".
16 | 
17 | ## Checklist
18 | 
19 | - [ ] Add entry to CHANGELOG.md 
20 | 
21 | Closes #XXX
22 | 
23 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: ['**']
 6 |   push:
 7 |     branches: ['master']
 8 |     tags: [v*]
 9 |   # release:
10 |     # types: [published]
11 | 
12 | jobs:
13 |   build:
14 |     name: Build and Test
15 |     strategy:
16 |       matrix:
17 |         scala: ["2.12.7"]
18 |     runs-on: ubuntu-latest
19 | 
20 |     env:
21 |       SCALA_VERSION:  ${{ matrix.scala }}
22 |       BUILD_NUMBER:   ${{ github.run_id }}
23 | 
24 |     steps:
25 |       - uses: actions/checkout@v2
26 |         with:
27 |           fetch-depth: 0
28 | 
29 |       - uses: coursier/cache-action@v6
30 |       # - uses: olafurpg/setup-scala@v13
31 |         # with:
32 |           # java-version: adopt@1.8
33 | 
34 |       - name: run tests
35 |         run: docker compose -f .github/docker-compose.yml up test --abort-on-container-exit --exit-code-from test
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /RUNNING_PID
 2 | /logs/
 3 | /project/*-shim.sbt
 4 | /project/project/
 5 | /project/target/
 6 | /target/
 7 | /data/*.osm
 8 | /data/*.geojson
 9 | /data/*.osm.json
10 | /data/*.osm.pbf
11 | /images/*
12 | .ensime
13 | .ensime_cache/*
14 | clipping/*
15 | osmosis/*
16 | .idea
17 | target
18 | .metals
19 | \#*
20 | .\#*
21 | 
22 | derby.log
23 | metastore_db/*
24 | bench/target/
25 | idea.sbt
26 | mainRunner/
27 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | <!-- markdownlint-disable MD024 -->
  2 | # Changelog
  3 | 
  4 | All notable changes to this project will be documented in this file.
  5 | 
  6 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  7 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  8 | 
  9 | ## [Unreleased]
 10 | 
 11 | ### Added
 12 | 
 13 | - GitHub actions config
 14 | 
 15 | ### Changed
 16 | 
 17 | ### Fixed
 18 | 
 19 | ## [2.2.0]
 20 | 
 21 | ### Added
 22 | 
 23 | - Feature parsing in `AugmentedDiffSource` uses `vectorpipe.util.RobustFeature` to tolerate bad geometries in the stream [#148](https://github.com/geotrellis/vectorpipe/pull/148).
 24 | - Receive GPG key while publishing artifacts [#138](https://github.com/geotrellis/vectorpipe/pull/138)
 25 | - `Pipeline#finalize(vectorTiles, zoom)` method to receive the final RDD of generated vector tiles for a zoom level
 26 | - `Pipeline.Output` mixin trait that overrides `finalize` with default implementation using `saveVectorTiles(vectorTiles, zoom, pipeline.baseOutputURI)`
 27 | 
 28 | ### Changed
 29 | 
 30 | - `VectorPipe.Options` to support for any square layout level (not just from ZoomedLayoutScheme)
 31 | - `Pipeline#baseOutputURI` moved to `Pipeline.Output#baseOutputURI`
 32 | - Updated Geotrellis dependency to 3.5.1
 33 | - Improve robustness of functions in `vectorpipe.sources.ChangesetSource`
 34 | 
 35 | ### Fixed
 36 | 
 37 | ## [2.1.3] - 2019-12-18
 38 | 
 39 | ### Fixed
 40 | 
 41 | - Catch 403 S3Exceptions when checking minutely diffs in AugmentedDiffSource
 42 | 
 43 | ## [2.1.2] - 2019-12-17
 44 | 
 45 | ### Fixed
 46 | 
 47 | - Catch proper AWS SDK v2 NoSuchKeyException when checking minutely diffs in AugmentedDiffSource
 48 | 
 49 | ## [2.1.1] - 2019-12-16
 50 | 
 51 | ### Fixed
 52 | 
 53 | - AugmentedDiffSource failed to properly decode from JSON sources
 54 | - MicroBatchReader null pointer exception when reading baseURI from DataSourceOptions
 55 | 
 56 | ## [2.1.0] - 2019-12-12
 57 | 
 58 | ### Added
 59 | 
 60 | - `vectorpipe.examples`: VectorPipe examples moved from https://github.com/azavea/osmesa
 61 | - `VectorPipe.defaultSparkSessionWithJTS` method to construct a VectorPipe tailored `SparkSession`. Users with more complicated use cases will still want to manually construct their own session.
 62 | 
 63 | ## [2.0.0] - 2019-11-29
 64 | 
 65 | This is the first release to depend on GeoTrellis 3.0.
 66 | 
 67 | ### Changed
 68 | 
 69 | - Streaming sources now fallback to the current remote sequence if no database
 70 |   checkpoint or option can be found
 71 | - Depend on Spark 2.4.4
 72 | - Depend on GeoTrellis 3.1.0
 73 | 
 74 | ## [1.1.0] - 2019-09-26
 75 | 
 76 | ### Added
 77 | 
 78 | - `useCaching` option to VectorPipe.Options allows for persisting to disk.
 79 |   Helps avoid repeated computations.
 80 | - Functions for converting sequence numbers to timestamps and back for both
 81 |   changeset replications and augmented diff replications. See `ChangesetSource`
 82 |   and `AugmentedDiffSource` in `vectorpipe.sources`.
 83 | 
 84 | ### Changed
 85 | 
 86 | - Improved empty geometry handling in UDFs
 87 | 
 88 | ### Fixed
 89 | 
 90 | ## [1.0.0] - 2019-07-09
 91 | 
 92 | ### Added
 93 | 
 94 | - RELEASING.md - Instructions for releasing new versions of this project
 95 | - Support for semicolon-delimited tag values in UDFs, e.g. `shop=bakery;dairy`
 96 | - Support for `nds` in augmented diff GeoJSON (matching
 97 |   [`osm-replication-streams@^0.7.0`](https://github.com/mojodna/osm-replication-streams/tree/v0.7.0)
 98 |   output)
 99 | - "Uninteresting" tags are dropped when processing OSM inputs; this will result
100 |   in fewer point features being generated (as those nodes previously had tags
101 |   applied).
102 | 
103 | ### Changed
104 | 
105 | - Sync with [id-area-keys@2.13.0](https://github.com/osmlab/id-area-keys/blob/v2.13.0/areaKeys.json) for determining area-ness of a way.
106 | - Fetch gzipped augmented diff JSON (produced by [overpass-diff-publisher](https://github.com/mojodna/overpass-diff-publisher))
107 | - Preserve the last-known coordinates of deleted nodes
108 | - Better handling of falsy boolean values in tag UDFs
109 | - Adds `riverbank`, `stream_end`, `dam`, `weir`, `waterfall`, and `pressurised`
110 |   to the list of waterway features
111 | - Populates `nds` and `members` for deleted elements from the previous version
112 | 
113 | ### Fixed
114 | 
115 | - Resolve commons-io deprecation warnings
116 | - Convert coordinates to Doubles (expected by VP internals) when pre-processing
117 | 
118 | ## [1.0.0-RC3] - 2019-04-24
119 | 
120 | ### Fixed
121 | 
122 | - Mark all logger vals and some UDF vals as @transient lazy to avoid Spark serialization issues
123 | - Properly strip leading and trailing slashes from S3 URIs when exporting vector tiles
124 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.org:
--------------------------------------------------------------------------------
  1 | #+TITLE: Contributing to VectorPipe
  2 | #+AUTHOR: Colin
  3 | #+HTML_HEAD: <link rel="stylesheet" type="text/css" href="/home/colin/code/org-theme.css"/>
  4 | 
  5 | ** Prerequisite Knowledge
  6 | 
  7 | *** GeoTrellis
  8 | 
  9 | GeoTrellis sublibraries and types are used heavily throughout ~vectorpipe~,
 10 | particularly its ~vector~ and ~vectortile~ packages.
 11 | 
 12 | *** Apache Spark
 13 | 
 14 | ~RDD~ usage is fairly prevalent, so knowledge of Spark internals may help
 15 | you, depending on your task.
 16 | 
 17 | *** Cats
 18 | 
 19 | The Functional Programming library that adds certain necessities missing
 20 | from vanilla Scala. This is not at all necessary for /using/ ~vectorpipe~,
 21 | but is used here and there within its internal machinery.
 22 | 
 23 | *** OpenStreetMap
 24 | 
 25 | Knowledge of how OpenStreetMap data is formatted will help you immensely. Terms:
 26 | 
 27 | - Element
 28 | - Node
 29 | - Way
 30 | - Relation
 31 | 
 32 | ** Development Dependencies
 33 | 
 34 | - [[http://www.scala-sbt.org/][SBT]]
 35 | - [[https://spark.apache.org/][Apache Spark]] (a local install on your machine)
 36 | - [[https://jekyllrb.com/][Jekyll]] (if editing the microsite)
 37 | 
 38 | Otherwise, all Scala dependencies (including compilers) will be
 39 | automatically downloaded by sbt.
 40 | 
 41 | ** Style Guide
 42 | 
 43 | When contributing code changes to ~vectorpipe~, bear in mind that we make a
 44 | few stylistic choices in order to minimize code complexity:
 45 | 
 46 | *** Code and Directory Layout
 47 | 
 48 | - Code mechanics relevant to the workings of the library but irrelevant to the
 49 |   user should be relegated to a module under ~vectorpipe.*.internal~, where
 50 |   the ~*~ is whatever parent module you're working in.
 51 | 
 52 | - Type aliases live in *package objects*:
 53 | 
 54 | #+BEGIN_SRC scala
 55 |   package vectorpipe
 56 | 
 57 |   package object foo {
 58 |     type Bar = Int
 59 |   }
 60 | #+END_SRC
 61 | 
 62 | - Typeclass instances live in the companion object of the class they're for:
 63 | 
 64 | #+BEGIN_SRC scala
 65 |   import cats._
 66 | 
 67 |   case class Foo[T](t: T)
 68 | 
 69 |   object Foo {
 70 |     implicit val fooFunctor: Functor[Foo] = new Functor[Foo] {
 71 |       def map[A, B](fa: Foo[A])(f: A => B): Foo[B] = ???
 72 |     }
 73 |   }
 74 | #+END_SRC
 75 | 
 76 | This is to give immediate "visibility" of instances to their corresponding
 77 | types. Just by importing ~Foo~, you have access to all its instances without
 78 | having to think about them. This decreases ~import~ confusion.
 79 | 
 80 | *** Scala Features to Avoid
 81 | 
 82 | **** Method Overloading and Default Arguments
 83 | 
 84 | We [[https://stackoverflow.com/a/2512001/643684][avoid method overloading]]:
 85 | 
 86 | #+BEGIN_SRC scala
 87 |   case class Foo[T](t: T) {
 88 |     def bar(a: Int): Bar = ???
 89 | 
 90 |     // avoid
 91 |     def bar(a: Int, b: Int): Bar = ???
 92 |   }
 93 | #+END_SRC
 94 | 
 95 | We avoid default arguments:
 96 | 
 97 | #+BEGIN_SRC scala
 98 |   case class Foo[T](t: T) {
 99 |     // avoid
100 |     def bar(a: Int, b: Option[Int] = None): Bar = ???
101 |   }
102 | #+END_SRC
103 | 
104 | Since this is method overloading in disguise.
105 | 
106 | **** Exceptions
107 | 
108 | We avoid throwing Exceptions:
109 | 
110 | #+BEGIN_SRC scala
111 |   /* Surely this function will obey its contract... */
112 |   def innocent(path: String): Foo
113 | 
114 |   sbt> innocent("/wrong/file/path/or/bad/data.txt")
115 |   java.lang.YouCouldntHaveForeseenThisException
116 | #+END_SRC
117 | 
118 | Exceptions were intentionally left out of new languages like [[https://golang.org/doc/faq#exceptions][Golang]], [[https://www.rust-lang.org/en-US/faq.html#error-handling][Rust]], and Elm.
119 | In Scala, we can use vanilla ~Try~ and ~Either~, or ~EitherT~ from [[http://typelevel.org/cats/][Cats]] or [[https://github.com/scalaz/scalaz][ScalaZ]]
120 | to model potential errors:
121 | 
122 | #+BEGIN_SRC scala
123 |   def innocent(path: String): Either[String, Foo]
124 | 
125 |   /* "Mixing Contexts", i.e. the ability to run concurrently and to fail safely */
126 |   def innocentIO(path: String): EitherT[Future, String, Foo]
127 | #+END_SRC
128 | 
129 | **** Non-data Classes
130 | 
131 | We [[https://www.youtube.com/watch?v=o9pEzgHorH0][avoid classes that don't represent data]]:
132 | 
133 | #+BEGIN_SRC scala
134 |   class Fooifizer(val bestArg: Type) {
135 |     def work(arg: Type): Unit = { ??? }
136 |   }
137 | #+END_SRC
138 | 
139 | Instead, we call a spade a spade and write a stand-alone function:
140 | 
141 | #+BEGIN_SRC scala
142 |   /* Put this in an appropriate companion object, or the package object */
143 |   def fooifize(bestArg: Type, arg: Type): Unit = { ??? }
144 | #+END_SRC
145 | 
146 | **** Miscellaneous
147 | 
148 | We avoid ~.apply~ returning a type other than the parent object:
149 | 
150 | #+BEGIN_SRC scala
151 |   object Foo {
152 |     // avoid
153 |     def apply(...): Bar = ...
154 |   }
155 | 
156 |   // Or else you can write code like:
157 |   val x = Foo(...)  // hard to know what x's type is.
158 | #+END_SRC
159 | 
160 | We [[https://github.com/circe/circe/blame/master/DESIGN.md#L77][avoid implicit conversions]]:
161 | 
162 | #+BEGIN_SRC scala
163 |   case class Foo(...)
164 | 
165 |   case class Bar(...) {
166 |     def bar: ??? = ...
167 |   }
168 | 
169 |   object Foo {
170 |     // avoid
171 |     implicit def foo2Bar(foo: Foo): Bar = ...
172 |   }
173 | 
174 |   // Or else you can write code like:
175 |   val x = Foo(...).bar // where did `bar` come from?
176 | #+END_SRC
177 | 
178 | Typeclasses should be implemented via the implicit-val-within-companion-object
179 | pattern.
180 | 
181 | ** Updating the Microsite
182 | 
183 | All content files can be found in ~src/main/tut/~. After making your desired
184 | changes, you can confirm them by running the following in sbt:
185 | 
186 | #+BEGIN_EXAMPLE
187 | sbt> makeMicrosite
188 | #+END_EXAMPLE
189 | 
190 | This will build the site as well as compile every Scala example. If
191 | something about the API has changed and the examples are no longer valid,
192 | these docs will fail to build. This is a good thing! Just make the
193 | appropriate extra changes and rebuild.
194 | 
195 | To view your built site locally, navigate to ~target/site/~ and run ~jekyll
196 | serve~. Be careful: The main content of the site will be visible at
197 | [[http://127.0.0.1:4000/vectorpipe/][127.0.0.1:4000/vectorpipe/]]. Without
198 | the ~vectorpipe~ on the end, you won't see anything.
199 | 
200 | If you have write permission to the main VectorPipe repo on Github, then
201 | your updated microsite can be published to
202 | [[https://geotrellis.github.io/vectorpipe/]] via:
203 | 
204 | #+BEGIN_EXAMPLE
205 | sbt> publishMicrosite
206 | #+END_EXAMPLE
207 | ** Publishing to Bintray
208 | 
209 | Provided you have permissions to publish to [[https://bintray.com/azavea][Azavea's Bintray]], all that's necessary
210 | to proceed is:
211 | 
212 | #+BEGIN_EXAMPLE
213 | sbt> publish
214 | #+END_EXAMPLE
215 | 
216 | in your SBT shell.
217 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This software is licensed under the Apache 2 license, quoted below.
 2 | 
 3 | Copyright 2011-2017 Azavea [http://www.azavea.com]
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License"); you may not
 6 | use this file except in compliance with the License. You may obtain a copy of
 7 | the License at
 8 | 
 9 |     [http://www.apache.org/licenses/LICENSE-2.0]
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 | License for the specific language governing permissions and limitations under
15 | the License.
16 | 


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
 1 | # Publishing a release
 2 | 
 3 | 1. Create a new release branch from up-to-date master named `release/x.y.z`
 4 | 1. Review CHANGELOG.md. Move `[Unreleased]` header to empty section and replace with `[x.y.z]` header plus release date.
 5 | 1. Update the version numbers in the build.sbt and spark-shell examples in the README's "Getting Started" section.
 6 | 1. Commit these changes as a single commit, with the message "Release vx.y.z"
 7 | 1. Push branch and make a PR on GitHub
 8 | 1. Ensure CI succeeds
 9 | 1. Ensure there are no new commits on master. If there are new commits, rebase this branch on master and start over at step 2 if you wish to include them. Otherwise, merge.
10 | 1. Tag the merge commit on the master branch: `git tag -a vx.y.z -m "Release x.y.z"`
11 | 1. Push the new tag: `git push --tags`; if you have multiple remotes, you may need to target the proper upstream repo: `git push <remote> --tags`.
12 | 1. Review the CircleCI build status to ensure that the tag was successfully published to SonaType.
13 | 


--------------------------------------------------------------------------------
/bench/src/main/scala/vectorpipe/Bench.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe
 2 | 
 3 | import java.util.concurrent.TimeUnit
 4 | 
 5 | import geotrellis.vector.{Extent, Line, Point}
 6 | import org.openjdk.jmh.annotations._
 7 | 
 8 | // --- //
 9 | 
10 | @BenchmarkMode(Array(Mode.AverageTime))
11 | @OutputTimeUnit(TimeUnit.MICROSECONDS)
12 | @State(Scope.Thread)
13 | class LineBench {
14 |   val extent = Extent(0, 0, 5, 5)
15 | 
16 |   var line: Line = _
17 | 
18 |   @Setup
19 |   def setup: Unit = {
20 |     line = Line(
21 |       List.range(4, -100, -2).map(n => Point(n, 1)) ++ List(Point(-3,4), Point(-1,4), Point(2,4), Point(4,4))
22 |     )
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/data/linestring.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/linestring.mvt


--------------------------------------------------------------------------------
/data/onepoint.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/onepoint.mvt


--------------------------------------------------------------------------------
/data/polygon.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/polygon.mvt


--------------------------------------------------------------------------------
/data/roads.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/roads.mvt


--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | 
 3 | object Dependencies {
 4 |   val awscala        = "com.github.seratch"          %% "awscala"                     % Version.awscala
 5 |   val decline        = "com.monovore"                %% "decline"                     % Version.decline
 6 |   val spark          = "org.apache.spark"            %% "spark-core"                  % Version.spark
 7 |   val sparkSql       = "org.apache.spark"            %% "spark-sql"                   % Version.spark
 8 |   val sparkHive      = "org.apache.spark"            %% "spark-hive"                  % Version.spark
 9 |   val sparkStreaming = "org.apache.spark"            %% "spark-streaming"             % Version.spark
10 |   val sparkJts       = "org.locationtech.geomesa"    %% "geomesa-spark-jts"           % Version.geomesa
11 |   val gtGeomesa      = "org.locationtech.geotrellis" %% "geotrellis-geomesa"          % Version.geotrellis
12 |   val gtGeotools     = "org.locationtech.geotrellis" %% "geotrellis-geotools"         % Version.geotrellis
13 |   val gtS3           = "org.locationtech.geotrellis" %% "geotrellis-s3"               % Version.geotrellis
14 | 
15 |   val gtS3Spark      = "org.locationtech.geotrellis" %% "geotrellis-s3-spark"         % Version.geotrellis
16 |   val gtSpark        = "org.locationtech.geotrellis" %% "geotrellis-spark"            % Version.geotrellis
17 |   val gtSparkTestKit = "org.locationtech.geotrellis" %% "geotrellis-spark-testkit"    % Version.geotrellis % "test"
18 |   val gtVector       = "org.locationtech.geotrellis" %% "geotrellis-vector"           % Version.geotrellis
19 |   val gtShapefile    = "org.locationtech.geotrellis" %% "geotrellis-shapefile"        % Version.geotrellis
20 |   val gtVectorTile   = "org.locationtech.geotrellis" %% "geotrellis-vectortile"       % Version.geotrellis
21 |   val cats           = "org.typelevel"               %% "cats-core"                   % Version.cats
22 |   val scalactic      = "org.scalactic"               %% "scalactic"                   % Version.scalactic
23 |   val scalatest      = "org.scalatest"               %%  "scalatest"                  % Version.scalatest % "test"
24 |   val jaiCore        = "javax.media"                 % "jai_core"                     % "1.1.3" from "https://repo.osgeo.org/repository/release/javax/media/jai_core/1.1.3/jai_core-1.1.3.jar"
25 |   val hbaseCommon    = "org.apache.hbase" % "hbase-common" % "1.3.1"
26 |   val hbaseClient    = "org.apache.hbase" % "hbase-client" % "1.3.1"
27 |   val hbaseServer    = "org.apache.hbase" % "hbase-server" % "1.3.1"
28 |   val geomesaHbaseDatastore = "org.locationtech.geomesa" % "geomesa-hbase-datastore_2.11" % Version.geomesa
29 |   val kryo           = "com.esotericsoftware"        % "kryo-shaded"                   % Version.kryo
30 |   val circeCore      = "io.circe"                    %% "circe-core"                   % Version.circe
31 |   val circeGeneric   = "io.circe"                    %% "circe-generic"                % Version.circe
32 |   val circeExtras    = "io.circe"                    %% "circe-generic-extras"         % Version.circe
33 |   val circeParser    = "io.circe"                    %% "circe-parser"                 % Version.circe
34 |   val circeOptics    = "io.circe"                    %% "circe-optics"                 % Version.circe
35 |   val circeJava8     = "io.circe"                    %% "circe-java8"                  % Version.circe
36 |   val circeYaml      = "io.circe"                    %% "circe-yaml"                   % "0.9.0"
37 |   val commonsIO      = "commons-io"                  %  "commons-io"                   % Version.commonsIO
38 |   val scalaj         = "org.scalaj"                  %% "scalaj-http"                  % Version.scalaj
39 | }
40 | 


--------------------------------------------------------------------------------
/project/Version.scala:
--------------------------------------------------------------------------------
 1 | object Version {
 2 |   val awscala = "0.8.1"
 3 |   val geotrellis = "3.5.1"
 4 |   val scala2_11 = "2.11.12"
 5 |   val scala2_12 = "2.12.12"
 6 |   val geomesa = "2.2.1"
 7 |   val decline = "0.6.1"
 8 |   val cats = "1.6.1"
 9 |   val scalactic = "3.0.6"
10 |   val scalatest = "3.0.3"
11 |   val spark = "2.4.4"
12 |   val kryo = "4.0.2"
13 |   val circe = "0.11.0"
14 |   val scalaLogging = "3.9.2"
15 |   val commonsIO = "2.6"
16 |   val scalaj = "2.4.1"
17 | }
18 | 


--------------------------------------------------------------------------------
/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")
2 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.2.8
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | addSbtPlugin("com.47deg"  % "sbt-microsites" % "0.7.4")
 2 | 
 3 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.27")
 4 | 
 5 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0")
 6 | 
 7 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full)
 8 | 
 9 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.5")
10 | 
11 | addSbtPlugin("io.crashbox" % "sbt-gpg" % "0.2.0")
12 | 
13 | addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
14 | 


--------------------------------------------------------------------------------
/scripts/cibuild:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [[ -n "${VECTORPIPE_DEBUG}" ]]; then
 6 |     set -x
 7 | fi
 8 | 
 9 | function usage() {
10 |     echo -n \
11 |         "Usage: $(basename "$0")
12 | Execute tests.
13 | "
14 | }
15 | 
16 | if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
17 |     if [[ "${1:-}" == "--help" ]]; then
18 |         usage
19 |     else
20 |         SCALA_VERSION="${SCALA_VERSION:-2.11.12}" ./scripts/test
21 |     fi
22 | fi
23 | 


--------------------------------------------------------------------------------
/scripts/cipublish:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [[ -n "${VECTORPIPE_DEBUG}" ]]; then
 6 |     set -x
 7 | fi
 8 | 
 9 | function usage() {
10 |     echo -n \
11 |         "Usage: $(basename "$0")
12 | Publish artifacts to Sonatype.
13 | "
14 | }
15 | 
16 | if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
17 |     if [[ "${1:-}" == "--help" ]]; then
18 |         usage
19 |     else
20 |         if [[ -n "${CIRCLE_TAG}" ]]; then
21 |             echo "Publishing artifacts to Sonatype"
22 |             ./sbt ";++${SCALA_VERSION:-2.11.12};sonatypeOpen ${CIRCLE_BUILD_NUM};publish;sonatypeRelease"
23 |         else
24 |             echo "Publishing artifacts to default location"
25 |             ./sbt "++${SCALA_VERSION:-2.11.12}" publish
26 |         fi
27 |     fi
28 | fi
29 | 


--------------------------------------------------------------------------------
/scripts/test:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [[ -n "${VECTORPIPE_DEBUG}" ]]; then
 6 |     set -x
 7 | fi
 8 | 
 9 | function usage() {
10 |     echo -n \
11 |         "Usage: $(basename "$0")
12 | Update Scala dependencies and execute tests.
13 | "
14 | }
15 | 
16 | if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
17 |     if [[ "${1:-}" == "--help" ]]; then
18 |         usage
19 |     else
20 |         echo "Executing Scala test suite"
21 |         ./sbt "++${SCALA_VERSION:-2.11.12}" test
22 |     fi
23 | fi
24 | 


--------------------------------------------------------------------------------
/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | vectorpipe.sources.AugmentedDiffProvider
2 | vectorpipe.sources.ChangeProvider
3 | vectorpipe.sources.ChangesetProvider
4 | 


--------------------------------------------------------------------------------
/src/main/resources/microsite/data/menu.yml:
--------------------------------------------------------------------------------
 1 | options:
 2 | 
 3 |   - title: Usage
 4 |     url: usage.html
 5 |     menu_type: usage
 6 |     menu_section: usage
 7 | 
 8 |   - title: Concepts
 9 |     url: usage/concepts.html
10 |     menu_type: usage
11 |     menu_section: concepts
12 | 
13 |   - title: Reading OpenStreetMap Data
14 |     url: usage/osm.html
15 |     menu_type: usage
16 |     menu_section: osm
17 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/OSM.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe
 2 | 
 3 | import java.sql.Timestamp
 4 | 
 5 | import org.apache.spark.sql._
 6 | import org.apache.spark.sql.functions._
 7 | import geotrellis.vector._
 8 | import vectorpipe.functions.osm.removeUninterestingTags
 9 | import vectorpipe.internal._
10 | 
11 | object OSM {
12 |   /**
13 |     * Convert a raw OSM dataframe into a frame containing JTS geometries for each unique id/changeset.
14 |     *
15 |     * This currently produces Points for nodes containing "interesting" tags, LineStrings and Polygons for ways
16 |     * (according to OSM rules for defining areas), MultiPolygons for multipolygon and boundary relations, and
17 |     * LineStrings / MultiLineStrings for route relations.
18 |     *
19 |     * @param input DataFrame containing node, way, and relation elements
20 |     * @return DataFrame containing geometries.
21 |     */
22 |   def toGeometry(input: DataFrame): DataFrame = {
23 |     import input.sparkSession.implicits._
24 | 
25 |     val st_pointToGeom = org.apache.spark.sql.functions.udf { pt: Point => pt.asInstanceOf[Geometry] }
26 | 
27 |     val elements = input
28 |       .withColumn("tags", removeUninterestingTags('tags))
29 | 
30 |     val nodes = preprocessNodes(elements)
31 | 
32 |     val nodeGeoms = constructPointGeometries(nodes)
33 |       .withColumn("minorVersion", lit(0))
34 |       .withColumn("geom", st_pointToGeom('geom))
35 | 
36 |     val wayGeoms = reconstructWayGeometries(elements, nodes)
37 | 
38 |     val relationGeoms = reconstructRelationGeometries(elements, wayGeoms)
39 | 
40 |     nodeGeoms
41 |       .union(wayGeoms.where(size('tags) > 0).drop('geometryChanged))
42 |       .union(relationGeoms)
43 |   }
44 | 
45 |   /**
46 |     * Snapshot pre-processed elements.
47 |     *
48 |     * A Time Pin is stuck through a set of elements that have been augmented with a 'validUntil column to identify all
49 |     * that were valid at a specific point in time (i.e. updated before the target timestamp and valid after it).
50 |     *
51 |     * @param df        Elements (including 'validUntil column)
52 |     * @param timestamp Optional timestamp to snapshot at
53 |     * @return DataFrame containing valid elements at timestamp (or now)
54 |     */
55 |   def snapshot(df: DataFrame, timestamp: Timestamp = null): DataFrame = {
56 |     import df.sparkSession.implicits._
57 | 
58 |     df
59 |       .where(
60 |         'updated <= coalesce(lit(timestamp), current_timestamp)
61 |           and coalesce(lit(timestamp), current_timestamp) < coalesce('validUntil, date_add(current_timestamp, 1)))
62 |   }
63 | 
64 |   /**
65 |     * Augment geometries with user metadata.
66 |     *
67 |     * When 'changeset is included, user (name and 'uid) metadata is joined from a DataFrame containing changeset
68 |     * metadata.
69 |     *
70 |     * @param geoms      Geometries to augment.
71 |     * @param changesets Changesets DataFrame with user metadata.
72 |     * @return Geometries augmented with user metadata.
73 |     */
74 |   def addUserMetadata(geoms: DataFrame, changesets: DataFrame): DataFrame = {
75 |     import geoms.sparkSession.implicits._
76 | 
77 |     geoms
78 |       .join(changesets.select('id as 'changeset, 'uid, 'user), Seq("changeset"))
79 |   }
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/AugmentedDiffProcessor.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.examples
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import cats.implicits._
 6 | import com.monovore.decline._
 7 | import org.apache.spark.sql._
 8 | import vectorpipe.VectorPipe
 9 | import vectorpipe.model.AugmentedDiff
10 | import vectorpipe.sources.Source
11 | 
12 | /*
13 |  * Usage example:
14 |  *
15 |  * sbt assembly
16 |  *
17 |  * spark-submit \
18 |  *   --class vectorpipe.examples.AugmentedDiffProcessor \
19 |  *   target/scala-2.11/vectorpipe.jar \
20 |  *   --augmented-diff-source s3://somewhere/diffs/
21 |  */
22 | object AugmentedDiffProcessor
23 |     extends CommandApp(
24 |       name = "augmented-diff-processor",
25 |       header = "Read from augmented diffs",
26 |       main = {
27 |         val augmentedDiffSourceOpt = Opts.option[URI](
28 |           "augmented-diff-source",
29 |           short = "a",
30 |           metavar = "uri",
31 |           help = "Location of augmented diffs to process"
32 |         )
33 |         val startSequenceOpt = Opts
34 |           .option[Int](
35 |             "start-sequence",
36 |             short = "s",
37 |             metavar = "sequence",
38 |             help = "Starting sequence. If absent, the current (remote) sequence will be used."
39 |           )
40 |           .orNone
41 |         val endSequenceOpt = Opts
42 |           .option[Int](
43 |             "end-sequence",
44 |             short = "e",
45 |             metavar = "sequence",
46 |             help = "Ending sequence. If absent, the current (remote) sequence will be used."
47 |           )
48 |           .orNone
49 | 
50 |         (augmentedDiffSourceOpt, startSequenceOpt, endSequenceOpt)
51 |           .mapN {
52 |             (augmentedDiffSource, startSequence, endSequence) =>
53 |               implicit val ss: SparkSession =
54 |                 VectorPipe.defaultSparkSessionWithJTS("AugmentedDiffProcessor")
55 | 
56 |               import ss.implicits._
57 | 
58 |               val options = Map(Source.BaseURI -> augmentedDiffSource.toString) ++
59 |                 startSequence
60 |                   .map(s => Map(Source.StartSequence -> s.toString))
61 |                   .getOrElse(Map.empty[String, String]) ++
62 |                 endSequence
63 |                   .map(s => Map(Source.EndSequence -> s.toString))
64 |                   .getOrElse(Map.empty[String, String])
65 | 
66 |               val geoms =
67 |                 ss.read.format(Source.AugmentedDiffs).options(options).load
68 | 
69 |               // aggregations are triggered when an event with a later timestamp ("event time") is received
70 |               // geoms.select('sequence).distinct.show
71 |               geoms.as[AugmentedDiff].show
72 | 
73 |               ss.stop()
74 |           }
75 |       }
76 |     )
77 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/AugmentedDiffStreamProcessor.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.examples
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import cats.implicits._
 6 | import com.monovore.decline._
 7 | import geotrellis.vector.{Feature, Geometry}
 8 | import org.apache.spark.sql._
 9 | import vectorpipe.VectorPipe
10 | import vectorpipe.model.ElementWithSequence
11 | import vectorpipe.sources.Source
12 | 
13 | /*
14 |  * Usage example:
15 |  *
16 |  * sbt assembly
17 |  *
18 |  * spark-submit \
19 |  *   --class vectorpipe.examples.AugmentedDiffStreamProcessor \
20 |  *   target/scala-2.11/vectorpipe.jar \
21 |  *   --augmented-diff-source s3://somewhere/diffs/
22 |  */
23 | object AugmentedDiffStreamProcessor
24 |     extends CommandApp(
25 |       name = "augmented-diff-stream-processor",
26 |       header = "Read OSM augmented diffs as an open stream",
27 |       main = {
28 |         type AugmentedDiffFeature = Feature[Geometry, ElementWithSequence]
29 | 
30 |         val augmentedDiffSourceOpt = Opts.option[URI](
31 |           "augmented-diff-source",
32 |           short = "a",
33 |           metavar = "uri",
34 |           help = "Location of augmented diffs to process"
35 |         )
36 |         val startSequenceOpt = Opts
37 |           .option[Int](
38 |             "start-sequence",
39 |             short = "s",
40 |             metavar = "sequence",
41 |             help = "Starting sequence. If absent, the current (remote) sequence will be used."
42 |           )
43 |           .orNone
44 |         val endSequenceOpt = Opts
45 |           .option[Int](
46 |             "end-sequence",
47 |             short = "e",
48 |             metavar = "sequence",
49 |             help = "Ending sequence. If absent, this will be an infinite stream."
50 |           )
51 |           .orNone
52 | 
53 |         (augmentedDiffSourceOpt, startSequenceOpt, endSequenceOpt)
54 |           .mapN {
55 |             (augmentedDiffSource, startSequence, endSequence) =>
56 |               implicit val ss: SparkSession =
57 |                 VectorPipe.defaultSparkSessionWithJTS("AugmentedDiffStreamProcessor")
58 | 
59 |               val options = Map(Source.BaseURI -> augmentedDiffSource.toString,
60 |                                 Source.ProcessName -> "AugmentedDiffStreamProcessor") ++
61 |                 startSequence
62 |                   .map(s => Map(Source.StartSequence -> s.toString))
63 |                   .getOrElse(Map.empty[String, String]) ++
64 |                 endSequence
65 |                   .map(s => Map(Source.EndSequence -> s.toString))
66 |                   .getOrElse(Map.empty[String, String])
67 | 
68 |               val geoms =
69 |                 ss.readStream.format(Source.AugmentedDiffs).options(options).load
70 | 
71 |               // aggregations are triggered when an event with a later timestamp ("event time") is received
72 |               val query = geoms.writeStream
73 |                 .format("console")
74 |                 .start
75 | 
76 |               query.awaitTermination()
77 | 
78 |               ss.stop()
79 |           }
80 |       }
81 |     )
82 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangeProcessor.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.examples
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import cats.implicits._
 6 | import com.monovore.decline._
 7 | import org.apache.spark.sql._
 8 | import vectorpipe.VectorPipe
 9 | import vectorpipe.model.Change
10 | import vectorpipe.sources.Source
11 | 
12 | /*
13 |  * Usage example:
14 |  *
15 |  * sbt assembly
16 |  *
17 |  * spark-submit \
18 |  *   --class vectorpipe.examples.ChangeProcessor \
19 |  *   target/scala-2.11/vectorpipe.jar
20 |  */
21 | object ChangeProcessor
22 |   extends CommandApp(
23 |     name = "change-processor",
24 |     header = "Read minutely changes from start sequence to end sequence",
25 |     main = {
26 |       val changeSourceOpt = Opts
27 |         .option[URI]("change-source",
28 |         short = "d",
29 |         metavar = "uri",
30 |         help = "Location of minutely diffs to process")
31 |         .withDefault(new URI("https://planet.osm.org/replication/minute/"))
32 |       val startSequenceOpt = Opts
33 |         .option[Int](
34 |         "start-sequence",
35 |         short = "s",
36 |         metavar = "sequence",
37 |         help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 |       )
39 |         .orNone
40 |       val endSequenceOpt = Opts
41 |         .option[Int](
42 |         "end-sequence",
43 |         short = "e",
44 |         metavar = "sequence",
45 |         help = "Ending sequence. If absent, this will be an infinite stream."
46 |       )
47 |         .orNone
48 | 
49 |       (changeSourceOpt, startSequenceOpt, endSequenceOpt)
50 |         .mapN {
51 |           (changeSource, startSequence, endSequence) =>
52 |             implicit val ss: SparkSession =
53 |               VectorPipe.defaultSparkSessionWithJTS("ChangeProcessor")
54 | 
55 |             import ss.implicits._
56 | 
57 |             val options = Map(Source.BaseURI -> changeSource.toString) ++
58 |               startSequence
59 |                 .map(s => Map(Source.StartSequence -> s.toString))
60 |                 .getOrElse(Map.empty[String, String]) ++
61 |               endSequence
62 |                 .map(s => Map(Source.EndSequence -> s.toString))
63 |                 .getOrElse(Map.empty[String, String])
64 | 
65 |             val changes =
66 |               ss.read.format(Source.Changes).options(options).load
67 | 
68 |             // aggregations are triggered when an event with a later timestamp ("event time") is received
69 |             // changes.select('sequence).distinct.show
70 |             changes.as[Change].show
71 | 
72 |             ss.stop()
73 |         }
74 |     }
75 |   )
76 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangeStreamProcessor.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.examples
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import cats.implicits._
 6 | import com.monovore.decline._
 7 | import org.apache.spark.sql._
 8 | import vectorpipe.VectorPipe
 9 | import vectorpipe.sources.Source
10 | 
11 | /*
12 |  * Usage example:
13 |  *
14 |  * sbt assembly
15 |  *
16 |  * spark-submit \
17 |  *   --class vectorpipe.examples.ChangeStreamProcessor \
18 |  *   target/scala-2.11/vectorpipe.jar \
19 |  *   --augmented-diff-source s3://somewhere/diffs/
20 |  */
21 | object ChangeStreamProcessor
22 |     extends CommandApp(
23 |       name = "change-stream-processor",
24 |       header = "Read OSM minutely diffs as a stream",
25 |       main = {
26 |         val changeSourceOpt = Opts
27 |           .option[URI]("change-source",
28 |                        short = "d",
29 |                        metavar = "uri",
30 |                        help = "Location of minutely diffs to process")
31 |           .withDefault(new URI("https://planet.osm.org/replication/minute/"))
32 |         val startSequenceOpt = Opts
33 |           .option[Int](
34 |             "start-sequence",
35 |             short = "s",
36 |             metavar = "sequence",
37 |             help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 |           )
39 |           .orNone
40 |         val endSequenceOpt = Opts
41 |           .option[Int](
42 |             "end-sequence",
43 |             short = "e",
44 |             metavar = "sequence",
45 |             help = "Ending sequence. If absent, this will be an infinite stream."
46 |           )
47 |           .orNone
48 |         val partitionCountOpt = Opts
49 |           .option[Int]("partitions",
50 |                        short = "p",
51 |                        metavar = "partition count",
52 |                        help = "Change partition count.")
53 |           .orNone
54 | 
55 |         (changeSourceOpt, startSequenceOpt, endSequenceOpt, partitionCountOpt)
56 |           .mapN {
57 |             (changeSource, startSequence, endSequence, partitionCount) =>
58 |               implicit val ss: SparkSession =
59 |                 VectorPipe.defaultSparkSessionWithJTS("ChangeStreamProcessor")
60 | 
61 |               val options = Map(Source.BaseURI -> changeSource.toString, Source.ProcessName -> "ChangeStreamProcessor") ++
62 |                 startSequence.map(s => Map(Source.StartSequence -> s.toString))
63 |                   .getOrElse(Map.empty[String, String]) ++
64 |                 endSequence.map(s => Map(Source.EndSequence -> s.toString))
65 |                   .getOrElse(Map.empty[String, String]) ++
66 |                 partitionCount.map(s => Map(Source.PartitionCount -> s.toString))
67 |                   .getOrElse(Map.empty[String, String])
68 | 
69 |               val changes =
70 |                 ss.readStream.format(Source.Changes).options(options).load
71 | 
72 |               val query = changes.writeStream
73 |                 .format("console")
74 |                 .start
75 | 
76 |               query.awaitTermination()
77 | 
78 |               ss.stop()
79 |           }
80 |       }
81 |     )
82 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangesetProcessor.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.examples
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import cats.implicits._
 6 | import com.monovore.decline._
 7 | import org.apache.spark.sql._
 8 | import vectorpipe.VectorPipe
 9 | import vectorpipe.model.Changeset
10 | import vectorpipe.sources.Source
11 | 
12 | /*
13 |  * Usage example:
14 |  *
15 |  * sbt assembly
16 |  *
17 |  * spark-submit \
18 |  *   --class vectorpipe.examples.ChangesetProcessor \
19 |  *   target/scala-2.11/vectorpipe.jar
20 |  */
21 | object ChangesetProcessor
22 |   extends CommandApp(
23 |     name = "changeset-processor",
24 |     header = "Read changesets between start sequence and end sequence",
25 |     main = {
26 |       val changesetSourceOpt =
27 |         Opts.option[URI]("changeset-source",
28 |           short = "c",
29 |           metavar = "uri",
30 |           help = "Location of changesets to process"
31 |         ).withDefault(new URI("https://planet.osm.org/replication/changesets/"))
32 |       val startSequenceOpt = Opts
33 |         .option[Int](
34 |         "start-sequence",
35 |         short = "s",
36 |         metavar = "sequence",
37 |         help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 |       )
39 |         .orNone
40 |       val endSequenceOpt = Opts
41 |         .option[Int](
42 |         "end-sequence",
43 |         short = "e",
44 |         metavar = "sequence",
45 |         help = "Ending sequence. If absent, this will be an infinite stream."
46 |       )
47 |         .orNone
48 | 
49 |       (changesetSourceOpt, startSequenceOpt, endSequenceOpt)
50 |         .mapN {
51 |           (changesetSource, startSequence, endSequence) =>
52 |             implicit val ss: SparkSession =
53 |               VectorPipe.defaultSparkSessionWithJTS("ChangesetProcessor")
54 | 
55 |             import ss.implicits._
56 | 
57 |             val options = Map(Source.BaseURI -> changesetSource.toString) ++
58 |               startSequence
59 |                 .map(s => Map(Source.StartSequence -> s.toString))
60 |                 .getOrElse(Map.empty[String, String]) ++
61 |               endSequence
62 |                 .map(s => Map(Source.EndSequence -> s.toString))
63 |                 .getOrElse(Map.empty[String, String])
64 | 
65 |             val changes =
66 |               ss.read.format(Source.Changesets).options(options).load
67 | 
68 |             // aggregations are triggered when an event with a later timestamp ("event time") is received
69 |             // changes.select('sequence).distinct.show
70 |             changes.as[Changeset].show
71 | 
72 |             ss.stop()
73 |         }
74 |     }
75 |   )
76 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangesetStreamProcessor.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.examples
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import cats.implicits._
 6 | import com.monovore.decline._
 7 | import org.apache.spark.sql._
 8 | import vectorpipe.VectorPipe
 9 | import vectorpipe.sources.Source
10 | 
11 | /*
12 |  * Usage example:
13 |  *
14 |  * sbt assembly
15 |  *
16 |  * spark-submit \
17 |  *   --class vectorpipe.examples.ChangesetStreamProcessor \
18 |  *   target/scala-2.11/vectorpipe.jar \
19 |  *   --augmented-diff-source s3://somewhere/diffs/
20 |  */
21 | object ChangesetStreamProcessor
22 |   extends CommandApp(
23 |     name = "changeset-stream-processor",
24 |     header = "Read OSM changesets from start sequence to end sequence as a stream",
25 |     main = {
26 |       val changesetSourceOpt =
27 |         Opts.option[URI]("changeset-source",
28 |           short = "c",
29 |           metavar = "uri",
30 |           help = "Location of changesets to process"
31 |         ).withDefault(new URI("https://planet.osm.org/replication/changesets/"))
32 |       val startSequenceOpt = Opts
33 |         .option[Int](
34 |         "start-sequence",
35 |         short = "s",
36 |         metavar = "sequence",
37 |         help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 |       )
39 |         .orNone
40 |       val endSequenceOpt = Opts
41 |         .option[Int](
42 |         "end-sequence",
43 |         short = "e",
44 |         metavar = "sequence",
45 |         help = "Ending sequence. If absent, this will be an infinite stream."
46 |       )
47 |         .orNone
48 |       val batchSizeOpt = Opts
49 |         .option[Int]("batch-size",
50 |         short = "b",
51 |         metavar = "batch size",
52 |         help = "Change batch size.")
53 |         .orNone
54 | 
55 |       (changesetSourceOpt, startSequenceOpt, endSequenceOpt, batchSizeOpt)
56 |         .mapN {
57 |           (changesetSource, startSequence, endSequence, batchSize) =>
58 |             implicit val ss: SparkSession =
59 |               VectorPipe.defaultSparkSessionWithJTS("ChangesetStreamProcessor")
60 | 
61 |             val options = Map(Source.BaseURI -> changesetSource.toString, Source.ProcessName -> "ChangesetStreamProcessor") ++
62 |               startSequence.map(s => Map(Source.StartSequence -> s.toString))
63 |                 .getOrElse(Map.empty[String, String]) ++
64 |               endSequence.map(s => Map(Source.EndSequence -> s.toString))
65 |                 .getOrElse(Map.empty[String, String]) ++
66 |               batchSize.map(s => Map(Source.BatchSize -> s.toString))
67 |                 .getOrElse(Map.empty[String, String])
68 | 
69 |             val changesets =
70 |               ss.readStream.format(Source.Changesets).options(options).load
71 | 
72 |             val query = changesets.writeStream
73 |               .format("console")
74 |               .start
75 | 
76 |             query.awaitTermination()
77 | 
78 |             ss.stop()
79 |         }
80 |     }
81 |   )
82 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/functions/package.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe
 2 | 
 3 | import org.apache.spark.sql.Column
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.functions._
 6 | import org.apache.spark.sql.types.{DoubleType, FloatType}
 7 | import vectorpipe.util._
 8 | 
 9 | package object functions {
10 |   // A brief note about style
11 |   // Spark functions are typically defined using snake_case, therefore so are the UDFs
12 |   // internal helper functions use standard Scala naming conventions
13 | 
14 |   @transient lazy val merge_counts: UserDefinedFunction = udf(_mergeCounts)
15 | 
16 |   @transient lazy val sum_counts: UserDefinedFunction = udf { counts: Iterable[Map[String, Int]] =>
17 |     counts.reduce(_mergeCounts(_, _))
18 |   }
19 | 
20 |   // Convert BigDecimals to doubles
21 |   // Reduces size taken for representation at the expense of some precision loss.
22 |   def asDouble(value: Column): Column =
23 |     when(value.isNotNull, value.cast(DoubleType))
24 |       .otherwise(lit(Double.NaN)) as s"asDouble($value)"
25 | 
26 |   // Convert BigDecimals to floats
27 |   // Reduces size taken for representation at the expense of more precision loss.
28 |   def asFloat(value: Column): Column =
29 |     when(value.isNotNull, value.cast(FloatType))
30 |       .otherwise(lit(Float.NaN)) as s"asFloat($value)"
31 | 
32 |   @transient lazy val count_values: UserDefinedFunction = udf {
33 |     (_: Seq[String]).groupBy(identity).mapValues(_.size)
34 |   }
35 | 
36 |   @transient lazy val flatten: UserDefinedFunction = udf {
37 |     (_: Seq[Seq[String]]).flatten
38 |   }
39 | 
40 |   @transient lazy val flatten_set: UserDefinedFunction = udf {
41 |     (_: Seq[Seq[String]]).flatten.distinct
42 |   }
43 | 
44 |   @transient lazy val merge_sets: UserDefinedFunction = udf { (a: Iterable[String], b: Iterable[String]) =>
45 |     (Option(a).getOrElse(Set.empty).toSet ++ Option(b).getOrElse(Set.empty).toSet).toArray
46 |   }
47 | 
48 |   @transient lazy val without: UserDefinedFunction = udf { (list: Seq[String], without: String) =>
49 |     list.filterNot(x => x == without)
50 |   }
51 | 
52 |   private val _mergeCounts = (a: Map[String, Int], b: Map[String, Int]) =>
53 |     mergeMaps(Option(a).getOrElse(Map.empty[String, Int]),
54 |               Option(b).getOrElse(Map.empty[String, Int]))(_ + _)
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Actions.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | 
 3 | object Actions {
 4 |   type Action = Byte
 5 | 
 6 |   val Create: Action = 1.byteValue
 7 |   val Modify: Action = 2.byteValue
 8 |   val Delete: Action = 3.byteValue
 9 | 
10 |   def fromString(str: String): Action =
11 |     str.toLowerCase match {
12 |       case "create" => Actions.Create
13 |       case "delete" => Actions.Delete
14 |       case "modify" => Actions.Modify
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/AugmentedDiff.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | 
 3 | import java.sql.Timestamp
 4 | 
 5 | import geotrellis.vector._
 6 | 
 7 | case class AugmentedDiff(sequence: Int,
 8 |                          `type`: Byte,
 9 |                          id: Long,
10 |                          prevGeom: Option[Geometry],
11 |                          geom: Geometry,
12 |                          prevTags: Option[Map[String, String]],
13 |                          tags: Map[String, String],
14 |                          prevNds: Option[Seq[Long]],
15 |                          nds: Seq[Long],
16 |                          prevChangeset: Option[Long],
17 |                          changeset: Long,
18 |                          prevUid: Option[Long],
19 |                          uid: Long,
20 |                          prevUser: Option[String],
21 |                          user: String,
22 |                          prevUpdated: Option[Timestamp],
23 |                          updated: Timestamp,
24 |                          prevVisible: Option[Boolean],
25 |                          visible: Boolean,
26 |                          prevVersion: Option[Int],
27 |                          version: Int,
28 |                          minorVersion: Boolean)
29 | 
30 | object AugmentedDiff {
31 |   def apply(sequence: Int,
32 |             prev: Option[Feature[Geometry, ElementWithSequence]],
33 |             curr: Feature[Geometry, ElementWithSequence]): AugmentedDiff = {
34 |     val `type` = Member.typeFromString(curr.data.`type`)
35 |     val minorVersion = prev.map(_.data.version).getOrElse(Int.MinValue) == curr.data.version
36 | 
37 |     AugmentedDiff(
38 |       sequence,
39 |       `type`,
40 |       curr.data.id,
41 |       prev.map(_.geom),
42 |       curr.geom,
43 |       prev.map(_.data.tags),
44 |       curr.data.tags,
45 |       prev.map(_.data.nds),
46 |       curr.data.nds,
47 |       prev.map(_.data.changeset),
48 |       curr.data.changeset,
49 |       prev.map(_.data.uid),
50 |       curr.data.uid,
51 |       prev.map(_.data.user),
52 |       curr.data.user,
53 |       prev.map(_.data.timestamp),
54 |       curr.data.timestamp,
55 |       prev.map(_.data.visible.getOrElse(true)),
56 |       curr.data.visible.getOrElse(true),
57 |       prev.map(_.data.version),
58 |       curr.data.version,
59 |       minorVersion
60 |     )
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Change.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.model
  2 | 
  3 | import java.sql.Timestamp
  4 | 
  5 | import org.joda.time.DateTime
  6 | import org.xml.sax
  7 | import org.xml.sax.helpers.DefaultHandler
  8 | 
  9 | import scala.collection.mutable
 10 | import scala.collection.mutable.ListBuffer
 11 | 
 12 | // TODO at some point user metadata (changeset, uid, user, timestamp?) should become options, as they may not be
 13 | // available
 14 | case class Change(id: Long,
 15 |                   `type`: String,
 16 |                   tags: Map[String, String],
 17 |                   lat: Option[Double],
 18 |                   lon: Option[Double],
 19 |                   nds: Option[Seq[Nd]],
 20 |                   members: Option[Seq[Member]],
 21 |                   changeset: Long,
 22 |                   timestamp: Timestamp,
 23 |                   uid: Long,
 24 |                   user: String,
 25 |                   version: Long,
 26 |                   visible: Boolean,
 27 |                   sequence: Int)
 28 | 
 29 | object Change {
 30 |   implicit def stringToTimestamp(s: String): Timestamp =
 31 |     Timestamp.from(DateTime.parse(s).toDate.toInstant)
 32 | 
 33 |   class ChangeHandler(sequence: Int) extends DefaultHandler {
 34 |     final val ActionLabels: Set[String] = Set("create", "delete", "modify")
 35 |     final val ElementLabels: Set[String] = Set("node", "way", "relation")
 36 | 
 37 |     private val changeSeq: ListBuffer[Change] = ListBuffer.empty
 38 |     private val tags: mutable.Map[String, String] = mutable.Map.empty
 39 |     private val nds: ListBuffer[Nd] = ListBuffer.empty
 40 |     private val members: ListBuffer[Member] = ListBuffer.empty
 41 |     private var action: Actions.Action = _
 42 |     private var attrs: Map[String, String] = _
 43 | 
 44 |     def changes: Seq[Change] = changeSeq
 45 | 
 46 |     override def startElement(uri: String,
 47 |                               localName: String,
 48 |                               qName: String,
 49 |                               attributes: sax.Attributes): Unit = {
 50 |       val attrs =
 51 |         (for {
 52 |           i <- Range(0, attributes.getLength)
 53 |         } yield attributes.getQName(i) -> attributes.getValue(i)).toMap
 54 | 
 55 |       qName.toLowerCase match {
 56 |         case label if ActionLabels.contains(label) =>
 57 |           action = Actions.fromString(qName)
 58 | 
 59 |         case label if ElementLabels.contains(label) =>
 60 |           reset()
 61 | 
 62 |           this.attrs = attrs
 63 | 
 64 |         case "tag" =>
 65 |           tags.update(attrs("k"), attrs("v"))
 66 | 
 67 |         case "nd" =>
 68 |           nds.append(Nd(attrs("ref").toLong))
 69 | 
 70 |         case "member" =>
 71 |           members.append(
 72 |             Member(Member.typeFromString(attrs("type")), attrs("ref").toLong, attrs("role")))
 73 | 
 74 |         case _ => () // no-op
 75 |       }
 76 |     }
 77 | 
 78 |     def reset(): Unit = {
 79 |       tags.clear()
 80 |       nds.clear()
 81 |       members.clear()
 82 |     }
 83 | 
 84 |     override def endElement(uri: String, localName: String, qName: String): Unit = {
 85 |       if (ElementLabels.contains(qName.toLowerCase)) {
 86 |         changeSeq.append(
 87 |           Change(
 88 |             attrs("id").toLong,
 89 |             qName,
 90 |             tags.toMap,
 91 |             attrs.get("lat").map(_.toDouble),
 92 |             attrs.get("lon").map(_.toDouble),
 93 |             Option(nds).filter(_.nonEmpty),
 94 |             Option(members).filter(_.nonEmpty).map(_.toSeq),
 95 |             attrs.get("changeset").map(_.toLong).getOrElse(-1L),
 96 |             stringToTimestamp(attrs.getOrElse("timestamp", "1970-01-01T00:00:00Z")),
 97 |             attrs.get("uid").map(_.toLong).getOrElse(-1L),
 98 |             attrs.getOrElse("user", ""),
 99 |             attrs.get("version").map(_.toLong).getOrElse(-1L),
100 |             action != Actions.Delete,
101 |             sequence
102 |           ))
103 |       }
104 |     }
105 |   }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Changeset.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | 
 3 | import java.sql.Timestamp
 4 | 
 5 | import org.joda.time.DateTime
 6 | 
 7 | import scala.util.Try
 8 | 
 9 | case class Changeset(id: Long,
10 |                      tags: Map[String, String],
11 |                      createdAt: Timestamp,
12 |                      open: Boolean,
13 |                      closedAt: Option[Timestamp],
14 |                      commentsCount: Int,
15 |                      minLat: Option[Double],
16 |                      maxLat: Option[Double],
17 |                      minLon: Option[Double],
18 |                      maxLon: Option[Double],
19 |                      numChanges: Int,
20 |                      uid: Long,
21 |                      user: String,
22 |                      comments: Seq[ChangesetComment],
23 |                      sequence: Int)
24 | 
25 | object Changeset {
26 |   implicit def stringToTimestamp(s: String): Timestamp =
27 |     Timestamp.from(DateTime.parse(s).toDate.toInstant)
28 | 
29 |   implicit def stringToOptionalTimestamp(s: String): Option[Timestamp] =
30 |     s match {
31 |       case "" => None
32 |       case ts => Some(ts)
33 |     }
34 | 
35 |   implicit def stringToOptionalDouble(s: String): Option[Double] =
36 |     s match {
37 |       case "" => None
38 |       case c  => Some(c.toDouble)
39 |     }
40 | 
41 |   def fromXML(node: scala.xml.Node, sequence: Int): Changeset = {
42 |     val id = (node \@ "id").toLong
43 |     // Old changesets lack the appropriate field
44 |     val commentsCount = Try((node \@ "comments_count").toInt).toOption.getOrElse(0)
45 |     val uid = (node \@ "uid").toLong
46 |     val user = node \@ "user"
47 |     val numChanges = Try((node \@ "num_changes").toInt).toOption.getOrElse(0)
48 |     val open = (node \@ "open").toBoolean
49 |     val closedAt = node \@ "closed_at"
50 |     val createdAt = node \@ "created_at"
51 | 
52 |     val maxLon = node \@ "max_lon"
53 |     val minLon = node \@ "min_lon"
54 |     val maxLat = node \@ "max_lat"
55 |     val minLat = node \@ "min_lat"
56 |     val tags =
57 |       (node \ "tag").map(tag => (tag \@ "k", tag \@ "v")).toMap
58 |     val comments = (node \ "discussion" \ "comment").map(ChangesetComment.fromXML)
59 | 
60 |     Changeset(
61 |       id,
62 |       tags,
63 |       createdAt,
64 |       open,
65 |       closedAt,
66 |       commentsCount,
67 |       minLat,
68 |       maxLat,
69 |       minLon,
70 |       maxLon,
71 |       numChanges,
72 |       uid,
73 |       user,
74 |       comments,
75 |       sequence
76 |     )
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/ChangesetComment.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | 
 3 | import java.sql.Timestamp
 4 | 
 5 | import org.joda.time.DateTime
 6 | 
 7 | case class ChangesetComment(date: Timestamp, user: String, uid: Long, body: String)
 8 | 
 9 | object ChangesetComment {
10 |   implicit def stringToTimestamp(s: String): Timestamp =
11 |     Timestamp.from(DateTime.parse(s).toDate.toInstant)
12 | 
13 |   def fromXML(node: scala.xml.Node): ChangesetComment = {
14 |     val date = node \@ "date"
15 |     val user = node \@ "user"
16 |     val uid = (node \@ "uid").toLong
17 |     val body = (node \ "text").text
18 | 
19 |     ChangesetComment(date, user, uid, body)
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/ElementWithSequence.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | 
 3 | import vectorpipe.model
 4 | 
 5 | import org.joda.time.format.ISODateTimeFormat
 6 | 
 7 | import io.circe._
 8 | import cats.syntax.either._
 9 | 
10 | import java.sql.Timestamp
11 | 
12 | 
13 | // TODO is this an AugmentedDiff or an OSM Element w/ a sequence property?
14 | // an AugmentedDiff may be (Option[Element with Sequence], Element with Sequence)
15 | case class ElementWithSequence(id: Long,
16 |                                `type`: String,
17 |                                tags: Map[String, String],
18 |                                nds: Seq[Long],
19 |                                changeset: Long,
20 |                                timestamp: Timestamp,
21 |                                uid: Long,
22 |                                user: String,
23 |                                version: Int,
24 |                                visible: Option[Boolean],
25 |                                sequence: Option[Long]) {
26 |   // TODO extract this; it's used in MakeTiles and elsewhere
27 |   val elementId: String = `type` match {
28 |     case "node"     => s"n$id"
29 |     case "way"      => s"w$id"
30 |     case "relation" => s"r$id"
31 |     case _          => id.toString
32 |   }
33 | }
34 | 
35 | object ElementWithSequence {
36 |   implicit val decodeFoo: Decoder[ElementWithSequence] = new Decoder[ElementWithSequence] {
37 |     final def apply(c: HCursor): Decoder.Result[ElementWithSequence] =
38 |       for {
39 |         id <- c.downField("id").as[Long]
40 |         `type` <- c.downField("type").as[String]
41 |         tags <- c.downField("tags").as[Map[String, String]]
42 |         nds <- c.downField("nds").as[Option[Seq[Long]]]
43 |         changeset <- c.downField("changeset").as[Long]
44 |         timestampS <- c.downField("timestamp").as[String]
45 |         uid <- c.downField("uid").as[Long]
46 |         user <- c.downField("user").as[String]
47 |         version <- c.downField("version").as[Int]
48 |         visible <- c.downField("visible").as[Option[Boolean]]
49 |         sequence <- c.downField("augmentedDiff").as[Option[Long]]
50 |       } yield {
51 |         val timestamp =
52 |           Timestamp.from(
53 |             ISODateTimeFormat
54 |               .dateTimeParser()
55 |               .parseDateTime(timestampS)
56 |               .toDate
57 |               .toInstant
58 |           )
59 |         model.ElementWithSequence(
60 |           id,
61 |           `type`,
62 |           tags,
63 |           nds.getOrElse(Seq.empty[Long]),
64 |           changeset,
65 |           timestamp,
66 |           uid,
67 |           user,
68 |           version,
69 |           visible,
70 |           sequence
71 |         )
72 |       }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Member.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | 
 3 | import vectorpipe.internal.{NodeType, RelationType, WayType}
 4 | 
 5 | import scala.xml.Node
 6 | 
 7 | case class Member(`type`: Byte, ref: Long, role: String)
 8 | 
 9 | object Member {
10 |   def typeFromString(str: String): Byte = str match {
11 |       case "node"     => NodeType
12 |       case "way"      => WayType
13 |       case "relation" => RelationType
14 |       case _ => null.asInstanceOf[Byte]
15 |   }
16 | 
17 |   def stringFromByte(b: Byte): String = b match {
18 |       case NodeType     => "node"
19 |       case WayType      => "way"
20 |       case RelationType => "relation"
21 |   }
22 | 
23 |   def fromXML(node: Node): Member = {
24 |     val `type` = typeFromString(node \@ "type")
25 |     val ref = (node \@ "ref").toLong
26 |     val role = node \@ "role"
27 | 
28 |     Member(`type`, ref, role)
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Nd.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.model
 2 | import scala.xml.Node
 3 | 
 4 | case class Nd(ref: Long)
 5 | 
 6 | object Nd {
 7 |   def fromXML(node: Node): Nd =
 8 |     Nd((node \@ "ref").toLong)
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/MultiPolygons.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.relations
  2 | import java.sql.Timestamp
  3 | 
  4 | import org.locationtech.jts.geom.prep.PreparedGeometryFactory
  5 | import org.locationtech.jts.geom.{Geometry, LineString, Polygon, TopologyException}
  6 | import org.apache.log4j.Logger
  7 | import vectorpipe.internal.WayType
  8 | 
  9 | object MultiPolygons {
 10 |   @transient private lazy val logger = Logger.getLogger(getClass)
 11 |   val prepGeomFactory = new PreparedGeometryFactory
 12 | 
 13 |   def build(id: Long,
 14 |             version: Int,
 15 |             timestamp: Timestamp,
 16 |             types: Seq[Byte],
 17 |             roles: Seq[String],
 18 |             _geoms: Seq[Geometry]): Option[Geometry] = {
 19 |     if (types.zip(_geoms).exists { case (t, g) => t == WayType && Option(g).isEmpty }) {
 20 |       // bail early if null values are present where they should exist (members w/ type=way)
 21 |       logger.debug(s"Incomplete relation: $id @ $version ($timestamp)")
 22 |       None
 23 |     } else if (types.isEmpty) {
 24 |       // empty relation
 25 |       None
 26 |     } else {
 27 |       val geomCount = _geoms.map(Option(_)).count(_.isDefined)
 28 | 
 29 |       logger.debug(s"$id @ $version ($timestamp) ${geomCount.formatted("%,d")} geoms")
 30 |       val geoms = _geoms.map {
 31 |         case geom: Polygon    => Some(geom.getExteriorRing)
 32 |         case geom: LineString => Some(geom)
 33 |         case _                => None
 34 |       }
 35 | 
 36 |       val vertexCount = geoms.filter(_.isDefined).map(_.get).map(_.getNumPoints).sum
 37 |       logger.warn(s"${vertexCount.formatted("%,d")} vertices (${geomCount
 38 |         .formatted("%,d")} geoms) from ${types.size} members in $id @ $version ($timestamp)")
 39 | 
 40 |       val members: Seq[(String, LineString)] = roles
 41 |         .zip(geoms)
 42 |         .filter(_._2.isDefined)
 43 |         .map(x => (x._1, x._2.get))
 44 | 
 45 |       val (complete, partial) =
 46 |         members.foldLeft((Vector.empty[Polygon], Vector.empty[LineString])) {
 47 |           case ((c, p), (role, line: LineString)) =>
 48 |             role match {
 49 |               case "outer" if line.isClosed && line.getNumPoints >= 4 =>
 50 |                 (c :+ geometryFactory.createPolygon(line.getCoordinates), p)
 51 |               case "outer" =>
 52 |                 (c, p :+ line)
 53 |               case "inner" if line.isClosed && line.getNumPoints >= 4 =>
 54 |                 (c :+ geometryFactory.createPolygon(line.getCoordinates), p)
 55 |               case "inner" => (c, p :+ line)
 56 |               case "" if line.isClosed && line.getNumPoints >= 4 =>
 57 |                 (c :+ geometryFactory.createPolygon(line.getCoordinates), p)
 58 |               case "" =>
 59 |                 (c, p :+ line)
 60 |               case _ =>
 61 |                 (c, p)
 62 |             }
 63 |         }
 64 | 
 65 |       try {
 66 |         val rings = complete ++ formRings(partial.sortWith(_.getNumPoints > _.getNumPoints))
 67 |         val preparedRings = rings.map(prepGeomFactory.create)
 68 | 
 69 |         // reclassify rings according to their topology (ignoring roles)
 70 |         val (classifiedOuters, classifiedInners) = rings.sortWith(_.getArea > _.getArea) match {
 71 |           case Seq(h, t @ _*) =>
 72 |             t.foldLeft((Array(h), Array.empty[Polygon])) {
 73 |               case ((os, is), ring) =>
 74 |                 // check the number of containing elements
 75 |                 preparedRings.count(r => r.getGeometry != ring && r.contains(ring)) % 2 match {
 76 |                   // if even, it's an outer ring
 77 |                   case 0 => (os :+ ring, is)
 78 |                   // if odd, it's an inner ring
 79 |                   case 1 => (os, is :+ ring)
 80 |                 }
 81 |             }
 82 |           case rs if rs.isEmpty => (Array.empty[Polygon], Array.empty[Polygon])
 83 |         }
 84 | 
 85 |         val (dissolvedOuters, addlInners) =
 86 |           dissolveRings(classifiedOuters)
 87 |         val (dissolvedInners, addlOuters) =
 88 |           dissolveRings(
 89 |             classifiedInners
 90 |               .map(_.getExteriorRing.getCoordinates)
 91 |               .map(geometryFactory.createPolygon) ++ addlInners)
 92 | 
 93 |         val (polygons, _) =
 94 |           (dissolvedOuters ++ addlOuters)
 95 |           // sort by size (descending) to use rings as part of the largest available polygon
 96 |             .sortWith(_.getArea > _.getArea)
 97 |             // only use inners once if they're contained by multiple outer rings
 98 |             .foldLeft((Vector.empty[Polygon], dissolvedInners)) {
 99 |               case ((ps, is), outer) =>
100 |                 val preparedOuter = prepGeomFactory.create(outer)
101 |                 (ps :+ geometryFactory.createPolygon(
102 |                    geometryFactory.createLinearRing(outer.getExteriorRing.getCoordinates),
103 |                    is.filter(inner => preparedOuter.contains(inner))
104 |                      .map({ x => geometryFactory.createLinearRing(x.getExteriorRing.getCoordinates)
105 |                      })
106 |                      .toArray
107 |                  ),
108 |                  is.filterNot(inner => preparedOuter.contains(inner)))
109 |             }
110 | 
111 |         polygons match {
112 |           case v @ Vector(p: Polygon) if v.length == 1 => Some(p)
113 |           case ps                                      => Some(geometryFactory.createMultiPolygon(ps.toArray))
114 |         }
115 |       } catch {
116 |         case e @ (_: AssemblyException | _: IllegalArgumentException | _: TopologyException) =>
117 |           logger.warn(
118 |             s"Could not reconstruct relation $id @ $version ($timestamp): ${e.getMessage}")
119 |           None
120 |         case e: Throwable =>
121 |           logger.warn(s"Could not reconstruct relation $id @ $version ($timestamp): $e")
122 |           e.getStackTrace.foreach(logger.warn)
123 |           None
124 |       }
125 |     }
126 |   }
127 | }
128 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/Routes.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.relations
 2 | import java.sql.Timestamp
 3 | 
 4 | import geotrellis.vector._
 5 | import org.locationtech.jts.geom.TopologyException
 6 | import org.apache.log4j.Logger
 7 | import vectorpipe.internal.WayType
 8 | 
 9 | object Routes {
10 |   @transient private lazy val logger = Logger.getLogger(getClass)
11 | 
12 |   def build(id: Long,
13 |             version: Int,
14 |             timestamp: Timestamp,
15 |             types: Seq[Byte],
16 |             roles: Seq[String],
17 |             geoms: Seq[Geometry]): Option[Seq[(String, Geometry)]] = {
18 |     if (types.zip(geoms).exists { case (t, g) => t == WayType && Option(g).isEmpty }) {
19 |       // bail early if null values are present where they should exist (members w/ type=way)
20 |       logger.debug(s"Incomplete relation: $id @ $version ($timestamp)")
21 |       None
22 |     } else if (types.isEmpty) {
23 |       // empty relation
24 |       None
25 |     } else {
26 | 
27 |       try {
28 |         val res = roles
29 |           .zip(geoms.map(Option.apply))
30 |           .filter(_._2.isDefined)
31 |           .map(x => (x._1, x._2.get))
32 |           .groupBy {
33 |             case (role, _) => role
34 |           }
35 |           .mapValues(_.map(_._2))
36 |           .mapValues(connectSegments)
37 |           .map {
38 |             case (role, lines) =>
39 |               lines match {
40 |                 case Seq(line) => (role, line)
41 |                 case _         => (role, geometryFactory.createMultiLineString(lines.toArray))
42 |               }
43 |           }
44 |           .toSeq
45 | 
46 |         Some(res)
47 |       } catch {
48 |         case e @ (_: AssemblyException | _: IllegalArgumentException | _: TopologyException) =>
49 |           logger.warn(
50 |             s"Could not reconstruct route relation $id @ $version ($timestamp): ${e.getMessage}")
51 |           None
52 |         case e: Throwable =>
53 |           logger.warn(s"Could not reconstruct route relation $id @ $version ($timestamp): $e")
54 |           e.getStackTrace.foreach(logger.warn)
55 |           None
56 |       }
57 |     }
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/package.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe
  2 | 
  3 | import org.locationtech.jts.geom._
  4 | import vectorpipe.relations.utils.{
  5 |   PartialCoordinateSequence,
  6 |   ReversedCoordinateSequence,
  7 |   VirtualCoordinateSequence,
  8 |   isEqual
  9 | }
 10 | 
 11 | import scala.annotation.tailrec
 12 | import scala.collection.GenTraversable
 13 | 
 14 | package object relations {
 15 | 
 16 |   // join segments together
 17 |   @tailrec
 18 |   def connectSegments(segments: GenTraversable[VirtualCoordinateSequence],
 19 |                       lines: Seq[CoordinateSequence] = Vector.empty[CoordinateSequence])
 20 |     : GenTraversable[CoordinateSequence] = {
 21 |     segments match {
 22 |       case Nil =>
 23 |         lines
 24 |       case Seq(h, t @ _*) =>
 25 |         val x = h.getX(h.size - 1)
 26 |         val y = h.getY(h.size - 1)
 27 | 
 28 |         t.find(line => x == line.getX(0) && y == line.getY(0)) match {
 29 |           case Some(next) =>
 30 |             connectSegments(h.append(new PartialCoordinateSequence(next, 1)) +: t.filterNot(line =>
 31 |                               isEqual(line, next)),
 32 |                             lines)
 33 |           case None =>
 34 |             t.find(line => x == line.getX(line.size - 1) && y == line.getY(line.size - 1)) match {
 35 |               case Some(next) =>
 36 |                 connectSegments(h.append(
 37 |                                   new PartialCoordinateSequence(
 38 |                                     new ReversedCoordinateSequence(next),
 39 |                                     1)) +: t.filterNot(line => isEqual(line, next)),
 40 |                                 lines)
 41 |               case None => connectSegments(t, lines :+ h)
 42 |             }
 43 |         }
 44 |     }
 45 |   }
 46 | 
 47 |   def connectSegments(segments: GenTraversable[Geometry])(
 48 |       implicit geometryFactory: GeometryFactory): GenTraversable[LineString] =
 49 |     connectSegments(
 50 |       segments
 51 |         .flatMap {
 52 |           case geom: LineString => Some(geom.getCoordinateSequence)
 53 |           case _                => None
 54 |         }
 55 |         .map(s => new VirtualCoordinateSequence(Seq(s)))
 56 |     ).map(geometryFactory.createLineString)
 57 | 
 58 |   // since GeoTrellis's GeometryFactory is unavailable
 59 |   implicit val geometryFactory: GeometryFactory = new GeometryFactory()
 60 | 
 61 |   // join segments together into rings
 62 |   @tailrec
 63 |   def formRings(segments: GenTraversable[VirtualCoordinateSequence],
 64 |                         rings: Seq[CoordinateSequence] = Vector.empty[CoordinateSequence])
 65 |     : GenTraversable[CoordinateSequence] = {
 66 |     segments match {
 67 |       case Nil =>
 68 |         rings
 69 |       case Seq(h, t @ _*) if h.getX(0) == h.getX(h.size - 1) && h.getY(0) == h.getY(h.size - 1) =>
 70 |         formRings(t, rings :+ h)
 71 |       case Seq(h, t @ _*) =>
 72 |         val x = h.getX(h.size - 1)
 73 |         val y = h.getY(h.size - 1)
 74 | 
 75 |         formRings(
 76 |           t.find(line => x == line.getX(0) && y == line.getY(0)) match {
 77 |             case Some(next) =>
 78 |               h.append(new PartialCoordinateSequence(next, 1)) +: t.filterNot(line =>
 79 |                 isEqual(line, next))
 80 |             case None =>
 81 |               t.find(line => x == line.getX(line.size - 1) && y == line.getY(line.size - 1)) match {
 82 |                 case Some(next) =>
 83 |                   h.append(new PartialCoordinateSequence(new ReversedCoordinateSequence(next), 1)) +: t
 84 |                     .filterNot(line => isEqual(line, next))
 85 |                 case None => throw new AssemblyException("Unable to connect segments.")
 86 |               }
 87 |           },
 88 |           rings
 89 |         )
 90 |     }
 91 |   }
 92 | 
 93 |   def formRings(segments: GenTraversable[LineString])(
 94 |       implicit geometryFactory: GeometryFactory): GenTraversable[Polygon] = {
 95 |     val csf = geometryFactory.getCoordinateSequenceFactory
 96 |     formRings(segments.map(_.getCoordinateSequence).map(s => new VirtualCoordinateSequence(Seq(s))))
 97 |       .map(csf.create(_))
 98 |       .map(geometryFactory.createPolygon)
 99 |   }
100 | 
101 |   def dissolveRings(rings: Array[Polygon]): (Seq[Polygon], Seq[Polygon]) = {
102 |     Option(geometryFactory.createGeometryCollection(rings.asInstanceOf[Array[Geometry]]).union) match {
103 |       case Some(mp) =>
104 |         val polygons = for (i <- 0 until mp.getNumGeometries) yield {
105 |           mp.getGeometryN(i).asInstanceOf[Polygon]
106 |         }
107 | 
108 |         (polygons.map(_.getExteriorRing.getCoordinates).map(geometryFactory.createPolygon),
109 |          polygons.flatMap(getInteriorRings).map(geometryFactory.createPolygon))
110 |       case None =>
111 |         (Vector.empty[Polygon], Vector.empty[Polygon])
112 |     }
113 |   }
114 | 
115 |   def getInteriorRings(p: Polygon): Seq[LinearRing] =
116 |     for (i <- 0 until p.getNumInteriorRing)
117 |       yield geometryFactory.createLinearRing(p.getInteriorRingN(i).getCoordinates)
118 | 
119 |   class AssemblyException(msg: String) extends Exception(msg)
120 | }
121 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/PartialCoordinateSequence.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.relations.utils
 2 | import org.locationtech.jts.geom.{Coordinate, CoordinateSequence, Envelope}
 3 | 
 4 | class PartialCoordinateSequence(sequence: CoordinateSequence, offset: Int)
 5 |     extends CoordinateSequence {
 6 |   private lazy val _size: Int = sequence.size() - offset
 7 | 
 8 |   private lazy val coordinates: Array[Coordinate] = {
 9 |     val coords = new Array[Coordinate](size())
10 | 
11 |     for (i <- 0 until size) {
12 |       coords(i) = getCoordinate(i)
13 |     }
14 | 
15 |     coords
16 |   }
17 | 
18 |   override def getDimension: Int = sequence.getDimension
19 | 
20 |   override def getCoordinate(i: Int): Coordinate = sequence.getCoordinate(offset + i)
21 | 
22 |   override def getCoordinateCopy(i: Int): Coordinate = sequence.getCoordinateCopy(offset + i)
23 | 
24 |   override def getCoordinate(index: Int, coord: Coordinate): Unit =
25 |     sequence.getCoordinate(offset + index, coord)
26 | 
27 |   override def getOrdinate(index: Int, ordinateIndex: Int): Double =
28 |     sequence.getOrdinate(offset + index, ordinateIndex)
29 | 
30 |   override def setOrdinate(index: Int, ordinateIndex: Int, value: Double): Unit =
31 |     sequence.setOrdinate(offset + index, ordinateIndex, value)
32 | 
33 |   override def toCoordinateArray: Array[Coordinate] = coordinates
34 | 
35 |   override def expandEnvelope(env: Envelope): Envelope = {
36 |     for (i <- 0 until size) {
37 |       env.expandToInclude(getX(i), getY(i))
38 |     }
39 | 
40 |     env
41 |   }
42 | 
43 |   override def getX(index: Int): Double = sequence.getX(offset + index)
44 | 
45 |   override def getY(index: Int): Double = sequence.getY(offset + index)
46 | 
47 |   override def size(): Int = _size
48 | 
49 |   override def clone(): AnyRef = new PartialCoordinateSequence(sequence, offset)
50 | 
51 |   override def copy(): PartialCoordinateSequence = new PartialCoordinateSequence(sequence.copy, offset)
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/ReversedCoordinateSequence.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.relations.utils
 2 | import org.locationtech.jts.geom.{Coordinate, CoordinateSequence, Envelope}
 3 | 
 4 | class ReversedCoordinateSequence(sequence: CoordinateSequence) extends CoordinateSequence {
 5 |   private lazy val coordinates: Array[Coordinate] = {
 6 |     val coords = new Array[Coordinate](size())
 7 | 
 8 |     for (i <- size - 1 to 0) {
 9 |       coords(i) = getCoordinate(i)
10 |     }
11 | 
12 |     coords
13 |   }
14 | 
15 |   override def getDimension: Int = sequence.getDimension
16 | 
17 |   override def getCoordinate(i: Int): Coordinate = sequence.getCoordinate(getIndex(i))
18 | 
19 |   override def getCoordinateCopy(i: Int): Coordinate = sequence.getCoordinateCopy(getIndex(i))
20 | 
21 |   override def getCoordinate(index: Int, coord: Coordinate): Unit =
22 |     sequence.getCoordinate(getIndex(index), coord)
23 | 
24 |   private def getIndex(i: Int): Int = size - 1 - i
25 | 
26 |   override def size(): Int = sequence.size
27 | 
28 |   override def getX(index: Int): Double = sequence.getX(getIndex(index))
29 | 
30 |   override def getY(index: Int): Double = sequence.getY(getIndex(index))
31 | 
32 |   override def getOrdinate(index: Int, ordinateIndex: Int): Double =
33 |     sequence.getOrdinate(getIndex(index), ordinateIndex)
34 | 
35 |   override def setOrdinate(index: Int, ordinateIndex: Int, value: Double): Unit =
36 |     sequence.setOrdinate(getIndex(index), ordinateIndex, value)
37 | 
38 |   override def toCoordinateArray: Array[Coordinate] = coordinates
39 | 
40 |   override def expandEnvelope(env: Envelope): Envelope = sequence.expandEnvelope(env)
41 | 
42 |   override def clone(): AnyRef = new ReversedCoordinateSequence(sequence)
43 | 
44 |   override def copy(): ReversedCoordinateSequence = new ReversedCoordinateSequence(sequence.copy)
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/VirtualCoordinateSequence.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.relations.utils
  2 | import com.google.common.collect.{Range, RangeMap, TreeRangeMap}
  3 | import org.locationtech.jts.geom.{Coordinate, CoordinateSequence, Envelope}
  4 | 
  5 | // rather than being a nested set of CoordinateSequences, this is a mutable wrapper to avoid deep call stacks
  6 | class VirtualCoordinateSequence(sequences: Seq[CoordinateSequence]) extends CoordinateSequence {
  7 |   // TODO this should be invalidated after append (but it doesn't actually matter because all of the appending will
  8 |   // occur ahead of time)
  9 |   private lazy val coordinates: Array[Coordinate] = {
 10 |     val coords = new Array[Coordinate](size())
 11 | 
 12 |     for (i <- 0 until size) {
 13 |       coords(i) = getCoordinate(i)
 14 |     }
 15 | 
 16 |     coords
 17 |   }
 18 | 
 19 |   private val rangeMap: RangeMap[Integer, CoordinateSequence] = {
 20 |     val rm = TreeRangeMap.create[Integer, CoordinateSequence]
 21 | 
 22 |     sequences
 23 |       .zip(sequences.map(_.size).scanLeft(0)(_ + _).dropRight(1))
 24 |       .map {
 25 |         case (seq, offset) => (seq, Range.closed(offset: Integer, offset + seq.size - 1: Integer))
 26 |       }
 27 |       .foreach { case (seq, range) => rm.put(range, seq) }
 28 | 
 29 |     rm
 30 |   }
 31 | 
 32 |   private var dimension: Int = sequences.map(_.getDimension).min
 33 | 
 34 |   private var _size: Int = sequences.map(_.size).sum
 35 | 
 36 |   def append(sequence: CoordinateSequence): VirtualCoordinateSequence = {
 37 |     val upperEndpoint = rangeMap.span.upperEndpoint
 38 |     val range = Range.closed(upperEndpoint + 1: Integer, upperEndpoint + sequence.size: Integer)
 39 |     rangeMap.put(range, sequence)
 40 | 
 41 |     dimension = Math.min(dimension, sequence.getDimension)
 42 |     _size += sequence.size
 43 | 
 44 |     this
 45 |   }
 46 | 
 47 |   override def getDimension: Int = dimension
 48 | 
 49 |   override def getCoordinate(i: Int): Coordinate = {
 50 |     val (sequence, index) = getSequence(i)
 51 | 
 52 |     // bypass PackedCoordinateSequence.getCoordinate to prevent caching and associated allocation
 53 |     new Coordinate(sequence.getX(index), sequence.getY(index))
 54 |   }
 55 | 
 56 |   private def getSequence(i: Int): (CoordinateSequence, Int) = {
 57 |     val entry = rangeMap.getEntry(i: Integer)
 58 | 
 59 |     (entry.getValue, i - entry.getKey.lowerEndpoint)
 60 |   }
 61 | 
 62 |   override def getCoordinateCopy(i: Int): Coordinate = {
 63 |     val (sequence, index) = getSequence(i)
 64 | 
 65 |     sequence.getCoordinateCopy(index)
 66 |   }
 67 | 
 68 |   override def getCoordinate(i: Int, coord: Coordinate): Unit = {
 69 |     val (sequence, index) = getSequence(i)
 70 | 
 71 |     sequence.getCoordinate(index, coord)
 72 |   }
 73 | 
 74 |   override def getOrdinate(i: Int, ordinateIndex: Int): Double = {
 75 |     val (sequence, index) = getSequence(i)
 76 | 
 77 |     sequence.getOrdinate(index, ordinateIndex)
 78 |   }
 79 | 
 80 |   override def setOrdinate(i: Int, ordinateIndex: Int, value: Double): Unit = {
 81 |     val (sequence, index) = getSequence(i)
 82 | 
 83 |     sequence.setOrdinate(index, ordinateIndex, value)
 84 |   }
 85 | 
 86 |   override def toCoordinateArray: Array[Coordinate] = coordinates
 87 | 
 88 |   override def expandEnvelope(env: Envelope): Envelope = {
 89 |     for (i <- 0 until size) {
 90 |       env.expandToInclude(getX(i), getY(i))
 91 |     }
 92 | 
 93 |     env
 94 |   }
 95 | 
 96 |   override def getX(i: Int): Double = {
 97 |     val (sequence, index) = getSequence(i)
 98 | 
 99 |     sequence.getX(index)
100 |   }
101 | 
102 |   override def getY(i: Int): Double = {
103 |     val (sequence, index) = getSequence(i)
104 | 
105 |     sequence.getY(index)
106 |   }
107 | 
108 |   override def size(): Int = _size
109 | 
110 |   override def clone(): AnyRef = {
111 |     // we're already playing fast and loose
112 |     this
113 |   }
114 | 
115 |   override def copy(): VirtualCoordinateSequence = new VirtualCoordinateSequence(sequences.map(_.copy))
116 | }
117 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/package.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.relations
 2 | 
 3 | import org.locationtech.jts.geom.CoordinateSequence
 4 | 
 5 | package object utils {
 6 | 
 7 |   /**
 8 |     * Tests whether two {@link CoordinateSequence}s are equal.
 9 |     * To be equal, the sequences must be the same length.
10 |     * They do not need to be of the same dimension,
11 |     * but the ordinate values for the smallest dimension of the two
12 |     * must be equal.
13 |     * Two <code>NaN</code> ordinates values are considered to be equal.
14 |     *
15 |     * Ported to Scala from JTS 1.15.0
16 |     *
17 |     * @param cs1 a CoordinateSequence
18 |     * @param cs2 a CoordinateSequence
19 |     * @return true if the sequences are equal in the common dimensions
20 |     */
21 |   def isEqual(cs1: CoordinateSequence, cs2: CoordinateSequence): Boolean = {
22 |     if (cs1.size != cs2.size) {
23 |       false
24 |     } else {
25 |       val dim = Math.min(cs1.getDimension, cs2.getDimension)
26 |       (0 until cs1.size).forall(i => {
27 |         (0 until dim).forall(d => {
28 |           val v1 = cs1.getOrdinate(i, d)
29 |           val v2 = cs2.getOrdinate(i, d)
30 | 
31 |           v1 == v2 || (v1 == Double.NaN && v2 == Double.NaN)
32 |         })
33 |       })
34 |     }
35 |   }
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffMicroBatchReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | import java.util
 5 | 
 6 | import org.apache.spark.internal.Logging
 7 | import org.apache.spark.sql.catalyst.InternalRow
 8 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 9 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
10 | import vectorpipe.model.AugmentedDiff
11 | 
12 | import scala.collection.JavaConverters._
13 | import scala.compat.java8.OptionConverters._
14 | 
15 | case class AugmentedDiffStreamBatchTask(baseURI: URI, sequences: Seq[Int], handler: (Int, AugmentedDiffSource.RF) => Unit)
16 |     extends InputPartition[InternalRow] {
17 |   override def createPartitionReader(): InputPartitionReader[InternalRow] =
18 |     AugmentedDiffStreamBatchReader(baseURI, sequences, handler)
19 | }
20 | 
21 | case class AugmentedDiffStreamBatchReader(baseURI: URI, sequences: Seq[Int], handler: (Int, AugmentedDiffSource.RF) => Unit)
22 |     extends ReplicationStreamBatchReader[AugmentedDiff](baseURI, sequences) {
23 | 
24 |   override def getSequence(baseURI: URI, sequence: Int): Seq[AugmentedDiff] =
25 |     AugmentedDiffSource.getSequence(baseURI, sequence, handler)
26 | }
27 | 
28 | case class AugmentedDiffMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
29 |     extends ReplicationStreamMicroBatchReader[AugmentedDiff](options, checkpointLocation)
30 |     with Logging {
31 | 
32 |   override def getCurrentSequence: Option[Int] =
33 |     AugmentedDiffSource.getCurrentSequence(baseURI)
34 | 
35 |   private def baseURI: URI =
36 |     options
37 |       .get(Source.BaseURI)
38 |       .asScala
39 |       .map(new URI(_))
40 |       .getOrElse(
41 |         throw new RuntimeException(
42 |           s"${Source.BaseURI} is a required option for ${Source.AugmentedDiffs}"
43 |         )
44 |       )
45 | 
46 |   private def errorHandler: AugmentedDiffSourceErrorHandler = {
47 |     val handlerClass = options
48 |       .get(Source.ErrorHandler)
49 |       .asScala
50 |       .getOrElse("vectorpipe.sources.AugmentedDiffSourceErrorHandler")
51 | 
52 |     val handler = Class.forName(handlerClass).newInstance.asInstanceOf[AugmentedDiffSourceErrorHandler]
53 |     handler.setOptions(options.asMap.asScala.toMap)
54 |     handler
55 |   }
56 | 
57 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
58 |     sequenceRange
59 |       .map(seq =>
60 |         AugmentedDiffStreamBatchTask(baseURI, Seq(seq), errorHandler.handle).asInstanceOf[InputPartition[InternalRow]])
61 |       .asJava
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffProvider.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.util.Optional
 4 | 
 5 | import org.apache.spark.sql.sources.DataSourceRegister
 6 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
 7 | import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader
 8 | import org.apache.spark.sql.sources.v2.{
 9 |   DataSourceOptions,
10 |   DataSourceV2,
11 |   MicroBatchReadSupport,
12 |   ReadSupport
13 | }
14 | import org.apache.spark.sql.types.StructType
15 | 
16 | class AugmentedDiffProvider
17 |     extends DataSourceV2
18 |     with ReadSupport
19 |     with MicroBatchReadSupport
20 |     with DataSourceRegister {
21 |   override def createMicroBatchReader(
22 |     schema: Optional[StructType],
23 |     checkpointLocation: String,
24 |     options: DataSourceOptions
25 |   ): MicroBatchReader = {
26 |     if (schema.isPresent) {
27 |       throw new IllegalStateException(
28 |         "The augmented diff source does not support a user-specified schema."
29 |       )
30 |     }
31 | 
32 |     AugmentedDiffMicroBatchReader(options, checkpointLocation)
33 |   }
34 | 
35 |   override def shortName(): String = Source.AugmentedDiffs
36 |   override def createReader(options: DataSourceOptions): DataSourceReader =
37 |     AugmentedDiffReader(options)
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | import java.util
 5 | 
 6 | import geotrellis.vector.Geometry
 7 | import org.apache.spark.sql.catalyst.InternalRow
 8 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 9 | import org.apache.spark.sql.sources.v2.reader.InputPartition
10 | import vectorpipe.model.{AugmentedDiff, ElementWithSequence}
11 | import vectorpipe.util.RobustFeature
12 | 
13 | import scala.collection.JavaConverters._
14 | import scala.compat.java8.OptionConverters._
15 | import scala.util.Random
16 | 
17 | case class AugmentedDiffReader(options: DataSourceOptions)
18 |     extends ReplicationReader[AugmentedDiff](options) {
19 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
20 |     // prevent sequential diffs from being assigned to the same task
21 |     val sequences = Random.shuffle((startSequence to endSequence).toList)
22 | 
23 |     sequences
24 |       .grouped(Math.max(1, sequences.length / partitionCount))
25 |       .toList
26 |       .map(
27 |         AugmentedDiffStreamBatchTask(baseURI, _, errorHandler.handle)
28 |           .asInstanceOf[InputPartition[InternalRow]]
29 |       )
30 |       .asJava
31 |   }
32 | 
33 |   private def baseURI: URI =
34 |     options
35 |       .get(Source.BaseURI)
36 |       .asScala
37 |       .map(new URI(_))
38 |       .getOrElse(
39 |         throw new RuntimeException(
40 |           s"${Source.BaseURI} is a required option for ${Source.AugmentedDiffs}"
41 |         )
42 |       )
43 | 
44 | 
45 |   private def errorHandler: AugmentedDiffSourceErrorHandler = {
46 |     val handlerClass = options
47 |       .get(Source.ErrorHandler)
48 |       .asScala
49 |       .getOrElse("vectorpipe.sources.AugmentedDiffSourceErrorHandler")
50 | 
51 |     val handler = Class.forName(handlerClass).newInstance.asInstanceOf[AugmentedDiffSourceErrorHandler]
52 |     handler.setOptions(options.asMap.asScala.toMap)
53 |     handler
54 |   }
55 | 
56 |   override def getCurrentSequence: Option[Int] = AugmentedDiffSource.getCurrentSequence(baseURI)
57 | }
58 | 
59 | 
60 | class AugmentedDiffSourceErrorHandler extends Serializable {
61 |   def setOptions(options: Map[String, String]): Unit = ()
62 | 
63 |   def handle(sequence: Int, feature: RobustFeature[Geometry, ElementWithSequence]): Unit = ()
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffSource.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.sources
  2 | 
  3 | import java.io.{BufferedInputStream, File}
  4 | import java.net.URI
  5 | import java.nio.charset.StandardCharsets
  6 | import java.sql.Timestamp
  7 | import java.time.Instant
  8 | import java.util.zip.GZIPInputStream
  9 | 
 10 | import geotrellis.store.s3._
 11 | import geotrellis.vector._
 12 | 
 13 | import vectorpipe.model.{AugmentedDiff, ElementWithSequence}
 14 | import vectorpipe.util._
 15 | //import vectorpipe.util.RobustFeatureFormats._
 16 | 
 17 | import org.apache.commons.io.IOUtils
 18 | import org.apache.spark.internal.Logging
 19 | import org.apache.spark.sql.Column
 20 | import org.apache.spark.sql.functions.{floor, from_unixtime, to_timestamp, unix_timestamp}
 21 | 
 22 | import _root_.io.circe._
 23 | import _root_.io.circe.generic.auto._
 24 | import cats.implicits._
 25 | 
 26 | import software.amazon.awssdk.services.s3.model.{GetObjectRequest, NoSuchKeyException, S3Exception}
 27 | import software.amazon.awssdk.services.s3.S3Client
 28 | import com.softwaremill.macmemo.memoize
 29 | import org.joda.time.DateTime
 30 | 
 31 | import scala.concurrent.duration.{Duration, _}
 32 | 
 33 | 
 34 | object AugmentedDiffSource extends Logging {
 35 |   type RF = RobustFeature[Geometry, ElementWithSequence]
 36 | 
 37 |   private lazy val s3: S3Client = S3ClientProducer.get()
 38 |   val Delay: Duration = 15.seconds
 39 | 
 40 |   private implicit val dateTimeDecoder: Decoder[DateTime] =
 41 |     Decoder.instance(a => a.as[String].map(DateTime.parse))
 42 | 
 43 |   def getFeatures(baseURI: URI, sequence: Int): Seq[Map[String, RF]] = {
 44 |     val bucket = baseURI.getHost
 45 |     val prefix = new File(baseURI.getPath.drop(1)).toPath
 46 |     // left-pad sequence
 47 |     val s = f"$sequence%09d"
 48 |     val key = prefix.resolve(s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.json.gz").toString
 49 | 
 50 |     logDebug(s"Fetching sequence $sequence")
 51 | 
 52 |     val obj = s3.getObject(
 53 |       GetObjectRequest
 54 |         .builder
 55 |         .bucket(bucket)
 56 |         .key(key)
 57 |         .build
 58 |     )
 59 | 
 60 |     val bis = new BufferedInputStream(obj)
 61 |     val gzis = new GZIPInputStream(bis)
 62 | 
 63 |     try {
 64 |       IOUtils
 65 |         .toString(gzis, StandardCharsets.UTF_8)
 66 |         .lines
 67 |         .map { line =>
 68 |           // Spark doesn't like RS-delimited JSON; perhaps Spray doesn't either
 69 |           line
 70 |             .replace("\u001e", "")
 71 |             .parseGeoJson[JsonRobustFeatureCollectionMap]
 72 |             .getAll[RF]
 73 |         }
 74 |         .toSeq
 75 |     } finally {
 76 |       gzis.close()
 77 |       bis.close()
 78 |     }
 79 |   }
 80 | 
 81 |   /**
 82 |    * Fetch all augmented diffs from a sequence number.
 83 |    *
 84 |    * This function collects the data in an augmented diff sequence file into
 85 |    * vectorpipe.model.AugmentedDiff objects.  These diff files are expected to be
 86 |    * stored on S3 in .json.gz files.  This method provides the option to process errors
 87 |    * generated when the new geometry in the diff is faulty.  If `waitUntilAvailable` is
 88 |    * set to true, the process will block, in 15 second increments, until the sequence
 89 |    * file is available.
 90 |    */
 91 |   def getSequence(baseURI: URI, sequence: Int, badGeometryHandler: (Int, RF) => Unit, waitUntilAvailable: Boolean): Seq[AugmentedDiff] = {
 92 |     logDebug(s"Fetching sequence $sequence")
 93 | 
 94 |     try {
 95 |       val robustFeatureMaps = getFeatures(baseURI, sequence)
 96 | 
 97 |       robustFeatureMaps.map{ m =>
 98 |         if (m.contains("new") && !m("new").geom.isDefined) badGeometryHandler(sequence, m("new"))
 99 |         AugmentedDiff(sequence, m.get("old").map(_.toFeature), m("new").toFeature)
100 |       }
101 |     } catch {
102 |       case e: S3Exception if e.isInstanceOf[NoSuchKeyException] || e.statusCode == 403 =>
103 |         logInfo(s"Encountered missing sequence (baseURI = ${baseURI}, sequence = ${sequence}), comparing with current for validity")
104 |         getCurrentSequence(baseURI) match {
105 |           case Some(s) if s > sequence =>
106 |             logInfo(s"$sequence is missing, continuing")
107 |             Seq.empty[AugmentedDiff]
108 |           case _ =>
109 |             if (waitUntilAvailable) {
110 |               logInfo(s"$sequence is not yet available, sleeping.")
111 |               Thread.sleep(Delay.toMillis)
112 |               getSequence(baseURI, sequence, waitUntilAvailable)
113 |             } else
114 |               throw e
115 |         }
116 |       case t: Throwable =>
117 |         if (waitUntilAvailable) {
118 |           logError(s"sequence $sequence caused an error", t)
119 |           Thread.sleep(Delay.toMillis)
120 |           getSequence(baseURI, sequence)
121 |         } else
122 |           throw t
123 |     }
124 |   }
125 | 
126 |   def getSequence(baseURI: URI, sequence: Int): Seq[AugmentedDiff] =
127 |     getSequence(baseURI, sequence, {(_: Int, _: RF) => ()}, true)
128 | 
129 |   def getSequence(baseURI: URI, sequence: Int, waitUntilAvailable: Boolean): Seq[AugmentedDiff] =
130 |     getSequence(baseURI, sequence, {(_: Int, _: RF) => ()}, waitUntilAvailable)
131 | 
132 |   def getSequence(baseURI: URI, sequence: Int, badGeometryHandler: (Int, RF) => Unit): Seq[AugmentedDiff] =
133 |     getSequence(baseURI, sequence, badGeometryHandler, true)
134 | 
135 |   @memoize(maxSize = 1, expiresAfter = 30 seconds)
136 |   def getCurrentSequence(baseURI: URI): Option[Int] = {
137 |     val bucket = baseURI.getHost
138 |     val prefix = new File(baseURI.getPath.drop(1)).toPath
139 |     val key = prefix.resolve("state.yaml").toString
140 | 
141 |     try {
142 |       val request = GetObjectRequest.builder()
143 |         .bucket(bucket)
144 |         .key(key)
145 |         .build()
146 |       val response = s3.getObjectAsBytes(request)
147 | 
148 |       val body = IOUtils
149 |         .toString(response.asInputStream, StandardCharsets.UTF_8.toString)
150 | 
151 |       val state = yaml.parser
152 |         .parse(body)
153 |         .leftMap(err => err: Error)
154 |         .flatMap(_.as[State])
155 |         .valueOr(throw _)
156 | 
157 |       logDebug(s"$baseURI state: ${state.sequence} @ ${state.last_run}")
158 | 
159 |       Some(state.sequence)
160 |     } catch {
161 |       case err: Throwable =>
162 |         logError("Error fetching / parsing changeset state.", err)
163 | 
164 |         None
165 |     }
166 |   }
167 | 
168 |   def timestampToSequence(timestamp: Timestamp): Int =
169 |     ((timestamp.toInstant.getEpochSecond - 1347432900) / 60).toInt
170 | 
171 |   def timestampToSequence(timestamp: Column): Column =
172 |     floor((unix_timestamp(timestamp) - 1347432900) / 60)
173 | 
174 |   def sequenceToTimestamp(sequence: Int): Timestamp =
175 |     Timestamp.from(Instant.ofEpochSecond(sequence.toLong * 60 + 1347432900L))
176 | 
177 |   def sequenceToTimestamp(sequence: Column): Column =
178 |     to_timestamp(from_unixtime(sequence * 60 + 1347432900))
179 | 
180 |   case class State(last_run: DateTime, sequence: Int)
181 | }
182 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeMicroBatchReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | import java.util
 5 | 
 6 | import org.apache.spark.sql.catalyst.InternalRow
 7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 8 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
 9 | import vectorpipe.model.Change
10 | 
11 | import scala.collection.JavaConverters._
12 | 
13 | case class ChangeStreamBatchTask(baseURI: URI, sequences: Seq[Int]) extends InputPartition[InternalRow] {
14 |   override def createPartitionReader(): InputPartitionReader[InternalRow] =
15 |     new ChangeStreamBatchReader(baseURI, sequences)
16 | }
17 | 
18 | class ChangeStreamBatchReader(baseURI: URI, sequences: Seq[Int])
19 |     extends ReplicationStreamBatchReader[Change](baseURI, sequences) {
20 | 
21 |   override def getSequence(baseURI: URI, sequence: Int): Seq[Change] =
22 |     ChangeSource.getSequence(baseURI, sequence)
23 | }
24 | 
25 | case class ChangeMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
26 |     extends ReplicationStreamMicroBatchReader[Change](options, checkpointLocation) {
27 |   private lazy val baseURI = new URI(
28 |     options
29 |       .get(Source.BaseURI)
30 |       .orElse("https://planet.osm.org/replication/minute/")
31 |   )
32 | 
33 |   override def getCurrentSequence: Option[Int] =
34 |     ChangeSource.getCurrentSequence(baseURI)
35 | 
36 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
37 |     sequenceRange
38 |       .map(
39 |         seq => ChangeStreamBatchTask(baseURI, Seq(seq)).asInstanceOf[InputPartition[InternalRow]]
40 |       )
41 |       .asJava
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeProvider.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.util.Optional
 4 | 
 5 | import org.apache.spark.sql.sources.DataSourceRegister
 6 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
 7 | import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader
 8 | import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, MicroBatchReadSupport, ReadSupport}
 9 | import org.apache.spark.sql.types.StructType
10 | 
11 | class ChangeProvider
12 |     extends DataSourceV2
13 |     with ReadSupport
14 |     with MicroBatchReadSupport
15 |     with DataSourceRegister {
16 |   override def createMicroBatchReader(
17 |     schema: Optional[StructType],
18 |     checkpointLocation: String,
19 |     options: DataSourceOptions
20 |   ): MicroBatchReader = {
21 |     if (schema.isPresent) {
22 |       throw new IllegalStateException(
23 |         "The changes source does not support a user-specified schema."
24 |       )
25 |     }
26 | 
27 |     ChangeMicroBatchReader(options, checkpointLocation)
28 |   }
29 | 
30 |   override def shortName(): String = Source.Changes
31 |   override def createReader(options: DataSourceOptions): DataSourceReader =
32 |     ChangeReader(options)
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | import java.util
 5 | 
 6 | import org.apache.spark.sql.catalyst.InternalRow
 7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 8 | import org.apache.spark.sql.sources.v2.reader.InputPartition
 9 | import vectorpipe.model.Change
10 | 
11 | import scala.collection.JavaConverters._
12 | import scala.util.Random
13 | 
14 | case class ChangeReader(options: DataSourceOptions) extends ReplicationReader[Change](options) {
15 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
16 |     // prevent sequential diffs from being assigned to the same task
17 |     val sequences = Random.shuffle((startSequence to endSequence).toList)
18 | 
19 |     sequences
20 |       .grouped(Math.max(1, sequences.length / partitionCount))
21 |       .toList
22 |       .map(
23 |         ChangeStreamBatchTask(baseURI, _)
24 |           .asInstanceOf[InputPartition[InternalRow]]
25 |       )
26 |       .asJava
27 |   }
28 | 
29 |   private def baseURI =
30 |     new URI(
31 |       options
32 |         .get(Source.BaseURI)
33 |         .orElse("https://planet.osm.org/replication/minute/"))
34 | 
35 |   override def getCurrentSequence: Option[Int] = ChangeSource.getCurrentSequence(baseURI)
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeSource.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.io.{ByteArrayInputStream, IOException, StringReader}
 4 | import java.net.URI
 5 | import java.util.Properties
 6 | import java.util.zip.GZIPInputStream
 7 | 
 8 | import com.softwaremill.macmemo.memoize
 9 | import javax.xml.parsers.SAXParserFactory
10 | import org.apache.spark.internal.Logging
11 | import org.joda.time.DateTime
12 | import vectorpipe.model
13 | import vectorpipe.model.Change
14 | import scalaj.http.Http
15 | 
16 | import scala.concurrent.duration.{Duration, _}
17 | 
18 | object ChangeSource extends Logging {
19 |   val Delay: Duration = 15 seconds
20 |   private val saxParserFactory = SAXParserFactory.newInstance
21 | 
22 |   def getSequence(baseURI: URI, sequence: Int): Seq[Change] = {
23 |     val s = f"$sequence%09d"
24 |     val path = s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.osc.gz"
25 | 
26 |     logInfo(s"Fetching sequence $sequence")
27 | 
28 |     try {
29 |       val response =
30 |         Http(baseURI.resolve(path).toString).asBytes
31 | 
32 |       if (response.code == 404) {
33 |         logInfo(s"$sequence is not yet available, sleeping.")
34 |         Thread.sleep(Delay.toMillis)
35 |         getSequence(baseURI, sequence)
36 |       } else {
37 |         val bais = new ByteArrayInputStream(response.body)
38 |         val gzis = new GZIPInputStream(bais)
39 |         val parser = saxParserFactory.newSAXParser
40 |         val handler = new model.Change.ChangeHandler(sequence)
41 |         try {
42 |           parser.parse(gzis, handler)
43 |           val changes = handler.changes
44 | 
45 |           logDebug(s"Received ${changes.length} changes from sequence $sequence")
46 | 
47 |           changes
48 |         } finally {
49 |           gzis.close()
50 |           bais.close()
51 |         }
52 |       }
53 |     } catch {
54 |       case e: IOException =>
55 |         logWarning(s"Error fetching change $sequence", e)
56 |         Thread.sleep(Delay.toMillis)
57 |         getSequence(baseURI, sequence)
58 |     }
59 |   }
60 | 
61 |   @memoize(maxSize = 1, expiresAfter = 30 seconds)
62 |   def getCurrentSequence(baseURI: URI): Option[Int] = {
63 |     try {
64 |       val response =
65 |         Http(baseURI.resolve("state.txt").toString).asString
66 | 
67 |       val state = new Properties
68 |       state.load(new StringReader(response.body))
69 | 
70 |       val sequence = state.getProperty("sequenceNumber").toInt
71 |       val timestamp = DateTime.parse(state.getProperty("timestamp"))
72 | 
73 |       logDebug(s"$baseURI state: $sequence @ $timestamp")
74 | 
75 |       Some(sequence)
76 |     } catch {
77 |       case err: Throwable =>
78 |         logError("Error fetching or parsing changeset state.", err)
79 |         logError(baseURI.toString)
80 | 
81 |         None
82 |     }
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetMicroBatchReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | import java.util
 5 | 
 6 | import org.apache.spark.sql.catalyst.InternalRow
 7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 8 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
 9 | import vectorpipe.model.Changeset
10 | 
11 | import scala.collection.JavaConverters._
12 | 
13 | case class ChangesetStreamBatchTask(baseURI: URI, sequences: Seq[Int])
14 |     extends InputPartition[InternalRow] {
15 |   override def createPartitionReader(): InputPartitionReader[InternalRow] =
16 |     new ChangesetStreamBatchReader(baseURI, sequences)
17 | }
18 | 
19 | class ChangesetStreamBatchReader(baseURI: URI, sequences: Seq[Int])
20 |     extends ReplicationStreamBatchReader[Changeset](baseURI, sequences) {
21 | 
22 |   override def getSequence(baseURI: URI, sequence: Int): Seq[Changeset] =
23 |     ChangesetSource.getChangeset(baseURI, sequence)
24 | }
25 | 
26 | class ChangesetMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
27 |     extends ReplicationStreamMicroBatchReader[Changeset](options, checkpointLocation) {
28 |   private lazy val baseURI = new URI(
29 |     options
30 |       .get(Source.BaseURI)
31 |       .orElse("https://planet.osm.org/replication/changesets/")
32 |   )
33 | 
34 |   override def getCurrentSequence: Option[Int] =
35 |     ChangesetSource.getCurrentSequence(baseURI).map(_.sequence.toInt)
36 | 
37 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
38 |     sequenceRange
39 |       .map(
40 |         seq => ChangesetStreamBatchTask(baseURI, Seq(seq)).asInstanceOf[InputPartition[InternalRow]]
41 |       )
42 |       .asJava
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetProvider.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.util.Optional
 4 | 
 5 | import org.apache.spark.sql.sources.DataSourceRegister
 6 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
 7 | import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader
 8 | import org.apache.spark.sql.sources.v2.{
 9 |   DataSourceOptions,
10 |   DataSourceV2,
11 |   MicroBatchReadSupport,
12 |   ReadSupport
13 | }
14 | import org.apache.spark.sql.types.StructType
15 | 
16 | class ChangesetProvider
17 |     extends DataSourceV2
18 |     with ReadSupport
19 |     with MicroBatchReadSupport
20 |     with DataSourceRegister {
21 |   override def createMicroBatchReader(
22 |     schema: Optional[StructType],
23 |     checkpointLocation: String,
24 |     options: DataSourceOptions
25 |   ): MicroBatchReader = {
26 |     if (schema.isPresent) {
27 |       throw new IllegalStateException(
28 |         "The changesets source does not support a user-specified schema."
29 |       )
30 |     }
31 | 
32 |     new ChangesetMicroBatchReader(options, checkpointLocation)
33 |   }
34 | 
35 |   override def shortName(): String = Source.Changesets
36 |   override def createReader(options: DataSourceOptions): DataSourceReader =
37 |     ChangesetReader(options)
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | import java.util
 5 | 
 6 | import org.apache.spark.sql.catalyst.InternalRow
 7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 8 | import org.apache.spark.sql.sources.v2.reader.InputPartition
 9 | import vectorpipe.model.Changeset
10 | 
11 | import scala.collection.JavaConverters._
12 | import scala.util.Random
13 | 
14 | case class ChangesetReader(options: DataSourceOptions)
15 |     extends ReplicationReader[Changeset](options) {
16 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
17 |     // prevent sequential diffs from being assigned to the same task
18 |     val sequences = Random.shuffle((startSequence to endSequence).toList)
19 | 
20 |     sequences
21 |       .grouped(Math.max(1, sequences.length / partitionCount))
22 |       .toList
23 |       .map(
24 |         ChangesetStreamBatchTask(baseURI, _)
25 |           .asInstanceOf[InputPartition[InternalRow]]
26 |       )
27 |       .asJava
28 |   }
29 | 
30 |   override protected def getCurrentSequence: Option[Int] =
31 |     ChangesetSource.getCurrentSequence(baseURI).map(_.sequence.toInt)
32 | 
33 |   private def baseURI =
34 |     new URI(
35 |       options
36 |         .get(Source.BaseURI)
37 |         .orElse("https://planet.osm.org/replication/changesets/"))
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetSource.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.sources
  2 | 
  3 | import java.io.{ByteArrayInputStream, IOException}
  4 | import java.net.URI
  5 | import java.nio.charset.StandardCharsets
  6 | import java.time.Instant
  7 | import java.util.zip.GZIPInputStream
  8 | 
  9 | import cats.implicits._
 10 | import io.circe.generic.auto._
 11 | import io.circe.{yaml, _}
 12 | import org.apache.commons.io.IOUtils
 13 | import org.apache.spark.internal.Logging
 14 | import org.joda.time.DateTime
 15 | import org.joda.time.format.DateTimeFormat
 16 | import vectorpipe.model.Changeset
 17 | import scalaj.http.Http
 18 | 
 19 | import scala.concurrent.duration.{Duration, _}
 20 | import scala.util.Try
 21 | import scala.xml.XML
 22 | 
 23 | object ChangesetSource extends Logging {
 24 |   val Delay: Duration = 15 seconds
 25 |   // state.yaml uses a custom date format
 26 |   private val formatter = DateTimeFormat.forPattern("y-M-d H:m:s.SSSSSSSSS Z")
 27 | 
 28 |   private implicit val dateTimeDecoder: Decoder[DateTime] =
 29 |     Decoder.instance(a => a.as[String].map(DateTime.parse(_, formatter)))
 30 | 
 31 |   def getChangeset(baseURI: URI, sequence: Int, retry: Boolean = true): Seq[Changeset] = {
 32 |     val s = f"$sequence%09d"
 33 |     val path = s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.osm.gz"
 34 | 
 35 |     logDebug(s"Fetching sequence $sequence")
 36 | 
 37 |     try {
 38 |       val response =
 39 |         Http(baseURI.resolve(path).toString).asBytes
 40 | 
 41 |       if (response.code == 404) {
 42 |         if (retry) {
 43 |           logDebug(s"$sequence is not yet available, sleeping.")
 44 |           Thread.sleep(Delay.toMillis)
 45 |           getChangeset(baseURI, sequence)
 46 |         } else {
 47 |           logDebug(s"$sequence is yet available")
 48 |           Seq()
 49 |         }
 50 |       } else {
 51 |         // NOTE: if diff bodies get really large, switch to a SAX parser to help with the memory footprint
 52 |         val bais = new ByteArrayInputStream(response.body)
 53 |         val gzis = new GZIPInputStream(bais)
 54 |         try {
 55 |           val data = XML.loadString(IOUtils.toString(gzis, StandardCharsets.UTF_8))
 56 | 
 57 |           val changesets = (data \ "changeset").map(Changeset.fromXML(_, sequence))
 58 | 
 59 |           logDebug(s"Received ${changesets.length} changesets")
 60 | 
 61 |           changesets
 62 |         } finally {
 63 |           gzis.close()
 64 |           bais.close()
 65 |         }
 66 |       }
 67 |     } catch {
 68 |       case e: IOException =>
 69 |         logWarning(s"Error fetching changeset $sequence", e)
 70 |         Thread.sleep(Delay.toMillis)
 71 |         getChangeset(baseURI, sequence)
 72 |     }
 73 |   }
 74 | 
 75 |   case class Sequence(last_run: DateTime, sequence: Long)
 76 | 
 77 |   private def grabSequence(baseURI: URI, filename: String): Sequence = {
 78 |     val response =
 79 |       Http(baseURI.resolve("state.yaml").toString).asString
 80 | 
 81 |     val state = yaml.parser
 82 |       .parse(response.body)
 83 |       .leftMap(err => err: Error)
 84 |       .flatMap(_.as[Sequence])
 85 |       .valueOr(throw _)
 86 | 
 87 |     state
 88 |   }
 89 | 
 90 |   def getCurrentSequence(baseURI: URI): Option[Sequence] = {
 91 |     var state: Try[Sequence] = null
 92 | 
 93 |     for (i <- Range(0, 5)) {
 94 |       state = Try(grabSequence(baseURI, "state.yaml"))
 95 | 
 96 |       if (state.isSuccess) {
 97 |         logDebug(s"$baseURI state: ${state.get.sequence} @ ${state.get.last_run}")
 98 | 
 99 |         return Some(state.get)
100 |       }
101 | 
102 |       Thread.sleep(5000)
103 |     }
104 | 
105 |     logError("Error fetching / parsing changeset state.", state.failed.get)
106 |     None
107 |   }
108 | 
109 |   def getSequence(baseURI: URI, sequence: Long): Option[Sequence] = {
110 |     val s = f"${sequence+1}%09d"
111 |     val path = s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.state.txt"
112 | 
113 |     try {
114 |       val state = grabSequence(baseURI, path)
115 | 
116 |       Some(state)
117 |     } catch {
118 |       case err: Throwable =>
119 |         logError("Error fetching / parsing changeset state.", err)
120 | 
121 |         None
122 |     }
123 |   }
124 | 
125 |   def estimateSequenceNumber(modifiedTime: Instant, baseURI: URI, maxIters: Int = 1000): Long = {
126 |     val current = getCurrentSequence(baseURI)
127 |     if (current.isDefined) {
128 |       val diffMinutes = (current.get.last_run.getMillis/1000 -
129 |                          modifiedTime.getEpochSecond) / 60
130 |       current.get.sequence - diffMinutes
131 |     } else {
132 |       // Some queries on the state.yaml fail, set up a failsafe
133 |       // ###.state.txt may not be provided for all replications, so use changsets
134 |       var i = 0
135 |       var baseTime: Long = -1
136 |       while (baseTime == -1 && i < maxIters) {
137 |         baseTime = getChangeset(baseURI, i, false).map(_.createdAt.toInstant.getEpochSecond).sorted.lastOption.getOrElse(-1L)
138 |         i += 1
139 |       }
140 |       if (i == maxIters)
141 |         throw new IndexOutOfBoundsException(s"Couldn't find non-empty changeset in ${maxIters} attempts")
142 | 
143 |       val query = modifiedTime.getEpochSecond
144 | 
145 |       (query - baseTime) / 60 + i
146 |     }
147 |   }
148 | 
149 |   private def safeSequenceTime(baseURI: URI, sequence: Long): Option[Instant] = {
150 |     val res = getSequence(baseURI, sequence)
151 |     if (res.isDefined) {
152 |       Some(Instant.parse(res.get.last_run.toString))
153 |     } else {
154 |       getChangeset(baseURI, sequence.toInt, false).map(_.createdAt.toInstant).sortBy(_.getEpochSecond).lastOption.map{ inst => Instant.parse(inst.toString).plusSeconds(60) }
155 |     }
156 |   }
157 | 
158 |   def findSequenceFor(modifiedTime: Instant, baseURI: URI): Long = {
159 |     var guess = estimateSequenceNumber(modifiedTime, baseURI)
160 | 
161 |     while (safeSequenceTime(baseURI, guess).map(_.isAfter(modifiedTime)).getOrElse(false)) { guess -= 1 }
162 |     while (safeSequenceTime(baseURI, guess).map(_.isBefore(modifiedTime)).getOrElse(false)) { guess += 1 }
163 | 
164 |     guess
165 |   }
166 | }
167 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ReplicationReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import org.apache.spark.SparkEnv
 4 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 5 | import org.apache.spark.sql.internal.SQLConf
 6 | import org.apache.spark.sql.sources.v2.DataSourceOptions
 7 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
 8 | import org.apache.spark.sql.types.StructType
 9 | 
10 | import scala.compat.java8.OptionConverters._
11 | import scala.reflect.runtime.universe.TypeTag
12 | 
13 | abstract class ReplicationReader[T <: Product: TypeTag](options: DataSourceOptions)
14 |     extends DataSourceReader {
15 |   private lazy val schema: StructType = ExpressionEncoder[T].schema
16 | 
17 |   val DefaultPartitionCount: Int =
18 |     SparkEnv.get.conf
19 |       .getInt(SQLConf.SHUFFLE_PARTITIONS.key, SQLConf.SHUFFLE_PARTITIONS.defaultValue.get)
20 | 
21 |   protected val partitionCount: Int =
22 |     options.getInt(Source.PartitionCount, DefaultPartitionCount)
23 | 
24 |   protected var endSequence: Int =
25 |     options
26 |       .get(Source.EndSequence)
27 |       .asScala
28 |       .map(s => s.toInt - 1)
29 |       .getOrElse(getCurrentSequence
30 |         .getOrElse(throw new RuntimeException("Could not determine end sequence.")))
31 | 
32 |   override def readSchema(): StructType = schema
33 | 
34 |   protected def startSequence: Int =
35 |     options
36 |       .get(Source.StartSequence)
37 |       .asScala
38 |       .map(s => s.toInt)
39 |       .getOrElse(getCurrentSequence
40 |         .getOrElse(throw new RuntimeException("Could not determine start sequence.")))
41 | 
42 |   protected def getCurrentSequence: Option[Int]
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ReplicationStreamBatchReader.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import org.apache.spark.internal.Logging
 6 | import org.apache.spark.sql.catalyst.InternalRow
 7 | import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
 8 | import org.apache.spark.sql.sources.v2.reader.InputPartitionReader
 9 | 
10 | import scala.collection.parallel.ForkJoinTaskSupport
11 | import scala.concurrent.forkjoin.ForkJoinPool
12 | import scala.reflect.runtime.universe.TypeTag
13 | 
14 | abstract class ReplicationStreamBatchReader[T <: Product: TypeTag](baseURI: URI,
15 |                                                                    sequences: Seq[Int])
16 |     extends InputPartitionReader[InternalRow]
17 |     with Logging {
18 |   org.apache.spark.sql.jts.registerTypes()
19 |   private lazy val rowEncoder = RowEncoder(encoder.schema).resolveAndBind()
20 |   protected var index: Int = -1
21 |   protected var items: Vector[T] = _
22 |   val Concurrency: Int = 8
23 |   private lazy val encoder = ExpressionEncoder[T]
24 | 
25 |   override def next(): Boolean = {
26 |     index += 1
27 | 
28 |     if (Option(items).isEmpty) {
29 |       val parSequences = sequences.par
30 |       val taskSupport = new ForkJoinTaskSupport(new ForkJoinPool(Concurrency))
31 |       parSequences.tasksupport = taskSupport
32 | 
33 |       items = parSequences.flatMap(seq => getSequence(baseURI, seq)).toVector
34 | 
35 |       taskSupport.environment.shutdown()
36 |     }
37 | 
38 |     index < items.length
39 |   }
40 | 
41 |   override def get(): InternalRow = encoder.toRow(items(index))
42 | 
43 |   override def close(): Unit = Unit
44 | 
45 |   protected def getSequence(baseURI: URI, sequence: Int): Seq[T]
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/SequenceOffset.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | import org.apache.spark.sql.sources.v2.reader.streaming.Offset
 3 | 
 4 | case class SequenceOffset(sequence: Int, pending: Boolean = false)
 5 |     extends Offset
 6 |     with Ordered[SequenceOffset] {
 7 |   override val json: String = s"[$sequence,${pending.compare(false)}]"
 8 | 
 9 |   def +(increment: Int): SequenceOffset = SequenceOffset(sequence + increment)
10 |   def -(decrement: Int): SequenceOffset = SequenceOffset(sequence - decrement)
11 |   def next: SequenceOffset = SequenceOffset(sequence, pending = true)
12 | 
13 |   override def compare(that: SequenceOffset): Int =
14 |     sequence.compare(that.sequence) match {
15 |       case 0 => pending.compare(that.pending)
16 |       case x => x
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/Source.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | object Source {
 4 |   val AugmentedDiffs: String = "augmented-diffs"
 5 |   val Changes: String = "changes"
 6 |   val Changesets: String = "changesets"
 7 | 
 8 |   val BaseURI: String = "base_uri"
 9 |   val BatchSize: String = "batch_size"
10 |   val DatabaseURI: String = "db_uri"
11 |   val PartitionCount: String = "partition_count"
12 |   val ProcessName: String = "proc_name"
13 |   val StartSequence: String = "start_sequence"
14 |   val EndSequence: String = "end_sequence"
15 | 
16 |   val ErrorHandler: String = "error_handler"
17 |   val ErrorCodes: Set[Int] = Set(403, 404)
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Auth.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.util
 2 | 
 3 | import java.net.URI
 4 | 
 5 | case class Auth(user: Option[String], password: Option[String]) {
 6 |     def isDefined: Boolean = (user.isDefined && password.isDefined)
 7 | }
 8 | 
 9 | object Auth {
10 |   def fromUri(uri: URI, userParam: String = "user", passwordParam: String = "password"): Auth = {
11 |     val auth = getUriUserInfo(uri)
12 |     if (auth.isDefined) {
13 |       auth
14 |     } else {
15 |       val params = getUriParams(uri)
16 |       auth.copy(
17 |         user = auth.user.orElse(params.get(userParam)),
18 |         password = auth.password.orElse(params.get(passwordParam))
19 |       )
20 |     }
21 |   }
22 | 
23 |   /** Parse only the URI auth section */
24 |   def getUriUserInfo(uri: URI): Auth = {
25 |     val info = uri.getUserInfo
26 |     if (null == info)
27 |       Auth(None, None)
28 |     else {
29 |       val chunk = info.split(":")
30 |       if (chunk.length == 1)
31 |       Auth(Some(chunk(0)), None)
32 |       else
33 |       Auth(Some(chunk(0)), Some(chunk(1)))
34 |     }
35 |   }
36 | 
37 |   /** Parse URI parameters */
38 |   def getUriParams(uri: URI): Map[String, String] = {
39 |     val query = uri.getQuery
40 |     if (null == query)
41 |       Map.empty[String, String]
42 |     else {
43 |       query.split("&").map{ param =>
44 |         val arr = param.split("=")
45 |         arr(0) -> arr(1)
46 |       }.toMap
47 |     }
48 |   }
49 | }


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/DBUtils.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.util
 2 | 
 3 | import java.net.URI
 4 | import java.sql.{Connection, DriverManager}
 5 | 
 6 | object DBUtils {
 7 |   def getJdbcConnection(uri: URI): Connection = {
 8 | 
 9 |     val cleanUri = new URI(
10 |       uri.getScheme,
11 |       Option(uri.getHost).getOrElse("localhost") + (if (uri.getPort > 0) ":" + uri.getPort  else ""),
12 |       uri.getPath,
13 |       null.asInstanceOf[String],
14 |       null.asInstanceOf[String]
15 |     )
16 |     // also drops UserInfo
17 | 
18 |     val auth = Auth.fromUri(uri)
19 |     (auth.user, auth.password) match {
20 |       case (Some(user), Some(pass)) =>
21 |         DriverManager.getConnection(s"jdbc:${cleanUri.toString}", user, pass)
22 |       case _ =>
23 |         DriverManager.getConnection(s"jdbc:${cleanUri.toString}")
24 |     }
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Geocode.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.util
  2 | 
  3 | import geotrellis.vector._
  4 | import geotrellis.vector.io.json._
  5 | 
  6 | import org.apache.spark.sql._
  7 | import org.apache.spark.sql.catalyst.encoders.RowEncoder
  8 | import org.apache.spark.sql.functions._
  9 | import org.apache.spark.sql.types._
 10 | 
 11 | import org.locationtech.jts.geom.prep._
 12 | import org.locationtech.jts.index.ItemVisitor
 13 | 
 14 | import _root_.io.circe.{Encoder => CirceEncoder, Decoder => CirceDecoder, _}
 15 | import cats.syntax.either._
 16 | 
 17 | object Geocode {
 18 | 
 19 |   case class CountryId(code: String)
 20 | 
 21 |   object CountryIdCodecs {
 22 |     implicit val encodeCountryId: CirceEncoder[CountryId] = new CirceEncoder[CountryId] {
 23 |       final def apply(a: CountryId): Json = Json.obj(
 24 |         ("code", Json.fromString(a.code))
 25 |       )
 26 |     }
 27 |     implicit val decodeCountryId: CirceDecoder[CountryId] = new CirceDecoder[CountryId] {
 28 |       final def apply(c: HCursor): CirceDecoder.Result[CountryId] =
 29 |         for {
 30 |           code <- c.downField("ADM0_A3").as[String]
 31 |         } yield {
 32 |           CountryId(code)
 33 |         }
 34 |     }
 35 |   }
 36 | 
 37 |   import CountryIdCodecs._
 38 | 
 39 |   object Countries {
 40 |     lazy val all: Vector[MultiPolygonFeature[CountryId]] = {
 41 |       val collection =
 42 |         Resource("countries.geojson").
 43 |           parseGeoJson[JsonFeatureCollection]
 44 | 
 45 |       val polys =
 46 |         collection.
 47 |           getAllPolygonFeatures[CountryId].
 48 |           map(_.mapGeom(MultiPolygon(_)))
 49 | 
 50 |       val mps =
 51 |         collection.
 52 |           getAllMultiPolygonFeatures[CountryId]
 53 | 
 54 |       polys ++ mps
 55 |     }
 56 | 
 57 |     def indexed: SpatialIndex[MultiPolygonFeature[CountryId]] =
 58 |       SpatialIndex.fromExtents(all) { mpf => mpf.geom.extent }
 59 |   }
 60 | 
 61 |   class CountryLookup() extends Serializable {
 62 |     private val index =
 63 |       geotrellis.vector.SpatialIndex.fromExtents(
 64 |         Countries.all.
 65 |           map { mpf =>
 66 |             (PreparedGeometryFactory.prepare(mpf.geom), mpf.data)
 67 |           }
 68 |       ) { case (pg, _) => pg.getGeometry().extent }
 69 | 
 70 |     def lookup(geom: geotrellis.vector.Geometry): Traversable[CountryId] = {
 71 |       val t =
 72 |         new Traversable[(PreparedGeometry, CountryId)] {
 73 |           override def foreach[U](f: ((PreparedGeometry, CountryId)) => U): Unit = {
 74 |             val visitor = new ItemVisitor {
 75 |               override def visitItem(obj: AnyRef): Unit =
 76 |                 f(obj.asInstanceOf[(PreparedGeometry, CountryId)])
 77 |             }
 78 |             index.rtree.query(geom.getEnvelopeInternal, visitor)
 79 |           }
 80 |         }
 81 | 
 82 |       t.
 83 |         filter(_._1.intersects(geom)).
 84 |         map(_._2)
 85 |     }
 86 |   }
 87 | 
 88 |   def apply(geoms: DataFrame): DataFrame = {
 89 |     val newSchema = StructType(geoms.schema.fields :+ StructField(
 90 |       "countries", ArrayType(StringType, containsNull = false), nullable = true))
 91 |     implicit val encoder: Encoder[Row] = RowEncoder(newSchema)
 92 | 
 93 |     geoms
 94 |       .mapPartitions { partition =>
 95 |         val countryLookup = new CountryLookup()
 96 | 
 97 |         partition.map { row =>
 98 |           val countryCodes = Option(row.getAs[Geometry]("geom")) match {
 99 |             case Some(geom) => countryLookup.lookup(geom).map(x => x.code)
100 |             case None => Seq.empty[String]
101 |           }
102 | 
103 |           Row.fromSeq(row.toSeq :+ countryCodes)
104 |         }
105 |       }
106 |   }
107 | 
108 |   def regionsByChangeset(geomCountries: Dataset[Row]): DataFrame = {
109 |     import geomCountries.sparkSession.implicits._
110 | 
111 |     geomCountries
112 |       .where('country.isNotNull)
113 |       .groupBy('changeset)
114 |       .agg(collect_set('country) as 'countries)
115 | 
116 |   }
117 | 
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Implicits.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.util
 2 | 
 3 | import geotrellis.vector._
 4 | import _root_.io.circe._
 5 | 
 6 | import scala.reflect.ClassTag
 7 | 
 8 | object Implicits extends Implicits
 9 | 
10 | trait Implicits extends RobustFeatureFormats {
11 |   implicit class RobustFeaturesToGeoJson[G <: Geometry: ClassTag, D: Encoder](features: Traversable[RobustFeature[G, D]]) {
12 |     def toGeoJson(): String = {
13 |       val fc = new JsonRobustFeatureCollection
14 | 
15 |       features.foreach(fc.add(_))
16 | 
17 |       fc.asJson.noSpaces
18 |     }
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/JsonRobustFeatureCollection.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | package vectorpipe.util
 3 | 
 4 | import cats.syntax.either._
 5 | import geotrellis.vector._
 6 | import _root_.io.circe._
 7 | import _root_.io.circe.syntax._
 8 | 
 9 | import scala.collection.immutable.VectorBuilder
10 | import scala.collection.mutable
11 | import scala.reflect.ClassTag
12 | 
13 | class JsonRobustFeatureCollection(features: List[Json] = Nil) {
14 |   private val buffer = mutable.ListBuffer(features: _*)
15 | 
16 |   def add[G <: Geometry: ClassTag, D: Encoder](feature: RobustFeature[G, D]) =
17 |     buffer += RobustFeatureFormats.writeRobustFeatureJson(feature)
18 | 
19 |   def getAll[F: Decoder]: Vector[F] = {
20 |     val ret = new VectorBuilder[F]()
21 |     features.foreach{ _.as[F].foreach(ret += _) }
22 |     ret.result()
23 |   }
24 | 
25 |   def getAllRobustFeatures[F <: RobustFeature[_, _] :Decoder]: Vector[F] =
26 |     getAll[F]
27 | 
28 |   def getAllPointFeatures[D: Decoder]()           = getAll[RobustFeature[Point, D]]
29 |   def getAllLineStringFeatures[D: Decoder]()      = getAll[RobustFeature[LineString, D]]
30 |   def getAllPolygonFeatures[D: Decoder]()         = getAll[RobustFeature[Polygon, D]]
31 |   def getAllMultiPointFeatures[D: Decoder]()      = getAll[RobustFeature[MultiPoint, D]]
32 |   def getAllMultiLineStringFeatures[D: Decoder]() = getAll[RobustFeature[MultiLineString, D]]
33 |   def getAllMultiPolygonFeatures[D: Decoder]()    = getAll[RobustFeature[MultiPolygon, D]]
34 | 
35 |   def getAllGeometries(): Vector[Geometry] =
36 |     getAll[Point] ++ getAll[LineString] ++ getAll[Polygon] ++
37 |       getAll[MultiPoint] ++ getAll[MultiLineString] ++ getAll[MultiPolygon]
38 | 
39 |   def asJson: Json = {
40 |     val bboxOption = getAllGeometries.map(_.extent).reduceOption(_ combine _)
41 |     bboxOption match {
42 |       case Some(bbox) =>
43 |         Json.obj(
44 |           "type" -> "FeatureCollection".asJson,
45 |           "bbox" -> Extent.listEncoder(bbox),
46 |           "features" -> buffer.toVector.asJson
47 |         )
48 |       case _ =>
49 |         Json.obj(
50 |           "type" -> "FeatureCollection".asJson,
51 |           "features" -> buffer.toVector.asJson
52 |         )
53 |     }
54 |   }
55 | }
56 | 
57 | object JsonRobustFeatureCollection {
58 |   def apply() = new JsonRobustFeatureCollection()
59 | 
60 |   def apply[G <: Geometry: ClassTag, D: Encoder](features: Traversable[RobustFeature[G, D]]) = {
61 |     val fc = new JsonRobustFeatureCollection()
62 |     features.foreach(fc.add(_))
63 |     fc
64 |   }
65 | 
66 |   def apply(features: Traversable[Json])(implicit d: DummyImplicit): JsonRobustFeatureCollection =
67 |     new JsonRobustFeatureCollection(features.toList)
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/JsonRobustFeatureCollectionMap.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.util
 2 | 
 3 | import cats.syntax.either._
 4 | import geotrellis.vector._
 5 | import _root_.io.circe._
 6 | import _root_.io.circe.syntax._
 7 | 
 8 | import scala.collection.mutable
 9 | import scala.reflect.ClassTag
10 | 
11 | class JsonRobustFeatureCollectionMap(features: List[Json] = Nil) {
12 |   private val buffer = mutable.ListBuffer(features:_*)
13 | 
14 |   def add[G <: Geometry: ClassTag, D: Encoder](featureMap: (String, RobustFeature[G, D])) =
15 |     buffer += RobustFeatureFormats.writeRobustFeatureJsonWithID(featureMap)
16 | 
17 |   def asJson: Json = {
18 |     val bboxOption = getAll[Geometry].map(_._2.extent).reduceOption(_ combine _)
19 |     bboxOption match {
20 |       case Some(bbox) =>
21 |         Json.obj(
22 |           "type" -> "FeatureCollection".asJson,
23 |           "bbox" -> Extent.listEncoder(bbox),
24 |           "features" -> buffer.toVector.asJson
25 |         )
26 |       case _ =>
27 |         Json.obj(
28 |           "type" -> "FeatureCollection".asJson,
29 |           "features" -> buffer.toVector.asJson
30 |         )
31 |     }
32 |   }
33 | 
34 |   private def getFeatureID(js: Json): String = {
35 |     val c = js.hcursor
36 |     val id = c.downField("id")
37 |     id.as[String] match {
38 |       case Right(i) => i
39 |       case _ =>
40 |         id.as[Int] match {
41 |           case Right(i) => i.toString
42 |           case _ => throw DecodingFailure("Feature expected to have \"ID\" field", c.history)
43 |         }
44 |     }
45 |   }
46 | 
47 |   def getAll[F: Decoder]: Map[String, F] = {
48 |     var ret = Map[String, F]()
49 |     features.foreach{ f => f.as[F].foreach(ret += getFeatureID(f) -> _) }
50 |     ret
51 |   }
52 | }
53 | 
54 | object JsonRobustFeatureCollectionMap {
55 |   def apply() = new JsonRobustFeatureCollectionMap()
56 | 
57 |   def apply[G <: Geometry: ClassTag, D: Encoder](features: Traversable[(String, RobustFeature[G, D])]) = {
58 |     val fc = new JsonRobustFeatureCollectionMap()
59 |     features.foreach(fc.add(_))
60 |     fc
61 |   }
62 | 
63 |   def apply(features: Traversable[Json])(implicit d: DummyImplicit): JsonRobustFeatureCollectionMap =
64 |     new JsonRobustFeatureCollectionMap(features.toList)
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Resource.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.util
 2 | 
 3 | import java.io.InputStream
 4 | 
 5 | object Resource {
 6 |   def apply(name: String): String = {
 7 |     val stream: InputStream = getClass.getResourceAsStream(s"/$name")
 8 |     try {
 9 |       scala.io.Source.fromInputStream(stream).getLines.mkString(" ")
10 |     } finally {
11 |       stream.close()
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/RobustFeature.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.util
  2 | 
  3 | import cats.syntax.either._
  4 | import geotrellis.vector._
  5 | import geotrellis.vector.io.json._
  6 | import _root_.io.circe._
  7 | import _root_.io.circe.syntax._
  8 | 
  9 | import scala.reflect.ClassTag
 10 | import scala.util.{Try, Success, Failure}
 11 | 
 12 | case class RobustFeature[+G <: Geometry: ClassTag, D](geom: Option[G], data: D) {
 13 |   def toFeature(): Feature[G, D] = {
 14 |     val g = geom match {
 15 |       case Some(gg) => gg
 16 |       case _ => MultiPoint.EMPTY
 17 |     }
 18 |     Feature(g.asInstanceOf[G], data)
 19 |   }
 20 | }
 21 | 
 22 | trait RobustFeatureFormats {
 23 |   def writeRobustFeatureJson[G <: Geometry: ClassTag, D: Encoder](obj: RobustFeature[G, D]): Json = {
 24 |     val feature = obj.toFeature
 25 |     Json.obj(
 26 |       "type" -> "Feature".asJson,
 27 |       "geometry" -> GeometryFormats.geometryEncoder(feature.geom),
 28 |       "bbox" -> Extent.listEncoder(feature.geom.extent),
 29 |       "properties" -> obj.data.asJson
 30 |     )
 31 |   }
 32 | 
 33 |   def writeRobustFeatureJsonWithID[G <: Geometry: ClassTag, D: Encoder](idFeature: (String, RobustFeature[G, D])): Json = {
 34 |     val feature = idFeature._2.toFeature
 35 |     Json.obj(
 36 |       "type" -> "Feature".asJson,
 37 |       "geometry" -> GeometryFormats.geometryEncoder(feature.geom),
 38 |       "bbox" -> Extent.listEncoder(feature.geom.extent),
 39 |       "properties" -> idFeature._2.data.asJson,
 40 |       "id" -> idFeature._1.asJson
 41 |     )
 42 |   }
 43 | 
 44 |   def readRobustFeatureJson[D: Decoder, G <: Geometry: Decoder: ClassTag](value: Json): RobustFeature[G, D] = {
 45 |     val c = value.hcursor
 46 |     (c.downField("type").as[String], c.downField("geometry").focus, c.downField("properties").focus) match {
 47 |       case (Right("Feature"), Some(geom), Some(data)) =>
 48 |         //val g = Try(geom.convertTo[G]).toOption
 49 |         //val d = data.convertTo[D]
 50 |         (Try(geom.as[G].toOption).toOption.getOrElse(None), data.as[D].toOption) match {
 51 |           case (Some(g), Some(d)) if g isEmpty => RobustFeature(None, d)
 52 |           case (Some(g), Some(d)) => RobustFeature(Some(g), d)
 53 |           case (None, Some(d)) => RobustFeature(None, d)
 54 |           case (_, None) => throw new Exception(s"Feature expected well-formed data; got $data")
 55 |         }
 56 |       case _ => throw new Exception("Feature expected")
 57 |     }
 58 |   }
 59 | 
 60 |   def readRobustFeatureJsonWithID[D: Decoder, G <: Geometry: Decoder: ClassTag](value: Json): (String, RobustFeature[G, D]) = {
 61 |     val c = value.hcursor
 62 |     (c.downField("type").as[String], c.downField("geometry").focus, c.downField("properties").focus, c.downField("id").focus) match {
 63 |       case (Right("Feature"), Some(geom), Some(data), Some(id)) =>
 64 |         //val g = Try(geom.convertTo[G]).toOption
 65 |         //val d = data.convertTo[D]
 66 |         (Try(geom.as[G].toOption).toOption.getOrElse(None), data.as[D].toOption, id.as[String].toOption) match {
 67 |           case (Some(g), Some(d), Some(i)) if g isEmpty => (i, RobustFeature(None, d))
 68 |           case (Some(g), Some(d), Some(i)) => (i, RobustFeature(Some(g), d))
 69 |           case (None, Some(d), Some(i)) => (i, RobustFeature(None, d))
 70 |           case _ => throw new Exception(s"Feature expected well-formed id and data; got (${id}, ${data})")
 71 |         }
 72 |       case _ => throw new Exception("Feature expected")
 73 |     }
 74 |   }
 75 | 
 76 |   implicit def robustFeatureDecoder[G <: Geometry: Decoder: ClassTag, D: Decoder]: Decoder[RobustFeature[G, D]] =
 77 |     Decoder.decodeJson.emap { json: Json =>
 78 |       Try(readRobustFeatureJson[D, G](json)) match {
 79 |         case Success(f) => Right(f)
 80 |         case Failure(e) => Left(e.getMessage)
 81 |       }
 82 |     }
 83 | 
 84 |   implicit def robustFeatureEncoder[G <: Geometry: Encoder: ClassTag, D: Encoder]: Encoder[RobustFeature[G, D]] =
 85 |     Encoder.encodeJson.contramap[RobustFeature[G, D]] { writeRobustFeatureJson }
 86 | 
 87 |   implicit val robustFeatureCollectionEncoder: Encoder[JsonRobustFeatureCollection] =
 88 |     Encoder.encodeJson.contramap[JsonRobustFeatureCollection] { _.asJson }
 89 | 
 90 |   implicit val robustFeatureCollectionDecoder: Decoder[JsonRobustFeatureCollection] =
 91 |     Decoder.decodeHCursor.emap { c: HCursor =>
 92 |       (c.downField("type").as[String], c.downField("features").focus) match {
 93 |         case (Right("FeatureCollection"), Some(features)) => Right(JsonRobustFeatureCollection(features.asArray.toVector.flatten))
 94 |         case _ => Left("FeatureCollection expected")
 95 |       }
 96 |     }
 97 | 
 98 |   implicit val robustFeatureCollectionMapEncoder: Encoder[JsonRobustFeatureCollectionMap] =
 99 |     Encoder.encodeJson.contramap[JsonRobustFeatureCollectionMap] { _.asJson }
100 | 
101 |   implicit val robustFeatureCollectionMapDecoder: Decoder[JsonRobustFeatureCollectionMap] =
102 |     Decoder.decodeHCursor.emap { c: HCursor =>
103 |       (c.downField("type").as[String], c.downField("features").focus) match {
104 |         case (Right("FeatureCollection"), Some(features)) => Right(JsonRobustFeatureCollectionMap(features.asArray.toVector.flatten))
105 |         case _ => Left("FeatureCollection expected")
106 |       }
107 |     }
108 | }
109 | 
110 | object RobustFeatureFormats extends RobustFeatureFormats
111 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/package.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe
 2 | 
 3 | package object util extends Implicits {
 4 |   def mergeMaps[K, V](m1: Map[K, V], m2: Map[K, V])(f: (V, V) => V): Map[K, V] =
 5 |     (m1.toSeq ++ m2.toSeq).
 6 |       groupBy(_._1).
 7 |       map { case (k, vs) =>
 8 |         (k, vs.map(_._2).reduce(f))
 9 |       }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/Clipping.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import geotrellis.layer.SpatialKey
 4 | import geotrellis.layer.LayoutLevel
 5 | import geotrellis.vector._
 6 | 
 7 | import scala.concurrent.ExecutionContext.Implicits.global
 8 | 
 9 | object Clipping {
10 |     def byLayoutCell(geom: Geometry, key: SpatialKey, layoutLevel: LayoutLevel): Geometry = {
11 |       val ex = layoutLevel.layout.mapTransform.keyToExtent(key)
12 | 
13 |       // Preserve dimension of resultant geometry
14 |       val clipped = geom match {
15 |         case p: Point => p // points with the current key intersect the extent by definition
16 |         case mp: MultiPoint =>
17 |           timedIntersect(mp, ex) match {
18 |             case PointResult(pr) => pr
19 |             case MultiPointResult(mpr) => mpr
20 |             case NoResult =>
21 |               logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
22 |               geom
23 |             case _ => // should never match here; just shut the compiler up
24 |               geom
25 |           }
26 |         case l: LineString =>
27 |           timedIntersect(l, ex) match {
28 |             case LineStringResult(lr) => lr
29 |             case MultiLineStringResult(mlr) => mlr
30 |             case GeometryCollectionResult(gcr) =>
31 |               gcr.getAll[LineString].length match {
32 |                 case 0 => MultiLineString()
33 |                 case 1 => gcr.getAll[LineString].head
34 |                 case _ => MultiLineString(gcr.getAll[LineString])
35 |               }
36 |             case NoResult =>
37 |               logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
38 |               geom
39 |             case _ =>
40 |               MultiLineString() // Discard (multi-)point results
41 |           }
42 |         case ml: MultiLineString =>
43 |           timedIntersect(ml, ex) match {
44 |             case LineStringResult(lr) => lr
45 |             case MultiLineStringResult(mlr) => mlr
46 |             case GeometryCollectionResult(gcr) =>
47 |               (gcr.getAll[LineString].length, gcr.getAll[MultiLineString].length) match {
48 |                 case (0, 0) => MultiLineString()
49 |                 case (1, 0) => gcr.getAll[LineString].head
50 |                 case (0, 1) => gcr.getAll[MultiLineString].head
51 |                 case _ => MultiLineString(gcr.getAll[LineString] ++ gcr.getAll[MultiLineString].flatMap(_.lines.toSeq))
52 |               }
53 |             case NoResult =>
54 |               logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
55 |               geom
56 |             case _ =>
57 |               MultiLineString() // Discard (multi-)point results
58 |           }
59 |         case poly: Polygon =>
60 |           timedIntersect(poly, ex) match {
61 |             case PolygonResult(pr) => pr
62 |             case MultiPolygonResult(mpr) => mpr
63 |             case GeometryCollectionResult(gcr) =>
64 |               gcr.getAll[Polygon].length match {
65 |                 case 0 => MultiPolygon()
66 |                 case 1 => gcr.getAll[Polygon].head
67 |                 case _ => MultiPolygon(gcr.getAll[Polygon])
68 |               }
69 |             case NoResult =>
70 |               logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
71 |               geom
72 |             case _ => MultiPolygon() // ignore point/line results
73 |           }
74 |         case mp: MultiPolygon =>
75 |           timedIntersect(mp, ex) match {
76 |             case PolygonResult(pr) => pr
77 |             case MultiPolygonResult(mpr) => mpr
78 |             case GeometryCollectionResult(gcr) =>
79 |               (gcr.getAll[Polygon].length, gcr.getAll[MultiPolygon].length) match {
80 |                 case (0, 0) => MultiPolygon()
81 |                 case (1, 0) => gcr.getAll[Polygon].head
82 |                 case (0, 1) => gcr.getAll[MultiPolygon].head
83 |                 case _ => MultiPolygon(gcr.getAll[Polygon] ++ gcr.getAll[MultiPolygon].flatMap(_.polygons.toSeq))
84 |               }
85 |             case NoResult =>
86 |               logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
87 |               geom
88 |             case _ => MultiPolygon() // ignore point/line results
89 |           }
90 |       }
91 |       clipped
92 |     }
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/Simplify.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import geotrellis.vector._
 4 | import geotrellis.layer._
 5 | import org.locationtech.jts.simplify.TopologyPreservingSimplifier
 6 | 
 7 | object Simplify {
 8 | 
 9 |   /**
10 |    * Simplifies geometry using JTS's topology-preserving simplifier.
11 |    *
12 |    * Note that there are known bugs with this simplifier.  Please refer to the
13 |    * JTS documentation.  Faster simplifiers with fewer guarantees are available
14 |    * there as well.
15 |    */
16 |   def withJTS(g: Geometry, ld: LayoutDefinition): Geometry = {
17 |     TopologyPreservingSimplifier.simplify(g, ld.cellSize.resolution)
18 |   }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/export/package.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import geotrellis.layer.SpatialKey
 4 | import geotrellis.spark.store.hadoop._
 5 | import geotrellis.spark.store.s3._
 6 | import geotrellis.vectortile._
 7 | import org.apache.spark.rdd.RDD
 8 | 
 9 | import software.amazon.awssdk.services.s3.model.ObjectCannedACL
10 | 
11 | import java.net.URI
12 | import java.io.ByteArrayOutputStream
13 | import java.util.zip.GZIPOutputStream
14 | 
15 | package object export {
16 |   def saveVectorTiles(vectorTiles: RDD[(SpatialKey, VectorTile)], zoom: Int, uri: URI): Unit = {
17 |     uri.getScheme match {
18 |       case "s3" =>
19 |         val path = uri.getPath
20 |         val prefix = path.stripPrefix("/").stripSuffix("/")
21 |         saveToS3(vectorTiles, zoom, uri.getAuthority, prefix)
22 |       case _ =>
23 |         saveHadoop(vectorTiles, zoom, uri)
24 |     }
25 |   }
26 | 
27 |   private def saveToS3(vectorTiles: RDD[(SpatialKey, VectorTile)], zoom: Int, bucket: String, prefix: String) = {
28 |     vectorTiles
29 |       .mapValues { tile =>
30 |         val byteStream = new ByteArrayOutputStream()
31 | 
32 |         try {
33 |           val gzipStream = new GZIPOutputStream(byteStream)
34 |           try {
35 |             gzipStream.write(tile.toBytes)
36 |           } finally {
37 |             gzipStream.close()
38 |           }
39 |         } finally {
40 |           byteStream.close()
41 |         }
42 | 
43 |         byteStream.toByteArray
44 |       }
45 |       .saveToS3(
46 |         { sk: SpatialKey => s"s3://${bucket}/${prefix}/${zoom}/${sk.col}/${sk.row}.mvt" },
47 |         putObjectModifier = { request =>
48 |           request
49 |             .toBuilder()
50 |             .contentEncoding("gzip")
51 |             .acl(ObjectCannedACL.PUBLIC_READ)
52 |             .build()
53 |         })
54 |   }
55 | 
56 |   private def saveHadoop(vectorTiles: RDD[(SpatialKey, VectorTile)], zoom: Int, uri: URI) = {
57 |     vectorTiles
58 |       .mapValues(_.toBytes)
59 |       .saveToHadoop({ sk: SpatialKey => s"${uri}/${zoom}/${sk.col}/${sk.row}.mvt" })
60 |   }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/package.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe
  2 | 
  3 | import geotrellis.proj4._
  4 | import geotrellis.layer.SpatialKey
  5 | import geotrellis.layer.LayoutDefinition
  6 | import geotrellis.vector._
  7 | import geotrellis.vectortile._
  8 | import org.apache.spark.sql._
  9 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 10 | import org.apache.spark.sql.functions._
 11 | 
 12 | import scala.concurrent._
 13 | import scala.concurrent.duration._
 14 | import scala.util.{Try, Success, Failure}
 15 | 
 16 | package object vectortile {
 17 |   type VectorTileFeature[+G <: Geometry] = Feature[G, Map[String, Value]]
 18 | 
 19 |   def vtf2mvtf[G <: Geometry](vtf: VectorTileFeature[G]): MVTFeature[G] =
 20 |     MVTFeature(vtf.geom, vtf.data)
 21 | 
 22 |   sealed trait LayerMultiplicity { val name: String }
 23 |   case class SingleLayer(val name: String) extends LayerMultiplicity
 24 |   case class LayerNamesInColumn(val name: String) extends LayerMultiplicity
 25 | 
 26 |   @transient lazy val logger = org.apache.log4j.Logger.getRootLogger
 27 | 
 28 |   @transient lazy val st_reprojectGeom = udf { (g: Geometry, srcProj: String, destProj: String) =>
 29 |     val trans = Proj4Transform(CRS.fromString(srcProj), CRS.fromString(destProj))
 30 |     if (Option(g).isDefined) {
 31 |       if (g.isEmpty)
 32 |         g
 33 |       else {
 34 |         g.reproject(trans)
 35 |       }
 36 |     } else {
 37 |       null
 38 |     }
 39 |   }
 40 | 
 41 |   def keyTo(layout: LayoutDefinition) = udf { g: Geometry =>
 42 |     if (Option(g).isDefined && !g.isEmpty) {
 43 |       layout.mapTransform.keysForGeometry(g).toArray
 44 |     } else {
 45 |       Array.empty[SpatialKey]
 46 |     }
 47 |   }
 48 | 
 49 |   def getSpatialKey(k: GenericRowWithSchema): SpatialKey = SpatialKey(k.getInt(0), k.getInt(1))
 50 | 
 51 |   def getSpatialKey(row: Row, field: String): SpatialKey = {
 52 |     val k = row.getAs[Row](field)
 53 |     SpatialKey(k.getInt(0), k.getInt(1))
 54 |   }
 55 | 
 56 |   // case class IdFeature[+G <: Geometry, +D](geom: Geometry, data: D, id: Int) extends Feature[G, D](geom, data) {
 57 |   //   override def mapGeom[T <: Geometry](f: G => T): IdFeature[T, D] =
 58 |   //     IdFeature(f(geom), data, id)
 59 | 
 60 |   //   override def mapData[T](f: D => T): IdFeature[G, T] =
 61 |   //     IdFeature(geom, f(data), id)
 62 |   // }
 63 | 
 64 |   def timedIntersect[G <: Geometry](geom: G, ex: Extent)(implicit ec: ExecutionContext) = {
 65 |     val future = Future { geom.&(ex) }
 66 |     Try(Await.result(future, 5000 milliseconds)) match {
 67 |       case Success(res) => res
 68 |       case Failure(_) =>
 69 |         logger.warn(s"Could not intersect $geom with $ex in 5000 milliseconds")
 70 |         NoResult
 71 |     }
 72 |   }
 73 | 
 74 |   case class VTContents(points: List[VectorTileFeature[Point]] = Nil,
 75 |                         multipoints: List[VectorTileFeature[MultiPoint]] = Nil,
 76 |                         lines: List[VectorTileFeature[LineString]] = Nil,
 77 |                         multilines: List[VectorTileFeature[MultiLineString]] = Nil,
 78 |                         polygons: List[VectorTileFeature[Polygon]] = Nil,
 79 |                         multipolygons: List[VectorTileFeature[MultiPolygon]] = Nil) {
 80 |     def +(other: VTContents) = VTContents(points ++ other.points,
 81 |                                           multipoints ++ other.multipoints,
 82 |                                           lines ++ other.lines,
 83 |                                           multilines ++ other.multilines,
 84 |                                           polygons ++ other.polygons,
 85 |                                           multipolygons ++ other.multipolygons)
 86 |     def +[G <: Geometry](other: VectorTileFeature[G]) = other.geom match {
 87 |       case p : Point        => copy(points=other.asInstanceOf[VectorTileFeature[Point]] :: points)
 88 |       case mp: MultiPoint   => copy(multipoints=other.asInstanceOf[VectorTileFeature[MultiPoint]] :: multipoints)
 89 |       case l : LineString         => copy(lines=other.asInstanceOf[VectorTileFeature[LineString]] :: lines)
 90 |       case ml: MultiLineString    => copy(multilines=other.asInstanceOf[VectorTileFeature[MultiLineString]] :: multilines)
 91 |       case p : Polygon      => copy(polygons=other.asInstanceOf[VectorTileFeature[Polygon]] :: polygons)
 92 |       case mp: MultiPolygon => copy(multipolygons=other.asInstanceOf[VectorTileFeature[MultiPolygon]] :: multipolygons)
 93 |     }
 94 |   }
 95 |   object VTContents {
 96 |     def empty() = VTContents()
 97 |   }
 98 | 
 99 |   def buildLayer[G <: Geometry](features: Iterable[VectorTileFeature[G]], layerName: String, ex: Extent, tileWidth: Int): Layer = {
100 |     val contents = features.foldLeft(VTContents.empty){ (accum, feature) => accum + feature }
101 |     val VTContents(pts, mpts, ls, mls, ps, mps) = contents
102 |     StrictLayer(
103 |       name=layerName,
104 |       tileWidth=tileWidth,
105 |       version=2,
106 |       tileExtent=ex,
107 |       points=pts.map(vtf2mvtf),
108 |       multiPoints=mpts.map(vtf2mvtf),
109 |       lines=ls.map(vtf2mvtf),
110 |       multiLines=mls.map(vtf2mvtf),
111 |       polygons=ps.map(vtf2mvtf),
112 |       multiPolygons=mps.map(vtf2mvtf)
113 |     )
114 |   }
115 | 
116 |   def buildSortedLayer[G <: Geometry](features: Iterable[VectorTileFeature[G]], layerName: String, ex: Extent, tileWidth: Int): Layer = {
117 |     val contents = features.foldLeft(VTContents.empty){ (accum, feature) => accum + feature }
118 |     val VTContents(pts, mpts, ls, mls, ps, mps) = contents
119 |     StrictLayer(
120 |       name=layerName,
121 |       tileWidth=tileWidth,
122 |       version=2,
123 |       tileExtent=ex,
124 |       points=pts.map(vtf2mvtf),
125 |       multiPoints=mpts.map(vtf2mvtf),
126 |       lines=ls.map(vtf2mvtf),
127 |       multiLines=mls.map(vtf2mvtf),
128 |       polygons=ps.sortWith(_.getArea > _.getArea).map(vtf2mvtf),
129 |       multiPolygons=mps.sortWith(_.getArea > _.getArea).map(vtf2mvtf)
130 |     )
131 |   }
132 | 
133 |   def buildVectorTile[G <: Geometry](
134 |     features: Iterable[VectorTileFeature[G]],
135 |     layerName: String,
136 |     ex: Extent,
137 |     tileWidth: Int,
138 |     sorted: Boolean
139 |   ): VectorTile = {
140 |     val layer =
141 |       if (sorted)
142 |         buildSortedLayer(features, layerName, ex, tileWidth)
143 |       else
144 |         buildLayer(features, layerName, ex, tileWidth)
145 |     VectorTile(Map(layerName -> layer), ex)
146 |   }
147 | 
148 |   def buildVectorTile[G <: Geometry](
149 |     layerFeatures: Map[String, Iterable[VectorTileFeature[G]]],
150 |     ex: Extent,
151 |     tileWidth: Int,
152 |     sorted: Boolean
153 |   ): VectorTile = {
154 |     VectorTile(layerFeatures.map{ case (layerName, features) => (layerName,
155 |       if (sorted)
156 |         buildSortedLayer(features, layerName, ex, tileWidth)
157 |       else
158 |         buildLayer(features, layerName, ex, tileWidth))
159 |     }, ex)
160 |   }
161 | 
162 | }
163 | 


--------------------------------------------------------------------------------
/src/main/tut/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: home
 3 | title: "Home"
 4 | section: "section_home"
 5 | position: 1
 6 | technologies:
 7 |  - first: ["GeoTrellis", "Geographic data processing engine for high performance applications"]
 8 |  - second: ["Apache Spark", "An engine for large-scale data processing"]
 9 |  - third: ["Scala", "Functional Programming on the JVM"]
10 | ---
11 | 
12 | # VectorPipe
13 | 
14 | VectorPipe is a Scala library for transforming vector data of arbitrary
15 | sources into [Mapbox Vector Tiles](https://www.mapbox.com/vector-tiles/). It
16 | uses the VectorTile codec from the [GeoTrellis library
17 | suite](https://geotrellis.io/), which in turn is powered by [Apache
18 | Spark](https://spark.apache.org/).
19 | 
20 | Currently VectorPipe can process:
21 | 
22 | - OpenStreetMap XML / PBF* / ORC
23 | 
24 | And produce:
25 | 
26 | - Analytic Vector Tiles (AVTs)
27 | - Custom Vector Tile schemes (by writing a custom *Collator* function)
28 | 
29 | Of course, you're not limited to just producing Vector Tiles. Once you've
30 | extracted your raw data into [GeoTrellis](https://geotrellis.io/) Geometries,
31 | you can do whatever you want with them (analytics, rasterizing, etc.).
32 | 
33 | ### Dependencies
34 | 
35 | - Scala 2.11
36 | - Apache Spark 2.1.0+
37 | 
38 | ### Getting Started
39 | 
40 | To use VectorPipe, add the following to your `build.sbt`:
41 | 
42 | ```
43 | resolvers += Resolver.bintrayRepo("azavea", "maven")
44 | 
45 | libraryDependencies += "com.azavea" %% "vectorpipe" % "0.1.0"
46 | ```
47 | 
48 | Now import the following, and you're good to go:
49 | 
50 | ```tut:silent
51 | import vectorpipe._
52 | ```
53 | 
54 | ### Performance
55 | 
56 | Wow, fast!
57 | 
58 | ### Related Projects
59 | 
60 | - [OpenMapTiles](https://openmaptiles.org/)
61 | - [Mapbox](https://www.mapbox.com/)
62 | 


--------------------------------------------------------------------------------
/src/main/tut/outputs.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: "Outputs"
 4 | section: "section4"
 5 | position: 4
 6 | ---
 7 | 
 8 | Types of VectorTiles!
 9 | 
10 | - AVTs
11 | - OpenMapTiles
12 | - Custom!
13 | 


--------------------------------------------------------------------------------
/src/main/tut/sources.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: page
3 | title: "Data Sources"
4 | section: "section3"
5 | position: 3
6 | ---
7 | 
8 | Sources of Vector data!
9 | 


--------------------------------------------------------------------------------
/src/main/tut/usage.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: docs
3 | title: "Usage"
4 | section: "usage"
5 | position: 2
6 | ---
7 | 
8 | {% include_relative usage/usage.md %}
9 | 


--------------------------------------------------------------------------------
/src/main/tut/usage/concepts.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: docs
  3 | title: "Concepts"
  4 | section: "usage"
  5 | ---
  6 | 
  7 | # Concepts
  8 | 
  9 | VectorPipe strives to be straight-forward. With only a few simple function
 10 | applications we can transform completely raw data into a grid of
 11 | VectorTiles, ready for further processing. "Clipping" and "Collation"
 12 | functions help us customize this process along the way.
 13 | 
 14 | <img src="/vectorpipe/img/pipeline.png" style="width:90%;height:auto">
 15 | 
 16 | ### Data Sources
 17 | 
 18 | Some source of Vector (re: geometric) data on the earth. Could come in any
 19 | format (example: OpenStreetMap).
 20 | 
 21 | For each data source that has first-class support, we expose a
 22 | `vectorpipe.*` module with a matching name. Example: `vectorpipe.osm`. These
 23 | modules expose all the types and functions necessary for transforming the
 24 | raw data into the "Middle Ground" types.
 25 | 
 26 | No first-class support for your favourite data source? Want to write it
 27 | yourself, and maybe even keep it private? That's okay, just provide the
 28 | function `YourData => RDD[Feature[G, D]]` and VectorPipe can handle the
 29 | rest.
 30 | 
 31 | ### The "Middle Ground"
 32 | 
 33 | A collection of Geometries on the earth. The actual data can be distributed
 34 | across multiple machines via Spark's `RDD` type. From this "middle ground",
 35 | we can proceed with creating Vector Tiles, or (with the right supporting
 36 | code) we could convert *back* into the format of the original source data.
 37 | 
 38 | Note that via the method `VectorTile.toIterable`, the following conversion
 39 | is possible:
 40 | 
 41 | ```tut:silent
 42 | import geotrellis.spark._
 43 | import geotrellis.vector._
 44 | import geotrellis.vectortile._
 45 | import org.apache.spark._
 46 | import org.apache.spark.rdd.RDD
 47 | 
 48 | implicit val sc: SparkContext = new SparkContext(
 49 |   new SparkConf().setMaster("local[*]").setAppName("back-to-middle-ground")
 50 | )
 51 | 
 52 | /* Mocked as `empty` for the example */
 53 | val tiles: RDD[(SpatialKey, VectorTile)] = sc.emptyRDD
 54 | 
 55 | /* A VT layer converted back to the "middle ground", possibly for recollation */
 56 | val backToMiddle: RDD[(SpatialKey, Iterable[Feature[Geometry, Map[String, Value]]])] =
 57 |   tiles.mapValues(_.toIterable)
 58 | 
 59 | /* Close up Spark nicely */
 60 | sc.stop()
 61 | ```
 62 | 
 63 | ### Clipping Functions
 64 | 
 65 | GeoTrellis has a consistent `RDD[(K, V)]` pattern for handling grids of
 66 | tiled data, where `K` is the grid index and `V` is the actual value type.
 67 | Before `RDD[(SpatialKey, VectorTile)]` can be achieved, we need to convert
 68 | our gridless `RDD[Feature[G, D]]` into such a grid, such that each Feature's
 69 | `Geometry` is reasonably clipped to the size of an individual tile. Depending
 70 | on which clipping function you choose (from the `vectorpipe.Clip` object, or
 71 | even your own custom one) the shape of the clipped Geometry will vary. See
 72 | our Scaladocs for more detail on the available options.
 73 | 
 74 | Admittedly, we sometimes can't guarantee the validity of incoming vector data.
 75 | Clipping is known to occasionally fail on large, complex multipolygons, so
 76 | we skip over these failures while optionally allowing to log them. Any logging
 77 | framework can be used.
 78 | 
 79 | ### Collation Functions
 80 | 
 81 | Once clipped and gridded by `VectorPipe.toGrid`, we have a `RDD[(SpatialKey,
 82 | Iterable[Feature[G, D]])]` that represents all the Geometry fragments
 83 | present at each tiled location on the earth. This is the perfect shape to
 84 | turn into a `VectorTile`. To do so, we need to choose a *Collator* function,
 85 | which determines what VectorTile Layer each `Feature` should be placed into,
 86 | and how (if at all) its corresponding metadata (the `D`) should be
 87 | processed.
 88 | 
 89 | Want to write your own Collator? The `Collate.generically` function will be
 90 | of interest to you.
 91 | 
 92 | ### Output Targets
 93 | 
 94 | We can imagine two possible outputs for our completed grid of Vector Tiles:
 95 | 
 96 | - A compressed GeoTrellis layer, saved to S3 [or
 97 | elsewhere](https://geotrellis.readthedocs.io/en/latest/guide/tile-backends.html)
 98 | - A dump of every tile as an `.mvt`, readable by other software
 99 | 
100 | Either option is simple, but outputting an `RDD[(SpatialKey, VectorTile)]`
101 | isn't actually the concern of VectorPipe - it can be handled entirely in
102 | client code via GeoTrellis functionality. An example of this can be found
103 | [in this repository](https://github.com/fosskers/vectorpipe-io).
104 | 


--------------------------------------------------------------------------------
/src/main/tut/usage/osm.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: docs
 3 | title: "Reading OpenStreetMap Data"
 4 | section: "usage"
 5 | ---
 6 | 
 7 | ## From XML
 8 | 
 9 | OSM XML files usually appear with the extension `.osm`. Since the data is all string-based,
10 | these files can be quite large compared to their PBF or ORC equivalents.
11 | 
12 | ```tut:silent
13 | import org.apache.spark._
14 | import scala.util.{Success, Failure}
15 | import vectorpipe._
16 | 
17 | implicit val sc: SparkContext = new SparkContext(
18 |   new SparkConf().setMaster("local[*]").setAppName("xml-example")
19 | )
20 | 
21 | val path: String = "/some/path/on/your/machine/foo.osm"
22 | 
23 | osm.fromLocalXML(path) match {
24 |   case Failure(e) => { }  /* Parsing failed somehow... is the filepath correct? */
25 |   case Success((ns,ws,rs)) => { }  /* (RDD[(Long, Node)], RDD[(Long, Way)], RDD[(Long, Relation)]) */
26 | }
27 | 
28 | sc.stop()
29 | ```
30 | 
31 | ## From PBF
32 | 
33 | For the time being, `.osm.pbf` files can be used by first converting them to `.orc`
34 | files using the [osm2orc](https://github.com/mojodna/osm2orc) tool, and then following
35 | VectorPipe's ORC instructions given below.
36 | 
37 | ## From ORC
38 | 
39 | You must first include an extra dependency to the `libraryDependencies` list in your `build.sbt`:
40 | 
41 | ```
42 | "org.apache.spark" %% "spark-hive" % "2.2.0"
43 | ```
44 | 
45 | And then we can read our OSM data in parallel via Spark. Notice the use of `SparkSession`
46 | instead of `SparkContext` here:
47 | 
48 | ```tut:silent
49 | import org.apache.spark.sql._
50 | import scala.util.{Success, Failure}
51 | import vectorpipe._
52 | 
53 | implicit val ss: SparkSession =
54 |   SparkSession.builder.master("local[*]").appName("orc-example").enableHiveSupport.getOrCreate
55 | 
56 | val path: String = "s3://bucket/key/foo.orc"
57 | 
58 | osm.fromORC(path) match {
59 |   case Failure(err) => { } /* Does the file exist? Do you have the right AWS credentials? */
60 |   case Success((ns,ws,rs)) => { }  /* (RDD[(Long, Node)], RDD[(Long, Way)], RDD[(Long, Relation)]) */
61 | }
62 | 
63 | ss.stop()
64 | ```
65 | 
66 | This approach will be particularly efficient when run on an EMR cluster, since
67 | EMR clusters have privileged access to S3.
68 | 


--------------------------------------------------------------------------------
/src/main/tut/usage/usage.md:
--------------------------------------------------------------------------------
 1 | # Usage
 2 | 
 3 | Writing a small executable that uses VectorPipe is straight-forward. The
 4 | entire `main` isn't much more than:
 5 | 
 6 | ```tut:silent
 7 | import geotrellis.proj4.WebMercator
 8 | import geotrellis.spark._
 9 | import geotrellis.spark.tiling._
10 | import geotrellis.vectortile.VectorTile
11 | import org.apache.spark._
12 | import org.apache.spark.rdd.RDD
13 | import vectorpipe._  /* All types and functions. Also exposes the `osm` submodule used below. */
14 | 
15 | /* Initialize a `SparkContext`, necessary for all `RDD` work */
16 | implicit val sc: SparkContext = new SparkContext(
17 |   new SparkConf().setMaster("local[*]").setAppName("vectorpipe-example")
18 | )
19 | 
20 | /* Describe the dimensions of your data area */
21 | val layout: LayoutDefinition =
22 |   ZoomedLayoutScheme.layoutForZoom(15, WebMercator.worldExtent, 512)
23 | 
24 | /* From an OSM data source, mocked as "empty" for this example */
25 | val (nodes, ways, relations): (RDD[(Long, osm.Node)], RDD[(Long, osm.Way)], RDD[(Long, osm.Relation)]) =
26 |   (sc.emptyRDD, sc.emptyRDD, sc.emptyRDD)
27 | 
28 | /* All OSM Elements lifted into GeoTrellis Geometry types.
29 |  * Note: type OSMFeature = Feature[Geometry, ElementData]
30 |  */
31 | val features: RDD[osm.OSMFeature] =
32 |   osm.features(nodes, ways, relations).geometries
33 | 
34 | /* All Geometries clipped to your `layout` grid */
35 | val featGrid: RDD[(SpatialKey, Iterable[osm.OSMFeature])] =
36 |   grid(Clip.byHybrid, logToStdout, layout, features)
37 | 
38 | /* A grid of Vector Tiles */
39 | val tiles: RDD[(SpatialKey, VectorTile)] =
40 |   vectortiles(Collate.byOSM, layout, featGrid)
41 | 
42 | /* Further processing here, writing to S3, etc. */
43 | 
44 | /* Halt Spark nicely */
45 | sc.stop()
46 | ```
47 | 
48 | A full example of processing some OSM XML [can be found
49 | here](https://github.com/fosskers/vectorpipe-io).
50 | 


--------------------------------------------------------------------------------
/src/test/resources/.gitignore:
--------------------------------------------------------------------------------
1 | !*.orc
2 | 


--------------------------------------------------------------------------------
/src/test/resources/isle-of-man-latest.osm.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/isle-of-man-latest.osm.orc


--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=WARN, console
2 | log4j.appender.console=org.apache.log4j.ConsoleAppender
3 | log4j.appender.console.target=System.out
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | # log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c: %m%n
6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
7 | log4j.logger.osmesa=DEBUG


--------------------------------------------------------------------------------
/src/test/resources/relation-110564.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-110564.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-110564.wkt:
--------------------------------------------------------------------------------
1 | MULTIPOLYGON (((-85.982597 34.392855, -85.979971 34.392887, -85.979885 34.392888, -85.976397 34.392888, -85.969464 34.39293, -85.96946 34.392309, -85.969469 34.390934, -85.969527 34.38977, -85.969542 34.389184, -85.978326 34.389014, -85.980136 34.388975, -85.982477 34.38893, -85.982539 34.390479, -85.982597 34.392855)), ((-85.991477 34.381899, -85.991142 34.3819, -85.987173 34.381924, -85.987092 34.389011, -85.983147 34.388938, -85.982574 34.388926, -85.982477 34.38893, -85.982563 34.387774, -85.982684 34.386311, -85.982673 34.385681, -85.982714 34.382035, -85.986874 34.381928, -85.987021 34.381777, -85.987076 34.38115, -85.98709 34.378146, -85.99014 34.378181, -85.990431 34.378189, -85.991498 34.378197, -85.991477 34.381899)), ((-85.969523 34.400126, -85.96934 34.400318, -85.969242 34.402769, -85.969203 34.403786, -85.968504 34.403761, -85.966925 34.403706, -85.966119 34.403681, -85.96504 34.403639, -85.965057 34.402798, -85.965119 34.400062, -85.969291 34.400119, -85.96938 34.396425, -85.972964 34.396491, -85.973719 34.396504, -85.97364 34.40018, -85.969523 34.400126)), ((-85.965119 34.400062, -85.962384 34.400035, -85.960656 34.400001, -85.960724 34.397709, -85.960747 34.397073, -85.96075 34.396963, -85.960772 34.396316, -85.963846 34.396282, -85.965205 34.396349, -85.965205 34.396921, -85.965119 34.400062)), ((-85.96938 34.396425, -85.965205 34.396349, -85.965206 34.394604, -85.965209 34.393635, -85.965212 34.392946, -85.967966 34.392944, -85.969464 34.39293, -85.96938 34.396425)))
2 | 


--------------------------------------------------------------------------------
/src/test/resources/relation-191199.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-191199.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-191199.wkt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-191199.wkt


--------------------------------------------------------------------------------
/src/test/resources/relation-191204.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-191204.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-191204.wkt:
--------------------------------------------------------------------------------
1 | MULTIPOLYGON EMPTY
2 | 


--------------------------------------------------------------------------------
/src/test/resources/relation-1949938.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-1949938.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-1949938.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.342046 41.8333126, -71.3423551 41.8336345, -71.3424089 41.8338235, -71.3423936 41.8340698, -71.3420938 41.8353984, -71.3418786 41.8357421, -71.3412098 41.8367385, -71.3411176 41.8371451, -71.3410484 41.8376434, -71.3411483 41.8384852, -71.3413405 41.8388403, -71.3415711 41.839264, -71.342063 41.8398481, -71.342432 41.840312, -71.3427856 41.8407071, -71.3432852 41.8412511, -71.3434005 41.8414115, -71.3435465 41.8415718, -71.3435542 41.8416863, -71.3434927 41.8417951, -71.3434159 41.8418352, -71.3432156 41.8418625, -71.3429086 41.8418868, -71.3418709 41.8419669, -71.3410945 41.8421101, -71.3408101 41.842196, -71.3403951 41.8422189, -71.3402874 41.8422991, -71.3401875 41.842488, -71.3400107 41.8425739, -71.3396187 41.8426197, -71.3393266 41.8425281, -71.3391037 41.8425625, -71.3390038 41.8426255, -71.3387117 41.8426197, -71.3385657 41.8425567, -71.3383197 41.842425, -71.3375818 41.8420013, -71.3372128 41.8418639, -71.3371052 41.8417722, -71.3371206 41.8415432, -71.3372436 41.8413714, -71.3374204 41.8412511, -71.3374204 41.8410851, -71.3372897 41.8410106, -71.3369899 41.8410965, -71.336744 41.8411366, -71.3362751 41.8410736, -71.3359061 41.8409763, -71.3355372 41.8407415, -71.3352297 41.8404666, -71.335076 41.8402662, -71.335076 41.8400657, -71.3351221 41.839768, -71.3351913 41.839579, -71.3355372 41.8391438, -71.3358523 41.8390292, -71.3361367 41.8389548, -71.3365134 41.8389548, -71.3367824 41.8390063, -71.3369438 41.8390235, -71.3370975 41.8390063, -71.3372052 41.8389204, -71.3373051 41.8386685, -71.3373819 41.8384566, -71.3373973 41.8383936, -71.3375972 41.8380958, -71.337651 41.8378954, -71.3376663 41.8376434, -71.3375126 41.8375059, -71.337282 41.8374601, -71.3370745 41.8375117, -71.3367824 41.8377465, -71.3365979 41.8379698, -71.3363904 41.8381359, -71.3362136 41.8382676, -71.335983 41.8384165, -71.3358139 41.8384222, -71.3356525 41.8383649, -71.3355295 41.8382447, -71.3355141 41.8381359, -71.3356294 41.8379297, -71.3357524 41.8378553, -71.33586 41.8376949, -71.3358446 41.8374945, -71.3358446 41.8372482, -71.3359984 41.8370993, -71.3361214 41.8370134, -71.3365595 41.8369562, -71.3368746 41.8368416, -71.3370207 41.8367042, -71.3370053 41.8365037, -71.3367286 41.8364465, -71.3365979 41.8363434, -71.3365595 41.8362346, -71.3365518 41.8360456, -71.3366902 41.8359081, -71.336767 41.8357363, -71.3368746 41.8355588, -71.3370822 41.8353927, -71.337136 41.8352381, -71.3371667 41.8350491, -71.3371513 41.8348258, -71.3370591 41.8346654, -71.3370591 41.8345509, -71.3371206 41.8343905, -71.337259 41.834316, -71.3373281 41.8342072, -71.3373743 41.83415, -71.337159 41.8337376, -71.3371437 41.8336173, -71.3371975 41.83352, -71.3372974 41.8334856, -71.3375126 41.8335715, -71.3378969 41.8337033, -71.3380968 41.8336116, -71.3381583 41.8335314, -71.3382813 41.8333596, -71.3385426 41.833142, -71.3388501 41.8329244, -71.3391037 41.8327697, -71.3393266 41.8325406, -71.3395649 41.8324032, -71.3398263 41.8322715, -71.3400338 41.832197, -71.3402029 41.8322027, -71.3403874 41.8322829, -71.3415865 41.8330561, -71.3417633 41.8331592, -71.342046 41.8333126), (-71.3413636 41.8354672, -71.3414174 41.8353641, -71.3414942 41.835324, -71.3415942 41.8353984, -71.341525 41.8355073, -71.3414174 41.8356275, -71.3413482 41.8355817, -71.3413636 41.8354672), (-71.3411945 41.8359024, -71.3413482 41.8359425, -71.3413636 41.8360513, -71.3412559 41.8362346, -71.3411868 41.8364007, -71.3410715 41.8364407, -71.3410177 41.8363434, -71.3410484 41.8361372, -71.3411253 41.8359941, -71.3411945 41.8359024), (-71.3400799 41.841944, -71.3398954 41.8417379, -71.3398186 41.8415145, -71.3398186 41.8414286, -71.3400338 41.8413084, -71.3402106 41.8413943, -71.3402951 41.8415432, -71.3403643 41.8416749, -71.3403413 41.841778, -71.3402106 41.8419383, -71.3400799 41.841944))
2 | 


--------------------------------------------------------------------------------
/src/test/resources/relation-2554903.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-2554903.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-2580685.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-2580685.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-2580685.wkt:
--------------------------------------------------------------------------------
1 | MULTIPOLYGON (((-71.4589656 41.799364, -71.4585222 41.7994796, -71.4586724 41.7990153, -71.4587368 41.7985914, -71.458887 41.7982555, -71.4592947 41.7980635, -71.4597882 41.7978955, -71.4601423 41.797712, -71.4604534 41.7974156, -71.4602925 41.7971037, -71.4600564 41.7967998, -71.4600564 41.7965438, -71.4601423 41.7962559, -71.4599599 41.7959519, -71.4596082 41.7958057, -71.4588853 41.7942745, -71.4589886 41.7938896, -71.4597229 41.7935902, -71.4599065 41.7935474, -71.4609162 41.7953609, -71.4611801 41.7962419, -71.461008 41.7966012, -71.4614784 41.797448, -71.4620408 41.7979202, -71.4622242 41.7985172, -71.4622109 41.799042, -71.46188 41.7993469, -71.4614585 41.7994322, -71.4610194 41.7993555, -71.460354 41.7990476, -71.4595852 41.7990133, -71.4589656 41.799364), (-71.4614062 41.7986518, -71.4612426 41.7988664, -71.4609089 41.7989786, -71.4606145 41.7989932, -71.4602024 41.7989005, -71.4600126 41.7988176, -71.4599407 41.7987152, -71.4600061 41.7985591, -71.4602285 41.7984079, -71.4606865 41.798286, -71.461151 41.7982421, -71.46138 41.7984079, -71.4614062 41.7986518)), ((-71.4584952 41.7995265, -71.4589656 41.799364, -71.4590804 41.8006898, -71.4592704 41.8011716, -71.4593557 41.8017247, -71.4592489 41.8021513, -71.4588976 41.8024816, -71.4593012 41.8030966, -71.4596196 41.8037689, -71.4610194 41.8044617, -71.4619832 41.8048123, -71.4625684 41.8046413, -71.4626372 41.8042735, -71.4634404 41.8040854, -71.4640944 41.8041196, -71.4647713 41.8037774, -71.4648861 41.8032814, -71.464278 41.8029307, -71.4645304 41.8026912, -71.4652532 41.8026912, -71.4659072 41.8028281, -71.4662973 41.802734, -71.4670087 41.801682, -71.4669743 41.800724, -71.4671578 41.8007155, -71.4673185 41.8015708, -71.4667907 41.8029649, -71.4662744 41.8035636, -71.4654483 41.8041281, -71.4648631 41.8048038, -71.463151 41.805115, -71.462361 41.804966, -71.4614784 41.8049748, -71.4605375 41.8044702, -71.4597114 41.8042307, -71.4589656 41.8034524, -71.4586903 41.8028537, -71.458677 41.8027838, -71.4586418 41.802703, -71.458621 41.8024886, -71.4586706 41.8023715, -71.4587706 41.8021353, -71.459046 41.801853, -71.4587132 41.8009806, -71.4586788 41.8001424, -71.4584952 41.7995265)))
2 | 


--------------------------------------------------------------------------------
/src/test/resources/relation-3080946.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-3080946.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-3105056.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-3105056.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-3105056.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.3264288 41.5069067, -71.3263203 41.5069497, -71.3262056 41.5070241, -71.3261505 41.5070885, -71.326092 41.5071901, -71.3260291 41.5073693, -71.325963 41.5075585, -71.325922 41.5076731, -71.325883 41.5076947, -71.3257676 41.5076736, -71.3256132 41.5076263, -71.3255477 41.507603, -71.3254939 41.5075638, -71.3254589 41.5075179, -71.3254449 41.5074619, -71.3254597 41.5074, -71.3255173 41.5072859, -71.3255791 41.5071711, -71.3255736 41.5068152, -71.3255687 41.50674, -71.325611 41.5067513, -71.3256436 41.5067582, -71.3256705 41.5067586, -71.3256924 41.5067488, -71.3261404 41.50684, -71.3261668 41.5068473, -71.3261869 41.5068513, -71.3262246 41.5068589, -71.3262246 41.5068679, -71.3262354 41.506883, -71.3262579 41.5068973, -71.3262824 41.5069051, -71.3263218 41.506903, -71.3263529 41.5068893, -71.3264288 41.5069067))
2 | 


--------------------------------------------------------------------------------
/src/test/resources/relation-333501.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-333501.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-333501.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.3364289 41.4799458, -71.3366843 41.4790572, -71.3368865 41.478988, -71.3368439 41.4783601, -71.336663 41.4783761, -71.3365442 41.4781598, -71.3369364 41.4780481, -71.3381528 41.477708, -71.3383414 41.4776608, -71.3384978 41.4776906, -71.3386815 41.4778123, -71.339168 41.4781548, -71.3392847 41.4782367, -71.3387163 41.4788549, -71.3384134 41.4792098, -71.3385822 41.4793315, -71.3382744 41.4797708, -71.3382223 41.4798627, -71.3377854 41.4798081, -71.3377208 41.4796616, -71.3369141 41.4796889, -71.3369066 41.4798677, -71.3365343 41.4799322, -71.3364289 41.4799458), (-71.3370199 41.4782179, -71.3383813 41.4778507, -71.3389892 41.4782749, -71.3388056 41.4784946, -71.3382821 41.4791493, -71.3381738 41.4792934, -71.3383417 41.4794107, -71.3381156 41.4797654, -71.3378676 41.4797206, -71.3378061 41.4795872, -71.337443 41.4796016, -71.3374347 41.4795441, -71.3372111 41.4795585, -71.3372193 41.4796118, -71.3368911 41.4796057, -71.3368911 41.4795708, -71.3368378 41.479577, -71.3368234 41.4796241, -71.3367023 41.4796077, -71.3367844 41.4792856, -71.3370347 41.4792097, -71.3370199 41.4782179))
2 | 


--------------------------------------------------------------------------------
/src/test/resources/relation-393502.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-393502.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-393502.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.4562227 41.6485987, -71.4561617 41.6486401, -71.4560719 41.648621, -71.4559852 41.648499, -71.4560684 41.6483234, -71.4561891 41.6481462, -71.456312 41.6479512, -71.4561934 41.6478207, -71.4558499 41.6476399, -71.4556242 41.6475887, -71.4553623 41.6474994, -71.455278 41.6475307, -71.4554576 41.6477624, -71.4556047 41.6478445, -71.4554322 41.6479837, -71.4554172 41.6482545, -71.4551891 41.6484802, -71.4547765 41.6488669, -71.4544546 41.6490673, -71.45424 41.6491475, -71.4540314 41.649123, -71.4535844 41.6491252, -71.4529913 41.6493056, -71.4527857 41.6494481, -71.4527976 41.6497243, -71.4528423 41.6500026, -71.4529466 41.6503055, -71.4528852 41.650835, -71.4529712 41.6510973, -71.4529405 41.6516331, -71.4529913 41.6520714, -71.4528935 41.6522887, -71.4521329 41.6531935, -71.4518542 41.6534193, -71.4516462 41.653417, -71.4515167 41.6533123, -71.4513968 41.6533739, -71.4515094 41.6534801, -71.4514263 41.6535232, -71.4513257 41.653429, -71.4511044 41.6535533, -71.450994 41.6536187, -71.4510103 41.6537167, -71.4503749 41.6541715, -71.4501844 41.654423, -71.449888 41.6546385, -71.4497378 41.6547958, -71.4495944 41.6548359, -71.449477 41.6549516, -71.449357 41.6549751, -71.4492382 41.6549484, -71.4491915 41.6549919, -71.4491545 41.6553574, -71.4491894 41.6556444, -71.4488881 41.6563126, -71.4486687 41.6565095, -71.4485691 41.6565879, -71.448441 41.6566786, -71.4482743 41.6567632, -71.4475918 41.6569869, -71.4473949 41.6571615, -71.4472649 41.6574506, -71.4469975 41.6577686, -71.446965 41.6583328, -71.4467593 41.6585282, -71.4464288 41.6586792, -71.4463475 41.6586284, -71.446196 41.6587191, -71.4462188 41.6587466, -71.4457896 41.6589791, -71.4454817 41.6590878, -71.4453846 41.6592176, -71.4454959 41.6592606, -71.4453015 41.6593598, -71.4452371 41.6593168, -71.4449099 41.6596544, -71.4447699 41.6596156, -71.4447248 41.6598738, -71.4449286 41.6599069, -71.4449152 41.659957, -71.4447476 41.659935, -71.4446804 41.6602495, -71.4445897 41.6604577, -71.4445357 41.660459, -71.4445424 41.6608086, -71.4446044 41.6610053, -71.4445981 41.6610969, -71.444818 41.6610641, -71.4448737 41.6612109, -71.4449199 41.6612159, -71.4449065 41.6613427, -71.4449374 41.6614677, -71.4450876 41.661627, -71.4451607 41.6617069, -71.4451862 41.6617783, -71.4451318 41.6617983, -71.4452552 41.6620057, -71.4452773 41.6620019, -71.4452597 41.6622439, -71.4452016 41.6622652, -71.4450983 41.66267, -71.4449401 41.6629243, -71.4449405 41.6631424, -71.4451117 41.6631449, -71.4449562 41.6635917, -71.4448563 41.6640225, -71.4428492 41.664013, -71.4409667 41.6640028, -71.4410086 41.6635043, -71.4409704 41.6630459, -71.4406563 41.6627032, -71.4405231 41.6624939, -71.4405799 41.6621481, -71.4410582 41.6614303, -71.4410348 41.6611838, -71.4411192 41.6609324, -71.441648 41.6604529, -71.441765 41.6602931, -71.4417262 41.6601353, -71.441636 41.6599996, -71.441442 41.6597342, -71.4414015 41.6595652, -71.441427 41.6591274, -71.4415719 41.6588499, -71.4415607 41.6585328, -71.4420302 41.6575009, -71.4421048 41.6572404, -71.4421307 41.6567142, -71.4424257 41.6562739, -71.4432187 41.6557317, -71.4437089 41.65514, -71.4438941 41.6547956, -71.4441515 41.6545408, -71.4442445 41.6543352, -71.4446573 41.654132, -71.4458846 41.653978, -71.4465442 41.6537136, -71.4468603 41.6534841, -71.4470949 41.6532021, -71.4473923 41.6525943, -71.4478523 41.652443, -71.4483847 41.6523728, -71.448728 41.6521774, -71.4489359 41.6519299, -71.4489171 41.6518417, -71.4487246 41.651737, -71.4486033 41.6515712, -71.4486176 41.6513594, -71.4489403 41.6510273, -71.4492344 41.6508874, -71.449339 41.6508027, -71.4493249 41.6506844, -71.4492149 41.6503867, -71.4494166 41.6495338, -71.449869 41.6492269, -71.4503998 41.6486283, -71.4504804 41.6483137, -71.4503091 41.6480198, -71.450056 41.6477186, -71.4501647 41.6474817, -71.4502232 41.6471998, -71.4510559 41.6469478, -71.4512425 41.6469841, -71.451467 41.6469178, -71.4515282 41.646955, -71.4515222 41.6477833, -71.4516529 41.6478644, -71.4517596 41.6478784, -71.4518637 41.6478163, -71.4521768 41.6474964, -71.4525273 41.6474225, -71.4530853 41.6470536, -71.4531583 41.6470422, -71.4532339 41.6470851, -71.453267 41.647215, -71.4532408 41.6472952, -71.4532515 41.6473503, -71.4533246 41.6474295, -71.4533823 41.647455, -71.4533743 41.6475001, -71.4535238 41.6476389, -71.4535612 41.6477262, -71.4537196 41.6478033, -71.4540525 41.6477985, -71.4541949 41.6476977, -71.4543566 41.647678, -71.4546696 41.6474009, -71.4553603 41.6472376, -71.4557675 41.6473042, -71.4561148 41.6475294, -71.4563012 41.6476389, -71.4565258 41.6476734, -71.4565082 41.6480817, -71.4563099 41.6482686, -71.4561667 41.6485057, -71.4562227 41.6485987))


--------------------------------------------------------------------------------
/src/test/resources/relation-5448156.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-5448156.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-5448691.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-5448691.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-5612959.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-5612959.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-61315.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-61315.orc


--------------------------------------------------------------------------------
/src/test/resources/relation-61315.wkt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-61315.wkt


--------------------------------------------------------------------------------
/src/test/resources/relation-6710544.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-6710544.orc


--------------------------------------------------------------------------------
/src/test/resources/view/cluster-view.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <!--
 4 |       A viewer for the vectortiles created by TestPipeline
 5 | 
 6 |       Please fill in a valid Mapbox access token at the top of the <script> block before using
 7 |   -->
 8 | <head>
 9 |     <meta charset='utf-8' />
10 |     <title>Point cluster view</title>
11 |     <meta name='viewport' content='initial-scale=1,maximum-scale=1,user-scalable=no' />
12 |     <script src='https://api.tiles.mapbox.com/mapbox-gl-js/v0.44.1/mapbox-gl.js'></script>
13 |     <link href='https://api.tiles.mapbox.com/mapbox-gl-js/v0.44.1/mapbox-gl.css' rel='stylesheet' />
14 |     <style>
15 |         body { margin:0; padding:0; }
16 |         #map { position:absolute; top:0; bottom:0; width:100%; }
17 |     </style>
18 | </head>
19 | <body>
20 | 
21 | <div id='map'></div>
22 |   <script>
23 |    mapboxgl.accessToken = '';
24 |    var map = new mapboxgl.Map({
25 |      container: 'map',
26 |      <!-- style: 'http://osm-liberty.lukasmartinelli.ch/style.json',-->
27 |      style: 'mapbox://styles/mapbox/satellite-streets-v9',
28 |      hash: false,
29 |      zoom: 8,
30 |      center: [-4.5, 54.3]
31 |    });
32 | 
33 |    map.on('load', function() {
34 |        map.addSource("nodes", {
35 |            type: "vector",
36 |            tiles: ["file:///tmp/iom-tiles-pyramid/{z}/{x}/{y}.mvt"],
37 |            minzoom: 6,
38 |            maxzoom: 8,
39 |            cluster: "true"
40 |        })
41 | 
42 |        // Add VecTiles source
43 |        map.addLayer({
44 |            "id": "clusters",
45 |            "type": "circle",
46 |            "source": "nodes",
47 |            "source-layer": "points",
48 |            "paint": {
49 |                "circle-color": "#ff1818",
50 |                "circle-radius": [
51 |                    "step",
52 |                    ["get", "weight"],
53 |                    1,
54 |                    10,
55 |                    2,
56 |                    20,
57 |                    4,
58 |                    40,
59 |                    6,
60 |                    120,
61 |                    8,
62 |                    500,
63 |                    10,
64 |                    1000,
65 |                    15,
66 |                    2000,
67 |                    20
68 |                ]
69 |            }
70 |        });
71 | 
72 |        map.on("click", function(e) {
73 |            console.log(e);
74 |            console.log(e.target.transform._zoom);
75 |        });
76 |    });
77 | 
78 |    map.addControl(new mapboxgl.NavigationControl());
79 | </script>
80 | 
81 | </body>
82 | </html>
83 | 


--------------------------------------------------------------------------------
/src/test/resources/view/layer-test.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <!--
  4 |       A viewer for the vectortiles created by LayerTestPipeline
  5 | 
  6 |       Please fill in a valid Mapbox access token at the top of the <script> block before using
  7 |   -->
  8 | <head>
  9 |     <meta charset='utf-8' />
 10 |     <title>Add a third party vector tile source</title>
 11 |     <meta name='viewport' content='initial-scale=1,maximum-scale=1,user-scalable=no' />
 12 |     <script src='https://api.tiles.mapbox.com/mapbox-gl-js/v0.44.1/mapbox-gl.js'></script>
 13 |     <link href='https://api.tiles.mapbox.com/mapbox-gl-js/v0.44.1/mapbox-gl.css' rel='stylesheet' />
 14 |     <style>
 15 |         body { margin:0; padding:0; }
 16 |         #map { position:absolute; top:0; bottom:0; width:100%; }
 17 |         #menu {
 18 |             background: #fff;
 19 |             position: absolute;
 20 |             z-index: 1;
 21 |             top: 10px;
 22 |             right: 10px;
 23 |             border-radius: 3px;
 24 |             width: 120px;
 25 |             border: 1px solid rgba(0,0,0,0.4);
 26 |             font-family: 'Open Sans', sans-serif;
 27 |         }
 28 | 
 29 |         #menu a {
 30 |             font-size: 13px;
 31 |             color: #404040;
 32 |             display: block;
 33 |             margin: 0;
 34 |             padding: 0;
 35 |             padding: 10px;
 36 |             text-decoration: none;
 37 |             border-bottom: 1px solid rgba(0,0,0,0.25);
 38 |             text-align: center;
 39 |         }
 40 | 
 41 |         #menu a:last-child {
 42 |             border: none;
 43 |         }
 44 | 
 45 |         #menu a:hover {
 46 |             background-color: #f8f8f8;
 47 |             color: #404040;
 48 |         }
 49 | 
 50 |         #menu a.active {
 51 |             background-color: #3887be;
 52 |             color: #ffffff;
 53 |         }
 54 | 
 55 |         #menu a.active:hover {
 56 |             background: #3074a4;
 57 |         }
 58 |     </style>
 59 | </head>
 60 | <body>
 61 | 
 62 | <nav id='menu'></nav>
 63 | <div id='map'></div>
 64 | 
 65 | 
 66 | <script>
 67 |   mapboxgl.accessToken = '';
 68 |   var map = new mapboxgl.Map({
 69 |     container: 'map',
 70 |     style: 'mapbox://styles/mapbox/satellite-streets-v9',
 71 |     hash: false,
 72 |     zoom: 14,
 73 |     center: [-4.484, 54.152]
 74 |   });
 75 | 
 76 |   map.on('load', function() {
 77 |     map.addSource("vectortiles", {
 78 |       type: "vector",
 79 |       tiles: ["file:///tmp/iom-layers/{z}/{x}/{y}.mvt"],
 80 |       minzoom: 14,
 81 |       maxzoom: 14
 82 |     })
 83 | 
 84 |     // Show buildings
 85 |     map.addLayer({
 86 |       "id": "buildings",
 87 |       "type": "fill",
 88 |       "source": "vectortiles",
 89 |       "source-layer": "buildings",
 90 |       "paint": {
 91 |         'fill-color': 'rgba(200, 100, 240, 0.4)',
 92 |         'fill-outline-color': 'rgba(200, 100, 240, 1)'
 93 |       }
 94 |     });
 95 | 
 96 |     // Show routes
 97 |     map.addLayer({
 98 |       "id": "roads",
 99 |       "type": "line",
100 |       "source": "vectortiles",
101 |       "source-layer": "roads",
102 |       "layout": {
103 |         "line-join": "round",
104 |         "line-cap": "round"
105 |       },
106 |       "paint": {
107 |         'line-color': 'rgb(250, 210, 1)',
108 |         'line-width': 3
109 |       }
110 |     });
111 |   });
112 | 
113 |   var toggleableLayerIds = [ 'buildings', 'roads' ];
114 | 
115 |   for (var i = 0; i < toggleableLayerIds.length; i++) {
116 |     var id = toggleableLayerIds[i];
117 | 
118 |     var link = document.createElement('a');
119 |     link.href = '#';
120 |     link.className = 'active';
121 |     link.textContent = id;
122 | 
123 |     link.onclick = function (e) {
124 |       var clickedLayer = this.textContent;
125 |       e.preventDefault();
126 |       e.stopPropagation();
127 | 
128 |       var visibility = map.getLayoutProperty(clickedLayer, 'visibility');
129 | 
130 |       if (visibility === 'visible') {
131 |         map.setLayoutProperty(clickedLayer, 'visibility', 'none');
132 |         this.className = '';
133 |       } else {
134 |         this.className = 'active';
135 |         map.setLayoutProperty(clickedLayer, 'visibility', 'visible');
136 |       }
137 |     };
138 | 
139 |     var layers = document.getElementById('menu');
140 |     layers.appendChild(link);
141 |   }
142 | 
143 |   //map.addControl(new mapboxgl.NavigationControl());
144 | </script>
145 | 
146 | </body>
147 | </html>
148 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/MultiPolygonRelationReconstructionSpec.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe
  2 | 
  3 | import java.sql.Timestamp
  4 | 
  5 | import geotrellis.spark.store.kryo.KryoRegistrator
  6 | import geotrellis.vector._
  7 | import org.apache.spark.SparkConf
  8 | import org.apache.spark.serializer.KryoSerializer
  9 | import org.apache.spark.sql._
 10 | import org.apache.spark.sql.functions._
 11 | import org.scalatest.prop.{TableDrivenPropertyChecks, Tables}
 12 | import org.scalatest.{Matchers, PropSpec}
 13 | import vectorpipe.model.Member
 14 | import org.locationtech.jts.io.WKTReader
 15 | import org.locationtech.geomesa.spark.jts._
 16 | import vectorpipe.relations.MultiPolygons.build
 17 | 
 18 | import scala.io.Source
 19 | 
 20 | case class Fixture(id: Int, members: DataFrame, wkt: Seq[String])
 21 | 
 22 | trait SparkPoweredTables extends Tables {
 23 |   def wktReader = new WKTReader()
 24 | 
 25 |   val spark: SparkSession = SparkSession
 26 |     .builder
 27 |     .config(
 28 |       /* Settings compatible with both local and EMR execution */
 29 |       new SparkConf()
 30 |         .setAppName(getClass.getName)
 31 |         .setIfMissing("spark.master", "local[*]")
 32 |         .setIfMissing("spark.serializer", classOf[KryoSerializer].getName)
 33 |         .setIfMissing("spark.kryo.registrator", classOf[KryoRegistrator].getName)
 34 |         .setIfMissing("spark.sql.orc.impl", "native")
 35 |     ).getOrCreate()
 36 |   spark.withJTS
 37 | 
 38 |   def relation(relation: Int): Fixture = Fixture(relation, orc(s"relation-$relation.orc"), readWktFile(s"relation-$relation.wkt"))
 39 | 
 40 |   def orc(filename: String): DataFrame = spark.read.orc(getClass.getResource("/" + filename).getPath)
 41 | 
 42 | // osm2pgsql -c -d rhode_island -j -K -l rhode-island-latest.osm.pbf
 43 | // select ST_AsText(way) from planet_osm_polygon where osm_id=-333501;
 44 | 
 45 |   def readWktFile(filename: String): Seq[String] =
 46 |     try {
 47 |       Source.fromInputStream(getClass.getResourceAsStream("/" + filename)).getLines.toSeq match {
 48 |         case expected if expected.isEmpty =>
 49 |           Seq()
 50 |         case expected =>
 51 |           expected
 52 |       }
 53 |     } catch {
 54 |       case _: Exception => Seq("[not provided]")
 55 |     }
 56 | 
 57 |   def asGeoms(relations: DataFrame): Seq[Geometry] = {
 58 |     import relations.sparkSession.implicits._
 59 | 
 60 |     relations.select('geom).collect.map { row =>
 61 |       row.getAs[Geometry]("geom")
 62 |     }
 63 |   }
 64 | }
 65 | 
 66 | // osm2pgsql -c -d rhode_island -j -K -l rhode-island-latest.osm.pbf
 67 | // select ST_AsText(way) from planet_osm_polygon where osm_id=-333501;
 68 | // to debug / visually validate (geoms won't match exactly), load WKT into geojson.io from Meta → Load WKT String
 69 | // https://www.openstreetmap.org/relation/64420
 70 | // to find multipolygons: select osm_id from planet_osm_polygon where osm_id < 0 and ST_GeometryType(way) = 'ST_MultiPolygon' order by osm_id desc;
 71 | class MultiPolygonRelationExamples extends SparkPoweredTables {
 72 |   def examples = Table("multipolygon relation",
 73 |     relation(333501), // unordered, single polygon with 1 hole
 74 |     relation(393502), // single polygon, multiple outer parts, no holes
 75 |     relation(1949938), // unordered, single polygon with multiple holes
 76 |     relation(3105056), // multiple unordered outer parts in varying directions
 77 |     relation(2580685), // multipolygon: 2 polygons, one with 1 hole
 78 |     relation(3080946), // multipolygon: many polygons, no holes
 79 |     relation(5448156), // multipolygon made up of parcels
 80 |     relation(5448691), // multipolygon made up of parcels
 81 |     relation(6710544), // complex multipolygon
 82 |     relation(191199), // 4 segments; 2 are components of another (thus duplicates)
 83 |     relation(61315), // incomplete member list (sourced from an extract of a neighboring state)
 84 |     relation(2554903), // boundary w/ admin_centre + label node members
 85 |     relation(191204), // no members
 86 |     /* relation(5612959), // pathological case for unioning  --- removed test, too pathological (address later?) */
 87 |     relation(110564) // touching but not dissolve-able
 88 |   )
 89 | }
 90 | 
 91 | class MultiPolygonRelationReconstructionSpec extends PropSpec with TableDrivenPropertyChecks with Matchers {
 92 |   property("should match expected WKT") {
 93 |     new MultiPolygonRelationExamples {
 94 |       forAll(examples) { fixture =>
 95 |         import fixture.members.sparkSession.implicits._
 96 | 
 97 |         // TODO rewrite fixtures with additional columns added below
 98 |         val actual: Seq[Geometry] = asGeoms(fixture.members
 99 |           .withColumn("version", lit(1))
100 |           .withColumn("minorVersion", lit(0))
101 |           .withColumn("updated", lit(Timestamp.valueOf("2001-01-01 00:00:00")))
102 |           .withColumn("validUntil", lit(Timestamp.valueOf("2002-01-01 00:00:00")))
103 |           .withColumn("geometry", st_geomFromWKB('geom))
104 |           .groupByKey { row =>
105 |             (row.getAs[Long]("changeset"), row.getAs[Long]("id"), row.getAs[Integer]("version"), row.getAs[Integer]
106 |               ("minorVersion"), row.getAs[Timestamp]("updated"), row.getAs[Timestamp]("validUntil"))
107 |           }
108 |           .mapGroups {
109 |             case ((changeset, id, version, minorVersion, updated, validUntil), rows) =>
110 |               val members = rows.toVector
111 |               // TODO store Bytes as the type in fixtures
112 |               val types = members.map { x => Member.typeFromString(x.getAs[String]("type")) }
113 |               val roles = members.map(_.getAs[String]("role"))
114 |               val geoms = members.map(_.getAs[Geometry]("geometry"))
115 |               val mp = build(id, version, updated, types, roles, geoms).orNull
116 | 
117 |               (changeset, id, version, minorVersion, updated, validUntil, mp)
118 |           }
119 |           .toDF("changeset", "id", "version", "minorVersion", "updated", "validUntil", "geom")
120 |         ).flatMap(Option.apply(_))
121 | 
122 |         val expected = fixture.wkt.map(wktReader.read)
123 | 
124 |         try {
125 |           actual should ===(expected)
126 |         } catch {
127 |           case e: Throwable =>
128 |             println(s"${fixture.id} actual:")
129 |             actual.foreach(println)
130 |             println(s"${fixture.id} expected:")
131 |             fixture.wkt.foreach(println)
132 | 
133 |             throw e
134 |         }
135 |       }
136 |     }
137 |   }
138 | }
139 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/ProcessOSMTest.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe
 2 | 
 3 | import org.scalatest._
 4 | import vectorpipe.{internal => ProcessOSM}
 5 | 
 6 | class ProcessOSMTest extends FunSpec with TestEnvironment with Matchers {
 7 |   val orcFile = getClass.getResource("/isle-of-man-latest.osm.orc").getPath
 8 | 
 9 |   val elements = ss.read.orc(orcFile)
10 |   val nodes = ProcessOSM.preprocessNodes(elements).cache
11 |   val nodeGeoms = ProcessOSM.constructPointGeometries(nodes).cache
12 |   val wayGeoms = ProcessOSM.reconstructWayGeometries(elements, nodes).cache
13 |   val relationGeoms = ProcessOSM.reconstructRelationGeometries(elements, wayGeoms).cache
14 | 
15 |   it("parses isle of man nodes") {
16 |     info(s"Nodes: ${nodeGeoms.count}")
17 |   }
18 | 
19 |   it("parses isle of man ways") {
20 |     info(s"Ways: ${wayGeoms.count}")
21 |   }
22 | 
23 |   it("parses isle of man relations") {
24 |     info(s"Relations: ${relationGeoms.count}")
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/TestEnvironment.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2016 Azavea
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package vectorpipe
18 | 
19 | import org.apache.spark.serializer.KryoSerializer
20 | import org.apache.spark.sql.SparkSession
21 | import org.scalatest._
22 | 
23 | object TestEnvironment {
24 | }
25 | 
26 | /*
27 |  * These set of traits handle the creation and deletion of test directories on the local fs and hdfs,
28 |  * It uses commons-io in at least one case (recursive directory deletion)
29 |  */
30 | trait TestEnvironment extends BeforeAndAfterAll { self: Suite with BeforeAndAfterAll =>
31 |   implicit val ss: SparkSession = SparkSession.builder
32 |     .master("local[*]")
33 |     .appName("VectorPipe Test")
34 |     .config("spark.ui.enabled", "false")
35 |     .config("spark.default.parallelism","8")
36 |     .config("spark.serializer", classOf[KryoSerializer].getName)
37 |     .config("spark.kryo.registrationRequired", "false")
38 |     .config("spark.kryoserializer.buffer.max", "500m")
39 |     .config("spark.sql.orc.impl", "native")
40 |     .getOrCreate()
41 | 
42 |   // get the name of the class which mixes in this trait
43 |   val name = this.getClass.getName
44 | 
45 |   override def beforeAll() = {
46 |     ss.sparkContext.setJobGroup(this.getClass.getName, "test")
47 |   }
48 | 
49 |   override def afterAll() = {
50 |     ss.sparkContext.clearJobGroup()
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/functions/osm/FunctionSpec.scala:
--------------------------------------------------------------------------------
  1 | package vectorpipe.functions.osm
  2 | 
  3 | import org.apache.spark.sql.Row
  4 | import org.scalatest.{FunSpec, Matchers}
  5 | import vectorpipe.TestEnvironment
  6 | 
  7 | class FunctionSpec extends FunSpec with TestEnvironment with Matchers {
  8 | 
  9 |   import ss.implicits._
 10 | 
 11 |   describe("isArea") {
 12 |     it("marks 'area=*' appropriately") {
 13 |       Seq(
 14 |         Map("area" -> "yes") -> true,
 15 |         Map("area" -> "YES") -> true,
 16 |         Map("area" -> "YeS") -> true,
 17 |         Map("area" -> "1") -> true,
 18 |         Map("area" -> "true") -> true,
 19 |         Map("area" -> "True") -> true,
 20 |         Map("area" -> "no") -> false,
 21 |         Map("area" -> "no") -> false,
 22 |         Map("area" -> "0") -> false,
 23 |         Map("area" -> "something") -> false,
 24 |         Map("area" -> "yes;no") -> true,
 25 |         Map("area" -> "yes; no") -> true,
 26 |         Map("area" -> "yes ; no") -> true,
 27 |         Map("area" -> "yes ;no") -> true
 28 |       )
 29 |         .toDF("tags", "value")
 30 |         .where(isArea('tags) =!= 'value)
 31 |         .count should equal(0)
 32 |     }
 33 | 
 34 |     it("respects area-keys") {
 35 |       Seq(
 36 |         Map("office" -> "architect") -> true,
 37 |         Map("waterway" -> "riverbank") -> true,
 38 |         Map("waterway" -> "canal") -> false,
 39 |         Map("aeroway" -> "aerodrome;apron") -> true,
 40 |         Map("aeroway" -> "aerodrome ; runway") -> true,
 41 |         Map("aeroway" -> "taxiway;runway") -> false
 42 |       )
 43 |         .toDF("tags", "value")
 44 |         .where(isArea('tags) =!= 'value)
 45 |         .count should equal(0)
 46 |     }
 47 |   }
 48 | 
 49 |   describe("isMultiPolygon") {
 50 |     it("marks multipolygons and boundaries appropriately") {
 51 |       Seq(
 52 |         Map("type" -> "multipolygon") -> true,
 53 |         Map("type" -> "boundary") -> true,
 54 |         Map("type" -> "route") -> false,
 55 |         Map("type" -> "multipolygon;boundary") -> true,
 56 |         Map("type" -> "multipolygon ; boundary") -> true
 57 |       )
 58 |         .toDF("tags", "value")
 59 |         .where(isMultiPolygon('tags) =!= 'value)
 60 |         .count should equal(0)
 61 |     }
 62 |   }
 63 | 
 64 |   describe("isRoute") {
 65 |     it("marks routes appropriately") {
 66 |       Seq(
 67 |         Map("type" -> "multipolygon") -> false,
 68 |         Map("type" -> "boundary") -> false,
 69 |         Map("type" -> "route") -> true,
 70 |         Map("type" -> "route;boundary") -> true,
 71 |         Map("type" -> "route ; boundary") -> true
 72 |       )
 73 |         .toDF("tags", "value")
 74 |         .where(isRoute('tags) =!= 'value)
 75 |         .count should equal(0)
 76 |     }
 77 |   }
 78 | 
 79 |   describe("isBuilding") {
 80 |     it("marks buildings appropriately") {
 81 |       Seq(
 82 |         Map("building" -> "yes") -> true,
 83 |         Map("building" -> "no") -> false,
 84 |         Map("building" -> "false") -> false,
 85 |         Map("building" -> "farm") -> true,
 86 |         Map("building" -> "farm;apartments") -> true
 87 |       )
 88 |         .toDF("tags", "value")
 89 |         .where(isBuilding('tags) =!= 'value)
 90 |         .count should equal(0)
 91 |     }
 92 |   }
 93 | 
 94 |   describe("isPOI") {
 95 |     it("marks POIs appropriately") {
 96 |       Seq(
 97 |         Map("amenity" -> "cafe") -> true,
 98 |         Map("shop" -> "bakery") -> true,
 99 |         Map("craft" -> "bakery") -> true,
100 |         Map("office" -> "architect") -> true,
101 |         Map("leisure" -> "disc_golf_course") -> true,
102 |         Map("aeroway" -> "aerodrome") -> true,
103 |         Map("highway" -> "motorway") -> false,
104 |         Map("shop" -> "bakery ; dairy") -> true
105 |       )
106 |         .toDF("tags", "value")
107 |         .where(isPOI('tags) =!= 'value)
108 |         .count should equal(0)
109 |     }
110 |   }
111 | 
112 |   describe("isRoad") {
113 |     it("marks roads appropriately") {
114 |       Seq(
115 |         Map("highway" -> "motorway") -> true,
116 |         Map("highway" -> "path") -> true,
117 |         Map("highway" -> "path ;footway") -> true,
118 |         Map("building" -> "yes") -> false
119 |       )
120 |         .toDF("tags", "value")
121 |         .where(isRoad('tags) =!= 'value)
122 |         .count should equal(0)
123 |     }
124 |   }
125 | 
126 |   describe("isCoastline") {
127 |     it("marks coastline appropriately") {
128 |       Seq(
129 |         Map("natural" -> "coastline") -> true,
130 |         Map("natural" -> "water") -> false,
131 |         Map("natural" -> "coastline ; water") -> true
132 |       )
133 |         .toDF("tags", "value")
134 |         .where(isCoastline('tags) =!= 'value)
135 |         .count should equal(0)
136 |     }
137 |   }
138 | 
139 |   describe("isWaterway") {
140 |     it("marks waterways appropriately") {
141 |       Seq(
142 |         Map("waterway" -> "river") -> true,
143 |         Map("waterway" -> "riverbank") -> true,
144 |         Map("waterway" -> "canal") -> true,
145 |         Map("waterway" -> "stream") -> true,
146 |         Map("waterway" -> "brook") -> true,
147 |         Map("waterway" -> "drain") -> true,
148 |         Map("waterway" -> "ditch") -> true,
149 |         Map("waterway" -> "dam") -> true,
150 |         Map("waterway" -> "weir") -> true,
151 |         Map("waterway" -> "waterfall") -> true,
152 |         Map("waterway" -> "pressurised") -> true,
153 |         Map("waterway" -> "fuel") -> false,
154 |         Map("waterway" -> "canal ; stream") -> true,
155 |         Map("waterway" -> "canal ; fuel") -> true
156 |       )
157 |         .toDF("tags", "value")
158 |         .where(isWaterway('tags) =!= 'value)
159 |         .count should equal(0)
160 |     }
161 |   }
162 | 
163 |   describe("removeUninterestingTags") {
164 |     it("drops uninteresting tags") {
165 |       Seq(
166 |         Map("building" -> "yes", "created_by" -> "JOSM")
167 |       )
168 |         .toDF("tags")
169 |         .withColumn("tags", removeUninterestingTags('tags))
170 |         .collect() should equal(Array(Row(Map("building" -> "yes"))))
171 |     }
172 | 
173 |     it("drops uninteresting single tags") {
174 |       Seq(
175 |         Map("building" -> "yes", "colour" -> "grey"),
176 |         Map("colour" -> "grey")
177 |       )
178 |         .toDF("tags")
179 |         .withColumn("tags", removeUninterestingTags('tags))
180 |         .collect() should equal(Array(Row(Map("building" -> "yes", "colour" -> "grey")), Row(Map.empty)))
181 |     }
182 | 
183 |     it("drops uninteresting prefixed tags") {
184 |       Seq(
185 |         Map("highway" -> "motorway", "tiger:reviewed" -> "no"),
186 |         Map("building" -> "yes", "CLC:something" -> "something")
187 |       )
188 |         .toDF("tags")
189 |         .withColumn("tags", removeUninterestingTags('tags))
190 |         .collect() should equal(Array(Row(Map("highway" -> "motorway")), Row(Map("building" -> "yes"))))
191 |     }
192 | 
193 |     it("drops tags with invalid keys") {
194 |       Seq(
195 |         Map("highway" -> "motorway", "k=v" -> "value"),
196 |         Map("building" -> "yes", "land use" -> "something")
197 |       )
198 |         .toDF("tags")
199 |         .withColumn("tags", removeUninterestingTags('tags))
200 |         .collect() should equal(Array(Row(Map("highway" -> "motorway")), Row(Map("building" -> "yes"))))
201 |     }
202 |   }
203 | 
204 |   describe("removeSemiInterestingTags") {
205 |     it("drops semi-interesting tags") {
206 |       Seq(
207 |         Map("building" -> "yes", "source" -> "MassGIS")
208 |       )
209 |         .toDF("tags")
210 |         .withColumn("tags", removeSemiInterestingTags('tags))
211 |         .collect() should equal(Array(Row(Map("building" -> "yes"))))
212 |     }
213 | 
214 |     it("drops semi-interesting prefixed tags") {
215 |       Seq(
216 |         Map("highway" -> "motorway", "source:geometry" -> "MassGIS")
217 |       )
218 |         .toDF("tags")
219 |         .withColumn("tags", removeSemiInterestingTags('tags))
220 |         .collect() should equal(Array(Row(Map("highway" -> "motorway"))))
221 |     }
222 |   }
223 | 
224 | }
225 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/sources/AugmentedDiffSourceTest.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.sources
 2 | 
 3 | import geotrellis.vector.Geometry
 4 | import org.apache.spark.internal.Logging
 5 | import org.scalatest.{FunSpec, Matchers}
 6 | import vectorpipe.TestEnvironment
 7 | import vectorpipe.model.ElementWithSequence
 8 | import vectorpipe.util.RobustFeature
 9 | 
10 | class AugmentedDiffSourceSpec extends FunSpec with TestEnvironment with Matchers {
11 | 
12 |   import ss.implicits._
13 | 
14 |   describe("Timestamp to sequence conversion") {
15 |     it("should provide a round trip for simple conversion") {
16 |       AugmentedDiffSource.timestampToSequence(AugmentedDiffSource.sequenceToTimestamp(3700047)) should be (3700047)
17 |     }
18 | 
19 |     it("should provide a round trip for column functions") {
20 |       val df = ss.createDataset(Seq(3700047)).toDF
21 |       (df.select(AugmentedDiffSource.sequenceToTimestamp('value) as 'time)
22 |          .select(AugmentedDiffSource.timestampToSequence('time) as 'value)
23 |          .first
24 |          .getLong(0)) should be (3700047)
25 |     }
26 |   }
27 | 
28 | }
29 | 
30 | class LogErrors extends AugmentedDiffSourceErrorHandler with Logging {
31 |   override def handle(sequence: Int, feature: RobustFeature[Geometry, ElementWithSequence]) = {
32 |     logWarning(s"Error in sequence ${sequence} for feature with metadata: ${feature.data}")
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/LayerTestPipeline.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import geotrellis.vector._
 4 | import org.apache.spark.sql.DataFrame
 5 | import org.apache.spark.sql.functions
 6 | import org.apache.spark.sql.functions.when
 7 | 
 8 | import vectorpipe._
 9 | import vectorpipe.functions.osm._
10 | 
11 | case class LayerTestPipeline(geometryColumn: String, baseOutputURI: java.net.URI) extends Pipeline with Pipeline.Output {
12 |   val layerMultiplicity = LayerNamesInColumn("layers")
13 | 
14 |   override def select(wayGeoms: DataFrame, targetZoom: Int, keyColumn: String): DataFrame = {
15 |     import wayGeoms.sparkSession.implicits._
16 | 
17 |     wayGeoms
18 |       .withColumn("layers", when(isBuilding('tags), "buildings").when(isRoad('tags), "roads"))
19 |       .where(functions.not(functions.isnull('layers)))
20 |   }
21 | 
22 |   override def clip(geom: Geometry, key: geotrellis.layer.SpatialKey, layoutLevel: geotrellis.layer.LayoutLevel): Geometry =
23 |     Clipping.byLayoutCell(geom, key, layoutLevel)
24 | }
25 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/PipelineSpec.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import org.apache.spark.sql.functions
 4 | import org.apache.spark.sql.functions.{isnull, lit}
 5 | import org.locationtech.geomesa.spark.jts._
 6 | import org.scalatest._
 7 | import vectorpipe.{TestEnvironment, internal => vp, _}
 8 | 
 9 | class PipelineSpec extends FunSpec with TestEnvironment with Matchers {
10 |   import ss.implicits._
11 | 
12 |   ss.withJTS
13 |   val orcFile = getClass.getResource("/isle-of-man-latest.osm.orc").getPath
14 |   val df = ss.read.orc(orcFile)
15 | 
16 |   describe("Vectortile Pipelines") {
17 |     val nodes = vp.preprocessNodes(df, None)
18 | 
19 |     val nodeGeoms = nodes
20 |       .filter(functions.not(isnull('lat)))
21 |       .withColumn("geometry", st_makePoint('lon, 'lat))
22 |       .drop("lat", "lon")
23 |       .withColumn("weight", lit(1))
24 |       .cache
25 | 
26 |     val wayGeoms = vp.reconstructWayGeometries(df, nodes).cache
27 | 
28 |     it("should generate a single zoom level") {
29 |       val pipeline = TestPipeline("geometry", new java.net.URI("file:///tmp/iom-tiles"), 16)
30 |       VectorPipe(nodeGeoms, pipeline, VectorPipe.Options.forZoom(8))
31 |     }
32 | 
33 |     it("should generate multiple zoom levels") {
34 |       val pipeline = TestPipeline("geometry", new java.net.URI("file:///tmp/iom-tiles-pyramid"), 16)
35 |       VectorPipe(nodeGeoms, pipeline, VectorPipe.Options.forZoomRange(6, 8))
36 |     }
37 | 
38 |     it("should generate multiple layers") {
39 |       val pipeline = LayerTestPipeline("geom", new java.net.URI("file:///tmp/iom-layers"))
40 |       VectorPipe(wayGeoms, pipeline, VectorPipe.Options.forZoom(14))
41 |     }
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/TestPipeline.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import geotrellis.raster.RasterExtent
 4 | import geotrellis.layer._
 5 | import geotrellis.vector._
 6 | import geotrellis.vectortile._
 7 | 
 8 | import org.apache.spark.sql.{DataFrame, Row}
 9 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
10 | import org.apache.spark.sql.functions
11 | import org.apache.spark.sql.functions.{array, col, explode, sum}
12 | 
13 | import vectorpipe._
14 | 
15 | case class Bin(x: Int, y: Int)
16 | object Bin {
17 |   def apply(tup: (Int, Int)): Bin = Bin(tup._1, tup._2)
18 | }
19 | 
20 | case class TestPipeline(geometryColumn: String, baseOutputURI: java.net.URI, gridResolution: Int) extends Pipeline with Pipeline.Output {
21 |   val weightedCentroid = new WeightedCentroid
22 | 
23 |   val layerMultiplicity = SingleLayer("points")
24 | 
25 |   override def reduce(input: DataFrame, layoutLevel: LayoutLevel, keyColumn: String): DataFrame = {
26 |     import input.sparkSession.implicits._
27 | 
28 |     val layout = layoutLevel.layout
29 |     val binOfTile = functions.udf { (g: Geometry, key: GenericRowWithSchema) =>
30 |       val pt = g.asInstanceOf[Point]
31 |       val k = getSpatialKey(key)
32 |       val re = RasterExtent(layout.mapTransform.keyToExtent(k), gridResolution, gridResolution)
33 |       val c = pt.getCoordinate
34 |       Bin(re.mapToGrid(c.x, c.y))
35 |     }
36 | 
37 |     val st_geomToPoint = functions.udf { g: Geometry => g.asInstanceOf[Point] }
38 | 
39 |     input.withColumn(keyColumn, explode(col(keyColumn)))
40 |       .withColumn("bin", binOfTile(col(geometryColumn), col(keyColumn)))
41 |       .groupBy(col(keyColumn), col("bin"))
42 |       .agg(sum('weight) as 'weight, weightedCentroid(st_geomToPoint(col(geometryColumn)), 'weight) as geometryColumn)
43 |       .drop('bin)
44 |       .withColumn(keyColumn, array(col(keyColumn)))
45 |   }
46 | 
47 |   override def pack(row: Row, zoom: Int): VectorTileFeature[Point] = {
48 |     val g = row.getAs[Point](geometryColumn)
49 |     val weight = row.getAs[Long]("weight")
50 | 
51 |     Feature(g, Map( "weight" -> VInt64(weight) ))
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/WeightedCentroid.scala:
--------------------------------------------------------------------------------
 1 | package vectorpipe.vectortile
 2 | 
 3 | import geotrellis.vector._
 4 | import org.apache.spark.sql.Row
 5 | import org.apache.spark.sql.expressions.MutableAggregationBuffer
 6 | import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
 7 | import org.apache.spark.sql.jts.PointUDT
 8 | import org.apache.spark.sql.types._
 9 | import org.locationtech.jts.geom.{Coordinate, GeometryFactory}
10 | 
11 | class WeightedCentroid extends UserDefinedAggregateFunction {
12 | 
13 |   // Define the schema of the input data
14 |   override def inputSchema: org.apache.spark.sql.types.StructType =
15 |     StructType(StructField("point", PointUDT) :: StructField("weight", DoubleType) :: Nil)
16 | 
17 |   // Define the types of the intermediate data structure
18 |   override def bufferSchema: StructType = StructType(
19 |     StructField("x", DoubleType) :: StructField("y", DoubleType) :: StructField("weight", DoubleType) :: Nil
20 |   )
21 | 
22 |   // Define the return type
23 |   override def dataType: DataType = PointUDT
24 | 
25 |   // Does the function return the same value for the same input?
26 |   override def deterministic: Boolean = true
27 | 
28 |   // Create a new, empty buffer structure
29 |   override def initialize(buffer: MutableAggregationBuffer): Unit = {
30 |     buffer(0) = 0.0
31 |     buffer(1) = 0.0
32 |     buffer(2) = 0.0
33 |   }
34 | 
35 |   // Combine a new input with an existing buffer
36 |   override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
37 |     val c = input.getAs[Point](0).getCoordinate
38 |     val wt = input.getAs[Double](1)
39 |     buffer(0) = buffer.getAs[Double](0) + c.x * wt
40 |     buffer(1) = buffer.getAs[Double](1) + c.y * wt
41 |     buffer(2) = buffer.getAs[Double](2) + wt
42 |   }
43 | 
44 |   // Merge two intermediate buffers
45 |   override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
46 |     buffer1(0) = buffer1.getAs[Double](0) + buffer2.getAs[Double](0)
47 |     buffer1(1) = buffer1.getAs[Double](1) + buffer2.getAs[Double](1)
48 |     buffer1(2) = buffer1.getAs[Double](2) + buffer2.getAs[Double](2)
49 |   }
50 | 
51 |   // Produce the final output from a Row encoded with the bufferSchema
52 |   override def evaluate(buffer: Row): Any = {
53 |     val wx = buffer.getDouble(0)
54 |     val wy = buffer.getDouble(1)
55 |     val wt = buffer.getDouble(2)
56 |     (new GeometryFactory).createPoint(new Coordinate(wx/wt, wy/wt))
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------