├── .circleci
└── config.yml
├── .github
├── docker-compose.yml
├── pull_request_template.md
└── workflows
│ └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.org
├── LICENSE
├── README.md
├── RELEASING.md
├── bench
└── src
│ └── main
│ └── scala
│ └── vectorpipe
│ └── Bench.scala
├── build.sbt
├── data
├── 8shapedmultipolygon.osm
├── diomede.osm
├── india-pakistan.osm
├── linestring.mvt
├── onepoint.mvt
├── polygon.mvt
├── quarry-rock.osm
└── roads.mvt
├── project
├── Dependencies.scala
├── Version.scala
├── assembly.sbt
├── build.properties
└── plugins.sbt
├── sbt
├── scripts
├── cibuild
├── cipublish
└── test
└── src
├── main
├── resources
│ ├── META-INF
│ │ └── services
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ └── microsite
│ │ └── data
│ │ └── menu.yml
├── scala
│ └── vectorpipe
│ │ ├── OSM.scala
│ │ ├── VectorPipe.scala
│ │ ├── examples
│ │ ├── AugmentedDiffProcessor.scala
│ │ ├── AugmentedDiffStreamProcessor.scala
│ │ ├── ChangeProcessor.scala
│ │ ├── ChangeStreamProcessor.scala
│ │ ├── ChangesetProcessor.scala
│ │ └── ChangesetStreamProcessor.scala
│ │ ├── functions
│ │ ├── osm
│ │ │ └── package.scala
│ │ └── package.scala
│ │ ├── internal
│ │ └── package.scala
│ │ ├── model
│ │ ├── Actions.scala
│ │ ├── AugmentedDiff.scala
│ │ ├── Change.scala
│ │ ├── Changeset.scala
│ │ ├── ChangesetComment.scala
│ │ ├── ElementWithSequence.scala
│ │ ├── Member.scala
│ │ └── Nd.scala
│ │ ├── relations
│ │ ├── MultiPolygons.scala
│ │ ├── Routes.scala
│ │ ├── package.scala
│ │ └── utils
│ │ │ ├── PartialCoordinateSequence.scala
│ │ │ ├── ReversedCoordinateSequence.scala
│ │ │ ├── VirtualCoordinateSequence.scala
│ │ │ └── package.scala
│ │ ├── sources
│ │ ├── AugmentedDiffMicroBatchReader.scala
│ │ ├── AugmentedDiffProvider.scala
│ │ ├── AugmentedDiffReader.scala
│ │ ├── AugmentedDiffSource.scala
│ │ ├── ChangeMicroBatchReader.scala
│ │ ├── ChangeProvider.scala
│ │ ├── ChangeReader.scala
│ │ ├── ChangeSource.scala
│ │ ├── ChangesetMicroBatchReader.scala
│ │ ├── ChangesetProvider.scala
│ │ ├── ChangesetReader.scala
│ │ ├── ChangesetSource.scala
│ │ ├── ReplicationReader.scala
│ │ ├── ReplicationStreamBatchReader.scala
│ │ ├── ReplicationStreamMicroBatchReader.scala
│ │ ├── SequenceOffset.scala
│ │ └── Source.scala
│ │ ├── util
│ │ ├── Auth.scala
│ │ ├── DBUtils.scala
│ │ ├── Geocode.scala
│ │ ├── Implicits.scala
│ │ ├── JsonRobustFeatureCollection.scala
│ │ ├── JsonRobustFeatureCollectionMap.scala
│ │ ├── Resource.scala
│ │ ├── RobustFeature.scala
│ │ └── package.scala
│ │ └── vectortile
│ │ ├── Clipping.scala
│ │ ├── Pipeline.scala
│ │ ├── Simplify.scala
│ │ ├── export
│ │ └── package.scala
│ │ └── package.scala
└── tut
│ ├── index.md
│ ├── outputs.md
│ ├── sources.md
│ ├── usage.md
│ └── usage
│ ├── concepts.md
│ ├── osm.md
│ └── usage.md
└── test
├── resources
├── .gitignore
├── isle-of-man-latest.osm.orc
├── log4j.properties
├── relation-110564.orc
├── relation-110564.wkt
├── relation-191199.orc
├── relation-191199.wkt
├── relation-191204.orc
├── relation-191204.wkt
├── relation-1949938.orc
├── relation-1949938.wkt
├── relation-2554903.orc
├── relation-2554903.wkt
├── relation-2580685.orc
├── relation-2580685.wkt
├── relation-3080946.orc
├── relation-3080946.wkt
├── relation-3105056.orc
├── relation-3105056.wkt
├── relation-333501.orc
├── relation-333501.wkt
├── relation-393502.orc
├── relation-393502.wkt
├── relation-5448156.orc
├── relation-5448156.wkt
├── relation-5448691.orc
├── relation-5448691.wkt
├── relation-5612959.orc
├── relation-5612959.wkt
├── relation-61315.orc
├── relation-61315.wkt
├── relation-6710544.orc
├── relation-6710544.wkt
└── view
│ ├── cluster-view.html
│ └── layer-test.html
└── scala
└── vectorpipe
├── MultiPolygonRelationReconstructionSpec.scala
├── ProcessOSMTest.scala
├── TestEnvironment.scala
├── functions
└── osm
│ └── FunctionSpec.scala
├── sources
└── AugmentedDiffSourceTest.scala
└── vectortile
├── LayerTestPipeline.scala
├── PipelineSpec.scala
├── TestPipeline.scala
└── WeightedCentroid.scala
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | aliases:
2 | - &restore_sbt_cache
3 | key: sbt-cache-{{ checksum "/tmp/scala_version" }}
4 |
5 | - &save_sbt_cache
6 | key: sbt-cache-{{ checksum "/tmp/scala_version" }}-{{ epoch }}
7 | paths:
8 | - "~/.ivy2/cache"
9 | - "~/.sbt"
10 | - "~/.cache/coursier"
11 |
12 | - &run_cibuild
13 | - checkout
14 | - run: echo "${SCALA_VERSION}" > /tmp/scala_version
15 | - restore_cache: *restore_sbt_cache
16 | - run:
17 | name: Executing cibuild
18 | command: ./scripts/cibuild
19 | - save_cache: *save_sbt_cache
20 |
21 | - &run_cipublish
22 | - checkout
23 | - run: echo "${SCALA_VERSION}" > /tmp/scala_version
24 | - restore_cache: *restore_sbt_cache
25 | - run:
26 | name: "Import signing key"
27 | command: |
28 | gpg --keyserver keyserver.ubuntu.com \
29 | --recv-keys 0x13E9AA1D8153E95E && \
30 | echo "${GPG_KEY}" | base64 -d > signing_key.asc && \
31 | gpg --import signing_key.asc
32 | - run:
33 | name: Executing cipublish
34 | command: ./scripts/cipublish
35 |
36 | # Build environments
37 | - &machine-openjdk8-scala2_11_12-environment
38 | machine:
39 | image: ubuntu-1604:201903-01
40 | environment:
41 | SCALA_VERSION: 2.11.12
42 |
43 | - &openjdk8-scala2_11_12-environment
44 | docker:
45 | - image: circleci/openjdk:8-jdk
46 | environment:
47 | SCALA_VERSION: 2.11.12
48 |
49 | version: 2
50 | workflows:
51 | version: 2
52 | build:
53 | jobs:
54 | - "openjdk8-scala2.11.12":
55 | filters: # required since `openjdk8-scala2.11.12_deploy` has tag filters AND requires `openjdk8-scala2.11.12`
56 | tags:
57 | only:
58 | - /^(.*)$/
59 | - "openjdk8-scala2.11.12_deploy":
60 | requires:
61 | - "openjdk8-scala2.11.12"
62 | filters:
63 | tags:
64 | only:
65 | - /^(.*)$/
66 |
67 | jobs:
68 | # Execute cibuild in machine executor so we can use our existing
69 | # docker-compose test setup
70 | "openjdk8-scala2.11.12":
71 | <<: *machine-openjdk8-scala2_11_12-environment
72 | steps: *run_cibuild
73 |
74 | "openjdk8-scala2.11.12_deploy":
75 | <<: *openjdk8-scala2_11_12-environment
76 | steps: *run_cipublish
77 |
--------------------------------------------------------------------------------
/.github/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.7"
2 |
3 | services:
4 | test:
5 | image: openjdk:8-jdk
6 | working_dir: /usr/local/src
7 | command: ./sbt ++$SCALA_VERSION test
8 | environment:
9 | - CI
10 | - SCALA_VERSION
11 | volumes:
12 | - ./../:/usr/local/src
13 | network_mode: host
14 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | Brief description of what this PR does and why it's important
4 |
5 | ## Demo
6 |
7 | Optional. Screenshots, etc.
8 |
9 | ## Notes
10 |
11 | Optional. Extra context, ancillary topics, alternative strategies that didn't work out, etc.
12 |
13 | ## Testing Instructions
14 |
15 | Optional. Include if there's more specifics than "CI tests should pass".
16 |
17 | ## Checklist
18 |
19 | - [ ] Add entry to CHANGELOG.md
20 |
21 | Closes #XXX
22 |
23 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | pull_request:
5 | branches: ['**']
6 | push:
7 | branches: ['master']
8 | tags: [v*]
9 | # release:
10 | # types: [published]
11 |
12 | jobs:
13 | build:
14 | name: Build and Test
15 | strategy:
16 | matrix:
17 | scala: ["2.12.7"]
18 | runs-on: ubuntu-latest
19 |
20 | env:
21 | SCALA_VERSION: ${{ matrix.scala }}
22 | BUILD_NUMBER: ${{ github.run_id }}
23 |
24 | steps:
25 | - uses: actions/checkout@v2
26 | with:
27 | fetch-depth: 0
28 |
29 | - uses: coursier/cache-action@v6
30 | # - uses: olafurpg/setup-scala@v13
31 | # with:
32 | # java-version: adopt@1.8
33 |
34 | - name: run tests
35 | run: docker compose -f .github/docker-compose.yml up test --abort-on-container-exit --exit-code-from test
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /RUNNING_PID
2 | /logs/
3 | /project/*-shim.sbt
4 | /project/project/
5 | /project/target/
6 | /target/
7 | /data/*.osm
8 | /data/*.geojson
9 | /data/*.osm.json
10 | /data/*.osm.pbf
11 | /images/*
12 | .ensime
13 | .ensime_cache/*
14 | clipping/*
15 | osmosis/*
16 | .idea
17 | target
18 | .metals
19 | \#*
20 | .\#*
21 |
22 | derby.log
23 | metastore_db/*
24 | bench/target/
25 | idea.sbt
26 | mainRunner/
27 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | # Changelog
3 |
4 | All notable changes to this project will be documented in this file.
5 |
6 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
7 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8 |
9 | ## [Unreleased]
10 |
11 | ### Added
12 |
13 | - GitHub actions config
14 |
15 | ### Changed
16 |
17 | ### Fixed
18 |
19 | ## [2.2.0]
20 |
21 | ### Added
22 |
23 | - Feature parsing in `AugmentedDiffSource` uses `vectorpipe.util.RobustFeature` to tolerate bad geometries in the stream [#148](https://github.com/geotrellis/vectorpipe/pull/148).
24 | - Receive GPG key while publishing artifacts [#138](https://github.com/geotrellis/vectorpipe/pull/138)
25 | - `Pipeline#finalize(vectorTiles, zoom)` method to receive the final RDD of generated vector tiles for a zoom level
26 | - `Pipeline.Output` mixin trait that overrides `finalize` with default implementation using `saveVectorTiles(vectorTiles, zoom, pipeline.baseOutputURI)`
27 |
28 | ### Changed
29 |
30 | - `VectorPipe.Options` to support for any square layout level (not just from ZoomedLayoutScheme)
31 | - `Pipeline#baseOutputURI` moved to `Pipeline.Output#baseOutputURI`
32 | - Updated Geotrellis dependency to 3.5.1
33 | - Improve robustness of functions in `vectorpipe.sources.ChangesetSource`
34 |
35 | ### Fixed
36 |
37 | ## [2.1.3] - 2019-12-18
38 |
39 | ### Fixed
40 |
41 | - Catch 403 S3Exceptions when checking minutely diffs in AugmentedDiffSource
42 |
43 | ## [2.1.2] - 2019-12-17
44 |
45 | ### Fixed
46 |
47 | - Catch proper AWS SDK v2 NoSuchKeyException when checking minutely diffs in AugmentedDiffSource
48 |
49 | ## [2.1.1] - 2019-12-16
50 |
51 | ### Fixed
52 |
53 | - AugmentedDiffSource failed to properly decode from JSON sources
54 | - MicroBatchReader null pointer exception when reading baseURI from DataSourceOptions
55 |
56 | ## [2.1.0] - 2019-12-12
57 |
58 | ### Added
59 |
60 | - `vectorpipe.examples`: VectorPipe examples moved from https://github.com/azavea/osmesa
61 | - `VectorPipe.defaultSparkSessionWithJTS` method to construct a VectorPipe tailored `SparkSession`. Users with more complicated use cases will still want to manually construct their own session.
62 |
63 | ## [2.0.0] - 2019-11-29
64 |
65 | This is the first release to depend on GeoTrellis 3.0.
66 |
67 | ### Changed
68 |
69 | - Streaming sources now fallback to the current remote sequence if no database
70 | checkpoint or option can be found
71 | - Depend on Spark 2.4.4
72 | - Depend on GeoTrellis 3.1.0
73 |
74 | ## [1.1.0] - 2019-09-26
75 |
76 | ### Added
77 |
78 | - `useCaching` option to VectorPipe.Options allows for persisting to disk.
79 | Helps avoid repeated computations.
80 | - Functions for converting sequence numbers to timestamps and back for both
81 | changeset replications and augmented diff replications. See `ChangesetSource`
82 | and `AugmentedDiffSource` in `vectorpipe.sources`.
83 |
84 | ### Changed
85 |
86 | - Improved empty geometry handling in UDFs
87 |
88 | ### Fixed
89 |
90 | ## [1.0.0] - 2019-07-09
91 |
92 | ### Added
93 |
94 | - RELEASING.md - Instructions for releasing new versions of this project
95 | - Support for semicolon-delimited tag values in UDFs, e.g. `shop=bakery;dairy`
96 | - Support for `nds` in augmented diff GeoJSON (matching
97 | [`osm-replication-streams@^0.7.0`](https://github.com/mojodna/osm-replication-streams/tree/v0.7.0)
98 | output)
99 | - "Uninteresting" tags are dropped when processing OSM inputs; this will result
100 | in fewer point features being generated (as those nodes previously had tags
101 | applied).
102 |
103 | ### Changed
104 |
105 | - Sync with [id-area-keys@2.13.0](https://github.com/osmlab/id-area-keys/blob/v2.13.0/areaKeys.json) for determining area-ness of a way.
106 | - Fetch gzipped augmented diff JSON (produced by [overpass-diff-publisher](https://github.com/mojodna/overpass-diff-publisher))
107 | - Preserve the last-known coordinates of deleted nodes
108 | - Better handling of falsy boolean values in tag UDFs
109 | - Adds `riverbank`, `stream_end`, `dam`, `weir`, `waterfall`, and `pressurised`
110 | to the list of waterway features
111 | - Populates `nds` and `members` for deleted elements from the previous version
112 |
113 | ### Fixed
114 |
115 | - Resolve commons-io deprecation warnings
116 | - Convert coordinates to Doubles (expected by VP internals) when pre-processing
117 |
118 | ## [1.0.0-RC3] - 2019-04-24
119 |
120 | ### Fixed
121 |
122 | - Mark all logger vals and some UDF vals as @transient lazy to avoid Spark serialization issues
123 | - Properly strip leading and trailing slashes from S3 URIs when exporting vector tiles
124 |
--------------------------------------------------------------------------------
/CONTRIBUTING.org:
--------------------------------------------------------------------------------
1 | #+TITLE: Contributing to VectorPipe
2 | #+AUTHOR: Colin
3 | #+HTML_HEAD:
4 |
5 | ** Prerequisite Knowledge
6 |
7 | *** GeoTrellis
8 |
9 | GeoTrellis sublibraries and types are used heavily throughout ~vectorpipe~,
10 | particularly its ~vector~ and ~vectortile~ packages.
11 |
12 | *** Apache Spark
13 |
14 | ~RDD~ usage is fairly prevalent, so knowledge of Spark internals may help
15 | you, depending on your task.
16 |
17 | *** Cats
18 |
19 | The Functional Programming library that adds certain necessities missing
20 | from vanilla Scala. This is not at all necessary for /using/ ~vectorpipe~,
21 | but is used here and there within its internal machinery.
22 |
23 | *** OpenStreetMap
24 |
25 | Knowledge of how OpenStreetMap data is formatted will help you immensely. Terms:
26 |
27 | - Element
28 | - Node
29 | - Way
30 | - Relation
31 |
32 | ** Development Dependencies
33 |
34 | - [[http://www.scala-sbt.org/][SBT]]
35 | - [[https://spark.apache.org/][Apache Spark]] (a local install on your machine)
36 | - [[https://jekyllrb.com/][Jekyll]] (if editing the microsite)
37 |
38 | Otherwise, all Scala dependencies (including compilers) will be
39 | automatically downloaded by sbt.
40 |
41 | ** Style Guide
42 |
43 | When contributing code changes to ~vectorpipe~, bear in mind that we make a
44 | few stylistic choices in order to minimize code complexity:
45 |
46 | *** Code and Directory Layout
47 |
48 | - Code mechanics relevant to the workings of the library but irrelevant to the
49 | user should be relegated to a module under ~vectorpipe.*.internal~, where
50 | the ~*~ is whatever parent module you're working in.
51 |
52 | - Type aliases live in *package objects*:
53 |
54 | #+BEGIN_SRC scala
55 | package vectorpipe
56 |
57 | package object foo {
58 | type Bar = Int
59 | }
60 | #+END_SRC
61 |
62 | - Typeclass instances live in the companion object of the class they're for:
63 |
64 | #+BEGIN_SRC scala
65 | import cats._
66 |
67 | case class Foo[T](t: T)
68 |
69 | object Foo {
70 | implicit val fooFunctor: Functor[Foo] = new Functor[Foo] {
71 | def map[A, B](fa: Foo[A])(f: A => B): Foo[B] = ???
72 | }
73 | }
74 | #+END_SRC
75 |
76 | This is to give immediate "visibility" of instances to their corresponding
77 | types. Just by importing ~Foo~, you have access to all its instances without
78 | having to think about them. This decreases ~import~ confusion.
79 |
80 | *** Scala Features to Avoid
81 |
82 | **** Method Overloading and Default Arguments
83 |
84 | We [[https://stackoverflow.com/a/2512001/643684][avoid method overloading]]:
85 |
86 | #+BEGIN_SRC scala
87 | case class Foo[T](t: T) {
88 | def bar(a: Int): Bar = ???
89 |
90 | // avoid
91 | def bar(a: Int, b: Int): Bar = ???
92 | }
93 | #+END_SRC
94 |
95 | We avoid default arguments:
96 |
97 | #+BEGIN_SRC scala
98 | case class Foo[T](t: T) {
99 | // avoid
100 | def bar(a: Int, b: Option[Int] = None): Bar = ???
101 | }
102 | #+END_SRC
103 |
104 | Since this is method overloading in disguise.
105 |
106 | **** Exceptions
107 |
108 | We avoid throwing Exceptions:
109 |
110 | #+BEGIN_SRC scala
111 | /* Surely this function will obey its contract... */
112 | def innocent(path: String): Foo
113 |
114 | sbt> innocent("/wrong/file/path/or/bad/data.txt")
115 | java.lang.YouCouldntHaveForeseenThisException
116 | #+END_SRC
117 |
118 | Exceptions were intentionally left out of new languages like [[https://golang.org/doc/faq#exceptions][Golang]], [[https://www.rust-lang.org/en-US/faq.html#error-handling][Rust]], and Elm.
119 | In Scala, we can use vanilla ~Try~ and ~Either~, or ~EitherT~ from [[http://typelevel.org/cats/][Cats]] or [[https://github.com/scalaz/scalaz][ScalaZ]]
120 | to model potential errors:
121 |
122 | #+BEGIN_SRC scala
123 | def innocent(path: String): Either[String, Foo]
124 |
125 | /* "Mixing Contexts", i.e. the ability to run concurrently and to fail safely */
126 | def innocentIO(path: String): EitherT[Future, String, Foo]
127 | #+END_SRC
128 |
129 | **** Non-data Classes
130 |
131 | We [[https://www.youtube.com/watch?v=o9pEzgHorH0][avoid classes that don't represent data]]:
132 |
133 | #+BEGIN_SRC scala
134 | class Fooifizer(val bestArg: Type) {
135 | def work(arg: Type): Unit = { ??? }
136 | }
137 | #+END_SRC
138 |
139 | Instead, we call a spade a spade and write a stand-alone function:
140 |
141 | #+BEGIN_SRC scala
142 | /* Put this in an appropriate companion object, or the package object */
143 | def fooifize(bestArg: Type, arg: Type): Unit = { ??? }
144 | #+END_SRC
145 |
146 | **** Miscellaneous
147 |
148 | We avoid ~.apply~ returning a type other than the parent object:
149 |
150 | #+BEGIN_SRC scala
151 | object Foo {
152 | // avoid
153 | def apply(...): Bar = ...
154 | }
155 |
156 | // Or else you can write code like:
157 | val x = Foo(...) // hard to know what x's type is.
158 | #+END_SRC
159 |
160 | We [[https://github.com/circe/circe/blame/master/DESIGN.md#L77][avoid implicit conversions]]:
161 |
162 | #+BEGIN_SRC scala
163 | case class Foo(...)
164 |
165 | case class Bar(...) {
166 | def bar: ??? = ...
167 | }
168 |
169 | object Foo {
170 | // avoid
171 | implicit def foo2Bar(foo: Foo): Bar = ...
172 | }
173 |
174 | // Or else you can write code like:
175 | val x = Foo(...).bar // where did `bar` come from?
176 | #+END_SRC
177 |
178 | Typeclasses should be implemented via the implicit-val-within-companion-object
179 | pattern.
180 |
181 | ** Updating the Microsite
182 |
183 | All content files can be found in ~src/main/tut/~. After making your desired
184 | changes, you can confirm them by running the following in sbt:
185 |
186 | #+BEGIN_EXAMPLE
187 | sbt> makeMicrosite
188 | #+END_EXAMPLE
189 |
190 | This will build the site as well as compile every Scala example. If
191 | something about the API has changed and the examples are no longer valid,
192 | these docs will fail to build. This is a good thing! Just make the
193 | appropriate extra changes and rebuild.
194 |
195 | To view your built site locally, navigate to ~target/site/~ and run ~jekyll
196 | serve~. Be careful: The main content of the site will be visible at
197 | [[http://127.0.0.1:4000/vectorpipe/][127.0.0.1:4000/vectorpipe/]]. Without
198 | the ~vectorpipe~ on the end, you won't see anything.
199 |
200 | If you have write permission to the main VectorPipe repo on Github, then
201 | your updated microsite can be published to
202 | [[https://geotrellis.github.io/vectorpipe/]] via:
203 |
204 | #+BEGIN_EXAMPLE
205 | sbt> publishMicrosite
206 | #+END_EXAMPLE
207 | ** Publishing to Bintray
208 |
209 | Provided you have permissions to publish to [[https://bintray.com/azavea][Azavea's Bintray]], all that's necessary
210 | to proceed is:
211 |
212 | #+BEGIN_EXAMPLE
213 | sbt> publish
214 | #+END_EXAMPLE
215 |
216 | in your SBT shell.
217 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This software is licensed under the Apache 2 license, quoted below.
2 |
3 | Copyright 2011-2017 Azavea [http://www.azavea.com]
4 |
5 | Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 | use this file except in compliance with the License. You may obtain a copy of
7 | the License at
8 |
9 | [http://www.apache.org/licenses/LICENSE-2.0]
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 | License for the specific language governing permissions and limitations under
15 | the License.
16 |
--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | # Publishing a release
2 |
3 | 1. Create a new release branch from up-to-date master named `release/x.y.z`
4 | 1. Review CHANGELOG.md. Move `[Unreleased]` header to empty section and replace with `[x.y.z]` header plus release date.
5 | 1. Update the version numbers in the build.sbt and spark-shell examples in the README's "Getting Started" section.
6 | 1. Commit these changes as a single commit, with the message "Release vx.y.z"
7 | 1. Push branch and make a PR on GitHub
8 | 1. Ensure CI succeeds
9 | 1. Ensure there are no new commits on master. If there are new commits, rebase this branch on master and start over at step 2 if you wish to include them. Otherwise, merge.
10 | 1. Tag the merge commit on the master branch: `git tag -a vx.y.z -m "Release x.y.z"`
11 | 1. Push the new tag: `git push --tags`; if you have multiple remotes, you may need to target the proper upstream repo: `git push --tags`.
12 | 1. Review the CircleCI build status to ensure that the tag was successfully published to SonaType.
13 |
--------------------------------------------------------------------------------
/bench/src/main/scala/vectorpipe/Bench.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import java.util.concurrent.TimeUnit
4 |
5 | import geotrellis.vector.{Extent, Line, Point}
6 | import org.openjdk.jmh.annotations._
7 |
8 | // --- //
9 |
10 | @BenchmarkMode(Array(Mode.AverageTime))
11 | @OutputTimeUnit(TimeUnit.MICROSECONDS)
12 | @State(Scope.Thread)
13 | class LineBench {
14 | val extent = Extent(0, 0, 5, 5)
15 |
16 | var line: Line = _
17 |
18 | @Setup
19 | def setup: Unit = {
20 | line = Line(
21 | List.range(4, -100, -2).map(n => Point(n, 1)) ++ List(Point(-3,4), Point(-1,4), Point(2,4), Point(4,4))
22 | )
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/data/linestring.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/linestring.mvt
--------------------------------------------------------------------------------
/data/onepoint.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/onepoint.mvt
--------------------------------------------------------------------------------
/data/polygon.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/polygon.mvt
--------------------------------------------------------------------------------
/data/roads.mvt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/data/roads.mvt
--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 |
3 | object Dependencies {
4 | val awscala = "com.github.seratch" %% "awscala" % Version.awscala
5 | val decline = "com.monovore" %% "decline" % Version.decline
6 | val spark = "org.apache.spark" %% "spark-core" % Version.spark
7 | val sparkSql = "org.apache.spark" %% "spark-sql" % Version.spark
8 | val sparkHive = "org.apache.spark" %% "spark-hive" % Version.spark
9 | val sparkStreaming = "org.apache.spark" %% "spark-streaming" % Version.spark
10 | val sparkJts = "org.locationtech.geomesa" %% "geomesa-spark-jts" % Version.geomesa
11 | val gtGeomesa = "org.locationtech.geotrellis" %% "geotrellis-geomesa" % Version.geotrellis
12 | val gtGeotools = "org.locationtech.geotrellis" %% "geotrellis-geotools" % Version.geotrellis
13 | val gtS3 = "org.locationtech.geotrellis" %% "geotrellis-s3" % Version.geotrellis
14 |
15 | val gtS3Spark = "org.locationtech.geotrellis" %% "geotrellis-s3-spark" % Version.geotrellis
16 | val gtSpark = "org.locationtech.geotrellis" %% "geotrellis-spark" % Version.geotrellis
17 | val gtSparkTestKit = "org.locationtech.geotrellis" %% "geotrellis-spark-testkit" % Version.geotrellis % "test"
18 | val gtVector = "org.locationtech.geotrellis" %% "geotrellis-vector" % Version.geotrellis
19 | val gtShapefile = "org.locationtech.geotrellis" %% "geotrellis-shapefile" % Version.geotrellis
20 | val gtVectorTile = "org.locationtech.geotrellis" %% "geotrellis-vectortile" % Version.geotrellis
21 | val cats = "org.typelevel" %% "cats-core" % Version.cats
22 | val scalactic = "org.scalactic" %% "scalactic" % Version.scalactic
23 | val scalatest = "org.scalatest" %% "scalatest" % Version.scalatest % "test"
24 | val jaiCore = "javax.media" % "jai_core" % "1.1.3" from "https://repo.osgeo.org/repository/release/javax/media/jai_core/1.1.3/jai_core-1.1.3.jar"
25 | val hbaseCommon = "org.apache.hbase" % "hbase-common" % "1.3.1"
26 | val hbaseClient = "org.apache.hbase" % "hbase-client" % "1.3.1"
27 | val hbaseServer = "org.apache.hbase" % "hbase-server" % "1.3.1"
28 | val geomesaHbaseDatastore = "org.locationtech.geomesa" % "geomesa-hbase-datastore_2.11" % Version.geomesa
29 | val kryo = "com.esotericsoftware" % "kryo-shaded" % Version.kryo
30 | val circeCore = "io.circe" %% "circe-core" % Version.circe
31 | val circeGeneric = "io.circe" %% "circe-generic" % Version.circe
32 | val circeExtras = "io.circe" %% "circe-generic-extras" % Version.circe
33 | val circeParser = "io.circe" %% "circe-parser" % Version.circe
34 | val circeOptics = "io.circe" %% "circe-optics" % Version.circe
35 | val circeJava8 = "io.circe" %% "circe-java8" % Version.circe
36 | val circeYaml = "io.circe" %% "circe-yaml" % "0.9.0"
37 | val commonsIO = "commons-io" % "commons-io" % Version.commonsIO
38 | val scalaj = "org.scalaj" %% "scalaj-http" % Version.scalaj
39 | }
40 |
--------------------------------------------------------------------------------
/project/Version.scala:
--------------------------------------------------------------------------------
1 | object Version {
2 | val awscala = "0.8.1"
3 | val geotrellis = "3.5.1"
4 | val scala2_11 = "2.11.12"
5 | val scala2_12 = "2.12.12"
6 | val geomesa = "2.2.1"
7 | val decline = "0.6.1"
8 | val cats = "1.6.1"
9 | val scalactic = "3.0.6"
10 | val scalatest = "3.0.3"
11 | val spark = "2.4.4"
12 | val kryo = "4.0.2"
13 | val circe = "0.11.0"
14 | val scalaLogging = "3.9.2"
15 | val commonsIO = "2.6"
16 | val scalaj = "2.4.1"
17 | }
18 |
--------------------------------------------------------------------------------
/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")
2 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.2.8
2 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.47deg" % "sbt-microsites" % "0.7.4")
2 |
3 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.27")
4 |
5 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0")
6 |
7 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full)
8 |
9 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.5")
10 |
11 | addSbtPlugin("io.crashbox" % "sbt-gpg" % "0.2.0")
12 |
13 | addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
14 |
--------------------------------------------------------------------------------
/scripts/cibuild:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if [[ -n "${VECTORPIPE_DEBUG}" ]]; then
6 | set -x
7 | fi
8 |
9 | function usage() {
10 | echo -n \
11 | "Usage: $(basename "$0")
12 | Execute tests.
13 | "
14 | }
15 |
16 | if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
17 | if [[ "${1:-}" == "--help" ]]; then
18 | usage
19 | else
20 | SCALA_VERSION="${SCALA_VERSION:-2.11.12}" ./scripts/test
21 | fi
22 | fi
23 |
--------------------------------------------------------------------------------
/scripts/cipublish:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if [[ -n "${VECTORPIPE_DEBUG}" ]]; then
6 | set -x
7 | fi
8 |
9 | function usage() {
10 | echo -n \
11 | "Usage: $(basename "$0")
12 | Publish artifacts to Sonatype.
13 | "
14 | }
15 |
16 | if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
17 | if [[ "${1:-}" == "--help" ]]; then
18 | usage
19 | else
20 | if [[ -n "${CIRCLE_TAG}" ]]; then
21 | echo "Publishing artifacts to Sonatype"
22 | ./sbt ";++${SCALA_VERSION:-2.11.12};sonatypeOpen ${CIRCLE_BUILD_NUM};publish;sonatypeRelease"
23 | else
24 | echo "Publishing artifacts to default location"
25 | ./sbt "++${SCALA_VERSION:-2.11.12}" publish
26 | fi
27 | fi
28 | fi
29 |
--------------------------------------------------------------------------------
/scripts/test:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if [[ -n "${VECTORPIPE_DEBUG}" ]]; then
6 | set -x
7 | fi
8 |
9 | function usage() {
10 | echo -n \
11 | "Usage: $(basename "$0")
12 | Update Scala dependencies and execute tests.
13 | "
14 | }
15 |
16 | if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
17 | if [[ "${1:-}" == "--help" ]]; then
18 | usage
19 | else
20 | echo "Executing Scala test suite"
21 | ./sbt "++${SCALA_VERSION:-2.11.12}" test
22 | fi
23 | fi
24 |
--------------------------------------------------------------------------------
/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | vectorpipe.sources.AugmentedDiffProvider
2 | vectorpipe.sources.ChangeProvider
3 | vectorpipe.sources.ChangesetProvider
4 |
--------------------------------------------------------------------------------
/src/main/resources/microsite/data/menu.yml:
--------------------------------------------------------------------------------
1 | options:
2 |
3 | - title: Usage
4 | url: usage.html
5 | menu_type: usage
6 | menu_section: usage
7 |
8 | - title: Concepts
9 | url: usage/concepts.html
10 | menu_type: usage
11 | menu_section: concepts
12 |
13 | - title: Reading OpenStreetMap Data
14 | url: usage/osm.html
15 | menu_type: usage
16 | menu_section: osm
17 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/OSM.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import java.sql.Timestamp
4 |
5 | import org.apache.spark.sql._
6 | import org.apache.spark.sql.functions._
7 | import geotrellis.vector._
8 | import vectorpipe.functions.osm.removeUninterestingTags
9 | import vectorpipe.internal._
10 |
11 | object OSM {
12 | /**
13 | * Convert a raw OSM dataframe into a frame containing JTS geometries for each unique id/changeset.
14 | *
15 | * This currently produces Points for nodes containing "interesting" tags, LineStrings and Polygons for ways
16 | * (according to OSM rules for defining areas), MultiPolygons for multipolygon and boundary relations, and
17 | * LineStrings / MultiLineStrings for route relations.
18 | *
19 | * @param input DataFrame containing node, way, and relation elements
20 | * @return DataFrame containing geometries.
21 | */
22 | def toGeometry(input: DataFrame): DataFrame = {
23 | import input.sparkSession.implicits._
24 |
25 | val st_pointToGeom = org.apache.spark.sql.functions.udf { pt: Point => pt.asInstanceOf[Geometry] }
26 |
27 | val elements = input
28 | .withColumn("tags", removeUninterestingTags('tags))
29 |
30 | val nodes = preprocessNodes(elements)
31 |
32 | val nodeGeoms = constructPointGeometries(nodes)
33 | .withColumn("minorVersion", lit(0))
34 | .withColumn("geom", st_pointToGeom('geom))
35 |
36 | val wayGeoms = reconstructWayGeometries(elements, nodes)
37 |
38 | val relationGeoms = reconstructRelationGeometries(elements, wayGeoms)
39 |
40 | nodeGeoms
41 | .union(wayGeoms.where(size('tags) > 0).drop('geometryChanged))
42 | .union(relationGeoms)
43 | }
44 |
45 | /**
46 | * Snapshot pre-processed elements.
47 | *
48 | * A Time Pin is stuck through a set of elements that have been augmented with a 'validUntil column to identify all
49 | * that were valid at a specific point in time (i.e. updated before the target timestamp and valid after it).
50 | *
51 | * @param df Elements (including 'validUntil column)
52 | * @param timestamp Optional timestamp to snapshot at
53 | * @return DataFrame containing valid elements at timestamp (or now)
54 | */
55 | def snapshot(df: DataFrame, timestamp: Timestamp = null): DataFrame = {
56 | import df.sparkSession.implicits._
57 |
58 | df
59 | .where(
60 | 'updated <= coalesce(lit(timestamp), current_timestamp)
61 | and coalesce(lit(timestamp), current_timestamp) < coalesce('validUntil, date_add(current_timestamp, 1)))
62 | }
63 |
64 | /**
65 | * Augment geometries with user metadata.
66 | *
67 | * When 'changeset is included, user (name and 'uid) metadata is joined from a DataFrame containing changeset
68 | * metadata.
69 | *
70 | * @param geoms Geometries to augment.
71 | * @param changesets Changesets DataFrame with user metadata.
72 | * @return Geometries augmented with user metadata.
73 | */
74 | def addUserMetadata(geoms: DataFrame, changesets: DataFrame): DataFrame = {
75 | import geoms.sparkSession.implicits._
76 |
77 | geoms
78 | .join(changesets.select('id as 'changeset, 'uid, 'user), Seq("changeset"))
79 | }
80 |
81 | }
82 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/AugmentedDiffProcessor.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.examples
2 |
3 | import java.net.URI
4 |
5 | import cats.implicits._
6 | import com.monovore.decline._
7 | import org.apache.spark.sql._
8 | import vectorpipe.VectorPipe
9 | import vectorpipe.model.AugmentedDiff
10 | import vectorpipe.sources.Source
11 |
12 | /*
13 | * Usage example:
14 | *
15 | * sbt assembly
16 | *
17 | * spark-submit \
18 | * --class vectorpipe.examples.AugmentedDiffProcessor \
19 | * target/scala-2.11/vectorpipe.jar \
20 | * --augmented-diff-source s3://somewhere/diffs/
21 | */
22 | object AugmentedDiffProcessor
23 | extends CommandApp(
24 | name = "augmented-diff-processor",
25 | header = "Read from augmented diffs",
26 | main = {
27 | val augmentedDiffSourceOpt = Opts.option[URI](
28 | "augmented-diff-source",
29 | short = "a",
30 | metavar = "uri",
31 | help = "Location of augmented diffs to process"
32 | )
33 | val startSequenceOpt = Opts
34 | .option[Int](
35 | "start-sequence",
36 | short = "s",
37 | metavar = "sequence",
38 | help = "Starting sequence. If absent, the current (remote) sequence will be used."
39 | )
40 | .orNone
41 | val endSequenceOpt = Opts
42 | .option[Int](
43 | "end-sequence",
44 | short = "e",
45 | metavar = "sequence",
46 | help = "Ending sequence. If absent, the current (remote) sequence will be used."
47 | )
48 | .orNone
49 |
50 | (augmentedDiffSourceOpt, startSequenceOpt, endSequenceOpt)
51 | .mapN {
52 | (augmentedDiffSource, startSequence, endSequence) =>
53 | implicit val ss: SparkSession =
54 | VectorPipe.defaultSparkSessionWithJTS("AugmentedDiffProcessor")
55 |
56 | import ss.implicits._
57 |
58 | val options = Map(Source.BaseURI -> augmentedDiffSource.toString) ++
59 | startSequence
60 | .map(s => Map(Source.StartSequence -> s.toString))
61 | .getOrElse(Map.empty[String, String]) ++
62 | endSequence
63 | .map(s => Map(Source.EndSequence -> s.toString))
64 | .getOrElse(Map.empty[String, String])
65 |
66 | val geoms =
67 | ss.read.format(Source.AugmentedDiffs).options(options).load
68 |
69 | // aggregations are triggered when an event with a later timestamp ("event time") is received
70 | // geoms.select('sequence).distinct.show
71 | geoms.as[AugmentedDiff].show
72 |
73 | ss.stop()
74 | }
75 | }
76 | )
77 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/AugmentedDiffStreamProcessor.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.examples
2 |
3 | import java.net.URI
4 |
5 | import cats.implicits._
6 | import com.monovore.decline._
7 | import geotrellis.vector.{Feature, Geometry}
8 | import org.apache.spark.sql._
9 | import vectorpipe.VectorPipe
10 | import vectorpipe.model.ElementWithSequence
11 | import vectorpipe.sources.Source
12 |
13 | /*
14 | * Usage example:
15 | *
16 | * sbt assembly
17 | *
18 | * spark-submit \
19 | * --class vectorpipe.examples.AugmentedDiffStreamProcessor \
20 | * target/scala-2.11/vectorpipe.jar \
21 | * --augmented-diff-source s3://somewhere/diffs/
22 | */
23 | object AugmentedDiffStreamProcessor
24 | extends CommandApp(
25 | name = "augmented-diff-stream-processor",
26 | header = "Read OSM augmented diffs as an open stream",
27 | main = {
28 | type AugmentedDiffFeature = Feature[Geometry, ElementWithSequence]
29 |
30 | val augmentedDiffSourceOpt = Opts.option[URI](
31 | "augmented-diff-source",
32 | short = "a",
33 | metavar = "uri",
34 | help = "Location of augmented diffs to process"
35 | )
36 | val startSequenceOpt = Opts
37 | .option[Int](
38 | "start-sequence",
39 | short = "s",
40 | metavar = "sequence",
41 | help = "Starting sequence. If absent, the current (remote) sequence will be used."
42 | )
43 | .orNone
44 | val endSequenceOpt = Opts
45 | .option[Int](
46 | "end-sequence",
47 | short = "e",
48 | metavar = "sequence",
49 | help = "Ending sequence. If absent, this will be an infinite stream."
50 | )
51 | .orNone
52 |
53 | (augmentedDiffSourceOpt, startSequenceOpt, endSequenceOpt)
54 | .mapN {
55 | (augmentedDiffSource, startSequence, endSequence) =>
56 | implicit val ss: SparkSession =
57 | VectorPipe.defaultSparkSessionWithJTS("AugmentedDiffStreamProcessor")
58 |
59 | val options = Map(Source.BaseURI -> augmentedDiffSource.toString,
60 | Source.ProcessName -> "AugmentedDiffStreamProcessor") ++
61 | startSequence
62 | .map(s => Map(Source.StartSequence -> s.toString))
63 | .getOrElse(Map.empty[String, String]) ++
64 | endSequence
65 | .map(s => Map(Source.EndSequence -> s.toString))
66 | .getOrElse(Map.empty[String, String])
67 |
68 | val geoms =
69 | ss.readStream.format(Source.AugmentedDiffs).options(options).load
70 |
71 | // aggregations are triggered when an event with a later timestamp ("event time") is received
72 | val query = geoms.writeStream
73 | .format("console")
74 | .start
75 |
76 | query.awaitTermination()
77 |
78 | ss.stop()
79 | }
80 | }
81 | )
82 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangeProcessor.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.examples
2 |
3 | import java.net.URI
4 |
5 | import cats.implicits._
6 | import com.monovore.decline._
7 | import org.apache.spark.sql._
8 | import vectorpipe.VectorPipe
9 | import vectorpipe.model.Change
10 | import vectorpipe.sources.Source
11 |
12 | /*
13 | * Usage example:
14 | *
15 | * sbt assembly
16 | *
17 | * spark-submit \
18 | * --class vectorpipe.examples.ChangeProcessor \
19 | * target/scala-2.11/vectorpipe.jar
20 | */
21 | object ChangeProcessor
22 | extends CommandApp(
23 | name = "change-processor",
24 | header = "Read minutely changes from start sequence to end sequence",
25 | main = {
26 | val changeSourceOpt = Opts
27 | .option[URI]("change-source",
28 | short = "d",
29 | metavar = "uri",
30 | help = "Location of minutely diffs to process")
31 | .withDefault(new URI("https://planet.osm.org/replication/minute/"))
32 | val startSequenceOpt = Opts
33 | .option[Int](
34 | "start-sequence",
35 | short = "s",
36 | metavar = "sequence",
37 | help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 | )
39 | .orNone
40 | val endSequenceOpt = Opts
41 | .option[Int](
42 | "end-sequence",
43 | short = "e",
44 | metavar = "sequence",
45 | help = "Ending sequence. If absent, this will be an infinite stream."
46 | )
47 | .orNone
48 |
49 | (changeSourceOpt, startSequenceOpt, endSequenceOpt)
50 | .mapN {
51 | (changeSource, startSequence, endSequence) =>
52 | implicit val ss: SparkSession =
53 | VectorPipe.defaultSparkSessionWithJTS("ChangeProcessor")
54 |
55 | import ss.implicits._
56 |
57 | val options = Map(Source.BaseURI -> changeSource.toString) ++
58 | startSequence
59 | .map(s => Map(Source.StartSequence -> s.toString))
60 | .getOrElse(Map.empty[String, String]) ++
61 | endSequence
62 | .map(s => Map(Source.EndSequence -> s.toString))
63 | .getOrElse(Map.empty[String, String])
64 |
65 | val changes =
66 | ss.read.format(Source.Changes).options(options).load
67 |
68 | // aggregations are triggered when an event with a later timestamp ("event time") is received
69 | // changes.select('sequence).distinct.show
70 | changes.as[Change].show
71 |
72 | ss.stop()
73 | }
74 | }
75 | )
76 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangeStreamProcessor.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.examples
2 |
3 | import java.net.URI
4 |
5 | import cats.implicits._
6 | import com.monovore.decline._
7 | import org.apache.spark.sql._
8 | import vectorpipe.VectorPipe
9 | import vectorpipe.sources.Source
10 |
11 | /*
12 | * Usage example:
13 | *
14 | * sbt assembly
15 | *
16 | * spark-submit \
17 | * --class vectorpipe.examples.ChangeStreamProcessor \
18 | * target/scala-2.11/vectorpipe.jar \
19 | * --augmented-diff-source s3://somewhere/diffs/
20 | */
21 | object ChangeStreamProcessor
22 | extends CommandApp(
23 | name = "change-stream-processor",
24 | header = "Read OSM minutely diffs as a stream",
25 | main = {
26 | val changeSourceOpt = Opts
27 | .option[URI]("change-source",
28 | short = "d",
29 | metavar = "uri",
30 | help = "Location of minutely diffs to process")
31 | .withDefault(new URI("https://planet.osm.org/replication/minute/"))
32 | val startSequenceOpt = Opts
33 | .option[Int](
34 | "start-sequence",
35 | short = "s",
36 | metavar = "sequence",
37 | help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 | )
39 | .orNone
40 | val endSequenceOpt = Opts
41 | .option[Int](
42 | "end-sequence",
43 | short = "e",
44 | metavar = "sequence",
45 | help = "Ending sequence. If absent, this will be an infinite stream."
46 | )
47 | .orNone
48 | val partitionCountOpt = Opts
49 | .option[Int]("partitions",
50 | short = "p",
51 | metavar = "partition count",
52 | help = "Change partition count.")
53 | .orNone
54 |
55 | (changeSourceOpt, startSequenceOpt, endSequenceOpt, partitionCountOpt)
56 | .mapN {
57 | (changeSource, startSequence, endSequence, partitionCount) =>
58 | implicit val ss: SparkSession =
59 | VectorPipe.defaultSparkSessionWithJTS("ChangeStreamProcessor")
60 |
61 | val options = Map(Source.BaseURI -> changeSource.toString, Source.ProcessName -> "ChangeStreamProcessor") ++
62 | startSequence.map(s => Map(Source.StartSequence -> s.toString))
63 | .getOrElse(Map.empty[String, String]) ++
64 | endSequence.map(s => Map(Source.EndSequence -> s.toString))
65 | .getOrElse(Map.empty[String, String]) ++
66 | partitionCount.map(s => Map(Source.PartitionCount -> s.toString))
67 | .getOrElse(Map.empty[String, String])
68 |
69 | val changes =
70 | ss.readStream.format(Source.Changes).options(options).load
71 |
72 | val query = changes.writeStream
73 | .format("console")
74 | .start
75 |
76 | query.awaitTermination()
77 |
78 | ss.stop()
79 | }
80 | }
81 | )
82 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangesetProcessor.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.examples
2 |
3 | import java.net.URI
4 |
5 | import cats.implicits._
6 | import com.monovore.decline._
7 | import org.apache.spark.sql._
8 | import vectorpipe.VectorPipe
9 | import vectorpipe.model.Changeset
10 | import vectorpipe.sources.Source
11 |
12 | /*
13 | * Usage example:
14 | *
15 | * sbt assembly
16 | *
17 | * spark-submit \
18 | * --class vectorpipe.examples.ChangesetProcessor \
19 | * target/scala-2.11/vectorpipe.jar
20 | */
21 | object ChangesetProcessor
22 | extends CommandApp(
23 | name = "changeset-processor",
24 | header = "Read changesets between start sequence and end sequence",
25 | main = {
26 | val changesetSourceOpt =
27 | Opts.option[URI]("changeset-source",
28 | short = "c",
29 | metavar = "uri",
30 | help = "Location of changesets to process"
31 | ).withDefault(new URI("https://planet.osm.org/replication/changesets/"))
32 | val startSequenceOpt = Opts
33 | .option[Int](
34 | "start-sequence",
35 | short = "s",
36 | metavar = "sequence",
37 | help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 | )
39 | .orNone
40 | val endSequenceOpt = Opts
41 | .option[Int](
42 | "end-sequence",
43 | short = "e",
44 | metavar = "sequence",
45 | help = "Ending sequence. If absent, this will be an infinite stream."
46 | )
47 | .orNone
48 |
49 | (changesetSourceOpt, startSequenceOpt, endSequenceOpt)
50 | .mapN {
51 | (changesetSource, startSequence, endSequence) =>
52 | implicit val ss: SparkSession =
53 | VectorPipe.defaultSparkSessionWithJTS("ChangesetProcessor")
54 |
55 | import ss.implicits._
56 |
57 | val options = Map(Source.BaseURI -> changesetSource.toString) ++
58 | startSequence
59 | .map(s => Map(Source.StartSequence -> s.toString))
60 | .getOrElse(Map.empty[String, String]) ++
61 | endSequence
62 | .map(s => Map(Source.EndSequence -> s.toString))
63 | .getOrElse(Map.empty[String, String])
64 |
65 | val changes =
66 | ss.read.format(Source.Changesets).options(options).load
67 |
68 | // aggregations are triggered when an event with a later timestamp ("event time") is received
69 | // changes.select('sequence).distinct.show
70 | changes.as[Changeset].show
71 |
72 | ss.stop()
73 | }
74 | }
75 | )
76 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/examples/ChangesetStreamProcessor.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.examples
2 |
3 | import java.net.URI
4 |
5 | import cats.implicits._
6 | import com.monovore.decline._
7 | import org.apache.spark.sql._
8 | import vectorpipe.VectorPipe
9 | import vectorpipe.sources.Source
10 |
11 | /*
12 | * Usage example:
13 | *
14 | * sbt assembly
15 | *
16 | * spark-submit \
17 | * --class vectorpipe.examples.ChangesetStreamProcessor \
18 | * target/scala-2.11/vectorpipe.jar \
19 | * --augmented-diff-source s3://somewhere/diffs/
20 | */
21 | object ChangesetStreamProcessor
22 | extends CommandApp(
23 | name = "changeset-stream-processor",
24 | header = "Read OSM changesets from start sequence to end sequence as a stream",
25 | main = {
26 | val changesetSourceOpt =
27 | Opts.option[URI]("changeset-source",
28 | short = "c",
29 | metavar = "uri",
30 | help = "Location of changesets to process"
31 | ).withDefault(new URI("https://planet.osm.org/replication/changesets/"))
32 | val startSequenceOpt = Opts
33 | .option[Int](
34 | "start-sequence",
35 | short = "s",
36 | metavar = "sequence",
37 | help = "Starting sequence. If absent, the current (remote) sequence will be used."
38 | )
39 | .orNone
40 | val endSequenceOpt = Opts
41 | .option[Int](
42 | "end-sequence",
43 | short = "e",
44 | metavar = "sequence",
45 | help = "Ending sequence. If absent, this will be an infinite stream."
46 | )
47 | .orNone
48 | val batchSizeOpt = Opts
49 | .option[Int]("batch-size",
50 | short = "b",
51 | metavar = "batch size",
52 | help = "Change batch size.")
53 | .orNone
54 |
55 | (changesetSourceOpt, startSequenceOpt, endSequenceOpt, batchSizeOpt)
56 | .mapN {
57 | (changesetSource, startSequence, endSequence, batchSize) =>
58 | implicit val ss: SparkSession =
59 | VectorPipe.defaultSparkSessionWithJTS("ChangesetStreamProcessor")
60 |
61 | val options = Map(Source.BaseURI -> changesetSource.toString, Source.ProcessName -> "ChangesetStreamProcessor") ++
62 | startSequence.map(s => Map(Source.StartSequence -> s.toString))
63 | .getOrElse(Map.empty[String, String]) ++
64 | endSequence.map(s => Map(Source.EndSequence -> s.toString))
65 | .getOrElse(Map.empty[String, String]) ++
66 | batchSize.map(s => Map(Source.BatchSize -> s.toString))
67 | .getOrElse(Map.empty[String, String])
68 |
69 | val changesets =
70 | ss.readStream.format(Source.Changesets).options(options).load
71 |
72 | val query = changesets.writeStream
73 | .format("console")
74 | .start
75 |
76 | query.awaitTermination()
77 |
78 | ss.stop()
79 | }
80 | }
81 | )
82 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/functions/package.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import org.apache.spark.sql.Column
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.functions._
6 | import org.apache.spark.sql.types.{DoubleType, FloatType}
7 | import vectorpipe.util._
8 |
9 | package object functions {
10 | // A brief note about style
11 | // Spark functions are typically defined using snake_case, therefore so are the UDFs
12 | // internal helper functions use standard Scala naming conventions
13 |
14 | @transient lazy val merge_counts: UserDefinedFunction = udf(_mergeCounts)
15 |
16 | @transient lazy val sum_counts: UserDefinedFunction = udf { counts: Iterable[Map[String, Int]] =>
17 | counts.reduce(_mergeCounts(_, _))
18 | }
19 |
20 | // Convert BigDecimals to doubles
21 | // Reduces size taken for representation at the expense of some precision loss.
22 | def asDouble(value: Column): Column =
23 | when(value.isNotNull, value.cast(DoubleType))
24 | .otherwise(lit(Double.NaN)) as s"asDouble($value)"
25 |
26 | // Convert BigDecimals to floats
27 | // Reduces size taken for representation at the expense of more precision loss.
28 | def asFloat(value: Column): Column =
29 | when(value.isNotNull, value.cast(FloatType))
30 | .otherwise(lit(Float.NaN)) as s"asFloat($value)"
31 |
32 | @transient lazy val count_values: UserDefinedFunction = udf {
33 | (_: Seq[String]).groupBy(identity).mapValues(_.size)
34 | }
35 |
36 | @transient lazy val flatten: UserDefinedFunction = udf {
37 | (_: Seq[Seq[String]]).flatten
38 | }
39 |
40 | @transient lazy val flatten_set: UserDefinedFunction = udf {
41 | (_: Seq[Seq[String]]).flatten.distinct
42 | }
43 |
44 | @transient lazy val merge_sets: UserDefinedFunction = udf { (a: Iterable[String], b: Iterable[String]) =>
45 | (Option(a).getOrElse(Set.empty).toSet ++ Option(b).getOrElse(Set.empty).toSet).toArray
46 | }
47 |
48 | @transient lazy val without: UserDefinedFunction = udf { (list: Seq[String], without: String) =>
49 | list.filterNot(x => x == without)
50 | }
51 |
52 | private val _mergeCounts = (a: Map[String, Int], b: Map[String, Int]) =>
53 | mergeMaps(Option(a).getOrElse(Map.empty[String, Int]),
54 | Option(b).getOrElse(Map.empty[String, Int]))(_ + _)
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Actions.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | object Actions {
4 | type Action = Byte
5 |
6 | val Create: Action = 1.byteValue
7 | val Modify: Action = 2.byteValue
8 | val Delete: Action = 3.byteValue
9 |
10 | def fromString(str: String): Action =
11 | str.toLowerCase match {
12 | case "create" => Actions.Create
13 | case "delete" => Actions.Delete
14 | case "modify" => Actions.Modify
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/AugmentedDiff.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | import java.sql.Timestamp
4 |
5 | import geotrellis.vector._
6 |
7 | case class AugmentedDiff(sequence: Int,
8 | `type`: Byte,
9 | id: Long,
10 | prevGeom: Option[Geometry],
11 | geom: Geometry,
12 | prevTags: Option[Map[String, String]],
13 | tags: Map[String, String],
14 | prevNds: Option[Seq[Long]],
15 | nds: Seq[Long],
16 | prevChangeset: Option[Long],
17 | changeset: Long,
18 | prevUid: Option[Long],
19 | uid: Long,
20 | prevUser: Option[String],
21 | user: String,
22 | prevUpdated: Option[Timestamp],
23 | updated: Timestamp,
24 | prevVisible: Option[Boolean],
25 | visible: Boolean,
26 | prevVersion: Option[Int],
27 | version: Int,
28 | minorVersion: Boolean)
29 |
30 | object AugmentedDiff {
31 | def apply(sequence: Int,
32 | prev: Option[Feature[Geometry, ElementWithSequence]],
33 | curr: Feature[Geometry, ElementWithSequence]): AugmentedDiff = {
34 | val `type` = Member.typeFromString(curr.data.`type`)
35 | val minorVersion = prev.map(_.data.version).getOrElse(Int.MinValue) == curr.data.version
36 |
37 | AugmentedDiff(
38 | sequence,
39 | `type`,
40 | curr.data.id,
41 | prev.map(_.geom),
42 | curr.geom,
43 | prev.map(_.data.tags),
44 | curr.data.tags,
45 | prev.map(_.data.nds),
46 | curr.data.nds,
47 | prev.map(_.data.changeset),
48 | curr.data.changeset,
49 | prev.map(_.data.uid),
50 | curr.data.uid,
51 | prev.map(_.data.user),
52 | curr.data.user,
53 | prev.map(_.data.timestamp),
54 | curr.data.timestamp,
55 | prev.map(_.data.visible.getOrElse(true)),
56 | curr.data.visible.getOrElse(true),
57 | prev.map(_.data.version),
58 | curr.data.version,
59 | minorVersion
60 | )
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Change.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | import java.sql.Timestamp
4 |
5 | import org.joda.time.DateTime
6 | import org.xml.sax
7 | import org.xml.sax.helpers.DefaultHandler
8 |
9 | import scala.collection.mutable
10 | import scala.collection.mutable.ListBuffer
11 |
12 | // TODO at some point user metadata (changeset, uid, user, timestamp?) should become options, as they may not be
13 | // available
14 | case class Change(id: Long,
15 | `type`: String,
16 | tags: Map[String, String],
17 | lat: Option[Double],
18 | lon: Option[Double],
19 | nds: Option[Seq[Nd]],
20 | members: Option[Seq[Member]],
21 | changeset: Long,
22 | timestamp: Timestamp,
23 | uid: Long,
24 | user: String,
25 | version: Long,
26 | visible: Boolean,
27 | sequence: Int)
28 |
29 | object Change {
30 | implicit def stringToTimestamp(s: String): Timestamp =
31 | Timestamp.from(DateTime.parse(s).toDate.toInstant)
32 |
33 | class ChangeHandler(sequence: Int) extends DefaultHandler {
34 | final val ActionLabels: Set[String] = Set("create", "delete", "modify")
35 | final val ElementLabels: Set[String] = Set("node", "way", "relation")
36 |
37 | private val changeSeq: ListBuffer[Change] = ListBuffer.empty
38 | private val tags: mutable.Map[String, String] = mutable.Map.empty
39 | private val nds: ListBuffer[Nd] = ListBuffer.empty
40 | private val members: ListBuffer[Member] = ListBuffer.empty
41 | private var action: Actions.Action = _
42 | private var attrs: Map[String, String] = _
43 |
44 | def changes: Seq[Change] = changeSeq
45 |
46 | override def startElement(uri: String,
47 | localName: String,
48 | qName: String,
49 | attributes: sax.Attributes): Unit = {
50 | val attrs =
51 | (for {
52 | i <- Range(0, attributes.getLength)
53 | } yield attributes.getQName(i) -> attributes.getValue(i)).toMap
54 |
55 | qName.toLowerCase match {
56 | case label if ActionLabels.contains(label) =>
57 | action = Actions.fromString(qName)
58 |
59 | case label if ElementLabels.contains(label) =>
60 | reset()
61 |
62 | this.attrs = attrs
63 |
64 | case "tag" =>
65 | tags.update(attrs("k"), attrs("v"))
66 |
67 | case "nd" =>
68 | nds.append(Nd(attrs("ref").toLong))
69 |
70 | case "member" =>
71 | members.append(
72 | Member(Member.typeFromString(attrs("type")), attrs("ref").toLong, attrs("role")))
73 |
74 | case _ => () // no-op
75 | }
76 | }
77 |
78 | def reset(): Unit = {
79 | tags.clear()
80 | nds.clear()
81 | members.clear()
82 | }
83 |
84 | override def endElement(uri: String, localName: String, qName: String): Unit = {
85 | if (ElementLabels.contains(qName.toLowerCase)) {
86 | changeSeq.append(
87 | Change(
88 | attrs("id").toLong,
89 | qName,
90 | tags.toMap,
91 | attrs.get("lat").map(_.toDouble),
92 | attrs.get("lon").map(_.toDouble),
93 | Option(nds).filter(_.nonEmpty),
94 | Option(members).filter(_.nonEmpty).map(_.toSeq),
95 | attrs.get("changeset").map(_.toLong).getOrElse(-1L),
96 | stringToTimestamp(attrs.getOrElse("timestamp", "1970-01-01T00:00:00Z")),
97 | attrs.get("uid").map(_.toLong).getOrElse(-1L),
98 | attrs.getOrElse("user", ""),
99 | attrs.get("version").map(_.toLong).getOrElse(-1L),
100 | action != Actions.Delete,
101 | sequence
102 | ))
103 | }
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Changeset.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | import java.sql.Timestamp
4 |
5 | import org.joda.time.DateTime
6 |
7 | import scala.util.Try
8 |
9 | case class Changeset(id: Long,
10 | tags: Map[String, String],
11 | createdAt: Timestamp,
12 | open: Boolean,
13 | closedAt: Option[Timestamp],
14 | commentsCount: Int,
15 | minLat: Option[Double],
16 | maxLat: Option[Double],
17 | minLon: Option[Double],
18 | maxLon: Option[Double],
19 | numChanges: Int,
20 | uid: Long,
21 | user: String,
22 | comments: Seq[ChangesetComment],
23 | sequence: Int)
24 |
25 | object Changeset {
26 | implicit def stringToTimestamp(s: String): Timestamp =
27 | Timestamp.from(DateTime.parse(s).toDate.toInstant)
28 |
29 | implicit def stringToOptionalTimestamp(s: String): Option[Timestamp] =
30 | s match {
31 | case "" => None
32 | case ts => Some(ts)
33 | }
34 |
35 | implicit def stringToOptionalDouble(s: String): Option[Double] =
36 | s match {
37 | case "" => None
38 | case c => Some(c.toDouble)
39 | }
40 |
41 | def fromXML(node: scala.xml.Node, sequence: Int): Changeset = {
42 | val id = (node \@ "id").toLong
43 | // Old changesets lack the appropriate field
44 | val commentsCount = Try((node \@ "comments_count").toInt).toOption.getOrElse(0)
45 | val uid = (node \@ "uid").toLong
46 | val user = node \@ "user"
47 | val numChanges = Try((node \@ "num_changes").toInt).toOption.getOrElse(0)
48 | val open = (node \@ "open").toBoolean
49 | val closedAt = node \@ "closed_at"
50 | val createdAt = node \@ "created_at"
51 |
52 | val maxLon = node \@ "max_lon"
53 | val minLon = node \@ "min_lon"
54 | val maxLat = node \@ "max_lat"
55 | val minLat = node \@ "min_lat"
56 | val tags =
57 | (node \ "tag").map(tag => (tag \@ "k", tag \@ "v")).toMap
58 | val comments = (node \ "discussion" \ "comment").map(ChangesetComment.fromXML)
59 |
60 | Changeset(
61 | id,
62 | tags,
63 | createdAt,
64 | open,
65 | closedAt,
66 | commentsCount,
67 | minLat,
68 | maxLat,
69 | minLon,
70 | maxLon,
71 | numChanges,
72 | uid,
73 | user,
74 | comments,
75 | sequence
76 | )
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/ChangesetComment.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | import java.sql.Timestamp
4 |
5 | import org.joda.time.DateTime
6 |
7 | case class ChangesetComment(date: Timestamp, user: String, uid: Long, body: String)
8 |
9 | object ChangesetComment {
10 | implicit def stringToTimestamp(s: String): Timestamp =
11 | Timestamp.from(DateTime.parse(s).toDate.toInstant)
12 |
13 | def fromXML(node: scala.xml.Node): ChangesetComment = {
14 | val date = node \@ "date"
15 | val user = node \@ "user"
16 | val uid = (node \@ "uid").toLong
17 | val body = (node \ "text").text
18 |
19 | ChangesetComment(date, user, uid, body)
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/ElementWithSequence.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | import vectorpipe.model
4 |
5 | import org.joda.time.format.ISODateTimeFormat
6 |
7 | import io.circe._
8 | import cats.syntax.either._
9 |
10 | import java.sql.Timestamp
11 |
12 |
13 | // TODO is this an AugmentedDiff or an OSM Element w/ a sequence property?
14 | // an AugmentedDiff may be (Option[Element with Sequence], Element with Sequence)
15 | case class ElementWithSequence(id: Long,
16 | `type`: String,
17 | tags: Map[String, String],
18 | nds: Seq[Long],
19 | changeset: Long,
20 | timestamp: Timestamp,
21 | uid: Long,
22 | user: String,
23 | version: Int,
24 | visible: Option[Boolean],
25 | sequence: Option[Long]) {
26 | // TODO extract this; it's used in MakeTiles and elsewhere
27 | val elementId: String = `type` match {
28 | case "node" => s"n$id"
29 | case "way" => s"w$id"
30 | case "relation" => s"r$id"
31 | case _ => id.toString
32 | }
33 | }
34 |
35 | object ElementWithSequence {
36 | implicit val decodeFoo: Decoder[ElementWithSequence] = new Decoder[ElementWithSequence] {
37 | final def apply(c: HCursor): Decoder.Result[ElementWithSequence] =
38 | for {
39 | id <- c.downField("id").as[Long]
40 | `type` <- c.downField("type").as[String]
41 | tags <- c.downField("tags").as[Map[String, String]]
42 | nds <- c.downField("nds").as[Option[Seq[Long]]]
43 | changeset <- c.downField("changeset").as[Long]
44 | timestampS <- c.downField("timestamp").as[String]
45 | uid <- c.downField("uid").as[Long]
46 | user <- c.downField("user").as[String]
47 | version <- c.downField("version").as[Int]
48 | visible <- c.downField("visible").as[Option[Boolean]]
49 | sequence <- c.downField("augmentedDiff").as[Option[Long]]
50 | } yield {
51 | val timestamp =
52 | Timestamp.from(
53 | ISODateTimeFormat
54 | .dateTimeParser()
55 | .parseDateTime(timestampS)
56 | .toDate
57 | .toInstant
58 | )
59 | model.ElementWithSequence(
60 | id,
61 | `type`,
62 | tags,
63 | nds.getOrElse(Seq.empty[Long]),
64 | changeset,
65 | timestamp,
66 | uid,
67 | user,
68 | version,
69 | visible,
70 | sequence
71 | )
72 | }
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Member.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 |
3 | import vectorpipe.internal.{NodeType, RelationType, WayType}
4 |
5 | import scala.xml.Node
6 |
7 | case class Member(`type`: Byte, ref: Long, role: String)
8 |
9 | object Member {
10 | def typeFromString(str: String): Byte = str match {
11 | case "node" => NodeType
12 | case "way" => WayType
13 | case "relation" => RelationType
14 | case _ => null.asInstanceOf[Byte]
15 | }
16 |
17 | def stringFromByte(b: Byte): String = b match {
18 | case NodeType => "node"
19 | case WayType => "way"
20 | case RelationType => "relation"
21 | }
22 |
23 | def fromXML(node: Node): Member = {
24 | val `type` = typeFromString(node \@ "type")
25 | val ref = (node \@ "ref").toLong
26 | val role = node \@ "role"
27 |
28 | Member(`type`, ref, role)
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/model/Nd.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.model
2 | import scala.xml.Node
3 |
4 | case class Nd(ref: Long)
5 |
6 | object Nd {
7 | def fromXML(node: Node): Nd =
8 | Nd((node \@ "ref").toLong)
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/MultiPolygons.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.relations
2 | import java.sql.Timestamp
3 |
4 | import org.locationtech.jts.geom.prep.PreparedGeometryFactory
5 | import org.locationtech.jts.geom.{Geometry, LineString, Polygon, TopologyException}
6 | import org.apache.log4j.Logger
7 | import vectorpipe.internal.WayType
8 |
9 | object MultiPolygons {
10 | @transient private lazy val logger = Logger.getLogger(getClass)
11 | val prepGeomFactory = new PreparedGeometryFactory
12 |
13 | def build(id: Long,
14 | version: Int,
15 | timestamp: Timestamp,
16 | types: Seq[Byte],
17 | roles: Seq[String],
18 | _geoms: Seq[Geometry]): Option[Geometry] = {
19 | if (types.zip(_geoms).exists { case (t, g) => t == WayType && Option(g).isEmpty }) {
20 | // bail early if null values are present where they should exist (members w/ type=way)
21 | logger.debug(s"Incomplete relation: $id @ $version ($timestamp)")
22 | None
23 | } else if (types.isEmpty) {
24 | // empty relation
25 | None
26 | } else {
27 | val geomCount = _geoms.map(Option(_)).count(_.isDefined)
28 |
29 | logger.debug(s"$id @ $version ($timestamp) ${geomCount.formatted("%,d")} geoms")
30 | val geoms = _geoms.map {
31 | case geom: Polygon => Some(geom.getExteriorRing)
32 | case geom: LineString => Some(geom)
33 | case _ => None
34 | }
35 |
36 | val vertexCount = geoms.filter(_.isDefined).map(_.get).map(_.getNumPoints).sum
37 | logger.warn(s"${vertexCount.formatted("%,d")} vertices (${geomCount
38 | .formatted("%,d")} geoms) from ${types.size} members in $id @ $version ($timestamp)")
39 |
40 | val members: Seq[(String, LineString)] = roles
41 | .zip(geoms)
42 | .filter(_._2.isDefined)
43 | .map(x => (x._1, x._2.get))
44 |
45 | val (complete, partial) =
46 | members.foldLeft((Vector.empty[Polygon], Vector.empty[LineString])) {
47 | case ((c, p), (role, line: LineString)) =>
48 | role match {
49 | case "outer" if line.isClosed && line.getNumPoints >= 4 =>
50 | (c :+ geometryFactory.createPolygon(line.getCoordinates), p)
51 | case "outer" =>
52 | (c, p :+ line)
53 | case "inner" if line.isClosed && line.getNumPoints >= 4 =>
54 | (c :+ geometryFactory.createPolygon(line.getCoordinates), p)
55 | case "inner" => (c, p :+ line)
56 | case "" if line.isClosed && line.getNumPoints >= 4 =>
57 | (c :+ geometryFactory.createPolygon(line.getCoordinates), p)
58 | case "" =>
59 | (c, p :+ line)
60 | case _ =>
61 | (c, p)
62 | }
63 | }
64 |
65 | try {
66 | val rings = complete ++ formRings(partial.sortWith(_.getNumPoints > _.getNumPoints))
67 | val preparedRings = rings.map(prepGeomFactory.create)
68 |
69 | // reclassify rings according to their topology (ignoring roles)
70 | val (classifiedOuters, classifiedInners) = rings.sortWith(_.getArea > _.getArea) match {
71 | case Seq(h, t @ _*) =>
72 | t.foldLeft((Array(h), Array.empty[Polygon])) {
73 | case ((os, is), ring) =>
74 | // check the number of containing elements
75 | preparedRings.count(r => r.getGeometry != ring && r.contains(ring)) % 2 match {
76 | // if even, it's an outer ring
77 | case 0 => (os :+ ring, is)
78 | // if odd, it's an inner ring
79 | case 1 => (os, is :+ ring)
80 | }
81 | }
82 | case rs if rs.isEmpty => (Array.empty[Polygon], Array.empty[Polygon])
83 | }
84 |
85 | val (dissolvedOuters, addlInners) =
86 | dissolveRings(classifiedOuters)
87 | val (dissolvedInners, addlOuters) =
88 | dissolveRings(
89 | classifiedInners
90 | .map(_.getExteriorRing.getCoordinates)
91 | .map(geometryFactory.createPolygon) ++ addlInners)
92 |
93 | val (polygons, _) =
94 | (dissolvedOuters ++ addlOuters)
95 | // sort by size (descending) to use rings as part of the largest available polygon
96 | .sortWith(_.getArea > _.getArea)
97 | // only use inners once if they're contained by multiple outer rings
98 | .foldLeft((Vector.empty[Polygon], dissolvedInners)) {
99 | case ((ps, is), outer) =>
100 | val preparedOuter = prepGeomFactory.create(outer)
101 | (ps :+ geometryFactory.createPolygon(
102 | geometryFactory.createLinearRing(outer.getExteriorRing.getCoordinates),
103 | is.filter(inner => preparedOuter.contains(inner))
104 | .map({ x => geometryFactory.createLinearRing(x.getExteriorRing.getCoordinates)
105 | })
106 | .toArray
107 | ),
108 | is.filterNot(inner => preparedOuter.contains(inner)))
109 | }
110 |
111 | polygons match {
112 | case v @ Vector(p: Polygon) if v.length == 1 => Some(p)
113 | case ps => Some(geometryFactory.createMultiPolygon(ps.toArray))
114 | }
115 | } catch {
116 | case e @ (_: AssemblyException | _: IllegalArgumentException | _: TopologyException) =>
117 | logger.warn(
118 | s"Could not reconstruct relation $id @ $version ($timestamp): ${e.getMessage}")
119 | None
120 | case e: Throwable =>
121 | logger.warn(s"Could not reconstruct relation $id @ $version ($timestamp): $e")
122 | e.getStackTrace.foreach(logger.warn)
123 | None
124 | }
125 | }
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/Routes.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.relations
2 | import java.sql.Timestamp
3 |
4 | import geotrellis.vector._
5 | import org.locationtech.jts.geom.TopologyException
6 | import org.apache.log4j.Logger
7 | import vectorpipe.internal.WayType
8 |
9 | object Routes {
10 | @transient private lazy val logger = Logger.getLogger(getClass)
11 |
12 | def build(id: Long,
13 | version: Int,
14 | timestamp: Timestamp,
15 | types: Seq[Byte],
16 | roles: Seq[String],
17 | geoms: Seq[Geometry]): Option[Seq[(String, Geometry)]] = {
18 | if (types.zip(geoms).exists { case (t, g) => t == WayType && Option(g).isEmpty }) {
19 | // bail early if null values are present where they should exist (members w/ type=way)
20 | logger.debug(s"Incomplete relation: $id @ $version ($timestamp)")
21 | None
22 | } else if (types.isEmpty) {
23 | // empty relation
24 | None
25 | } else {
26 |
27 | try {
28 | val res = roles
29 | .zip(geoms.map(Option.apply))
30 | .filter(_._2.isDefined)
31 | .map(x => (x._1, x._2.get))
32 | .groupBy {
33 | case (role, _) => role
34 | }
35 | .mapValues(_.map(_._2))
36 | .mapValues(connectSegments)
37 | .map {
38 | case (role, lines) =>
39 | lines match {
40 | case Seq(line) => (role, line)
41 | case _ => (role, geometryFactory.createMultiLineString(lines.toArray))
42 | }
43 | }
44 | .toSeq
45 |
46 | Some(res)
47 | } catch {
48 | case e @ (_: AssemblyException | _: IllegalArgumentException | _: TopologyException) =>
49 | logger.warn(
50 | s"Could not reconstruct route relation $id @ $version ($timestamp): ${e.getMessage}")
51 | None
52 | case e: Throwable =>
53 | logger.warn(s"Could not reconstruct route relation $id @ $version ($timestamp): $e")
54 | e.getStackTrace.foreach(logger.warn)
55 | None
56 | }
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/package.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import org.locationtech.jts.geom._
4 | import vectorpipe.relations.utils.{
5 | PartialCoordinateSequence,
6 | ReversedCoordinateSequence,
7 | VirtualCoordinateSequence,
8 | isEqual
9 | }
10 |
11 | import scala.annotation.tailrec
12 | import scala.collection.GenTraversable
13 |
14 | package object relations {
15 |
16 | // join segments together
17 | @tailrec
18 | def connectSegments(segments: GenTraversable[VirtualCoordinateSequence],
19 | lines: Seq[CoordinateSequence] = Vector.empty[CoordinateSequence])
20 | : GenTraversable[CoordinateSequence] = {
21 | segments match {
22 | case Nil =>
23 | lines
24 | case Seq(h, t @ _*) =>
25 | val x = h.getX(h.size - 1)
26 | val y = h.getY(h.size - 1)
27 |
28 | t.find(line => x == line.getX(0) && y == line.getY(0)) match {
29 | case Some(next) =>
30 | connectSegments(h.append(new PartialCoordinateSequence(next, 1)) +: t.filterNot(line =>
31 | isEqual(line, next)),
32 | lines)
33 | case None =>
34 | t.find(line => x == line.getX(line.size - 1) && y == line.getY(line.size - 1)) match {
35 | case Some(next) =>
36 | connectSegments(h.append(
37 | new PartialCoordinateSequence(
38 | new ReversedCoordinateSequence(next),
39 | 1)) +: t.filterNot(line => isEqual(line, next)),
40 | lines)
41 | case None => connectSegments(t, lines :+ h)
42 | }
43 | }
44 | }
45 | }
46 |
47 | def connectSegments(segments: GenTraversable[Geometry])(
48 | implicit geometryFactory: GeometryFactory): GenTraversable[LineString] =
49 | connectSegments(
50 | segments
51 | .flatMap {
52 | case geom: LineString => Some(geom.getCoordinateSequence)
53 | case _ => None
54 | }
55 | .map(s => new VirtualCoordinateSequence(Seq(s)))
56 | ).map(geometryFactory.createLineString)
57 |
58 | // since GeoTrellis's GeometryFactory is unavailable
59 | implicit val geometryFactory: GeometryFactory = new GeometryFactory()
60 |
61 | // join segments together into rings
62 | @tailrec
63 | def formRings(segments: GenTraversable[VirtualCoordinateSequence],
64 | rings: Seq[CoordinateSequence] = Vector.empty[CoordinateSequence])
65 | : GenTraversable[CoordinateSequence] = {
66 | segments match {
67 | case Nil =>
68 | rings
69 | case Seq(h, t @ _*) if h.getX(0) == h.getX(h.size - 1) && h.getY(0) == h.getY(h.size - 1) =>
70 | formRings(t, rings :+ h)
71 | case Seq(h, t @ _*) =>
72 | val x = h.getX(h.size - 1)
73 | val y = h.getY(h.size - 1)
74 |
75 | formRings(
76 | t.find(line => x == line.getX(0) && y == line.getY(0)) match {
77 | case Some(next) =>
78 | h.append(new PartialCoordinateSequence(next, 1)) +: t.filterNot(line =>
79 | isEqual(line, next))
80 | case None =>
81 | t.find(line => x == line.getX(line.size - 1) && y == line.getY(line.size - 1)) match {
82 | case Some(next) =>
83 | h.append(new PartialCoordinateSequence(new ReversedCoordinateSequence(next), 1)) +: t
84 | .filterNot(line => isEqual(line, next))
85 | case None => throw new AssemblyException("Unable to connect segments.")
86 | }
87 | },
88 | rings
89 | )
90 | }
91 | }
92 |
93 | def formRings(segments: GenTraversable[LineString])(
94 | implicit geometryFactory: GeometryFactory): GenTraversable[Polygon] = {
95 | val csf = geometryFactory.getCoordinateSequenceFactory
96 | formRings(segments.map(_.getCoordinateSequence).map(s => new VirtualCoordinateSequence(Seq(s))))
97 | .map(csf.create(_))
98 | .map(geometryFactory.createPolygon)
99 | }
100 |
101 | def dissolveRings(rings: Array[Polygon]): (Seq[Polygon], Seq[Polygon]) = {
102 | Option(geometryFactory.createGeometryCollection(rings.asInstanceOf[Array[Geometry]]).union) match {
103 | case Some(mp) =>
104 | val polygons = for (i <- 0 until mp.getNumGeometries) yield {
105 | mp.getGeometryN(i).asInstanceOf[Polygon]
106 | }
107 |
108 | (polygons.map(_.getExteriorRing.getCoordinates).map(geometryFactory.createPolygon),
109 | polygons.flatMap(getInteriorRings).map(geometryFactory.createPolygon))
110 | case None =>
111 | (Vector.empty[Polygon], Vector.empty[Polygon])
112 | }
113 | }
114 |
115 | def getInteriorRings(p: Polygon): Seq[LinearRing] =
116 | for (i <- 0 until p.getNumInteriorRing)
117 | yield geometryFactory.createLinearRing(p.getInteriorRingN(i).getCoordinates)
118 |
119 | class AssemblyException(msg: String) extends Exception(msg)
120 | }
121 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/PartialCoordinateSequence.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.relations.utils
2 | import org.locationtech.jts.geom.{Coordinate, CoordinateSequence, Envelope}
3 |
4 | class PartialCoordinateSequence(sequence: CoordinateSequence, offset: Int)
5 | extends CoordinateSequence {
6 | private lazy val _size: Int = sequence.size() - offset
7 |
8 | private lazy val coordinates: Array[Coordinate] = {
9 | val coords = new Array[Coordinate](size())
10 |
11 | for (i <- 0 until size) {
12 | coords(i) = getCoordinate(i)
13 | }
14 |
15 | coords
16 | }
17 |
18 | override def getDimension: Int = sequence.getDimension
19 |
20 | override def getCoordinate(i: Int): Coordinate = sequence.getCoordinate(offset + i)
21 |
22 | override def getCoordinateCopy(i: Int): Coordinate = sequence.getCoordinateCopy(offset + i)
23 |
24 | override def getCoordinate(index: Int, coord: Coordinate): Unit =
25 | sequence.getCoordinate(offset + index, coord)
26 |
27 | override def getOrdinate(index: Int, ordinateIndex: Int): Double =
28 | sequence.getOrdinate(offset + index, ordinateIndex)
29 |
30 | override def setOrdinate(index: Int, ordinateIndex: Int, value: Double): Unit =
31 | sequence.setOrdinate(offset + index, ordinateIndex, value)
32 |
33 | override def toCoordinateArray: Array[Coordinate] = coordinates
34 |
35 | override def expandEnvelope(env: Envelope): Envelope = {
36 | for (i <- 0 until size) {
37 | env.expandToInclude(getX(i), getY(i))
38 | }
39 |
40 | env
41 | }
42 |
43 | override def getX(index: Int): Double = sequence.getX(offset + index)
44 |
45 | override def getY(index: Int): Double = sequence.getY(offset + index)
46 |
47 | override def size(): Int = _size
48 |
49 | override def clone(): AnyRef = new PartialCoordinateSequence(sequence, offset)
50 |
51 | override def copy(): PartialCoordinateSequence = new PartialCoordinateSequence(sequence.copy, offset)
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/ReversedCoordinateSequence.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.relations.utils
2 | import org.locationtech.jts.geom.{Coordinate, CoordinateSequence, Envelope}
3 |
4 | class ReversedCoordinateSequence(sequence: CoordinateSequence) extends CoordinateSequence {
5 | private lazy val coordinates: Array[Coordinate] = {
6 | val coords = new Array[Coordinate](size())
7 |
8 | for (i <- size - 1 to 0) {
9 | coords(i) = getCoordinate(i)
10 | }
11 |
12 | coords
13 | }
14 |
15 | override def getDimension: Int = sequence.getDimension
16 |
17 | override def getCoordinate(i: Int): Coordinate = sequence.getCoordinate(getIndex(i))
18 |
19 | override def getCoordinateCopy(i: Int): Coordinate = sequence.getCoordinateCopy(getIndex(i))
20 |
21 | override def getCoordinate(index: Int, coord: Coordinate): Unit =
22 | sequence.getCoordinate(getIndex(index), coord)
23 |
24 | private def getIndex(i: Int): Int = size - 1 - i
25 |
26 | override def size(): Int = sequence.size
27 |
28 | override def getX(index: Int): Double = sequence.getX(getIndex(index))
29 |
30 | override def getY(index: Int): Double = sequence.getY(getIndex(index))
31 |
32 | override def getOrdinate(index: Int, ordinateIndex: Int): Double =
33 | sequence.getOrdinate(getIndex(index), ordinateIndex)
34 |
35 | override def setOrdinate(index: Int, ordinateIndex: Int, value: Double): Unit =
36 | sequence.setOrdinate(getIndex(index), ordinateIndex, value)
37 |
38 | override def toCoordinateArray: Array[Coordinate] = coordinates
39 |
40 | override def expandEnvelope(env: Envelope): Envelope = sequence.expandEnvelope(env)
41 |
42 | override def clone(): AnyRef = new ReversedCoordinateSequence(sequence)
43 |
44 | override def copy(): ReversedCoordinateSequence = new ReversedCoordinateSequence(sequence.copy)
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/VirtualCoordinateSequence.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.relations.utils
2 | import com.google.common.collect.{Range, RangeMap, TreeRangeMap}
3 | import org.locationtech.jts.geom.{Coordinate, CoordinateSequence, Envelope}
4 |
5 | // rather than being a nested set of CoordinateSequences, this is a mutable wrapper to avoid deep call stacks
6 | class VirtualCoordinateSequence(sequences: Seq[CoordinateSequence]) extends CoordinateSequence {
7 | // TODO this should be invalidated after append (but it doesn't actually matter because all of the appending will
8 | // occur ahead of time)
9 | private lazy val coordinates: Array[Coordinate] = {
10 | val coords = new Array[Coordinate](size())
11 |
12 | for (i <- 0 until size) {
13 | coords(i) = getCoordinate(i)
14 | }
15 |
16 | coords
17 | }
18 |
19 | private val rangeMap: RangeMap[Integer, CoordinateSequence] = {
20 | val rm = TreeRangeMap.create[Integer, CoordinateSequence]
21 |
22 | sequences
23 | .zip(sequences.map(_.size).scanLeft(0)(_ + _).dropRight(1))
24 | .map {
25 | case (seq, offset) => (seq, Range.closed(offset: Integer, offset + seq.size - 1: Integer))
26 | }
27 | .foreach { case (seq, range) => rm.put(range, seq) }
28 |
29 | rm
30 | }
31 |
32 | private var dimension: Int = sequences.map(_.getDimension).min
33 |
34 | private var _size: Int = sequences.map(_.size).sum
35 |
36 | def append(sequence: CoordinateSequence): VirtualCoordinateSequence = {
37 | val upperEndpoint = rangeMap.span.upperEndpoint
38 | val range = Range.closed(upperEndpoint + 1: Integer, upperEndpoint + sequence.size: Integer)
39 | rangeMap.put(range, sequence)
40 |
41 | dimension = Math.min(dimension, sequence.getDimension)
42 | _size += sequence.size
43 |
44 | this
45 | }
46 |
47 | override def getDimension: Int = dimension
48 |
49 | override def getCoordinate(i: Int): Coordinate = {
50 | val (sequence, index) = getSequence(i)
51 |
52 | // bypass PackedCoordinateSequence.getCoordinate to prevent caching and associated allocation
53 | new Coordinate(sequence.getX(index), sequence.getY(index))
54 | }
55 |
56 | private def getSequence(i: Int): (CoordinateSequence, Int) = {
57 | val entry = rangeMap.getEntry(i: Integer)
58 |
59 | (entry.getValue, i - entry.getKey.lowerEndpoint)
60 | }
61 |
62 | override def getCoordinateCopy(i: Int): Coordinate = {
63 | val (sequence, index) = getSequence(i)
64 |
65 | sequence.getCoordinateCopy(index)
66 | }
67 |
68 | override def getCoordinate(i: Int, coord: Coordinate): Unit = {
69 | val (sequence, index) = getSequence(i)
70 |
71 | sequence.getCoordinate(index, coord)
72 | }
73 |
74 | override def getOrdinate(i: Int, ordinateIndex: Int): Double = {
75 | val (sequence, index) = getSequence(i)
76 |
77 | sequence.getOrdinate(index, ordinateIndex)
78 | }
79 |
80 | override def setOrdinate(i: Int, ordinateIndex: Int, value: Double): Unit = {
81 | val (sequence, index) = getSequence(i)
82 |
83 | sequence.setOrdinate(index, ordinateIndex, value)
84 | }
85 |
86 | override def toCoordinateArray: Array[Coordinate] = coordinates
87 |
88 | override def expandEnvelope(env: Envelope): Envelope = {
89 | for (i <- 0 until size) {
90 | env.expandToInclude(getX(i), getY(i))
91 | }
92 |
93 | env
94 | }
95 |
96 | override def getX(i: Int): Double = {
97 | val (sequence, index) = getSequence(i)
98 |
99 | sequence.getX(index)
100 | }
101 |
102 | override def getY(i: Int): Double = {
103 | val (sequence, index) = getSequence(i)
104 |
105 | sequence.getY(index)
106 | }
107 |
108 | override def size(): Int = _size
109 |
110 | override def clone(): AnyRef = {
111 | // we're already playing fast and loose
112 | this
113 | }
114 |
115 | override def copy(): VirtualCoordinateSequence = new VirtualCoordinateSequence(sequences.map(_.copy))
116 | }
117 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/relations/utils/package.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.relations
2 |
3 | import org.locationtech.jts.geom.CoordinateSequence
4 |
5 | package object utils {
6 |
7 | /**
8 | * Tests whether two {@link CoordinateSequence}s are equal.
9 | * To be equal, the sequences must be the same length.
10 | * They do not need to be of the same dimension,
11 | * but the ordinate values for the smallest dimension of the two
12 | * must be equal.
13 | * Two NaN
ordinates values are considered to be equal.
14 | *
15 | * Ported to Scala from JTS 1.15.0
16 | *
17 | * @param cs1 a CoordinateSequence
18 | * @param cs2 a CoordinateSequence
19 | * @return true if the sequences are equal in the common dimensions
20 | */
21 | def isEqual(cs1: CoordinateSequence, cs2: CoordinateSequence): Boolean = {
22 | if (cs1.size != cs2.size) {
23 | false
24 | } else {
25 | val dim = Math.min(cs1.getDimension, cs2.getDimension)
26 | (0 until cs1.size).forall(i => {
27 | (0 until dim).forall(d => {
28 | val v1 = cs1.getOrdinate(i, d)
29 | val v2 = cs2.getOrdinate(i, d)
30 |
31 | v1 == v2 || (v1 == Double.NaN && v2 == Double.NaN)
32 | })
33 | })
34 | }
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffMicroBatchReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 | import java.util
5 |
6 | import org.apache.spark.internal.Logging
7 | import org.apache.spark.sql.catalyst.InternalRow
8 | import org.apache.spark.sql.sources.v2.DataSourceOptions
9 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
10 | import vectorpipe.model.AugmentedDiff
11 |
12 | import scala.collection.JavaConverters._
13 | import scala.compat.java8.OptionConverters._
14 |
15 | case class AugmentedDiffStreamBatchTask(baseURI: URI, sequences: Seq[Int], handler: (Int, AugmentedDiffSource.RF) => Unit)
16 | extends InputPartition[InternalRow] {
17 | override def createPartitionReader(): InputPartitionReader[InternalRow] =
18 | AugmentedDiffStreamBatchReader(baseURI, sequences, handler)
19 | }
20 |
21 | case class AugmentedDiffStreamBatchReader(baseURI: URI, sequences: Seq[Int], handler: (Int, AugmentedDiffSource.RF) => Unit)
22 | extends ReplicationStreamBatchReader[AugmentedDiff](baseURI, sequences) {
23 |
24 | override def getSequence(baseURI: URI, sequence: Int): Seq[AugmentedDiff] =
25 | AugmentedDiffSource.getSequence(baseURI, sequence, handler)
26 | }
27 |
28 | case class AugmentedDiffMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
29 | extends ReplicationStreamMicroBatchReader[AugmentedDiff](options, checkpointLocation)
30 | with Logging {
31 |
32 | override def getCurrentSequence: Option[Int] =
33 | AugmentedDiffSource.getCurrentSequence(baseURI)
34 |
35 | private def baseURI: URI =
36 | options
37 | .get(Source.BaseURI)
38 | .asScala
39 | .map(new URI(_))
40 | .getOrElse(
41 | throw new RuntimeException(
42 | s"${Source.BaseURI} is a required option for ${Source.AugmentedDiffs}"
43 | )
44 | )
45 |
46 | private def errorHandler: AugmentedDiffSourceErrorHandler = {
47 | val handlerClass = options
48 | .get(Source.ErrorHandler)
49 | .asScala
50 | .getOrElse("vectorpipe.sources.AugmentedDiffSourceErrorHandler")
51 |
52 | val handler = Class.forName(handlerClass).newInstance.asInstanceOf[AugmentedDiffSourceErrorHandler]
53 | handler.setOptions(options.asMap.asScala.toMap)
54 | handler
55 | }
56 |
57 | override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
58 | sequenceRange
59 | .map(seq =>
60 | AugmentedDiffStreamBatchTask(baseURI, Seq(seq), errorHandler.handle).asInstanceOf[InputPartition[InternalRow]])
61 | .asJava
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffProvider.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.util.Optional
4 |
5 | import org.apache.spark.sql.sources.DataSourceRegister
6 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
7 | import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader
8 | import org.apache.spark.sql.sources.v2.{
9 | DataSourceOptions,
10 | DataSourceV2,
11 | MicroBatchReadSupport,
12 | ReadSupport
13 | }
14 | import org.apache.spark.sql.types.StructType
15 |
16 | class AugmentedDiffProvider
17 | extends DataSourceV2
18 | with ReadSupport
19 | with MicroBatchReadSupport
20 | with DataSourceRegister {
21 | override def createMicroBatchReader(
22 | schema: Optional[StructType],
23 | checkpointLocation: String,
24 | options: DataSourceOptions
25 | ): MicroBatchReader = {
26 | if (schema.isPresent) {
27 | throw new IllegalStateException(
28 | "The augmented diff source does not support a user-specified schema."
29 | )
30 | }
31 |
32 | AugmentedDiffMicroBatchReader(options, checkpointLocation)
33 | }
34 |
35 | override def shortName(): String = Source.AugmentedDiffs
36 | override def createReader(options: DataSourceOptions): DataSourceReader =
37 | AugmentedDiffReader(options)
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 | import java.util
5 |
6 | import geotrellis.vector.Geometry
7 | import org.apache.spark.sql.catalyst.InternalRow
8 | import org.apache.spark.sql.sources.v2.DataSourceOptions
9 | import org.apache.spark.sql.sources.v2.reader.InputPartition
10 | import vectorpipe.model.{AugmentedDiff, ElementWithSequence}
11 | import vectorpipe.util.RobustFeature
12 |
13 | import scala.collection.JavaConverters._
14 | import scala.compat.java8.OptionConverters._
15 | import scala.util.Random
16 |
17 | case class AugmentedDiffReader(options: DataSourceOptions)
18 | extends ReplicationReader[AugmentedDiff](options) {
19 | override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
20 | // prevent sequential diffs from being assigned to the same task
21 | val sequences = Random.shuffle((startSequence to endSequence).toList)
22 |
23 | sequences
24 | .grouped(Math.max(1, sequences.length / partitionCount))
25 | .toList
26 | .map(
27 | AugmentedDiffStreamBatchTask(baseURI, _, errorHandler.handle)
28 | .asInstanceOf[InputPartition[InternalRow]]
29 | )
30 | .asJava
31 | }
32 |
33 | private def baseURI: URI =
34 | options
35 | .get(Source.BaseURI)
36 | .asScala
37 | .map(new URI(_))
38 | .getOrElse(
39 | throw new RuntimeException(
40 | s"${Source.BaseURI} is a required option for ${Source.AugmentedDiffs}"
41 | )
42 | )
43 |
44 |
45 | private def errorHandler: AugmentedDiffSourceErrorHandler = {
46 | val handlerClass = options
47 | .get(Source.ErrorHandler)
48 | .asScala
49 | .getOrElse("vectorpipe.sources.AugmentedDiffSourceErrorHandler")
50 |
51 | val handler = Class.forName(handlerClass).newInstance.asInstanceOf[AugmentedDiffSourceErrorHandler]
52 | handler.setOptions(options.asMap.asScala.toMap)
53 | handler
54 | }
55 |
56 | override def getCurrentSequence: Option[Int] = AugmentedDiffSource.getCurrentSequence(baseURI)
57 | }
58 |
59 |
60 | class AugmentedDiffSourceErrorHandler extends Serializable {
61 | def setOptions(options: Map[String, String]): Unit = ()
62 |
63 | def handle(sequence: Int, feature: RobustFeature[Geometry, ElementWithSequence]): Unit = ()
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/AugmentedDiffSource.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.io.{BufferedInputStream, File}
4 | import java.net.URI
5 | import java.nio.charset.StandardCharsets
6 | import java.sql.Timestamp
7 | import java.time.Instant
8 | import java.util.zip.GZIPInputStream
9 |
10 | import geotrellis.store.s3._
11 | import geotrellis.vector._
12 |
13 | import vectorpipe.model.{AugmentedDiff, ElementWithSequence}
14 | import vectorpipe.util._
15 | //import vectorpipe.util.RobustFeatureFormats._
16 |
17 | import org.apache.commons.io.IOUtils
18 | import org.apache.spark.internal.Logging
19 | import org.apache.spark.sql.Column
20 | import org.apache.spark.sql.functions.{floor, from_unixtime, to_timestamp, unix_timestamp}
21 |
22 | import _root_.io.circe._
23 | import _root_.io.circe.generic.auto._
24 | import cats.implicits._
25 |
26 | import software.amazon.awssdk.services.s3.model.{GetObjectRequest, NoSuchKeyException, S3Exception}
27 | import software.amazon.awssdk.services.s3.S3Client
28 | import com.softwaremill.macmemo.memoize
29 | import org.joda.time.DateTime
30 |
31 | import scala.concurrent.duration.{Duration, _}
32 |
33 |
34 | object AugmentedDiffSource extends Logging {
35 | type RF = RobustFeature[Geometry, ElementWithSequence]
36 |
37 | private lazy val s3: S3Client = S3ClientProducer.get()
38 | val Delay: Duration = 15.seconds
39 |
40 | private implicit val dateTimeDecoder: Decoder[DateTime] =
41 | Decoder.instance(a => a.as[String].map(DateTime.parse))
42 |
43 | def getFeatures(baseURI: URI, sequence: Int): Seq[Map[String, RF]] = {
44 | val bucket = baseURI.getHost
45 | val prefix = new File(baseURI.getPath.drop(1)).toPath
46 | // left-pad sequence
47 | val s = f"$sequence%09d"
48 | val key = prefix.resolve(s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.json.gz").toString
49 |
50 | logDebug(s"Fetching sequence $sequence")
51 |
52 | val obj = s3.getObject(
53 | GetObjectRequest
54 | .builder
55 | .bucket(bucket)
56 | .key(key)
57 | .build
58 | )
59 |
60 | val bis = new BufferedInputStream(obj)
61 | val gzis = new GZIPInputStream(bis)
62 |
63 | try {
64 | IOUtils
65 | .toString(gzis, StandardCharsets.UTF_8)
66 | .lines
67 | .map { line =>
68 | // Spark doesn't like RS-delimited JSON; perhaps Spray doesn't either
69 | line
70 | .replace("\u001e", "")
71 | .parseGeoJson[JsonRobustFeatureCollectionMap]
72 | .getAll[RF]
73 | }
74 | .toSeq
75 | } finally {
76 | gzis.close()
77 | bis.close()
78 | }
79 | }
80 |
81 | /**
82 | * Fetch all augmented diffs from a sequence number.
83 | *
84 | * This function collects the data in an augmented diff sequence file into
85 | * vectorpipe.model.AugmentedDiff objects. These diff files are expected to be
86 | * stored on S3 in .json.gz files. This method provides the option to process errors
87 | * generated when the new geometry in the diff is faulty. If `waitUntilAvailable` is
88 | * set to true, the process will block, in 15 second increments, until the sequence
89 | * file is available.
90 | */
91 | def getSequence(baseURI: URI, sequence: Int, badGeometryHandler: (Int, RF) => Unit, waitUntilAvailable: Boolean): Seq[AugmentedDiff] = {
92 | logDebug(s"Fetching sequence $sequence")
93 |
94 | try {
95 | val robustFeatureMaps = getFeatures(baseURI, sequence)
96 |
97 | robustFeatureMaps.map{ m =>
98 | if (m.contains("new") && !m("new").geom.isDefined) badGeometryHandler(sequence, m("new"))
99 | AugmentedDiff(sequence, m.get("old").map(_.toFeature), m("new").toFeature)
100 | }
101 | } catch {
102 | case e: S3Exception if e.isInstanceOf[NoSuchKeyException] || e.statusCode == 403 =>
103 | logInfo(s"Encountered missing sequence (baseURI = ${baseURI}, sequence = ${sequence}), comparing with current for validity")
104 | getCurrentSequence(baseURI) match {
105 | case Some(s) if s > sequence =>
106 | logInfo(s"$sequence is missing, continuing")
107 | Seq.empty[AugmentedDiff]
108 | case _ =>
109 | if (waitUntilAvailable) {
110 | logInfo(s"$sequence is not yet available, sleeping.")
111 | Thread.sleep(Delay.toMillis)
112 | getSequence(baseURI, sequence, waitUntilAvailable)
113 | } else
114 | throw e
115 | }
116 | case t: Throwable =>
117 | if (waitUntilAvailable) {
118 | logError(s"sequence $sequence caused an error", t)
119 | Thread.sleep(Delay.toMillis)
120 | getSequence(baseURI, sequence)
121 | } else
122 | throw t
123 | }
124 | }
125 |
126 | def getSequence(baseURI: URI, sequence: Int): Seq[AugmentedDiff] =
127 | getSequence(baseURI, sequence, {(_: Int, _: RF) => ()}, true)
128 |
129 | def getSequence(baseURI: URI, sequence: Int, waitUntilAvailable: Boolean): Seq[AugmentedDiff] =
130 | getSequence(baseURI, sequence, {(_: Int, _: RF) => ()}, waitUntilAvailable)
131 |
132 | def getSequence(baseURI: URI, sequence: Int, badGeometryHandler: (Int, RF) => Unit): Seq[AugmentedDiff] =
133 | getSequence(baseURI, sequence, badGeometryHandler, true)
134 |
135 | @memoize(maxSize = 1, expiresAfter = 30 seconds)
136 | def getCurrentSequence(baseURI: URI): Option[Int] = {
137 | val bucket = baseURI.getHost
138 | val prefix = new File(baseURI.getPath.drop(1)).toPath
139 | val key = prefix.resolve("state.yaml").toString
140 |
141 | try {
142 | val request = GetObjectRequest.builder()
143 | .bucket(bucket)
144 | .key(key)
145 | .build()
146 | val response = s3.getObjectAsBytes(request)
147 |
148 | val body = IOUtils
149 | .toString(response.asInputStream, StandardCharsets.UTF_8.toString)
150 |
151 | val state = yaml.parser
152 | .parse(body)
153 | .leftMap(err => err: Error)
154 | .flatMap(_.as[State])
155 | .valueOr(throw _)
156 |
157 | logDebug(s"$baseURI state: ${state.sequence} @ ${state.last_run}")
158 |
159 | Some(state.sequence)
160 | } catch {
161 | case err: Throwable =>
162 | logError("Error fetching / parsing changeset state.", err)
163 |
164 | None
165 | }
166 | }
167 |
168 | def timestampToSequence(timestamp: Timestamp): Int =
169 | ((timestamp.toInstant.getEpochSecond - 1347432900) / 60).toInt
170 |
171 | def timestampToSequence(timestamp: Column): Column =
172 | floor((unix_timestamp(timestamp) - 1347432900) / 60)
173 |
174 | def sequenceToTimestamp(sequence: Int): Timestamp =
175 | Timestamp.from(Instant.ofEpochSecond(sequence.toLong * 60 + 1347432900L))
176 |
177 | def sequenceToTimestamp(sequence: Column): Column =
178 | to_timestamp(from_unixtime(sequence * 60 + 1347432900))
179 |
180 | case class State(last_run: DateTime, sequence: Int)
181 | }
182 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeMicroBatchReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 | import java.util
5 |
6 | import org.apache.spark.sql.catalyst.InternalRow
7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
8 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
9 | import vectorpipe.model.Change
10 |
11 | import scala.collection.JavaConverters._
12 |
13 | case class ChangeStreamBatchTask(baseURI: URI, sequences: Seq[Int]) extends InputPartition[InternalRow] {
14 | override def createPartitionReader(): InputPartitionReader[InternalRow] =
15 | new ChangeStreamBatchReader(baseURI, sequences)
16 | }
17 |
18 | class ChangeStreamBatchReader(baseURI: URI, sequences: Seq[Int])
19 | extends ReplicationStreamBatchReader[Change](baseURI, sequences) {
20 |
21 | override def getSequence(baseURI: URI, sequence: Int): Seq[Change] =
22 | ChangeSource.getSequence(baseURI, sequence)
23 | }
24 |
25 | case class ChangeMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
26 | extends ReplicationStreamMicroBatchReader[Change](options, checkpointLocation) {
27 | private lazy val baseURI = new URI(
28 | options
29 | .get(Source.BaseURI)
30 | .orElse("https://planet.osm.org/replication/minute/")
31 | )
32 |
33 | override def getCurrentSequence: Option[Int] =
34 | ChangeSource.getCurrentSequence(baseURI)
35 |
36 | override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
37 | sequenceRange
38 | .map(
39 | seq => ChangeStreamBatchTask(baseURI, Seq(seq)).asInstanceOf[InputPartition[InternalRow]]
40 | )
41 | .asJava
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeProvider.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.util.Optional
4 |
5 | import org.apache.spark.sql.sources.DataSourceRegister
6 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
7 | import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader
8 | import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, MicroBatchReadSupport, ReadSupport}
9 | import org.apache.spark.sql.types.StructType
10 |
11 | class ChangeProvider
12 | extends DataSourceV2
13 | with ReadSupport
14 | with MicroBatchReadSupport
15 | with DataSourceRegister {
16 | override def createMicroBatchReader(
17 | schema: Optional[StructType],
18 | checkpointLocation: String,
19 | options: DataSourceOptions
20 | ): MicroBatchReader = {
21 | if (schema.isPresent) {
22 | throw new IllegalStateException(
23 | "The changes source does not support a user-specified schema."
24 | )
25 | }
26 |
27 | ChangeMicroBatchReader(options, checkpointLocation)
28 | }
29 |
30 | override def shortName(): String = Source.Changes
31 | override def createReader(options: DataSourceOptions): DataSourceReader =
32 | ChangeReader(options)
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 | import java.util
5 |
6 | import org.apache.spark.sql.catalyst.InternalRow
7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
8 | import org.apache.spark.sql.sources.v2.reader.InputPartition
9 | import vectorpipe.model.Change
10 |
11 | import scala.collection.JavaConverters._
12 | import scala.util.Random
13 |
14 | case class ChangeReader(options: DataSourceOptions) extends ReplicationReader[Change](options) {
15 | override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
16 | // prevent sequential diffs from being assigned to the same task
17 | val sequences = Random.shuffle((startSequence to endSequence).toList)
18 |
19 | sequences
20 | .grouped(Math.max(1, sequences.length / partitionCount))
21 | .toList
22 | .map(
23 | ChangeStreamBatchTask(baseURI, _)
24 | .asInstanceOf[InputPartition[InternalRow]]
25 | )
26 | .asJava
27 | }
28 |
29 | private def baseURI =
30 | new URI(
31 | options
32 | .get(Source.BaseURI)
33 | .orElse("https://planet.osm.org/replication/minute/"))
34 |
35 | override def getCurrentSequence: Option[Int] = ChangeSource.getCurrentSequence(baseURI)
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangeSource.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.io.{ByteArrayInputStream, IOException, StringReader}
4 | import java.net.URI
5 | import java.util.Properties
6 | import java.util.zip.GZIPInputStream
7 |
8 | import com.softwaremill.macmemo.memoize
9 | import javax.xml.parsers.SAXParserFactory
10 | import org.apache.spark.internal.Logging
11 | import org.joda.time.DateTime
12 | import vectorpipe.model
13 | import vectorpipe.model.Change
14 | import scalaj.http.Http
15 |
16 | import scala.concurrent.duration.{Duration, _}
17 |
18 | object ChangeSource extends Logging {
19 | val Delay: Duration = 15 seconds
20 | private val saxParserFactory = SAXParserFactory.newInstance
21 |
22 | def getSequence(baseURI: URI, sequence: Int): Seq[Change] = {
23 | val s = f"$sequence%09d"
24 | val path = s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.osc.gz"
25 |
26 | logInfo(s"Fetching sequence $sequence")
27 |
28 | try {
29 | val response =
30 | Http(baseURI.resolve(path).toString).asBytes
31 |
32 | if (response.code == 404) {
33 | logInfo(s"$sequence is not yet available, sleeping.")
34 | Thread.sleep(Delay.toMillis)
35 | getSequence(baseURI, sequence)
36 | } else {
37 | val bais = new ByteArrayInputStream(response.body)
38 | val gzis = new GZIPInputStream(bais)
39 | val parser = saxParserFactory.newSAXParser
40 | val handler = new model.Change.ChangeHandler(sequence)
41 | try {
42 | parser.parse(gzis, handler)
43 | val changes = handler.changes
44 |
45 | logDebug(s"Received ${changes.length} changes from sequence $sequence")
46 |
47 | changes
48 | } finally {
49 | gzis.close()
50 | bais.close()
51 | }
52 | }
53 | } catch {
54 | case e: IOException =>
55 | logWarning(s"Error fetching change $sequence", e)
56 | Thread.sleep(Delay.toMillis)
57 | getSequence(baseURI, sequence)
58 | }
59 | }
60 |
61 | @memoize(maxSize = 1, expiresAfter = 30 seconds)
62 | def getCurrentSequence(baseURI: URI): Option[Int] = {
63 | try {
64 | val response =
65 | Http(baseURI.resolve("state.txt").toString).asString
66 |
67 | val state = new Properties
68 | state.load(new StringReader(response.body))
69 |
70 | val sequence = state.getProperty("sequenceNumber").toInt
71 | val timestamp = DateTime.parse(state.getProperty("timestamp"))
72 |
73 | logDebug(s"$baseURI state: $sequence @ $timestamp")
74 |
75 | Some(sequence)
76 | } catch {
77 | case err: Throwable =>
78 | logError("Error fetching or parsing changeset state.", err)
79 | logError(baseURI.toString)
80 |
81 | None
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetMicroBatchReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 | import java.util
5 |
6 | import org.apache.spark.sql.catalyst.InternalRow
7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
8 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
9 | import vectorpipe.model.Changeset
10 |
11 | import scala.collection.JavaConverters._
12 |
13 | case class ChangesetStreamBatchTask(baseURI: URI, sequences: Seq[Int])
14 | extends InputPartition[InternalRow] {
15 | override def createPartitionReader(): InputPartitionReader[InternalRow] =
16 | new ChangesetStreamBatchReader(baseURI, sequences)
17 | }
18 |
19 | class ChangesetStreamBatchReader(baseURI: URI, sequences: Seq[Int])
20 | extends ReplicationStreamBatchReader[Changeset](baseURI, sequences) {
21 |
22 | override def getSequence(baseURI: URI, sequence: Int): Seq[Changeset] =
23 | ChangesetSource.getChangeset(baseURI, sequence)
24 | }
25 |
26 | class ChangesetMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
27 | extends ReplicationStreamMicroBatchReader[Changeset](options, checkpointLocation) {
28 | private lazy val baseURI = new URI(
29 | options
30 | .get(Source.BaseURI)
31 | .orElse("https://planet.osm.org/replication/changesets/")
32 | )
33 |
34 | override def getCurrentSequence: Option[Int] =
35 | ChangesetSource.getCurrentSequence(baseURI).map(_.sequence.toInt)
36 |
37 | override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
38 | sequenceRange
39 | .map(
40 | seq => ChangesetStreamBatchTask(baseURI, Seq(seq)).asInstanceOf[InputPartition[InternalRow]]
41 | )
42 | .asJava
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetProvider.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.util.Optional
4 |
5 | import org.apache.spark.sql.sources.DataSourceRegister
6 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
7 | import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader
8 | import org.apache.spark.sql.sources.v2.{
9 | DataSourceOptions,
10 | DataSourceV2,
11 | MicroBatchReadSupport,
12 | ReadSupport
13 | }
14 | import org.apache.spark.sql.types.StructType
15 |
16 | class ChangesetProvider
17 | extends DataSourceV2
18 | with ReadSupport
19 | with MicroBatchReadSupport
20 | with DataSourceRegister {
21 | override def createMicroBatchReader(
22 | schema: Optional[StructType],
23 | checkpointLocation: String,
24 | options: DataSourceOptions
25 | ): MicroBatchReader = {
26 | if (schema.isPresent) {
27 | throw new IllegalStateException(
28 | "The changesets source does not support a user-specified schema."
29 | )
30 | }
31 |
32 | new ChangesetMicroBatchReader(options, checkpointLocation)
33 | }
34 |
35 | override def shortName(): String = Source.Changesets
36 | override def createReader(options: DataSourceOptions): DataSourceReader =
37 | ChangesetReader(options)
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 | import java.util
5 |
6 | import org.apache.spark.sql.catalyst.InternalRow
7 | import org.apache.spark.sql.sources.v2.DataSourceOptions
8 | import org.apache.spark.sql.sources.v2.reader.InputPartition
9 | import vectorpipe.model.Changeset
10 |
11 | import scala.collection.JavaConverters._
12 | import scala.util.Random
13 |
14 | case class ChangesetReader(options: DataSourceOptions)
15 | extends ReplicationReader[Changeset](options) {
16 | override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
17 | // prevent sequential diffs from being assigned to the same task
18 | val sequences = Random.shuffle((startSequence to endSequence).toList)
19 |
20 | sequences
21 | .grouped(Math.max(1, sequences.length / partitionCount))
22 | .toList
23 | .map(
24 | ChangesetStreamBatchTask(baseURI, _)
25 | .asInstanceOf[InputPartition[InternalRow]]
26 | )
27 | .asJava
28 | }
29 |
30 | override protected def getCurrentSequence: Option[Int] =
31 | ChangesetSource.getCurrentSequence(baseURI).map(_.sequence.toInt)
32 |
33 | private def baseURI =
34 | new URI(
35 | options
36 | .get(Source.BaseURI)
37 | .orElse("https://planet.osm.org/replication/changesets/"))
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ChangesetSource.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.io.{ByteArrayInputStream, IOException}
4 | import java.net.URI
5 | import java.nio.charset.StandardCharsets
6 | import java.time.Instant
7 | import java.util.zip.GZIPInputStream
8 |
9 | import cats.implicits._
10 | import io.circe.generic.auto._
11 | import io.circe.{yaml, _}
12 | import org.apache.commons.io.IOUtils
13 | import org.apache.spark.internal.Logging
14 | import org.joda.time.DateTime
15 | import org.joda.time.format.DateTimeFormat
16 | import vectorpipe.model.Changeset
17 | import scalaj.http.Http
18 |
19 | import scala.concurrent.duration.{Duration, _}
20 | import scala.util.Try
21 | import scala.xml.XML
22 |
23 | object ChangesetSource extends Logging {
24 | val Delay: Duration = 15 seconds
25 | // state.yaml uses a custom date format
26 | private val formatter = DateTimeFormat.forPattern("y-M-d H:m:s.SSSSSSSSS Z")
27 |
28 | private implicit val dateTimeDecoder: Decoder[DateTime] =
29 | Decoder.instance(a => a.as[String].map(DateTime.parse(_, formatter)))
30 |
31 | def getChangeset(baseURI: URI, sequence: Int, retry: Boolean = true): Seq[Changeset] = {
32 | val s = f"$sequence%09d"
33 | val path = s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.osm.gz"
34 |
35 | logDebug(s"Fetching sequence $sequence")
36 |
37 | try {
38 | val response =
39 | Http(baseURI.resolve(path).toString).asBytes
40 |
41 | if (response.code == 404) {
42 | if (retry) {
43 | logDebug(s"$sequence is not yet available, sleeping.")
44 | Thread.sleep(Delay.toMillis)
45 | getChangeset(baseURI, sequence)
46 | } else {
47 | logDebug(s"$sequence is yet available")
48 | Seq()
49 | }
50 | } else {
51 | // NOTE: if diff bodies get really large, switch to a SAX parser to help with the memory footprint
52 | val bais = new ByteArrayInputStream(response.body)
53 | val gzis = new GZIPInputStream(bais)
54 | try {
55 | val data = XML.loadString(IOUtils.toString(gzis, StandardCharsets.UTF_8))
56 |
57 | val changesets = (data \ "changeset").map(Changeset.fromXML(_, sequence))
58 |
59 | logDebug(s"Received ${changesets.length} changesets")
60 |
61 | changesets
62 | } finally {
63 | gzis.close()
64 | bais.close()
65 | }
66 | }
67 | } catch {
68 | case e: IOException =>
69 | logWarning(s"Error fetching changeset $sequence", e)
70 | Thread.sleep(Delay.toMillis)
71 | getChangeset(baseURI, sequence)
72 | }
73 | }
74 |
75 | case class Sequence(last_run: DateTime, sequence: Long)
76 |
77 | private def grabSequence(baseURI: URI, filename: String): Sequence = {
78 | val response =
79 | Http(baseURI.resolve("state.yaml").toString).asString
80 |
81 | val state = yaml.parser
82 | .parse(response.body)
83 | .leftMap(err => err: Error)
84 | .flatMap(_.as[Sequence])
85 | .valueOr(throw _)
86 |
87 | state
88 | }
89 |
90 | def getCurrentSequence(baseURI: URI): Option[Sequence] = {
91 | var state: Try[Sequence] = null
92 |
93 | for (i <- Range(0, 5)) {
94 | state = Try(grabSequence(baseURI, "state.yaml"))
95 |
96 | if (state.isSuccess) {
97 | logDebug(s"$baseURI state: ${state.get.sequence} @ ${state.get.last_run}")
98 |
99 | return Some(state.get)
100 | }
101 |
102 | Thread.sleep(5000)
103 | }
104 |
105 | logError("Error fetching / parsing changeset state.", state.failed.get)
106 | None
107 | }
108 |
109 | def getSequence(baseURI: URI, sequence: Long): Option[Sequence] = {
110 | val s = f"${sequence+1}%09d"
111 | val path = s"${s.slice(0, 3)}/${s.slice(3, 6)}/${s.slice(6, 9)}.state.txt"
112 |
113 | try {
114 | val state = grabSequence(baseURI, path)
115 |
116 | Some(state)
117 | } catch {
118 | case err: Throwable =>
119 | logError("Error fetching / parsing changeset state.", err)
120 |
121 | None
122 | }
123 | }
124 |
125 | def estimateSequenceNumber(modifiedTime: Instant, baseURI: URI, maxIters: Int = 1000): Long = {
126 | val current = getCurrentSequence(baseURI)
127 | if (current.isDefined) {
128 | val diffMinutes = (current.get.last_run.getMillis/1000 -
129 | modifiedTime.getEpochSecond) / 60
130 | current.get.sequence - diffMinutes
131 | } else {
132 | // Some queries on the state.yaml fail, set up a failsafe
133 | // ###.state.txt may not be provided for all replications, so use changsets
134 | var i = 0
135 | var baseTime: Long = -1
136 | while (baseTime == -1 && i < maxIters) {
137 | baseTime = getChangeset(baseURI, i, false).map(_.createdAt.toInstant.getEpochSecond).sorted.lastOption.getOrElse(-1L)
138 | i += 1
139 | }
140 | if (i == maxIters)
141 | throw new IndexOutOfBoundsException(s"Couldn't find non-empty changeset in ${maxIters} attempts")
142 |
143 | val query = modifiedTime.getEpochSecond
144 |
145 | (query - baseTime) / 60 + i
146 | }
147 | }
148 |
149 | private def safeSequenceTime(baseURI: URI, sequence: Long): Option[Instant] = {
150 | val res = getSequence(baseURI, sequence)
151 | if (res.isDefined) {
152 | Some(Instant.parse(res.get.last_run.toString))
153 | } else {
154 | getChangeset(baseURI, sequence.toInt, false).map(_.createdAt.toInstant).sortBy(_.getEpochSecond).lastOption.map{ inst => Instant.parse(inst.toString).plusSeconds(60) }
155 | }
156 | }
157 |
158 | def findSequenceFor(modifiedTime: Instant, baseURI: URI): Long = {
159 | var guess = estimateSequenceNumber(modifiedTime, baseURI)
160 |
161 | while (safeSequenceTime(baseURI, guess).map(_.isAfter(modifiedTime)).getOrElse(false)) { guess -= 1 }
162 | while (safeSequenceTime(baseURI, guess).map(_.isBefore(modifiedTime)).getOrElse(false)) { guess += 1 }
163 |
164 | guess
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ReplicationReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import org.apache.spark.SparkEnv
4 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
5 | import org.apache.spark.sql.internal.SQLConf
6 | import org.apache.spark.sql.sources.v2.DataSourceOptions
7 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
8 | import org.apache.spark.sql.types.StructType
9 |
10 | import scala.compat.java8.OptionConverters._
11 | import scala.reflect.runtime.universe.TypeTag
12 |
13 | abstract class ReplicationReader[T <: Product: TypeTag](options: DataSourceOptions)
14 | extends DataSourceReader {
15 | private lazy val schema: StructType = ExpressionEncoder[T].schema
16 |
17 | val DefaultPartitionCount: Int =
18 | SparkEnv.get.conf
19 | .getInt(SQLConf.SHUFFLE_PARTITIONS.key, SQLConf.SHUFFLE_PARTITIONS.defaultValue.get)
20 |
21 | protected val partitionCount: Int =
22 | options.getInt(Source.PartitionCount, DefaultPartitionCount)
23 |
24 | protected var endSequence: Int =
25 | options
26 | .get(Source.EndSequence)
27 | .asScala
28 | .map(s => s.toInt - 1)
29 | .getOrElse(getCurrentSequence
30 | .getOrElse(throw new RuntimeException("Could not determine end sequence.")))
31 |
32 | override def readSchema(): StructType = schema
33 |
34 | protected def startSequence: Int =
35 | options
36 | .get(Source.StartSequence)
37 | .asScala
38 | .map(s => s.toInt)
39 | .getOrElse(getCurrentSequence
40 | .getOrElse(throw new RuntimeException("Could not determine start sequence.")))
41 |
42 | protected def getCurrentSequence: Option[Int]
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/ReplicationStreamBatchReader.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import java.net.URI
4 |
5 | import org.apache.spark.internal.Logging
6 | import org.apache.spark.sql.catalyst.InternalRow
7 | import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
8 | import org.apache.spark.sql.sources.v2.reader.InputPartitionReader
9 |
10 | import scala.collection.parallel.ForkJoinTaskSupport
11 | import scala.concurrent.forkjoin.ForkJoinPool
12 | import scala.reflect.runtime.universe.TypeTag
13 |
14 | abstract class ReplicationStreamBatchReader[T <: Product: TypeTag](baseURI: URI,
15 | sequences: Seq[Int])
16 | extends InputPartitionReader[InternalRow]
17 | with Logging {
18 | org.apache.spark.sql.jts.registerTypes()
19 | private lazy val rowEncoder = RowEncoder(encoder.schema).resolveAndBind()
20 | protected var index: Int = -1
21 | protected var items: Vector[T] = _
22 | val Concurrency: Int = 8
23 | private lazy val encoder = ExpressionEncoder[T]
24 |
25 | override def next(): Boolean = {
26 | index += 1
27 |
28 | if (Option(items).isEmpty) {
29 | val parSequences = sequences.par
30 | val taskSupport = new ForkJoinTaskSupport(new ForkJoinPool(Concurrency))
31 | parSequences.tasksupport = taskSupport
32 |
33 | items = parSequences.flatMap(seq => getSequence(baseURI, seq)).toVector
34 |
35 | taskSupport.environment.shutdown()
36 | }
37 |
38 | index < items.length
39 | }
40 |
41 | override def get(): InternalRow = encoder.toRow(items(index))
42 |
43 | override def close(): Unit = Unit
44 |
45 | protected def getSequence(baseURI: URI, sequence: Int): Seq[T]
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/SequenceOffset.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 | import org.apache.spark.sql.sources.v2.reader.streaming.Offset
3 |
4 | case class SequenceOffset(sequence: Int, pending: Boolean = false)
5 | extends Offset
6 | with Ordered[SequenceOffset] {
7 | override val json: String = s"[$sequence,${pending.compare(false)}]"
8 |
9 | def +(increment: Int): SequenceOffset = SequenceOffset(sequence + increment)
10 | def -(decrement: Int): SequenceOffset = SequenceOffset(sequence - decrement)
11 | def next: SequenceOffset = SequenceOffset(sequence, pending = true)
12 |
13 | override def compare(that: SequenceOffset): Int =
14 | sequence.compare(that.sequence) match {
15 | case 0 => pending.compare(that.pending)
16 | case x => x
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/sources/Source.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | object Source {
4 | val AugmentedDiffs: String = "augmented-diffs"
5 | val Changes: String = "changes"
6 | val Changesets: String = "changesets"
7 |
8 | val BaseURI: String = "base_uri"
9 | val BatchSize: String = "batch_size"
10 | val DatabaseURI: String = "db_uri"
11 | val PartitionCount: String = "partition_count"
12 | val ProcessName: String = "proc_name"
13 | val StartSequence: String = "start_sequence"
14 | val EndSequence: String = "end_sequence"
15 |
16 | val ErrorHandler: String = "error_handler"
17 | val ErrorCodes: Set[Int] = Set(403, 404)
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Auth.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import java.net.URI
4 |
5 | case class Auth(user: Option[String], password: Option[String]) {
6 | def isDefined: Boolean = (user.isDefined && password.isDefined)
7 | }
8 |
9 | object Auth {
10 | def fromUri(uri: URI, userParam: String = "user", passwordParam: String = "password"): Auth = {
11 | val auth = getUriUserInfo(uri)
12 | if (auth.isDefined) {
13 | auth
14 | } else {
15 | val params = getUriParams(uri)
16 | auth.copy(
17 | user = auth.user.orElse(params.get(userParam)),
18 | password = auth.password.orElse(params.get(passwordParam))
19 | )
20 | }
21 | }
22 |
23 | /** Parse only the URI auth section */
24 | def getUriUserInfo(uri: URI): Auth = {
25 | val info = uri.getUserInfo
26 | if (null == info)
27 | Auth(None, None)
28 | else {
29 | val chunk = info.split(":")
30 | if (chunk.length == 1)
31 | Auth(Some(chunk(0)), None)
32 | else
33 | Auth(Some(chunk(0)), Some(chunk(1)))
34 | }
35 | }
36 |
37 | /** Parse URI parameters */
38 | def getUriParams(uri: URI): Map[String, String] = {
39 | val query = uri.getQuery
40 | if (null == query)
41 | Map.empty[String, String]
42 | else {
43 | query.split("&").map{ param =>
44 | val arr = param.split("=")
45 | arr(0) -> arr(1)
46 | }.toMap
47 | }
48 | }
49 | }
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/DBUtils.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import java.net.URI
4 | import java.sql.{Connection, DriverManager}
5 |
6 | object DBUtils {
7 | def getJdbcConnection(uri: URI): Connection = {
8 |
9 | val cleanUri = new URI(
10 | uri.getScheme,
11 | Option(uri.getHost).getOrElse("localhost") + (if (uri.getPort > 0) ":" + uri.getPort else ""),
12 | uri.getPath,
13 | null.asInstanceOf[String],
14 | null.asInstanceOf[String]
15 | )
16 | // also drops UserInfo
17 |
18 | val auth = Auth.fromUri(uri)
19 | (auth.user, auth.password) match {
20 | case (Some(user), Some(pass)) =>
21 | DriverManager.getConnection(s"jdbc:${cleanUri.toString}", user, pass)
22 | case _ =>
23 | DriverManager.getConnection(s"jdbc:${cleanUri.toString}")
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Geocode.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import geotrellis.vector._
4 | import geotrellis.vector.io.json._
5 |
6 | import org.apache.spark.sql._
7 | import org.apache.spark.sql.catalyst.encoders.RowEncoder
8 | import org.apache.spark.sql.functions._
9 | import org.apache.spark.sql.types._
10 |
11 | import org.locationtech.jts.geom.prep._
12 | import org.locationtech.jts.index.ItemVisitor
13 |
14 | import _root_.io.circe.{Encoder => CirceEncoder, Decoder => CirceDecoder, _}
15 | import cats.syntax.either._
16 |
17 | object Geocode {
18 |
19 | case class CountryId(code: String)
20 |
21 | object CountryIdCodecs {
22 | implicit val encodeCountryId: CirceEncoder[CountryId] = new CirceEncoder[CountryId] {
23 | final def apply(a: CountryId): Json = Json.obj(
24 | ("code", Json.fromString(a.code))
25 | )
26 | }
27 | implicit val decodeCountryId: CirceDecoder[CountryId] = new CirceDecoder[CountryId] {
28 | final def apply(c: HCursor): CirceDecoder.Result[CountryId] =
29 | for {
30 | code <- c.downField("ADM0_A3").as[String]
31 | } yield {
32 | CountryId(code)
33 | }
34 | }
35 | }
36 |
37 | import CountryIdCodecs._
38 |
39 | object Countries {
40 | lazy val all: Vector[MultiPolygonFeature[CountryId]] = {
41 | val collection =
42 | Resource("countries.geojson").
43 | parseGeoJson[JsonFeatureCollection]
44 |
45 | val polys =
46 | collection.
47 | getAllPolygonFeatures[CountryId].
48 | map(_.mapGeom(MultiPolygon(_)))
49 |
50 | val mps =
51 | collection.
52 | getAllMultiPolygonFeatures[CountryId]
53 |
54 | polys ++ mps
55 | }
56 |
57 | def indexed: SpatialIndex[MultiPolygonFeature[CountryId]] =
58 | SpatialIndex.fromExtents(all) { mpf => mpf.geom.extent }
59 | }
60 |
61 | class CountryLookup() extends Serializable {
62 | private val index =
63 | geotrellis.vector.SpatialIndex.fromExtents(
64 | Countries.all.
65 | map { mpf =>
66 | (PreparedGeometryFactory.prepare(mpf.geom), mpf.data)
67 | }
68 | ) { case (pg, _) => pg.getGeometry().extent }
69 |
70 | def lookup(geom: geotrellis.vector.Geometry): Traversable[CountryId] = {
71 | val t =
72 | new Traversable[(PreparedGeometry, CountryId)] {
73 | override def foreach[U](f: ((PreparedGeometry, CountryId)) => U): Unit = {
74 | val visitor = new ItemVisitor {
75 | override def visitItem(obj: AnyRef): Unit =
76 | f(obj.asInstanceOf[(PreparedGeometry, CountryId)])
77 | }
78 | index.rtree.query(geom.getEnvelopeInternal, visitor)
79 | }
80 | }
81 |
82 | t.
83 | filter(_._1.intersects(geom)).
84 | map(_._2)
85 | }
86 | }
87 |
88 | def apply(geoms: DataFrame): DataFrame = {
89 | val newSchema = StructType(geoms.schema.fields :+ StructField(
90 | "countries", ArrayType(StringType, containsNull = false), nullable = true))
91 | implicit val encoder: Encoder[Row] = RowEncoder(newSchema)
92 |
93 | geoms
94 | .mapPartitions { partition =>
95 | val countryLookup = new CountryLookup()
96 |
97 | partition.map { row =>
98 | val countryCodes = Option(row.getAs[Geometry]("geom")) match {
99 | case Some(geom) => countryLookup.lookup(geom).map(x => x.code)
100 | case None => Seq.empty[String]
101 | }
102 |
103 | Row.fromSeq(row.toSeq :+ countryCodes)
104 | }
105 | }
106 | }
107 |
108 | def regionsByChangeset(geomCountries: Dataset[Row]): DataFrame = {
109 | import geomCountries.sparkSession.implicits._
110 |
111 | geomCountries
112 | .where('country.isNotNull)
113 | .groupBy('changeset)
114 | .agg(collect_set('country) as 'countries)
115 |
116 | }
117 |
118 | }
119 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Implicits.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import geotrellis.vector._
4 | import _root_.io.circe._
5 |
6 | import scala.reflect.ClassTag
7 |
8 | object Implicits extends Implicits
9 |
10 | trait Implicits extends RobustFeatureFormats {
11 | implicit class RobustFeaturesToGeoJson[G <: Geometry: ClassTag, D: Encoder](features: Traversable[RobustFeature[G, D]]) {
12 | def toGeoJson(): String = {
13 | val fc = new JsonRobustFeatureCollection
14 |
15 | features.foreach(fc.add(_))
16 |
17 | fc.asJson.noSpaces
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/JsonRobustFeatureCollection.scala:
--------------------------------------------------------------------------------
1 |
2 | package vectorpipe.util
3 |
4 | import cats.syntax.either._
5 | import geotrellis.vector._
6 | import _root_.io.circe._
7 | import _root_.io.circe.syntax._
8 |
9 | import scala.collection.immutable.VectorBuilder
10 | import scala.collection.mutable
11 | import scala.reflect.ClassTag
12 |
13 | class JsonRobustFeatureCollection(features: List[Json] = Nil) {
14 | private val buffer = mutable.ListBuffer(features: _*)
15 |
16 | def add[G <: Geometry: ClassTag, D: Encoder](feature: RobustFeature[G, D]) =
17 | buffer += RobustFeatureFormats.writeRobustFeatureJson(feature)
18 |
19 | def getAll[F: Decoder]: Vector[F] = {
20 | val ret = new VectorBuilder[F]()
21 | features.foreach{ _.as[F].foreach(ret += _) }
22 | ret.result()
23 | }
24 |
25 | def getAllRobustFeatures[F <: RobustFeature[_, _] :Decoder]: Vector[F] =
26 | getAll[F]
27 |
28 | def getAllPointFeatures[D: Decoder]() = getAll[RobustFeature[Point, D]]
29 | def getAllLineStringFeatures[D: Decoder]() = getAll[RobustFeature[LineString, D]]
30 | def getAllPolygonFeatures[D: Decoder]() = getAll[RobustFeature[Polygon, D]]
31 | def getAllMultiPointFeatures[D: Decoder]() = getAll[RobustFeature[MultiPoint, D]]
32 | def getAllMultiLineStringFeatures[D: Decoder]() = getAll[RobustFeature[MultiLineString, D]]
33 | def getAllMultiPolygonFeatures[D: Decoder]() = getAll[RobustFeature[MultiPolygon, D]]
34 |
35 | def getAllGeometries(): Vector[Geometry] =
36 | getAll[Point] ++ getAll[LineString] ++ getAll[Polygon] ++
37 | getAll[MultiPoint] ++ getAll[MultiLineString] ++ getAll[MultiPolygon]
38 |
39 | def asJson: Json = {
40 | val bboxOption = getAllGeometries.map(_.extent).reduceOption(_ combine _)
41 | bboxOption match {
42 | case Some(bbox) =>
43 | Json.obj(
44 | "type" -> "FeatureCollection".asJson,
45 | "bbox" -> Extent.listEncoder(bbox),
46 | "features" -> buffer.toVector.asJson
47 | )
48 | case _ =>
49 | Json.obj(
50 | "type" -> "FeatureCollection".asJson,
51 | "features" -> buffer.toVector.asJson
52 | )
53 | }
54 | }
55 | }
56 |
57 | object JsonRobustFeatureCollection {
58 | def apply() = new JsonRobustFeatureCollection()
59 |
60 | def apply[G <: Geometry: ClassTag, D: Encoder](features: Traversable[RobustFeature[G, D]]) = {
61 | val fc = new JsonRobustFeatureCollection()
62 | features.foreach(fc.add(_))
63 | fc
64 | }
65 |
66 | def apply(features: Traversable[Json])(implicit d: DummyImplicit): JsonRobustFeatureCollection =
67 | new JsonRobustFeatureCollection(features.toList)
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/JsonRobustFeatureCollectionMap.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import cats.syntax.either._
4 | import geotrellis.vector._
5 | import _root_.io.circe._
6 | import _root_.io.circe.syntax._
7 |
8 | import scala.collection.mutable
9 | import scala.reflect.ClassTag
10 |
11 | class JsonRobustFeatureCollectionMap(features: List[Json] = Nil) {
12 | private val buffer = mutable.ListBuffer(features:_*)
13 |
14 | def add[G <: Geometry: ClassTag, D: Encoder](featureMap: (String, RobustFeature[G, D])) =
15 | buffer += RobustFeatureFormats.writeRobustFeatureJsonWithID(featureMap)
16 |
17 | def asJson: Json = {
18 | val bboxOption = getAll[Geometry].map(_._2.extent).reduceOption(_ combine _)
19 | bboxOption match {
20 | case Some(bbox) =>
21 | Json.obj(
22 | "type" -> "FeatureCollection".asJson,
23 | "bbox" -> Extent.listEncoder(bbox),
24 | "features" -> buffer.toVector.asJson
25 | )
26 | case _ =>
27 | Json.obj(
28 | "type" -> "FeatureCollection".asJson,
29 | "features" -> buffer.toVector.asJson
30 | )
31 | }
32 | }
33 |
34 | private def getFeatureID(js: Json): String = {
35 | val c = js.hcursor
36 | val id = c.downField("id")
37 | id.as[String] match {
38 | case Right(i) => i
39 | case _ =>
40 | id.as[Int] match {
41 | case Right(i) => i.toString
42 | case _ => throw DecodingFailure("Feature expected to have \"ID\" field", c.history)
43 | }
44 | }
45 | }
46 |
47 | def getAll[F: Decoder]: Map[String, F] = {
48 | var ret = Map[String, F]()
49 | features.foreach{ f => f.as[F].foreach(ret += getFeatureID(f) -> _) }
50 | ret
51 | }
52 | }
53 |
54 | object JsonRobustFeatureCollectionMap {
55 | def apply() = new JsonRobustFeatureCollectionMap()
56 |
57 | def apply[G <: Geometry: ClassTag, D: Encoder](features: Traversable[(String, RobustFeature[G, D])]) = {
58 | val fc = new JsonRobustFeatureCollectionMap()
59 | features.foreach(fc.add(_))
60 | fc
61 | }
62 |
63 | def apply(features: Traversable[Json])(implicit d: DummyImplicit): JsonRobustFeatureCollectionMap =
64 | new JsonRobustFeatureCollectionMap(features.toList)
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/Resource.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import java.io.InputStream
4 |
5 | object Resource {
6 | def apply(name: String): String = {
7 | val stream: InputStream = getClass.getResourceAsStream(s"/$name")
8 | try {
9 | scala.io.Source.fromInputStream(stream).getLines.mkString(" ")
10 | } finally {
11 | stream.close()
12 | }
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/RobustFeature.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.util
2 |
3 | import cats.syntax.either._
4 | import geotrellis.vector._
5 | import geotrellis.vector.io.json._
6 | import _root_.io.circe._
7 | import _root_.io.circe.syntax._
8 |
9 | import scala.reflect.ClassTag
10 | import scala.util.{Try, Success, Failure}
11 |
12 | case class RobustFeature[+G <: Geometry: ClassTag, D](geom: Option[G], data: D) {
13 | def toFeature(): Feature[G, D] = {
14 | val g = geom match {
15 | case Some(gg) => gg
16 | case _ => MultiPoint.EMPTY
17 | }
18 | Feature(g.asInstanceOf[G], data)
19 | }
20 | }
21 |
22 | trait RobustFeatureFormats {
23 | def writeRobustFeatureJson[G <: Geometry: ClassTag, D: Encoder](obj: RobustFeature[G, D]): Json = {
24 | val feature = obj.toFeature
25 | Json.obj(
26 | "type" -> "Feature".asJson,
27 | "geometry" -> GeometryFormats.geometryEncoder(feature.geom),
28 | "bbox" -> Extent.listEncoder(feature.geom.extent),
29 | "properties" -> obj.data.asJson
30 | )
31 | }
32 |
33 | def writeRobustFeatureJsonWithID[G <: Geometry: ClassTag, D: Encoder](idFeature: (String, RobustFeature[G, D])): Json = {
34 | val feature = idFeature._2.toFeature
35 | Json.obj(
36 | "type" -> "Feature".asJson,
37 | "geometry" -> GeometryFormats.geometryEncoder(feature.geom),
38 | "bbox" -> Extent.listEncoder(feature.geom.extent),
39 | "properties" -> idFeature._2.data.asJson,
40 | "id" -> idFeature._1.asJson
41 | )
42 | }
43 |
44 | def readRobustFeatureJson[D: Decoder, G <: Geometry: Decoder: ClassTag](value: Json): RobustFeature[G, D] = {
45 | val c = value.hcursor
46 | (c.downField("type").as[String], c.downField("geometry").focus, c.downField("properties").focus) match {
47 | case (Right("Feature"), Some(geom), Some(data)) =>
48 | //val g = Try(geom.convertTo[G]).toOption
49 | //val d = data.convertTo[D]
50 | (Try(geom.as[G].toOption).toOption.getOrElse(None), data.as[D].toOption) match {
51 | case (Some(g), Some(d)) if g isEmpty => RobustFeature(None, d)
52 | case (Some(g), Some(d)) => RobustFeature(Some(g), d)
53 | case (None, Some(d)) => RobustFeature(None, d)
54 | case (_, None) => throw new Exception(s"Feature expected well-formed data; got $data")
55 | }
56 | case _ => throw new Exception("Feature expected")
57 | }
58 | }
59 |
60 | def readRobustFeatureJsonWithID[D: Decoder, G <: Geometry: Decoder: ClassTag](value: Json): (String, RobustFeature[G, D]) = {
61 | val c = value.hcursor
62 | (c.downField("type").as[String], c.downField("geometry").focus, c.downField("properties").focus, c.downField("id").focus) match {
63 | case (Right("Feature"), Some(geom), Some(data), Some(id)) =>
64 | //val g = Try(geom.convertTo[G]).toOption
65 | //val d = data.convertTo[D]
66 | (Try(geom.as[G].toOption).toOption.getOrElse(None), data.as[D].toOption, id.as[String].toOption) match {
67 | case (Some(g), Some(d), Some(i)) if g isEmpty => (i, RobustFeature(None, d))
68 | case (Some(g), Some(d), Some(i)) => (i, RobustFeature(Some(g), d))
69 | case (None, Some(d), Some(i)) => (i, RobustFeature(None, d))
70 | case _ => throw new Exception(s"Feature expected well-formed id and data; got (${id}, ${data})")
71 | }
72 | case _ => throw new Exception("Feature expected")
73 | }
74 | }
75 |
76 | implicit def robustFeatureDecoder[G <: Geometry: Decoder: ClassTag, D: Decoder]: Decoder[RobustFeature[G, D]] =
77 | Decoder.decodeJson.emap { json: Json =>
78 | Try(readRobustFeatureJson[D, G](json)) match {
79 | case Success(f) => Right(f)
80 | case Failure(e) => Left(e.getMessage)
81 | }
82 | }
83 |
84 | implicit def robustFeatureEncoder[G <: Geometry: Encoder: ClassTag, D: Encoder]: Encoder[RobustFeature[G, D]] =
85 | Encoder.encodeJson.contramap[RobustFeature[G, D]] { writeRobustFeatureJson }
86 |
87 | implicit val robustFeatureCollectionEncoder: Encoder[JsonRobustFeatureCollection] =
88 | Encoder.encodeJson.contramap[JsonRobustFeatureCollection] { _.asJson }
89 |
90 | implicit val robustFeatureCollectionDecoder: Decoder[JsonRobustFeatureCollection] =
91 | Decoder.decodeHCursor.emap { c: HCursor =>
92 | (c.downField("type").as[String], c.downField("features").focus) match {
93 | case (Right("FeatureCollection"), Some(features)) => Right(JsonRobustFeatureCollection(features.asArray.toVector.flatten))
94 | case _ => Left("FeatureCollection expected")
95 | }
96 | }
97 |
98 | implicit val robustFeatureCollectionMapEncoder: Encoder[JsonRobustFeatureCollectionMap] =
99 | Encoder.encodeJson.contramap[JsonRobustFeatureCollectionMap] { _.asJson }
100 |
101 | implicit val robustFeatureCollectionMapDecoder: Decoder[JsonRobustFeatureCollectionMap] =
102 | Decoder.decodeHCursor.emap { c: HCursor =>
103 | (c.downField("type").as[String], c.downField("features").focus) match {
104 | case (Right("FeatureCollection"), Some(features)) => Right(JsonRobustFeatureCollectionMap(features.asArray.toVector.flatten))
105 | case _ => Left("FeatureCollection expected")
106 | }
107 | }
108 | }
109 |
110 | object RobustFeatureFormats extends RobustFeatureFormats
111 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/util/package.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | package object util extends Implicits {
4 | def mergeMaps[K, V](m1: Map[K, V], m2: Map[K, V])(f: (V, V) => V): Map[K, V] =
5 | (m1.toSeq ++ m2.toSeq).
6 | groupBy(_._1).
7 | map { case (k, vs) =>
8 | (k, vs.map(_._2).reduce(f))
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/Clipping.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import geotrellis.layer.SpatialKey
4 | import geotrellis.layer.LayoutLevel
5 | import geotrellis.vector._
6 |
7 | import scala.concurrent.ExecutionContext.Implicits.global
8 |
9 | object Clipping {
10 | def byLayoutCell(geom: Geometry, key: SpatialKey, layoutLevel: LayoutLevel): Geometry = {
11 | val ex = layoutLevel.layout.mapTransform.keyToExtent(key)
12 |
13 | // Preserve dimension of resultant geometry
14 | val clipped = geom match {
15 | case p: Point => p // points with the current key intersect the extent by definition
16 | case mp: MultiPoint =>
17 | timedIntersect(mp, ex) match {
18 | case PointResult(pr) => pr
19 | case MultiPointResult(mpr) => mpr
20 | case NoResult =>
21 | logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
22 | geom
23 | case _ => // should never match here; just shut the compiler up
24 | geom
25 | }
26 | case l: LineString =>
27 | timedIntersect(l, ex) match {
28 | case LineStringResult(lr) => lr
29 | case MultiLineStringResult(mlr) => mlr
30 | case GeometryCollectionResult(gcr) =>
31 | gcr.getAll[LineString].length match {
32 | case 0 => MultiLineString()
33 | case 1 => gcr.getAll[LineString].head
34 | case _ => MultiLineString(gcr.getAll[LineString])
35 | }
36 | case NoResult =>
37 | logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
38 | geom
39 | case _ =>
40 | MultiLineString() // Discard (multi-)point results
41 | }
42 | case ml: MultiLineString =>
43 | timedIntersect(ml, ex) match {
44 | case LineStringResult(lr) => lr
45 | case MultiLineStringResult(mlr) => mlr
46 | case GeometryCollectionResult(gcr) =>
47 | (gcr.getAll[LineString].length, gcr.getAll[MultiLineString].length) match {
48 | case (0, 0) => MultiLineString()
49 | case (1, 0) => gcr.getAll[LineString].head
50 | case (0, 1) => gcr.getAll[MultiLineString].head
51 | case _ => MultiLineString(gcr.getAll[LineString] ++ gcr.getAll[MultiLineString].flatMap(_.lines.toSeq))
52 | }
53 | case NoResult =>
54 | logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
55 | geom
56 | case _ =>
57 | MultiLineString() // Discard (multi-)point results
58 | }
59 | case poly: Polygon =>
60 | timedIntersect(poly, ex) match {
61 | case PolygonResult(pr) => pr
62 | case MultiPolygonResult(mpr) => mpr
63 | case GeometryCollectionResult(gcr) =>
64 | gcr.getAll[Polygon].length match {
65 | case 0 => MultiPolygon()
66 | case 1 => gcr.getAll[Polygon].head
67 | case _ => MultiPolygon(gcr.getAll[Polygon])
68 | }
69 | case NoResult =>
70 | logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
71 | geom
72 | case _ => MultiPolygon() // ignore point/line results
73 | }
74 | case mp: MultiPolygon =>
75 | timedIntersect(mp, ex) match {
76 | case PolygonResult(pr) => pr
77 | case MultiPolygonResult(mpr) => mpr
78 | case GeometryCollectionResult(gcr) =>
79 | (gcr.getAll[Polygon].length, gcr.getAll[MultiPolygon].length) match {
80 | case (0, 0) => MultiPolygon()
81 | case (1, 0) => gcr.getAll[Polygon].head
82 | case (0, 1) => gcr.getAll[MultiPolygon].head
83 | case _ => MultiPolygon(gcr.getAll[Polygon] ++ gcr.getAll[MultiPolygon].flatMap(_.polygons.toSeq))
84 | }
85 | case NoResult =>
86 | logger.warn(s"$geom was keyed to layout cell $key, but did not intersect $ex [zoom=${layoutLevel.zoom}]")
87 | geom
88 | case _ => MultiPolygon() // ignore point/line results
89 | }
90 | }
91 | clipped
92 | }
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/Simplify.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import geotrellis.vector._
4 | import geotrellis.layer._
5 | import org.locationtech.jts.simplify.TopologyPreservingSimplifier
6 |
7 | object Simplify {
8 |
9 | /**
10 | * Simplifies geometry using JTS's topology-preserving simplifier.
11 | *
12 | * Note that there are known bugs with this simplifier. Please refer to the
13 | * JTS documentation. Faster simplifiers with fewer guarantees are available
14 | * there as well.
15 | */
16 | def withJTS(g: Geometry, ld: LayoutDefinition): Geometry = {
17 | TopologyPreservingSimplifier.simplify(g, ld.cellSize.resolution)
18 | }
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/export/package.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import geotrellis.layer.SpatialKey
4 | import geotrellis.spark.store.hadoop._
5 | import geotrellis.spark.store.s3._
6 | import geotrellis.vectortile._
7 | import org.apache.spark.rdd.RDD
8 |
9 | import software.amazon.awssdk.services.s3.model.ObjectCannedACL
10 |
11 | import java.net.URI
12 | import java.io.ByteArrayOutputStream
13 | import java.util.zip.GZIPOutputStream
14 |
15 | package object export {
16 | def saveVectorTiles(vectorTiles: RDD[(SpatialKey, VectorTile)], zoom: Int, uri: URI): Unit = {
17 | uri.getScheme match {
18 | case "s3" =>
19 | val path = uri.getPath
20 | val prefix = path.stripPrefix("/").stripSuffix("/")
21 | saveToS3(vectorTiles, zoom, uri.getAuthority, prefix)
22 | case _ =>
23 | saveHadoop(vectorTiles, zoom, uri)
24 | }
25 | }
26 |
27 | private def saveToS3(vectorTiles: RDD[(SpatialKey, VectorTile)], zoom: Int, bucket: String, prefix: String) = {
28 | vectorTiles
29 | .mapValues { tile =>
30 | val byteStream = new ByteArrayOutputStream()
31 |
32 | try {
33 | val gzipStream = new GZIPOutputStream(byteStream)
34 | try {
35 | gzipStream.write(tile.toBytes)
36 | } finally {
37 | gzipStream.close()
38 | }
39 | } finally {
40 | byteStream.close()
41 | }
42 |
43 | byteStream.toByteArray
44 | }
45 | .saveToS3(
46 | { sk: SpatialKey => s"s3://${bucket}/${prefix}/${zoom}/${sk.col}/${sk.row}.mvt" },
47 | putObjectModifier = { request =>
48 | request
49 | .toBuilder()
50 | .contentEncoding("gzip")
51 | .acl(ObjectCannedACL.PUBLIC_READ)
52 | .build()
53 | })
54 | }
55 |
56 | private def saveHadoop(vectorTiles: RDD[(SpatialKey, VectorTile)], zoom: Int, uri: URI) = {
57 | vectorTiles
58 | .mapValues(_.toBytes)
59 | .saveToHadoop({ sk: SpatialKey => s"${uri}/${zoom}/${sk.col}/${sk.row}.mvt" })
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/scala/vectorpipe/vectortile/package.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import geotrellis.proj4._
4 | import geotrellis.layer.SpatialKey
5 | import geotrellis.layer.LayoutDefinition
6 | import geotrellis.vector._
7 | import geotrellis.vectortile._
8 | import org.apache.spark.sql._
9 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
10 | import org.apache.spark.sql.functions._
11 |
12 | import scala.concurrent._
13 | import scala.concurrent.duration._
14 | import scala.util.{Try, Success, Failure}
15 |
16 | package object vectortile {
17 | type VectorTileFeature[+G <: Geometry] = Feature[G, Map[String, Value]]
18 |
19 | def vtf2mvtf[G <: Geometry](vtf: VectorTileFeature[G]): MVTFeature[G] =
20 | MVTFeature(vtf.geom, vtf.data)
21 |
22 | sealed trait LayerMultiplicity { val name: String }
23 | case class SingleLayer(val name: String) extends LayerMultiplicity
24 | case class LayerNamesInColumn(val name: String) extends LayerMultiplicity
25 |
26 | @transient lazy val logger = org.apache.log4j.Logger.getRootLogger
27 |
28 | @transient lazy val st_reprojectGeom = udf { (g: Geometry, srcProj: String, destProj: String) =>
29 | val trans = Proj4Transform(CRS.fromString(srcProj), CRS.fromString(destProj))
30 | if (Option(g).isDefined) {
31 | if (g.isEmpty)
32 | g
33 | else {
34 | g.reproject(trans)
35 | }
36 | } else {
37 | null
38 | }
39 | }
40 |
41 | def keyTo(layout: LayoutDefinition) = udf { g: Geometry =>
42 | if (Option(g).isDefined && !g.isEmpty) {
43 | layout.mapTransform.keysForGeometry(g).toArray
44 | } else {
45 | Array.empty[SpatialKey]
46 | }
47 | }
48 |
49 | def getSpatialKey(k: GenericRowWithSchema): SpatialKey = SpatialKey(k.getInt(0), k.getInt(1))
50 |
51 | def getSpatialKey(row: Row, field: String): SpatialKey = {
52 | val k = row.getAs[Row](field)
53 | SpatialKey(k.getInt(0), k.getInt(1))
54 | }
55 |
56 | // case class IdFeature[+G <: Geometry, +D](geom: Geometry, data: D, id: Int) extends Feature[G, D](geom, data) {
57 | // override def mapGeom[T <: Geometry](f: G => T): IdFeature[T, D] =
58 | // IdFeature(f(geom), data, id)
59 |
60 | // override def mapData[T](f: D => T): IdFeature[G, T] =
61 | // IdFeature(geom, f(data), id)
62 | // }
63 |
64 | def timedIntersect[G <: Geometry](geom: G, ex: Extent)(implicit ec: ExecutionContext) = {
65 | val future = Future { geom.&(ex) }
66 | Try(Await.result(future, 5000 milliseconds)) match {
67 | case Success(res) => res
68 | case Failure(_) =>
69 | logger.warn(s"Could not intersect $geom with $ex in 5000 milliseconds")
70 | NoResult
71 | }
72 | }
73 |
74 | case class VTContents(points: List[VectorTileFeature[Point]] = Nil,
75 | multipoints: List[VectorTileFeature[MultiPoint]] = Nil,
76 | lines: List[VectorTileFeature[LineString]] = Nil,
77 | multilines: List[VectorTileFeature[MultiLineString]] = Nil,
78 | polygons: List[VectorTileFeature[Polygon]] = Nil,
79 | multipolygons: List[VectorTileFeature[MultiPolygon]] = Nil) {
80 | def +(other: VTContents) = VTContents(points ++ other.points,
81 | multipoints ++ other.multipoints,
82 | lines ++ other.lines,
83 | multilines ++ other.multilines,
84 | polygons ++ other.polygons,
85 | multipolygons ++ other.multipolygons)
86 | def +[G <: Geometry](other: VectorTileFeature[G]) = other.geom match {
87 | case p : Point => copy(points=other.asInstanceOf[VectorTileFeature[Point]] :: points)
88 | case mp: MultiPoint => copy(multipoints=other.asInstanceOf[VectorTileFeature[MultiPoint]] :: multipoints)
89 | case l : LineString => copy(lines=other.asInstanceOf[VectorTileFeature[LineString]] :: lines)
90 | case ml: MultiLineString => copy(multilines=other.asInstanceOf[VectorTileFeature[MultiLineString]] :: multilines)
91 | case p : Polygon => copy(polygons=other.asInstanceOf[VectorTileFeature[Polygon]] :: polygons)
92 | case mp: MultiPolygon => copy(multipolygons=other.asInstanceOf[VectorTileFeature[MultiPolygon]] :: multipolygons)
93 | }
94 | }
95 | object VTContents {
96 | def empty() = VTContents()
97 | }
98 |
99 | def buildLayer[G <: Geometry](features: Iterable[VectorTileFeature[G]], layerName: String, ex: Extent, tileWidth: Int): Layer = {
100 | val contents = features.foldLeft(VTContents.empty){ (accum, feature) => accum + feature }
101 | val VTContents(pts, mpts, ls, mls, ps, mps) = contents
102 | StrictLayer(
103 | name=layerName,
104 | tileWidth=tileWidth,
105 | version=2,
106 | tileExtent=ex,
107 | points=pts.map(vtf2mvtf),
108 | multiPoints=mpts.map(vtf2mvtf),
109 | lines=ls.map(vtf2mvtf),
110 | multiLines=mls.map(vtf2mvtf),
111 | polygons=ps.map(vtf2mvtf),
112 | multiPolygons=mps.map(vtf2mvtf)
113 | )
114 | }
115 |
116 | def buildSortedLayer[G <: Geometry](features: Iterable[VectorTileFeature[G]], layerName: String, ex: Extent, tileWidth: Int): Layer = {
117 | val contents = features.foldLeft(VTContents.empty){ (accum, feature) => accum + feature }
118 | val VTContents(pts, mpts, ls, mls, ps, mps) = contents
119 | StrictLayer(
120 | name=layerName,
121 | tileWidth=tileWidth,
122 | version=2,
123 | tileExtent=ex,
124 | points=pts.map(vtf2mvtf),
125 | multiPoints=mpts.map(vtf2mvtf),
126 | lines=ls.map(vtf2mvtf),
127 | multiLines=mls.map(vtf2mvtf),
128 | polygons=ps.sortWith(_.getArea > _.getArea).map(vtf2mvtf),
129 | multiPolygons=mps.sortWith(_.getArea > _.getArea).map(vtf2mvtf)
130 | )
131 | }
132 |
133 | def buildVectorTile[G <: Geometry](
134 | features: Iterable[VectorTileFeature[G]],
135 | layerName: String,
136 | ex: Extent,
137 | tileWidth: Int,
138 | sorted: Boolean
139 | ): VectorTile = {
140 | val layer =
141 | if (sorted)
142 | buildSortedLayer(features, layerName, ex, tileWidth)
143 | else
144 | buildLayer(features, layerName, ex, tileWidth)
145 | VectorTile(Map(layerName -> layer), ex)
146 | }
147 |
148 | def buildVectorTile[G <: Geometry](
149 | layerFeatures: Map[String, Iterable[VectorTileFeature[G]]],
150 | ex: Extent,
151 | tileWidth: Int,
152 | sorted: Boolean
153 | ): VectorTile = {
154 | VectorTile(layerFeatures.map{ case (layerName, features) => (layerName,
155 | if (sorted)
156 | buildSortedLayer(features, layerName, ex, tileWidth)
157 | else
158 | buildLayer(features, layerName, ex, tileWidth))
159 | }, ex)
160 | }
161 |
162 | }
163 |
--------------------------------------------------------------------------------
/src/main/tut/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: home
3 | title: "Home"
4 | section: "section_home"
5 | position: 1
6 | technologies:
7 | - first: ["GeoTrellis", "Geographic data processing engine for high performance applications"]
8 | - second: ["Apache Spark", "An engine for large-scale data processing"]
9 | - third: ["Scala", "Functional Programming on the JVM"]
10 | ---
11 |
12 | # VectorPipe
13 |
14 | VectorPipe is a Scala library for transforming vector data of arbitrary
15 | sources into [Mapbox Vector Tiles](https://www.mapbox.com/vector-tiles/). It
16 | uses the VectorTile codec from the [GeoTrellis library
17 | suite](https://geotrellis.io/), which in turn is powered by [Apache
18 | Spark](https://spark.apache.org/).
19 |
20 | Currently VectorPipe can process:
21 |
22 | - OpenStreetMap XML / PBF* / ORC
23 |
24 | And produce:
25 |
26 | - Analytic Vector Tiles (AVTs)
27 | - Custom Vector Tile schemes (by writing a custom *Collator* function)
28 |
29 | Of course, you're not limited to just producing Vector Tiles. Once you've
30 | extracted your raw data into [GeoTrellis](https://geotrellis.io/) Geometries,
31 | you can do whatever you want with them (analytics, rasterizing, etc.).
32 |
33 | ### Dependencies
34 |
35 | - Scala 2.11
36 | - Apache Spark 2.1.0+
37 |
38 | ### Getting Started
39 |
40 | To use VectorPipe, add the following to your `build.sbt`:
41 |
42 | ```
43 | resolvers += Resolver.bintrayRepo("azavea", "maven")
44 |
45 | libraryDependencies += "com.azavea" %% "vectorpipe" % "0.1.0"
46 | ```
47 |
48 | Now import the following, and you're good to go:
49 |
50 | ```tut:silent
51 | import vectorpipe._
52 | ```
53 |
54 | ### Performance
55 |
56 | Wow, fast!
57 |
58 | ### Related Projects
59 |
60 | - [OpenMapTiles](https://openmaptiles.org/)
61 | - [Mapbox](https://www.mapbox.com/)
62 |
--------------------------------------------------------------------------------
/src/main/tut/outputs.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: page
3 | title: "Outputs"
4 | section: "section4"
5 | position: 4
6 | ---
7 |
8 | Types of VectorTiles!
9 |
10 | - AVTs
11 | - OpenMapTiles
12 | - Custom!
13 |
--------------------------------------------------------------------------------
/src/main/tut/sources.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: page
3 | title: "Data Sources"
4 | section: "section3"
5 | position: 3
6 | ---
7 |
8 | Sources of Vector data!
9 |
--------------------------------------------------------------------------------
/src/main/tut/usage.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: docs
3 | title: "Usage"
4 | section: "usage"
5 | position: 2
6 | ---
7 |
8 | {% include_relative usage/usage.md %}
9 |
--------------------------------------------------------------------------------
/src/main/tut/usage/concepts.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: docs
3 | title: "Concepts"
4 | section: "usage"
5 | ---
6 |
7 | # Concepts
8 |
9 | VectorPipe strives to be straight-forward. With only a few simple function
10 | applications we can transform completely raw data into a grid of
11 | VectorTiles, ready for further processing. "Clipping" and "Collation"
12 | functions help us customize this process along the way.
13 |
14 |
15 |
16 | ### Data Sources
17 |
18 | Some source of Vector (re: geometric) data on the earth. Could come in any
19 | format (example: OpenStreetMap).
20 |
21 | For each data source that has first-class support, we expose a
22 | `vectorpipe.*` module with a matching name. Example: `vectorpipe.osm`. These
23 | modules expose all the types and functions necessary for transforming the
24 | raw data into the "Middle Ground" types.
25 |
26 | No first-class support for your favourite data source? Want to write it
27 | yourself, and maybe even keep it private? That's okay, just provide the
28 | function `YourData => RDD[Feature[G, D]]` and VectorPipe can handle the
29 | rest.
30 |
31 | ### The "Middle Ground"
32 |
33 | A collection of Geometries on the earth. The actual data can be distributed
34 | across multiple machines via Spark's `RDD` type. From this "middle ground",
35 | we can proceed with creating Vector Tiles, or (with the right supporting
36 | code) we could convert *back* into the format of the original source data.
37 |
38 | Note that via the method `VectorTile.toIterable`, the following conversion
39 | is possible:
40 |
41 | ```tut:silent
42 | import geotrellis.spark._
43 | import geotrellis.vector._
44 | import geotrellis.vectortile._
45 | import org.apache.spark._
46 | import org.apache.spark.rdd.RDD
47 |
48 | implicit val sc: SparkContext = new SparkContext(
49 | new SparkConf().setMaster("local[*]").setAppName("back-to-middle-ground")
50 | )
51 |
52 | /* Mocked as `empty` for the example */
53 | val tiles: RDD[(SpatialKey, VectorTile)] = sc.emptyRDD
54 |
55 | /* A VT layer converted back to the "middle ground", possibly for recollation */
56 | val backToMiddle: RDD[(SpatialKey, Iterable[Feature[Geometry, Map[String, Value]]])] =
57 | tiles.mapValues(_.toIterable)
58 |
59 | /* Close up Spark nicely */
60 | sc.stop()
61 | ```
62 |
63 | ### Clipping Functions
64 |
65 | GeoTrellis has a consistent `RDD[(K, V)]` pattern for handling grids of
66 | tiled data, where `K` is the grid index and `V` is the actual value type.
67 | Before `RDD[(SpatialKey, VectorTile)]` can be achieved, we need to convert
68 | our gridless `RDD[Feature[G, D]]` into such a grid, such that each Feature's
69 | `Geometry` is reasonably clipped to the size of an individual tile. Depending
70 | on which clipping function you choose (from the `vectorpipe.Clip` object, or
71 | even your own custom one) the shape of the clipped Geometry will vary. See
72 | our Scaladocs for more detail on the available options.
73 |
74 | Admittedly, we sometimes can't guarantee the validity of incoming vector data.
75 | Clipping is known to occasionally fail on large, complex multipolygons, so
76 | we skip over these failures while optionally allowing to log them. Any logging
77 | framework can be used.
78 |
79 | ### Collation Functions
80 |
81 | Once clipped and gridded by `VectorPipe.toGrid`, we have a `RDD[(SpatialKey,
82 | Iterable[Feature[G, D]])]` that represents all the Geometry fragments
83 | present at each tiled location on the earth. This is the perfect shape to
84 | turn into a `VectorTile`. To do so, we need to choose a *Collator* function,
85 | which determines what VectorTile Layer each `Feature` should be placed into,
86 | and how (if at all) its corresponding metadata (the `D`) should be
87 | processed.
88 |
89 | Want to write your own Collator? The `Collate.generically` function will be
90 | of interest to you.
91 |
92 | ### Output Targets
93 |
94 | We can imagine two possible outputs for our completed grid of Vector Tiles:
95 |
96 | - A compressed GeoTrellis layer, saved to S3 [or
97 | elsewhere](https://geotrellis.readthedocs.io/en/latest/guide/tile-backends.html)
98 | - A dump of every tile as an `.mvt`, readable by other software
99 |
100 | Either option is simple, but outputting an `RDD[(SpatialKey, VectorTile)]`
101 | isn't actually the concern of VectorPipe - it can be handled entirely in
102 | client code via GeoTrellis functionality. An example of this can be found
103 | [in this repository](https://github.com/fosskers/vectorpipe-io).
104 |
--------------------------------------------------------------------------------
/src/main/tut/usage/osm.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: docs
3 | title: "Reading OpenStreetMap Data"
4 | section: "usage"
5 | ---
6 |
7 | ## From XML
8 |
9 | OSM XML files usually appear with the extension `.osm`. Since the data is all string-based,
10 | these files can be quite large compared to their PBF or ORC equivalents.
11 |
12 | ```tut:silent
13 | import org.apache.spark._
14 | import scala.util.{Success, Failure}
15 | import vectorpipe._
16 |
17 | implicit val sc: SparkContext = new SparkContext(
18 | new SparkConf().setMaster("local[*]").setAppName("xml-example")
19 | )
20 |
21 | val path: String = "/some/path/on/your/machine/foo.osm"
22 |
23 | osm.fromLocalXML(path) match {
24 | case Failure(e) => { } /* Parsing failed somehow... is the filepath correct? */
25 | case Success((ns,ws,rs)) => { } /* (RDD[(Long, Node)], RDD[(Long, Way)], RDD[(Long, Relation)]) */
26 | }
27 |
28 | sc.stop()
29 | ```
30 |
31 | ## From PBF
32 |
33 | For the time being, `.osm.pbf` files can be used by first converting them to `.orc`
34 | files using the [osm2orc](https://github.com/mojodna/osm2orc) tool, and then following
35 | VectorPipe's ORC instructions given below.
36 |
37 | ## From ORC
38 |
39 | You must first include an extra dependency to the `libraryDependencies` list in your `build.sbt`:
40 |
41 | ```
42 | "org.apache.spark" %% "spark-hive" % "2.2.0"
43 | ```
44 |
45 | And then we can read our OSM data in parallel via Spark. Notice the use of `SparkSession`
46 | instead of `SparkContext` here:
47 |
48 | ```tut:silent
49 | import org.apache.spark.sql._
50 | import scala.util.{Success, Failure}
51 | import vectorpipe._
52 |
53 | implicit val ss: SparkSession =
54 | SparkSession.builder.master("local[*]").appName("orc-example").enableHiveSupport.getOrCreate
55 |
56 | val path: String = "s3://bucket/key/foo.orc"
57 |
58 | osm.fromORC(path) match {
59 | case Failure(err) => { } /* Does the file exist? Do you have the right AWS credentials? */
60 | case Success((ns,ws,rs)) => { } /* (RDD[(Long, Node)], RDD[(Long, Way)], RDD[(Long, Relation)]) */
61 | }
62 |
63 | ss.stop()
64 | ```
65 |
66 | This approach will be particularly efficient when run on an EMR cluster, since
67 | EMR clusters have privileged access to S3.
68 |
--------------------------------------------------------------------------------
/src/main/tut/usage/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 |
3 | Writing a small executable that uses VectorPipe is straight-forward. The
4 | entire `main` isn't much more than:
5 |
6 | ```tut:silent
7 | import geotrellis.proj4.WebMercator
8 | import geotrellis.spark._
9 | import geotrellis.spark.tiling._
10 | import geotrellis.vectortile.VectorTile
11 | import org.apache.spark._
12 | import org.apache.spark.rdd.RDD
13 | import vectorpipe._ /* All types and functions. Also exposes the `osm` submodule used below. */
14 |
15 | /* Initialize a `SparkContext`, necessary for all `RDD` work */
16 | implicit val sc: SparkContext = new SparkContext(
17 | new SparkConf().setMaster("local[*]").setAppName("vectorpipe-example")
18 | )
19 |
20 | /* Describe the dimensions of your data area */
21 | val layout: LayoutDefinition =
22 | ZoomedLayoutScheme.layoutForZoom(15, WebMercator.worldExtent, 512)
23 |
24 | /* From an OSM data source, mocked as "empty" for this example */
25 | val (nodes, ways, relations): (RDD[(Long, osm.Node)], RDD[(Long, osm.Way)], RDD[(Long, osm.Relation)]) =
26 | (sc.emptyRDD, sc.emptyRDD, sc.emptyRDD)
27 |
28 | /* All OSM Elements lifted into GeoTrellis Geometry types.
29 | * Note: type OSMFeature = Feature[Geometry, ElementData]
30 | */
31 | val features: RDD[osm.OSMFeature] =
32 | osm.features(nodes, ways, relations).geometries
33 |
34 | /* All Geometries clipped to your `layout` grid */
35 | val featGrid: RDD[(SpatialKey, Iterable[osm.OSMFeature])] =
36 | grid(Clip.byHybrid, logToStdout, layout, features)
37 |
38 | /* A grid of Vector Tiles */
39 | val tiles: RDD[(SpatialKey, VectorTile)] =
40 | vectortiles(Collate.byOSM, layout, featGrid)
41 |
42 | /* Further processing here, writing to S3, etc. */
43 |
44 | /* Halt Spark nicely */
45 | sc.stop()
46 | ```
47 |
48 | A full example of processing some OSM XML [can be found
49 | here](https://github.com/fosskers/vectorpipe-io).
50 |
--------------------------------------------------------------------------------
/src/test/resources/.gitignore:
--------------------------------------------------------------------------------
1 | !*.orc
2 |
--------------------------------------------------------------------------------
/src/test/resources/isle-of-man-latest.osm.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/isle-of-man-latest.osm.orc
--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=WARN, console
2 | log4j.appender.console=org.apache.log4j.ConsoleAppender
3 | log4j.appender.console.target=System.out
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | # log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c: %m%n
6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
7 | log4j.logger.osmesa=DEBUG
--------------------------------------------------------------------------------
/src/test/resources/relation-110564.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-110564.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-110564.wkt:
--------------------------------------------------------------------------------
1 | MULTIPOLYGON (((-85.982597 34.392855, -85.979971 34.392887, -85.979885 34.392888, -85.976397 34.392888, -85.969464 34.39293, -85.96946 34.392309, -85.969469 34.390934, -85.969527 34.38977, -85.969542 34.389184, -85.978326 34.389014, -85.980136 34.388975, -85.982477 34.38893, -85.982539 34.390479, -85.982597 34.392855)), ((-85.991477 34.381899, -85.991142 34.3819, -85.987173 34.381924, -85.987092 34.389011, -85.983147 34.388938, -85.982574 34.388926, -85.982477 34.38893, -85.982563 34.387774, -85.982684 34.386311, -85.982673 34.385681, -85.982714 34.382035, -85.986874 34.381928, -85.987021 34.381777, -85.987076 34.38115, -85.98709 34.378146, -85.99014 34.378181, -85.990431 34.378189, -85.991498 34.378197, -85.991477 34.381899)), ((-85.969523 34.400126, -85.96934 34.400318, -85.969242 34.402769, -85.969203 34.403786, -85.968504 34.403761, -85.966925 34.403706, -85.966119 34.403681, -85.96504 34.403639, -85.965057 34.402798, -85.965119 34.400062, -85.969291 34.400119, -85.96938 34.396425, -85.972964 34.396491, -85.973719 34.396504, -85.97364 34.40018, -85.969523 34.400126)), ((-85.965119 34.400062, -85.962384 34.400035, -85.960656 34.400001, -85.960724 34.397709, -85.960747 34.397073, -85.96075 34.396963, -85.960772 34.396316, -85.963846 34.396282, -85.965205 34.396349, -85.965205 34.396921, -85.965119 34.400062)), ((-85.96938 34.396425, -85.965205 34.396349, -85.965206 34.394604, -85.965209 34.393635, -85.965212 34.392946, -85.967966 34.392944, -85.969464 34.39293, -85.96938 34.396425)))
2 |
--------------------------------------------------------------------------------
/src/test/resources/relation-191199.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-191199.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-191199.wkt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-191199.wkt
--------------------------------------------------------------------------------
/src/test/resources/relation-191204.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-191204.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-191204.wkt:
--------------------------------------------------------------------------------
1 | MULTIPOLYGON EMPTY
2 |
--------------------------------------------------------------------------------
/src/test/resources/relation-1949938.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-1949938.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-1949938.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.342046 41.8333126, -71.3423551 41.8336345, -71.3424089 41.8338235, -71.3423936 41.8340698, -71.3420938 41.8353984, -71.3418786 41.8357421, -71.3412098 41.8367385, -71.3411176 41.8371451, -71.3410484 41.8376434, -71.3411483 41.8384852, -71.3413405 41.8388403, -71.3415711 41.839264, -71.342063 41.8398481, -71.342432 41.840312, -71.3427856 41.8407071, -71.3432852 41.8412511, -71.3434005 41.8414115, -71.3435465 41.8415718, -71.3435542 41.8416863, -71.3434927 41.8417951, -71.3434159 41.8418352, -71.3432156 41.8418625, -71.3429086 41.8418868, -71.3418709 41.8419669, -71.3410945 41.8421101, -71.3408101 41.842196, -71.3403951 41.8422189, -71.3402874 41.8422991, -71.3401875 41.842488, -71.3400107 41.8425739, -71.3396187 41.8426197, -71.3393266 41.8425281, -71.3391037 41.8425625, -71.3390038 41.8426255, -71.3387117 41.8426197, -71.3385657 41.8425567, -71.3383197 41.842425, -71.3375818 41.8420013, -71.3372128 41.8418639, -71.3371052 41.8417722, -71.3371206 41.8415432, -71.3372436 41.8413714, -71.3374204 41.8412511, -71.3374204 41.8410851, -71.3372897 41.8410106, -71.3369899 41.8410965, -71.336744 41.8411366, -71.3362751 41.8410736, -71.3359061 41.8409763, -71.3355372 41.8407415, -71.3352297 41.8404666, -71.335076 41.8402662, -71.335076 41.8400657, -71.3351221 41.839768, -71.3351913 41.839579, -71.3355372 41.8391438, -71.3358523 41.8390292, -71.3361367 41.8389548, -71.3365134 41.8389548, -71.3367824 41.8390063, -71.3369438 41.8390235, -71.3370975 41.8390063, -71.3372052 41.8389204, -71.3373051 41.8386685, -71.3373819 41.8384566, -71.3373973 41.8383936, -71.3375972 41.8380958, -71.337651 41.8378954, -71.3376663 41.8376434, -71.3375126 41.8375059, -71.337282 41.8374601, -71.3370745 41.8375117, -71.3367824 41.8377465, -71.3365979 41.8379698, -71.3363904 41.8381359, -71.3362136 41.8382676, -71.335983 41.8384165, -71.3358139 41.8384222, -71.3356525 41.8383649, -71.3355295 41.8382447, -71.3355141 41.8381359, -71.3356294 41.8379297, -71.3357524 41.8378553, -71.33586 41.8376949, -71.3358446 41.8374945, -71.3358446 41.8372482, -71.3359984 41.8370993, -71.3361214 41.8370134, -71.3365595 41.8369562, -71.3368746 41.8368416, -71.3370207 41.8367042, -71.3370053 41.8365037, -71.3367286 41.8364465, -71.3365979 41.8363434, -71.3365595 41.8362346, -71.3365518 41.8360456, -71.3366902 41.8359081, -71.336767 41.8357363, -71.3368746 41.8355588, -71.3370822 41.8353927, -71.337136 41.8352381, -71.3371667 41.8350491, -71.3371513 41.8348258, -71.3370591 41.8346654, -71.3370591 41.8345509, -71.3371206 41.8343905, -71.337259 41.834316, -71.3373281 41.8342072, -71.3373743 41.83415, -71.337159 41.8337376, -71.3371437 41.8336173, -71.3371975 41.83352, -71.3372974 41.8334856, -71.3375126 41.8335715, -71.3378969 41.8337033, -71.3380968 41.8336116, -71.3381583 41.8335314, -71.3382813 41.8333596, -71.3385426 41.833142, -71.3388501 41.8329244, -71.3391037 41.8327697, -71.3393266 41.8325406, -71.3395649 41.8324032, -71.3398263 41.8322715, -71.3400338 41.832197, -71.3402029 41.8322027, -71.3403874 41.8322829, -71.3415865 41.8330561, -71.3417633 41.8331592, -71.342046 41.8333126), (-71.3413636 41.8354672, -71.3414174 41.8353641, -71.3414942 41.835324, -71.3415942 41.8353984, -71.341525 41.8355073, -71.3414174 41.8356275, -71.3413482 41.8355817, -71.3413636 41.8354672), (-71.3411945 41.8359024, -71.3413482 41.8359425, -71.3413636 41.8360513, -71.3412559 41.8362346, -71.3411868 41.8364007, -71.3410715 41.8364407, -71.3410177 41.8363434, -71.3410484 41.8361372, -71.3411253 41.8359941, -71.3411945 41.8359024), (-71.3400799 41.841944, -71.3398954 41.8417379, -71.3398186 41.8415145, -71.3398186 41.8414286, -71.3400338 41.8413084, -71.3402106 41.8413943, -71.3402951 41.8415432, -71.3403643 41.8416749, -71.3403413 41.841778, -71.3402106 41.8419383, -71.3400799 41.841944))
2 |
--------------------------------------------------------------------------------
/src/test/resources/relation-2554903.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-2554903.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-2580685.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-2580685.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-2580685.wkt:
--------------------------------------------------------------------------------
1 | MULTIPOLYGON (((-71.4589656 41.799364, -71.4585222 41.7994796, -71.4586724 41.7990153, -71.4587368 41.7985914, -71.458887 41.7982555, -71.4592947 41.7980635, -71.4597882 41.7978955, -71.4601423 41.797712, -71.4604534 41.7974156, -71.4602925 41.7971037, -71.4600564 41.7967998, -71.4600564 41.7965438, -71.4601423 41.7962559, -71.4599599 41.7959519, -71.4596082 41.7958057, -71.4588853 41.7942745, -71.4589886 41.7938896, -71.4597229 41.7935902, -71.4599065 41.7935474, -71.4609162 41.7953609, -71.4611801 41.7962419, -71.461008 41.7966012, -71.4614784 41.797448, -71.4620408 41.7979202, -71.4622242 41.7985172, -71.4622109 41.799042, -71.46188 41.7993469, -71.4614585 41.7994322, -71.4610194 41.7993555, -71.460354 41.7990476, -71.4595852 41.7990133, -71.4589656 41.799364), (-71.4614062 41.7986518, -71.4612426 41.7988664, -71.4609089 41.7989786, -71.4606145 41.7989932, -71.4602024 41.7989005, -71.4600126 41.7988176, -71.4599407 41.7987152, -71.4600061 41.7985591, -71.4602285 41.7984079, -71.4606865 41.798286, -71.461151 41.7982421, -71.46138 41.7984079, -71.4614062 41.7986518)), ((-71.4584952 41.7995265, -71.4589656 41.799364, -71.4590804 41.8006898, -71.4592704 41.8011716, -71.4593557 41.8017247, -71.4592489 41.8021513, -71.4588976 41.8024816, -71.4593012 41.8030966, -71.4596196 41.8037689, -71.4610194 41.8044617, -71.4619832 41.8048123, -71.4625684 41.8046413, -71.4626372 41.8042735, -71.4634404 41.8040854, -71.4640944 41.8041196, -71.4647713 41.8037774, -71.4648861 41.8032814, -71.464278 41.8029307, -71.4645304 41.8026912, -71.4652532 41.8026912, -71.4659072 41.8028281, -71.4662973 41.802734, -71.4670087 41.801682, -71.4669743 41.800724, -71.4671578 41.8007155, -71.4673185 41.8015708, -71.4667907 41.8029649, -71.4662744 41.8035636, -71.4654483 41.8041281, -71.4648631 41.8048038, -71.463151 41.805115, -71.462361 41.804966, -71.4614784 41.8049748, -71.4605375 41.8044702, -71.4597114 41.8042307, -71.4589656 41.8034524, -71.4586903 41.8028537, -71.458677 41.8027838, -71.4586418 41.802703, -71.458621 41.8024886, -71.4586706 41.8023715, -71.4587706 41.8021353, -71.459046 41.801853, -71.4587132 41.8009806, -71.4586788 41.8001424, -71.4584952 41.7995265)))
2 |
--------------------------------------------------------------------------------
/src/test/resources/relation-3080946.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-3080946.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-3105056.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-3105056.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-3105056.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.3264288 41.5069067, -71.3263203 41.5069497, -71.3262056 41.5070241, -71.3261505 41.5070885, -71.326092 41.5071901, -71.3260291 41.5073693, -71.325963 41.5075585, -71.325922 41.5076731, -71.325883 41.5076947, -71.3257676 41.5076736, -71.3256132 41.5076263, -71.3255477 41.507603, -71.3254939 41.5075638, -71.3254589 41.5075179, -71.3254449 41.5074619, -71.3254597 41.5074, -71.3255173 41.5072859, -71.3255791 41.5071711, -71.3255736 41.5068152, -71.3255687 41.50674, -71.325611 41.5067513, -71.3256436 41.5067582, -71.3256705 41.5067586, -71.3256924 41.5067488, -71.3261404 41.50684, -71.3261668 41.5068473, -71.3261869 41.5068513, -71.3262246 41.5068589, -71.3262246 41.5068679, -71.3262354 41.506883, -71.3262579 41.5068973, -71.3262824 41.5069051, -71.3263218 41.506903, -71.3263529 41.5068893, -71.3264288 41.5069067))
2 |
--------------------------------------------------------------------------------
/src/test/resources/relation-333501.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-333501.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-333501.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.3364289 41.4799458, -71.3366843 41.4790572, -71.3368865 41.478988, -71.3368439 41.4783601, -71.336663 41.4783761, -71.3365442 41.4781598, -71.3369364 41.4780481, -71.3381528 41.477708, -71.3383414 41.4776608, -71.3384978 41.4776906, -71.3386815 41.4778123, -71.339168 41.4781548, -71.3392847 41.4782367, -71.3387163 41.4788549, -71.3384134 41.4792098, -71.3385822 41.4793315, -71.3382744 41.4797708, -71.3382223 41.4798627, -71.3377854 41.4798081, -71.3377208 41.4796616, -71.3369141 41.4796889, -71.3369066 41.4798677, -71.3365343 41.4799322, -71.3364289 41.4799458), (-71.3370199 41.4782179, -71.3383813 41.4778507, -71.3389892 41.4782749, -71.3388056 41.4784946, -71.3382821 41.4791493, -71.3381738 41.4792934, -71.3383417 41.4794107, -71.3381156 41.4797654, -71.3378676 41.4797206, -71.3378061 41.4795872, -71.337443 41.4796016, -71.3374347 41.4795441, -71.3372111 41.4795585, -71.3372193 41.4796118, -71.3368911 41.4796057, -71.3368911 41.4795708, -71.3368378 41.479577, -71.3368234 41.4796241, -71.3367023 41.4796077, -71.3367844 41.4792856, -71.3370347 41.4792097, -71.3370199 41.4782179))
2 |
--------------------------------------------------------------------------------
/src/test/resources/relation-393502.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-393502.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-393502.wkt:
--------------------------------------------------------------------------------
1 | POLYGON ((-71.4562227 41.6485987, -71.4561617 41.6486401, -71.4560719 41.648621, -71.4559852 41.648499, -71.4560684 41.6483234, -71.4561891 41.6481462, -71.456312 41.6479512, -71.4561934 41.6478207, -71.4558499 41.6476399, -71.4556242 41.6475887, -71.4553623 41.6474994, -71.455278 41.6475307, -71.4554576 41.6477624, -71.4556047 41.6478445, -71.4554322 41.6479837, -71.4554172 41.6482545, -71.4551891 41.6484802, -71.4547765 41.6488669, -71.4544546 41.6490673, -71.45424 41.6491475, -71.4540314 41.649123, -71.4535844 41.6491252, -71.4529913 41.6493056, -71.4527857 41.6494481, -71.4527976 41.6497243, -71.4528423 41.6500026, -71.4529466 41.6503055, -71.4528852 41.650835, -71.4529712 41.6510973, -71.4529405 41.6516331, -71.4529913 41.6520714, -71.4528935 41.6522887, -71.4521329 41.6531935, -71.4518542 41.6534193, -71.4516462 41.653417, -71.4515167 41.6533123, -71.4513968 41.6533739, -71.4515094 41.6534801, -71.4514263 41.6535232, -71.4513257 41.653429, -71.4511044 41.6535533, -71.450994 41.6536187, -71.4510103 41.6537167, -71.4503749 41.6541715, -71.4501844 41.654423, -71.449888 41.6546385, -71.4497378 41.6547958, -71.4495944 41.6548359, -71.449477 41.6549516, -71.449357 41.6549751, -71.4492382 41.6549484, -71.4491915 41.6549919, -71.4491545 41.6553574, -71.4491894 41.6556444, -71.4488881 41.6563126, -71.4486687 41.6565095, -71.4485691 41.6565879, -71.448441 41.6566786, -71.4482743 41.6567632, -71.4475918 41.6569869, -71.4473949 41.6571615, -71.4472649 41.6574506, -71.4469975 41.6577686, -71.446965 41.6583328, -71.4467593 41.6585282, -71.4464288 41.6586792, -71.4463475 41.6586284, -71.446196 41.6587191, -71.4462188 41.6587466, -71.4457896 41.6589791, -71.4454817 41.6590878, -71.4453846 41.6592176, -71.4454959 41.6592606, -71.4453015 41.6593598, -71.4452371 41.6593168, -71.4449099 41.6596544, -71.4447699 41.6596156, -71.4447248 41.6598738, -71.4449286 41.6599069, -71.4449152 41.659957, -71.4447476 41.659935, -71.4446804 41.6602495, -71.4445897 41.6604577, -71.4445357 41.660459, -71.4445424 41.6608086, -71.4446044 41.6610053, -71.4445981 41.6610969, -71.444818 41.6610641, -71.4448737 41.6612109, -71.4449199 41.6612159, -71.4449065 41.6613427, -71.4449374 41.6614677, -71.4450876 41.661627, -71.4451607 41.6617069, -71.4451862 41.6617783, -71.4451318 41.6617983, -71.4452552 41.6620057, -71.4452773 41.6620019, -71.4452597 41.6622439, -71.4452016 41.6622652, -71.4450983 41.66267, -71.4449401 41.6629243, -71.4449405 41.6631424, -71.4451117 41.6631449, -71.4449562 41.6635917, -71.4448563 41.6640225, -71.4428492 41.664013, -71.4409667 41.6640028, -71.4410086 41.6635043, -71.4409704 41.6630459, -71.4406563 41.6627032, -71.4405231 41.6624939, -71.4405799 41.6621481, -71.4410582 41.6614303, -71.4410348 41.6611838, -71.4411192 41.6609324, -71.441648 41.6604529, -71.441765 41.6602931, -71.4417262 41.6601353, -71.441636 41.6599996, -71.441442 41.6597342, -71.4414015 41.6595652, -71.441427 41.6591274, -71.4415719 41.6588499, -71.4415607 41.6585328, -71.4420302 41.6575009, -71.4421048 41.6572404, -71.4421307 41.6567142, -71.4424257 41.6562739, -71.4432187 41.6557317, -71.4437089 41.65514, -71.4438941 41.6547956, -71.4441515 41.6545408, -71.4442445 41.6543352, -71.4446573 41.654132, -71.4458846 41.653978, -71.4465442 41.6537136, -71.4468603 41.6534841, -71.4470949 41.6532021, -71.4473923 41.6525943, -71.4478523 41.652443, -71.4483847 41.6523728, -71.448728 41.6521774, -71.4489359 41.6519299, -71.4489171 41.6518417, -71.4487246 41.651737, -71.4486033 41.6515712, -71.4486176 41.6513594, -71.4489403 41.6510273, -71.4492344 41.6508874, -71.449339 41.6508027, -71.4493249 41.6506844, -71.4492149 41.6503867, -71.4494166 41.6495338, -71.449869 41.6492269, -71.4503998 41.6486283, -71.4504804 41.6483137, -71.4503091 41.6480198, -71.450056 41.6477186, -71.4501647 41.6474817, -71.4502232 41.6471998, -71.4510559 41.6469478, -71.4512425 41.6469841, -71.451467 41.6469178, -71.4515282 41.646955, -71.4515222 41.6477833, -71.4516529 41.6478644, -71.4517596 41.6478784, -71.4518637 41.6478163, -71.4521768 41.6474964, -71.4525273 41.6474225, -71.4530853 41.6470536, -71.4531583 41.6470422, -71.4532339 41.6470851, -71.453267 41.647215, -71.4532408 41.6472952, -71.4532515 41.6473503, -71.4533246 41.6474295, -71.4533823 41.647455, -71.4533743 41.6475001, -71.4535238 41.6476389, -71.4535612 41.6477262, -71.4537196 41.6478033, -71.4540525 41.6477985, -71.4541949 41.6476977, -71.4543566 41.647678, -71.4546696 41.6474009, -71.4553603 41.6472376, -71.4557675 41.6473042, -71.4561148 41.6475294, -71.4563012 41.6476389, -71.4565258 41.6476734, -71.4565082 41.6480817, -71.4563099 41.6482686, -71.4561667 41.6485057, -71.4562227 41.6485987))
--------------------------------------------------------------------------------
/src/test/resources/relation-5448156.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-5448156.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-5448691.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-5448691.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-5612959.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-5612959.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-61315.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-61315.orc
--------------------------------------------------------------------------------
/src/test/resources/relation-61315.wkt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-61315.wkt
--------------------------------------------------------------------------------
/src/test/resources/relation-6710544.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geotrellis/vectorpipe/a68f4cfae070371b5a16d9094c9ec4d87b7978b1/src/test/resources/relation-6710544.orc
--------------------------------------------------------------------------------
/src/test/resources/view/cluster-view.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
8 |
9 |
10 | Point cluster view
11 |
12 |
13 |
14 |
18 |
19 |
20 |
21 |
22 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/src/test/resources/view/layer-test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
8 |
9 |
10 | Add a third party vector tile source
11 |
12 |
13 |
14 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
145 |
146 |
147 |
148 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/MultiPolygonRelationReconstructionSpec.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import java.sql.Timestamp
4 |
5 | import geotrellis.spark.store.kryo.KryoRegistrator
6 | import geotrellis.vector._
7 | import org.apache.spark.SparkConf
8 | import org.apache.spark.serializer.KryoSerializer
9 | import org.apache.spark.sql._
10 | import org.apache.spark.sql.functions._
11 | import org.scalatest.prop.{TableDrivenPropertyChecks, Tables}
12 | import org.scalatest.{Matchers, PropSpec}
13 | import vectorpipe.model.Member
14 | import org.locationtech.jts.io.WKTReader
15 | import org.locationtech.geomesa.spark.jts._
16 | import vectorpipe.relations.MultiPolygons.build
17 |
18 | import scala.io.Source
19 |
20 | case class Fixture(id: Int, members: DataFrame, wkt: Seq[String])
21 |
22 | trait SparkPoweredTables extends Tables {
23 | def wktReader = new WKTReader()
24 |
25 | val spark: SparkSession = SparkSession
26 | .builder
27 | .config(
28 | /* Settings compatible with both local and EMR execution */
29 | new SparkConf()
30 | .setAppName(getClass.getName)
31 | .setIfMissing("spark.master", "local[*]")
32 | .setIfMissing("spark.serializer", classOf[KryoSerializer].getName)
33 | .setIfMissing("spark.kryo.registrator", classOf[KryoRegistrator].getName)
34 | .setIfMissing("spark.sql.orc.impl", "native")
35 | ).getOrCreate()
36 | spark.withJTS
37 |
38 | def relation(relation: Int): Fixture = Fixture(relation, orc(s"relation-$relation.orc"), readWktFile(s"relation-$relation.wkt"))
39 |
40 | def orc(filename: String): DataFrame = spark.read.orc(getClass.getResource("/" + filename).getPath)
41 |
42 | // osm2pgsql -c -d rhode_island -j -K -l rhode-island-latest.osm.pbf
43 | // select ST_AsText(way) from planet_osm_polygon where osm_id=-333501;
44 |
45 | def readWktFile(filename: String): Seq[String] =
46 | try {
47 | Source.fromInputStream(getClass.getResourceAsStream("/" + filename)).getLines.toSeq match {
48 | case expected if expected.isEmpty =>
49 | Seq()
50 | case expected =>
51 | expected
52 | }
53 | } catch {
54 | case _: Exception => Seq("[not provided]")
55 | }
56 |
57 | def asGeoms(relations: DataFrame): Seq[Geometry] = {
58 | import relations.sparkSession.implicits._
59 |
60 | relations.select('geom).collect.map { row =>
61 | row.getAs[Geometry]("geom")
62 | }
63 | }
64 | }
65 |
66 | // osm2pgsql -c -d rhode_island -j -K -l rhode-island-latest.osm.pbf
67 | // select ST_AsText(way) from planet_osm_polygon where osm_id=-333501;
68 | // to debug / visually validate (geoms won't match exactly), load WKT into geojson.io from Meta → Load WKT String
69 | // https://www.openstreetmap.org/relation/64420
70 | // to find multipolygons: select osm_id from planet_osm_polygon where osm_id < 0 and ST_GeometryType(way) = 'ST_MultiPolygon' order by osm_id desc;
71 | class MultiPolygonRelationExamples extends SparkPoweredTables {
72 | def examples = Table("multipolygon relation",
73 | relation(333501), // unordered, single polygon with 1 hole
74 | relation(393502), // single polygon, multiple outer parts, no holes
75 | relation(1949938), // unordered, single polygon with multiple holes
76 | relation(3105056), // multiple unordered outer parts in varying directions
77 | relation(2580685), // multipolygon: 2 polygons, one with 1 hole
78 | relation(3080946), // multipolygon: many polygons, no holes
79 | relation(5448156), // multipolygon made up of parcels
80 | relation(5448691), // multipolygon made up of parcels
81 | relation(6710544), // complex multipolygon
82 | relation(191199), // 4 segments; 2 are components of another (thus duplicates)
83 | relation(61315), // incomplete member list (sourced from an extract of a neighboring state)
84 | relation(2554903), // boundary w/ admin_centre + label node members
85 | relation(191204), // no members
86 | /* relation(5612959), // pathological case for unioning --- removed test, too pathological (address later?) */
87 | relation(110564) // touching but not dissolve-able
88 | )
89 | }
90 |
91 | class MultiPolygonRelationReconstructionSpec extends PropSpec with TableDrivenPropertyChecks with Matchers {
92 | property("should match expected WKT") {
93 | new MultiPolygonRelationExamples {
94 | forAll(examples) { fixture =>
95 | import fixture.members.sparkSession.implicits._
96 |
97 | // TODO rewrite fixtures with additional columns added below
98 | val actual: Seq[Geometry] = asGeoms(fixture.members
99 | .withColumn("version", lit(1))
100 | .withColumn("minorVersion", lit(0))
101 | .withColumn("updated", lit(Timestamp.valueOf("2001-01-01 00:00:00")))
102 | .withColumn("validUntil", lit(Timestamp.valueOf("2002-01-01 00:00:00")))
103 | .withColumn("geometry", st_geomFromWKB('geom))
104 | .groupByKey { row =>
105 | (row.getAs[Long]("changeset"), row.getAs[Long]("id"), row.getAs[Integer]("version"), row.getAs[Integer]
106 | ("minorVersion"), row.getAs[Timestamp]("updated"), row.getAs[Timestamp]("validUntil"))
107 | }
108 | .mapGroups {
109 | case ((changeset, id, version, minorVersion, updated, validUntil), rows) =>
110 | val members = rows.toVector
111 | // TODO store Bytes as the type in fixtures
112 | val types = members.map { x => Member.typeFromString(x.getAs[String]("type")) }
113 | val roles = members.map(_.getAs[String]("role"))
114 | val geoms = members.map(_.getAs[Geometry]("geometry"))
115 | val mp = build(id, version, updated, types, roles, geoms).orNull
116 |
117 | (changeset, id, version, minorVersion, updated, validUntil, mp)
118 | }
119 | .toDF("changeset", "id", "version", "minorVersion", "updated", "validUntil", "geom")
120 | ).flatMap(Option.apply(_))
121 |
122 | val expected = fixture.wkt.map(wktReader.read)
123 |
124 | try {
125 | actual should ===(expected)
126 | } catch {
127 | case e: Throwable =>
128 | println(s"${fixture.id} actual:")
129 | actual.foreach(println)
130 | println(s"${fixture.id} expected:")
131 | fixture.wkt.foreach(println)
132 |
133 | throw e
134 | }
135 | }
136 | }
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/ProcessOSMTest.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe
2 |
3 | import org.scalatest._
4 | import vectorpipe.{internal => ProcessOSM}
5 |
6 | class ProcessOSMTest extends FunSpec with TestEnvironment with Matchers {
7 | val orcFile = getClass.getResource("/isle-of-man-latest.osm.orc").getPath
8 |
9 | val elements = ss.read.orc(orcFile)
10 | val nodes = ProcessOSM.preprocessNodes(elements).cache
11 | val nodeGeoms = ProcessOSM.constructPointGeometries(nodes).cache
12 | val wayGeoms = ProcessOSM.reconstructWayGeometries(elements, nodes).cache
13 | val relationGeoms = ProcessOSM.reconstructRelationGeometries(elements, wayGeoms).cache
14 |
15 | it("parses isle of man nodes") {
16 | info(s"Nodes: ${nodeGeoms.count}")
17 | }
18 |
19 | it("parses isle of man ways") {
20 | info(s"Ways: ${wayGeoms.count}")
21 | }
22 |
23 | it("parses isle of man relations") {
24 | info(s"Relations: ${relationGeoms.count}")
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/TestEnvironment.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Azavea
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package vectorpipe
18 |
19 | import org.apache.spark.serializer.KryoSerializer
20 | import org.apache.spark.sql.SparkSession
21 | import org.scalatest._
22 |
23 | object TestEnvironment {
24 | }
25 |
26 | /*
27 | * These set of traits handle the creation and deletion of test directories on the local fs and hdfs,
28 | * It uses commons-io in at least one case (recursive directory deletion)
29 | */
30 | trait TestEnvironment extends BeforeAndAfterAll { self: Suite with BeforeAndAfterAll =>
31 | implicit val ss: SparkSession = SparkSession.builder
32 | .master("local[*]")
33 | .appName("VectorPipe Test")
34 | .config("spark.ui.enabled", "false")
35 | .config("spark.default.parallelism","8")
36 | .config("spark.serializer", classOf[KryoSerializer].getName)
37 | .config("spark.kryo.registrationRequired", "false")
38 | .config("spark.kryoserializer.buffer.max", "500m")
39 | .config("spark.sql.orc.impl", "native")
40 | .getOrCreate()
41 |
42 | // get the name of the class which mixes in this trait
43 | val name = this.getClass.getName
44 |
45 | override def beforeAll() = {
46 | ss.sparkContext.setJobGroup(this.getClass.getName, "test")
47 | }
48 |
49 | override def afterAll() = {
50 | ss.sparkContext.clearJobGroup()
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/functions/osm/FunctionSpec.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.functions.osm
2 |
3 | import org.apache.spark.sql.Row
4 | import org.scalatest.{FunSpec, Matchers}
5 | import vectorpipe.TestEnvironment
6 |
7 | class FunctionSpec extends FunSpec with TestEnvironment with Matchers {
8 |
9 | import ss.implicits._
10 |
11 | describe("isArea") {
12 | it("marks 'area=*' appropriately") {
13 | Seq(
14 | Map("area" -> "yes") -> true,
15 | Map("area" -> "YES") -> true,
16 | Map("area" -> "YeS") -> true,
17 | Map("area" -> "1") -> true,
18 | Map("area" -> "true") -> true,
19 | Map("area" -> "True") -> true,
20 | Map("area" -> "no") -> false,
21 | Map("area" -> "no") -> false,
22 | Map("area" -> "0") -> false,
23 | Map("area" -> "something") -> false,
24 | Map("area" -> "yes;no") -> true,
25 | Map("area" -> "yes; no") -> true,
26 | Map("area" -> "yes ; no") -> true,
27 | Map("area" -> "yes ;no") -> true
28 | )
29 | .toDF("tags", "value")
30 | .where(isArea('tags) =!= 'value)
31 | .count should equal(0)
32 | }
33 |
34 | it("respects area-keys") {
35 | Seq(
36 | Map("office" -> "architect") -> true,
37 | Map("waterway" -> "riverbank") -> true,
38 | Map("waterway" -> "canal") -> false,
39 | Map("aeroway" -> "aerodrome;apron") -> true,
40 | Map("aeroway" -> "aerodrome ; runway") -> true,
41 | Map("aeroway" -> "taxiway;runway") -> false
42 | )
43 | .toDF("tags", "value")
44 | .where(isArea('tags) =!= 'value)
45 | .count should equal(0)
46 | }
47 | }
48 |
49 | describe("isMultiPolygon") {
50 | it("marks multipolygons and boundaries appropriately") {
51 | Seq(
52 | Map("type" -> "multipolygon") -> true,
53 | Map("type" -> "boundary") -> true,
54 | Map("type" -> "route") -> false,
55 | Map("type" -> "multipolygon;boundary") -> true,
56 | Map("type" -> "multipolygon ; boundary") -> true
57 | )
58 | .toDF("tags", "value")
59 | .where(isMultiPolygon('tags) =!= 'value)
60 | .count should equal(0)
61 | }
62 | }
63 |
64 | describe("isRoute") {
65 | it("marks routes appropriately") {
66 | Seq(
67 | Map("type" -> "multipolygon") -> false,
68 | Map("type" -> "boundary") -> false,
69 | Map("type" -> "route") -> true,
70 | Map("type" -> "route;boundary") -> true,
71 | Map("type" -> "route ; boundary") -> true
72 | )
73 | .toDF("tags", "value")
74 | .where(isRoute('tags) =!= 'value)
75 | .count should equal(0)
76 | }
77 | }
78 |
79 | describe("isBuilding") {
80 | it("marks buildings appropriately") {
81 | Seq(
82 | Map("building" -> "yes") -> true,
83 | Map("building" -> "no") -> false,
84 | Map("building" -> "false") -> false,
85 | Map("building" -> "farm") -> true,
86 | Map("building" -> "farm;apartments") -> true
87 | )
88 | .toDF("tags", "value")
89 | .where(isBuilding('tags) =!= 'value)
90 | .count should equal(0)
91 | }
92 | }
93 |
94 | describe("isPOI") {
95 | it("marks POIs appropriately") {
96 | Seq(
97 | Map("amenity" -> "cafe") -> true,
98 | Map("shop" -> "bakery") -> true,
99 | Map("craft" -> "bakery") -> true,
100 | Map("office" -> "architect") -> true,
101 | Map("leisure" -> "disc_golf_course") -> true,
102 | Map("aeroway" -> "aerodrome") -> true,
103 | Map("highway" -> "motorway") -> false,
104 | Map("shop" -> "bakery ; dairy") -> true
105 | )
106 | .toDF("tags", "value")
107 | .where(isPOI('tags) =!= 'value)
108 | .count should equal(0)
109 | }
110 | }
111 |
112 | describe("isRoad") {
113 | it("marks roads appropriately") {
114 | Seq(
115 | Map("highway" -> "motorway") -> true,
116 | Map("highway" -> "path") -> true,
117 | Map("highway" -> "path ;footway") -> true,
118 | Map("building" -> "yes") -> false
119 | )
120 | .toDF("tags", "value")
121 | .where(isRoad('tags) =!= 'value)
122 | .count should equal(0)
123 | }
124 | }
125 |
126 | describe("isCoastline") {
127 | it("marks coastline appropriately") {
128 | Seq(
129 | Map("natural" -> "coastline") -> true,
130 | Map("natural" -> "water") -> false,
131 | Map("natural" -> "coastline ; water") -> true
132 | )
133 | .toDF("tags", "value")
134 | .where(isCoastline('tags) =!= 'value)
135 | .count should equal(0)
136 | }
137 | }
138 |
139 | describe("isWaterway") {
140 | it("marks waterways appropriately") {
141 | Seq(
142 | Map("waterway" -> "river") -> true,
143 | Map("waterway" -> "riverbank") -> true,
144 | Map("waterway" -> "canal") -> true,
145 | Map("waterway" -> "stream") -> true,
146 | Map("waterway" -> "brook") -> true,
147 | Map("waterway" -> "drain") -> true,
148 | Map("waterway" -> "ditch") -> true,
149 | Map("waterway" -> "dam") -> true,
150 | Map("waterway" -> "weir") -> true,
151 | Map("waterway" -> "waterfall") -> true,
152 | Map("waterway" -> "pressurised") -> true,
153 | Map("waterway" -> "fuel") -> false,
154 | Map("waterway" -> "canal ; stream") -> true,
155 | Map("waterway" -> "canal ; fuel") -> true
156 | )
157 | .toDF("tags", "value")
158 | .where(isWaterway('tags) =!= 'value)
159 | .count should equal(0)
160 | }
161 | }
162 |
163 | describe("removeUninterestingTags") {
164 | it("drops uninteresting tags") {
165 | Seq(
166 | Map("building" -> "yes", "created_by" -> "JOSM")
167 | )
168 | .toDF("tags")
169 | .withColumn("tags", removeUninterestingTags('tags))
170 | .collect() should equal(Array(Row(Map("building" -> "yes"))))
171 | }
172 |
173 | it("drops uninteresting single tags") {
174 | Seq(
175 | Map("building" -> "yes", "colour" -> "grey"),
176 | Map("colour" -> "grey")
177 | )
178 | .toDF("tags")
179 | .withColumn("tags", removeUninterestingTags('tags))
180 | .collect() should equal(Array(Row(Map("building" -> "yes", "colour" -> "grey")), Row(Map.empty)))
181 | }
182 |
183 | it("drops uninteresting prefixed tags") {
184 | Seq(
185 | Map("highway" -> "motorway", "tiger:reviewed" -> "no"),
186 | Map("building" -> "yes", "CLC:something" -> "something")
187 | )
188 | .toDF("tags")
189 | .withColumn("tags", removeUninterestingTags('tags))
190 | .collect() should equal(Array(Row(Map("highway" -> "motorway")), Row(Map("building" -> "yes"))))
191 | }
192 |
193 | it("drops tags with invalid keys") {
194 | Seq(
195 | Map("highway" -> "motorway", "k=v" -> "value"),
196 | Map("building" -> "yes", "land use" -> "something")
197 | )
198 | .toDF("tags")
199 | .withColumn("tags", removeUninterestingTags('tags))
200 | .collect() should equal(Array(Row(Map("highway" -> "motorway")), Row(Map("building" -> "yes"))))
201 | }
202 | }
203 |
204 | describe("removeSemiInterestingTags") {
205 | it("drops semi-interesting tags") {
206 | Seq(
207 | Map("building" -> "yes", "source" -> "MassGIS")
208 | )
209 | .toDF("tags")
210 | .withColumn("tags", removeSemiInterestingTags('tags))
211 | .collect() should equal(Array(Row(Map("building" -> "yes"))))
212 | }
213 |
214 | it("drops semi-interesting prefixed tags") {
215 | Seq(
216 | Map("highway" -> "motorway", "source:geometry" -> "MassGIS")
217 | )
218 | .toDF("tags")
219 | .withColumn("tags", removeSemiInterestingTags('tags))
220 | .collect() should equal(Array(Row(Map("highway" -> "motorway"))))
221 | }
222 | }
223 |
224 | }
225 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/sources/AugmentedDiffSourceTest.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.sources
2 |
3 | import geotrellis.vector.Geometry
4 | import org.apache.spark.internal.Logging
5 | import org.scalatest.{FunSpec, Matchers}
6 | import vectorpipe.TestEnvironment
7 | import vectorpipe.model.ElementWithSequence
8 | import vectorpipe.util.RobustFeature
9 |
10 | class AugmentedDiffSourceSpec extends FunSpec with TestEnvironment with Matchers {
11 |
12 | import ss.implicits._
13 |
14 | describe("Timestamp to sequence conversion") {
15 | it("should provide a round trip for simple conversion") {
16 | AugmentedDiffSource.timestampToSequence(AugmentedDiffSource.sequenceToTimestamp(3700047)) should be (3700047)
17 | }
18 |
19 | it("should provide a round trip for column functions") {
20 | val df = ss.createDataset(Seq(3700047)).toDF
21 | (df.select(AugmentedDiffSource.sequenceToTimestamp('value) as 'time)
22 | .select(AugmentedDiffSource.timestampToSequence('time) as 'value)
23 | .first
24 | .getLong(0)) should be (3700047)
25 | }
26 | }
27 |
28 | }
29 |
30 | class LogErrors extends AugmentedDiffSourceErrorHandler with Logging {
31 | override def handle(sequence: Int, feature: RobustFeature[Geometry, ElementWithSequence]) = {
32 | logWarning(s"Error in sequence ${sequence} for feature with metadata: ${feature.data}")
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/LayerTestPipeline.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import geotrellis.vector._
4 | import org.apache.spark.sql.DataFrame
5 | import org.apache.spark.sql.functions
6 | import org.apache.spark.sql.functions.when
7 |
8 | import vectorpipe._
9 | import vectorpipe.functions.osm._
10 |
11 | case class LayerTestPipeline(geometryColumn: String, baseOutputURI: java.net.URI) extends Pipeline with Pipeline.Output {
12 | val layerMultiplicity = LayerNamesInColumn("layers")
13 |
14 | override def select(wayGeoms: DataFrame, targetZoom: Int, keyColumn: String): DataFrame = {
15 | import wayGeoms.sparkSession.implicits._
16 |
17 | wayGeoms
18 | .withColumn("layers", when(isBuilding('tags), "buildings").when(isRoad('tags), "roads"))
19 | .where(functions.not(functions.isnull('layers)))
20 | }
21 |
22 | override def clip(geom: Geometry, key: geotrellis.layer.SpatialKey, layoutLevel: geotrellis.layer.LayoutLevel): Geometry =
23 | Clipping.byLayoutCell(geom, key, layoutLevel)
24 | }
25 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/PipelineSpec.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import org.apache.spark.sql.functions
4 | import org.apache.spark.sql.functions.{isnull, lit}
5 | import org.locationtech.geomesa.spark.jts._
6 | import org.scalatest._
7 | import vectorpipe.{TestEnvironment, internal => vp, _}
8 |
9 | class PipelineSpec extends FunSpec with TestEnvironment with Matchers {
10 | import ss.implicits._
11 |
12 | ss.withJTS
13 | val orcFile = getClass.getResource("/isle-of-man-latest.osm.orc").getPath
14 | val df = ss.read.orc(orcFile)
15 |
16 | describe("Vectortile Pipelines") {
17 | val nodes = vp.preprocessNodes(df, None)
18 |
19 | val nodeGeoms = nodes
20 | .filter(functions.not(isnull('lat)))
21 | .withColumn("geometry", st_makePoint('lon, 'lat))
22 | .drop("lat", "lon")
23 | .withColumn("weight", lit(1))
24 | .cache
25 |
26 | val wayGeoms = vp.reconstructWayGeometries(df, nodes).cache
27 |
28 | it("should generate a single zoom level") {
29 | val pipeline = TestPipeline("geometry", new java.net.URI("file:///tmp/iom-tiles"), 16)
30 | VectorPipe(nodeGeoms, pipeline, VectorPipe.Options.forZoom(8))
31 | }
32 |
33 | it("should generate multiple zoom levels") {
34 | val pipeline = TestPipeline("geometry", new java.net.URI("file:///tmp/iom-tiles-pyramid"), 16)
35 | VectorPipe(nodeGeoms, pipeline, VectorPipe.Options.forZoomRange(6, 8))
36 | }
37 |
38 | it("should generate multiple layers") {
39 | val pipeline = LayerTestPipeline("geom", new java.net.URI("file:///tmp/iom-layers"))
40 | VectorPipe(wayGeoms, pipeline, VectorPipe.Options.forZoom(14))
41 | }
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/TestPipeline.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import geotrellis.raster.RasterExtent
4 | import geotrellis.layer._
5 | import geotrellis.vector._
6 | import geotrellis.vectortile._
7 |
8 | import org.apache.spark.sql.{DataFrame, Row}
9 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
10 | import org.apache.spark.sql.functions
11 | import org.apache.spark.sql.functions.{array, col, explode, sum}
12 |
13 | import vectorpipe._
14 |
15 | case class Bin(x: Int, y: Int)
16 | object Bin {
17 | def apply(tup: (Int, Int)): Bin = Bin(tup._1, tup._2)
18 | }
19 |
20 | case class TestPipeline(geometryColumn: String, baseOutputURI: java.net.URI, gridResolution: Int) extends Pipeline with Pipeline.Output {
21 | val weightedCentroid = new WeightedCentroid
22 |
23 | val layerMultiplicity = SingleLayer("points")
24 |
25 | override def reduce(input: DataFrame, layoutLevel: LayoutLevel, keyColumn: String): DataFrame = {
26 | import input.sparkSession.implicits._
27 |
28 | val layout = layoutLevel.layout
29 | val binOfTile = functions.udf { (g: Geometry, key: GenericRowWithSchema) =>
30 | val pt = g.asInstanceOf[Point]
31 | val k = getSpatialKey(key)
32 | val re = RasterExtent(layout.mapTransform.keyToExtent(k), gridResolution, gridResolution)
33 | val c = pt.getCoordinate
34 | Bin(re.mapToGrid(c.x, c.y))
35 | }
36 |
37 | val st_geomToPoint = functions.udf { g: Geometry => g.asInstanceOf[Point] }
38 |
39 | input.withColumn(keyColumn, explode(col(keyColumn)))
40 | .withColumn("bin", binOfTile(col(geometryColumn), col(keyColumn)))
41 | .groupBy(col(keyColumn), col("bin"))
42 | .agg(sum('weight) as 'weight, weightedCentroid(st_geomToPoint(col(geometryColumn)), 'weight) as geometryColumn)
43 | .drop('bin)
44 | .withColumn(keyColumn, array(col(keyColumn)))
45 | }
46 |
47 | override def pack(row: Row, zoom: Int): VectorTileFeature[Point] = {
48 | val g = row.getAs[Point](geometryColumn)
49 | val weight = row.getAs[Long]("weight")
50 |
51 | Feature(g, Map( "weight" -> VInt64(weight) ))
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/scala/vectorpipe/vectortile/WeightedCentroid.scala:
--------------------------------------------------------------------------------
1 | package vectorpipe.vectortile
2 |
3 | import geotrellis.vector._
4 | import org.apache.spark.sql.Row
5 | import org.apache.spark.sql.expressions.MutableAggregationBuffer
6 | import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
7 | import org.apache.spark.sql.jts.PointUDT
8 | import org.apache.spark.sql.types._
9 | import org.locationtech.jts.geom.{Coordinate, GeometryFactory}
10 |
11 | class WeightedCentroid extends UserDefinedAggregateFunction {
12 |
13 | // Define the schema of the input data
14 | override def inputSchema: org.apache.spark.sql.types.StructType =
15 | StructType(StructField("point", PointUDT) :: StructField("weight", DoubleType) :: Nil)
16 |
17 | // Define the types of the intermediate data structure
18 | override def bufferSchema: StructType = StructType(
19 | StructField("x", DoubleType) :: StructField("y", DoubleType) :: StructField("weight", DoubleType) :: Nil
20 | )
21 |
22 | // Define the return type
23 | override def dataType: DataType = PointUDT
24 |
25 | // Does the function return the same value for the same input?
26 | override def deterministic: Boolean = true
27 |
28 | // Create a new, empty buffer structure
29 | override def initialize(buffer: MutableAggregationBuffer): Unit = {
30 | buffer(0) = 0.0
31 | buffer(1) = 0.0
32 | buffer(2) = 0.0
33 | }
34 |
35 | // Combine a new input with an existing buffer
36 | override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
37 | val c = input.getAs[Point](0).getCoordinate
38 | val wt = input.getAs[Double](1)
39 | buffer(0) = buffer.getAs[Double](0) + c.x * wt
40 | buffer(1) = buffer.getAs[Double](1) + c.y * wt
41 | buffer(2) = buffer.getAs[Double](2) + wt
42 | }
43 |
44 | // Merge two intermediate buffers
45 | override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
46 | buffer1(0) = buffer1.getAs[Double](0) + buffer2.getAs[Double](0)
47 | buffer1(1) = buffer1.getAs[Double](1) + buffer2.getAs[Double](1)
48 | buffer1(2) = buffer1.getAs[Double](2) + buffer2.getAs[Double](2)
49 | }
50 |
51 | // Produce the final output from a Row encoded with the bufferSchema
52 | override def evaluate(buffer: Row): Any = {
53 | val wx = buffer.getDouble(0)
54 | val wy = buffer.getDouble(1)
55 | val wt = buffer.getDouble(2)
56 | (new GeometryFactory).createPoint(new Coordinate(wx/wt, wy/wt))
57 | }
58 | }
59 |
--------------------------------------------------------------------------------