├── .gitignore ├── .travis.yml ├── README.md ├── project ├── build.properties └── plugins.sbt └── src ├── main ├── resources │ ├── log4j.properties │ ├── monkey-patch.sh │ └── run.sh └── scala │ └── spark_etl │ ├── CLI.scala │ ├── CLIOps.scala │ ├── ConfigError.scala │ ├── ExtractReader.scala │ ├── LoadWriter.scala │ ├── model │ ├── Config.scala │ ├── Extract.scala │ ├── Load.scala │ ├── ParametrizedConstructor.scala │ ├── Persist.scala │ ├── RuntimeContext.scala │ └── Transform.scala │ ├── oracle │ ├── OracleLoadAppender.scala │ └── OracleValidator.scala │ ├── parquet │ ├── ParquetExtractReader.scala │ ├── ParquetLoadWriter.scala │ └── PathValidator.scala │ ├── parser │ └── Parser.scala │ └── util │ ├── BAHelper.scala │ ├── DefaultEnv.scala │ ├── DepTree.scala │ ├── Files.scala │ ├── SparkParser.scala │ ├── UriLoader.scala │ └── Validation.scala └── test ├── resources ├── log4j.properties ├── main-utils │ ├── config │ │ └── app.yaml │ └── spark │ │ ├── extract-check │ │ ├── client.sql │ │ ├── item.sql │ │ └── transaction.sql │ │ ├── transform-check │ │ ├── client_spending.sql │ │ ├── item_purchase.sql │ │ └── minor_purchase.sql │ │ └── transform │ │ ├── client_spending.sql │ │ ├── item_purchase.sql │ │ └── minor_purchase.sql ├── parquet-roundtrip │ ├── app.yaml │ └── transform │ │ └── t.sql ├── parquet │ ├── good │ │ ├── _temporary │ │ │ └── .placeholder │ │ └── year=2017 │ │ │ └── .placeholder │ └── with_backup_dir │ │ └── _backup │ │ └── .placeholder ├── runtime-ctx │ └── spark │ │ ├── extract-check │ │ ├── client.sql │ │ ├── item.sql │ │ └── transaction.sql │ │ ├── transform-check │ │ ├── client_spending.sql │ │ ├── item_purchase.sql │ │ └── minor_purchase.sql │ │ └── transform │ │ ├── client_all.sql │ │ ├── client_spending.sql │ │ ├── item_purchase.sql │ │ └── minor_purchase.sql └── uri-loader │ ├── with_bogus_includes │ ├── with_env_vars │ ├── with_includes │ └── without_env_vars └── scala └── spark_etl ├── CLIOpsSpec.scala ├── model ├── ConfigSpec.scala └── RuntimeContextSpec.scala ├── oracle └── OracleLoadAppenderSpec.scala ├── parquet ├── PathValidatorSpec.scala └── WriteReadRoundtripSpec.scala ├── parser └── ParserSpec.scala └── util ├── DeaultEnvSpec.scala ├── DepTreeSpec.scala ├── SparkParserSpec.scala ├── UriLoaderSpec.scala └── ValidationSpec.scala /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/.travis.yml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/README.md -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.9 -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/project/plugins.sbt -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /src/main/resources/monkey-patch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/resources/monkey-patch.sh -------------------------------------------------------------------------------- /src/main/resources/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/resources/run.sh -------------------------------------------------------------------------------- /src/main/scala/spark_etl/CLI.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/CLI.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/CLIOps.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/CLIOps.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/ConfigError.scala: -------------------------------------------------------------------------------- 1 | package spark_etl 2 | 3 | case class ConfigError(msg: String, exc: Option[Throwable] = None) 4 | -------------------------------------------------------------------------------- /src/main/scala/spark_etl/ExtractReader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/ExtractReader.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/LoadWriter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/LoadWriter.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/Config.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/Config.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/Extract.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/Extract.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/Load.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/Load.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/ParametrizedConstructor.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/ParametrizedConstructor.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/Persist.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/Persist.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/RuntimeContext.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/RuntimeContext.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/model/Transform.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/model/Transform.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/oracle/OracleLoadAppender.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/oracle/OracleLoadAppender.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/oracle/OracleValidator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/oracle/OracleValidator.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/parquet/ParquetExtractReader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/parquet/ParquetExtractReader.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/parquet/ParquetLoadWriter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/parquet/ParquetLoadWriter.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/parquet/PathValidator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/parquet/PathValidator.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/parser/Parser.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/parser/Parser.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/BAHelper.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/BAHelper.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/DefaultEnv.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/DefaultEnv.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/DepTree.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/DepTree.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/Files.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/Files.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/SparkParser.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/SparkParser.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/UriLoader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/UriLoader.scala -------------------------------------------------------------------------------- /src/main/scala/spark_etl/util/Validation.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/main/scala/spark_etl/util/Validation.scala -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/log4j.properties -------------------------------------------------------------------------------- /src/test/resources/main-utils/config/app.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/config/app.yaml -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/extract-check/client.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/extract-check/client.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/extract-check/item.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/extract-check/item.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/extract-check/transaction.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/extract-check/transaction.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/transform-check/client_spending.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/transform-check/client_spending.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/transform-check/item_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/transform-check/item_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/transform-check/minor_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/transform-check/minor_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/transform/client_spending.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/transform/client_spending.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/transform/item_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/transform/item_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/main-utils/spark/transform/minor_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/main-utils/spark/transform/minor_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/parquet-roundtrip/app.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/parquet-roundtrip/app.yaml -------------------------------------------------------------------------------- /src/test/resources/parquet-roundtrip/transform/t.sql: -------------------------------------------------------------------------------- 1 | select s from x -------------------------------------------------------------------------------- /src/test/resources/parquet/good/_temporary/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/test/resources/parquet/good/year=2017/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/test/resources/parquet/with_backup_dir/_backup/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/extract-check/client.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/extract-check/client.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/extract-check/item.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/extract-check/item.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/extract-check/transaction.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/extract-check/transaction.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform-check/client_spending.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform-check/client_spending.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform-check/item_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform-check/item_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform-check/minor_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform-check/minor_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform/client_all.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform/client_all.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform/client_spending.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform/client_spending.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform/item_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform/item_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/runtime-ctx/spark/transform/minor_purchase.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/runtime-ctx/spark/transform/minor_purchase.sql -------------------------------------------------------------------------------- /src/test/resources/uri-loader/with_bogus_includes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/uri-loader/with_bogus_includes -------------------------------------------------------------------------------- /src/test/resources/uri-loader/with_env_vars: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/uri-loader/with_env_vars -------------------------------------------------------------------------------- /src/test/resources/uri-loader/with_includes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/resources/uri-loader/with_includes -------------------------------------------------------------------------------- /src/test/resources/uri-loader/without_env_vars: -------------------------------------------------------------------------------- 1 | hello there 2 | 123 3 | -------------------------------------------------------------------------------- /src/test/scala/spark_etl/CLIOpsSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/CLIOpsSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/model/ConfigSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/model/ConfigSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/model/RuntimeContextSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/model/RuntimeContextSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/oracle/OracleLoadAppenderSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/oracle/OracleLoadAppenderSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/parquet/PathValidatorSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/parquet/PathValidatorSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/parquet/WriteReadRoundtripSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/parquet/WriteReadRoundtripSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/parser/ParserSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/parser/ParserSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/util/DeaultEnvSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/util/DeaultEnvSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/util/DepTreeSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/util/DepTreeSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/util/SparkParserSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/util/SparkParserSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/util/UriLoaderSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/util/UriLoaderSpec.scala -------------------------------------------------------------------------------- /src/test/scala/spark_etl/util/ValidationSpec.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/konrads/spark-etl/HEAD/src/test/scala/spark_etl/util/ValidationSpec.scala --------------------------------------------------------------------------------