├── docker ├── .gitignore ├── .gitattributes ├── conf │ ├── system.yml │ └── history-server.yml ├── libexec │ ├── flowman-init.sh │ └── flowman-vars.sh ├── bin │ └── entrypoint.sh └── docker-compose.yml ├── tests ├── emr │ ├── .gitignore │ ├── flow │ │ ├── target │ │ │ ├── database.yml │ │ │ ├── aggregates.yml │ │ │ ├── measurements.yml │ │ │ ├── documentation.yml │ │ │ └── stations.yml │ │ ├── model │ │ │ ├── stations.yml │ │ │ ├── measurements-raw.yml │ │ │ ├── stations-raw.yml │ │ │ └── aggregates.yml │ │ ├── project.yml │ │ ├── config │ │ │ ├── environment.yml │ │ │ └── aws.yml │ │ ├── documentation.yml │ │ └── mapping │ │ │ └── stations.yml │ └── deployment.yml ├── synapse │ ├── .gitignore │ ├── flow │ │ ├── project.yml │ │ ├── config │ │ │ ├── config.yml │ │ │ ├── connections.yml │ │ │ └── environment.yml │ │ ├── target │ │ │ ├── stations.yml │ │ │ ├── documentation.yml │ │ │ ├── aggregates.yml │ │ │ └── measurements.yml │ │ ├── model │ │ │ ├── stations-raw.yml │ │ │ ├── measurements-raw.yml │ │ │ └── stations.yml │ │ ├── documentation.yml │ │ └── mapping │ │ │ └── stations.yml │ ├── README.md │ └── deployment.yml ├── tutorial │ └── .gitignore ├── demo-weather │ └── .gitignore ├── quickstart │ ├── .gitignore │ └── docker-compose.yml ├── archetype-assembly │ ├── .gitignore │ └── run.sh ├── archetype-quickstart │ ├── .gitignore │ └── run.sh ├── mariadb │ ├── migrations │ │ ├── project.yml │ │ └── module │ │ │ ├── connections.yml │ │ │ ├── job.yml │ │ │ └── config.yml │ ├── weather │ │ ├── target │ │ │ ├── aggregates.yml │ │ │ ├── measurements.yml │ │ │ └── stations.yml │ │ ├── config │ │ │ ├── connections.yml │ │ │ ├── environment.yml │ │ │ └── aws.yml │ │ ├── model │ │ │ ├── stations.yml │ │ │ ├── measurements-raw.yml │ │ │ ├── measurements.yml │ │ │ └── stations-raw.yml │ │ ├── mapping │ │ │ ├── aggregates.yml │ │ │ └── stations.yml │ │ ├── job │ │ │ └── main.yml │ │ └── project.yml │ ├── README.md │ ├── bin │ │ └── run-tests.sh │ ├── run.sh │ └── conf │ │ └── default-namespace.yml ├── mysql │ ├── migrations │ │ ├── project.yml │ │ └── module │ │ │ ├── connections.yml │ │ │ ├── job.yml │ │ │ └── config.yml │ ├── weather │ │ ├── target │ │ │ ├── aggregates.yml │ │ │ ├── measurements.yml │ │ │ └── stations.yml │ │ ├── config │ │ │ ├── connections.yml │ │ │ ├── environment.yml │ │ │ └── aws.yml │ │ ├── model │ │ │ ├── stations.yml │ │ │ ├── measurements-raw.yml │ │ │ ├── measurements.yml │ │ │ └── stations-raw.yml │ │ ├── mapping │ │ │ ├── aggregates.yml │ │ │ └── stations.yml │ │ ├── job │ │ │ └── main.yml │ │ └── project.yml │ ├── README.md │ ├── bin │ │ └── run-tests.sh │ ├── docker │ │ └── mysql │ │ │ └── my.cnf │ ├── run.sh │ └── conf │ │ └── default-namespace.yml ├── oracle │ ├── migrations │ │ ├── project.yml │ │ └── module │ │ │ ├── job.yml │ │ │ ├── connections.yml │ │ │ └── config.yml │ ├── weather │ │ ├── project.yml │ │ ├── target │ │ │ ├── aggregates.yml │ │ │ ├── measurements.yml │ │ │ └── stations.yml │ │ ├── config │ │ │ └── connections.yml │ │ ├── model │ │ │ ├── stations.yml │ │ │ ├── measurements.yml │ │ │ ├── stations-raw.yml │ │ │ └── measurements-raw.yml │ │ ├── job │ │ │ └── main.yml │ │ └── mapping │ │ │ └── stations.yml │ ├── bin │ │ └── run-tests.sh │ └── run.sh ├── sqlserver │ ├── docker │ │ ├── entrypoint.sh │ │ └── setup-db.sh │ ├── migrations │ │ ├── project.yml │ │ └── module │ │ │ ├── job.yml │ │ │ ├── connections.yml │ │ │ └── test-timestamp.yml │ ├── twitter │ │ ├── job │ │ │ └── job.yml │ │ ├── project.yml │ │ ├── config │ │ │ └── connections.yml │ │ ├── mapping │ │ │ └── tweets.yml │ │ └── model │ │ │ └── tweets-raw.yml │ ├── weather │ │ ├── target │ │ │ ├── aggregates.yml │ │ │ ├── measurements.yml │ │ │ └── stations.yml │ │ ├── project.yml │ │ ├── config │ │ │ ├── connections.yml │ │ │ ├── environment.yml │ │ │ └── config.yml │ │ ├── model │ │ │ ├── stations.yml │ │ │ ├── measurements.yml │ │ │ ├── stations-raw.yml │ │ │ └── measurements-raw.yml │ │ ├── job │ │ │ └── main.yml │ │ └── mapping │ │ │ └── stations.yml │ ├── README.md │ ├── bin │ │ └── run-tests.sh │ └── run.sh └── postgresql │ ├── migrations │ ├── project.yml │ └── module │ │ ├── connections.yml │ │ ├── job.yml │ │ └── config.yml │ ├── weather │ ├── project.yml │ ├── target │ │ ├── aggregates.yml │ │ ├── measurements.yml │ │ └── stations.yml │ ├── config │ │ └── connections.yml │ ├── model │ │ ├── stations.yml │ │ ├── measurements.yml │ │ ├── stations-raw.yml │ │ └── measurements-raw.yml │ ├── job │ │ └── main.yml │ └── mapping │ │ └── stations.yml │ ├── README.md │ ├── bin │ └── run-tests.sh │ ├── run.sh │ └── conf │ └── default-namespace.yml ├── flowman-common └── .gitignore ├── flowman-core ├── .gitignore └── src │ ├── test │ └── resources │ │ └── com │ │ └── dimajix │ │ └── flowman │ │ └── some-test-resource.txt │ └── main │ ├── resources │ └── META-INF │ │ └── services │ │ ├── com.dimajix.flowman.spi.LogFilter │ │ ├── com.dimajix.flowman.spi.PluginListener │ │ ├── com.dimajix.flowman.spi.ClassAnnotationHandler │ │ ├── com.dimajix.flowman.spi.ColumnCheckExecutor │ │ ├── com.dimajix.flowman.spi.SchemaCheckExecutor │ │ └── com.dimajix.flowman.jdbc.SqlDialect │ ├── scala │ └── com │ │ └── dimajix │ │ └── flowman │ │ ├── util │ │ └── FixedPoint.scala │ │ └── metric │ │ └── GaugeMetric.scala │ └── java │ └── com │ └── dimajix │ └── flowman │ └── annotation │ └── TemplateObject.java ├── flowman-dist ├── .gitignore ├── conf │ └── system.yml.template ├── bin │ ├── flowshell.cmd │ ├── flowshell2.cmd │ ├── flowexec │ ├── flowshell │ ├── flowman-schema │ ├── flowrexec │ ├── flowrshell │ ├── flowman-kernel │ └── flowman-server └── libexec │ └── flowman-launch.cmd ├── flowman-parent └── .gitignore ├── flowman-server ├── .gitignore └── src │ └── main │ └── properties │ └── flowman-server.properties ├── flowman-spec ├── .gitignore └── src │ ├── test │ └── resources │ │ ├── data │ │ ├── data_1.csv │ │ ├── expected │ │ │ ├── data_3.txt │ │ │ ├── data_1.txt │ │ │ └── data_2.txt │ │ └── actual │ │ │ ├── data_2.txt │ │ │ └── data_1.txt │ │ ├── project-a │ │ ├── relation │ │ │ ├── some-view.sql │ │ │ └── some-view.yml │ │ ├── config │ │ │ └── env.yml │ │ └── project.yml │ │ ├── project-b │ │ ├── relation │ │ │ ├── some-view.sql │ │ │ └── some-view.yml │ │ ├── config │ │ │ └── env.yml │ │ └── project.yaml │ │ ├── project-c │ │ ├── relation │ │ │ ├── some-view.sql │ │ │ └── some-view.yml │ │ ├── config │ │ │ └── env.yml │ │ └── project.yml │ │ ├── project │ │ ├── relation │ │ │ ├── some-view.sql │ │ │ └── some-view.yml │ │ ├── config │ │ │ └── env.yml │ │ └── TestProject.yml │ │ ├── project-imports │ │ ├── relation │ │ │ ├── some-view.sql │ │ │ └── some-view.yml │ │ ├── config │ │ │ └── env.yml │ │ └── project.yml │ │ └── project-with-dups │ │ ├── relation │ │ ├── some-view.sql │ │ ├── some-view.yml │ │ └── some-view-dup.yml │ │ ├── config │ │ └── env.yml │ │ └── TestProject.yml │ └── main │ ├── resources │ ├── com │ │ └── dimajix │ │ │ └── flowman │ │ │ ├── report │ │ │ └── text │ │ │ │ ├── assertion-start.vtl │ │ │ │ ├── target-start.vtl │ │ │ │ ├── target-finish.vtl │ │ │ │ ├── assertion-finish.vtl │ │ │ │ ├── job-start.vtl │ │ │ │ ├── lifecycle-finish.vtl │ │ │ │ ├── lifecycle-start.vtl │ │ │ │ └── job-finish.vtl │ │ │ └── documentation │ │ │ ├── html │ │ │ └── template.properties │ │ │ ├── text │ │ │ └── template.properties │ │ │ └── html+css │ │ │ └── template.properties │ └── META-INF │ │ └── services │ │ ├── com.dimajix.flowman.spi.ModuleReader │ │ ├── com.dimajix.flowman.spi.NamespaceReader │ │ ├── com.dimajix.flowman.spi.ProjectReader │ │ ├── com.dimajix.flowman.spi.DocumenterReader │ │ └── com.dimajix.flowman.spi.PluginListener │ └── jackson-2.6 │ └── com │ └── fasterxml │ └── jackson │ └── databind │ └── introspect │ └── AnnotatedClassResolver.java ├── flowman-tools ├── .gitignore └── src │ └── main │ ├── resources │ └── META-INF │ │ └── flowman │ │ └── conf │ │ ├── system.yml.template │ │ └── default-namespace.yml.template │ └── properties │ └── flowman-tools.properties ├── flowman-common-java ├── .gitignore └── src │ ├── test │ └── resources │ │ └── com │ │ └── dimajix │ │ └── flowman │ │ └── globber-test │ │ ├── file-1.txt │ │ ├── excludedir │ │ └── file-1.txt │ │ ├── subdir_1 │ │ └── subdir_2 │ │ │ ├── exclude-1.txt │ │ │ └── file-1.txt │ │ ├── subdir_2 │ │ └── exclude │ │ │ └── file-1.txt │ │ └── .flowman-ignore │ └── main │ └── resources │ └── com │ └── dimajix │ └── flowman │ ├── flowman.properties │ └── flowman-logo.txt ├── flowman-kernel-api ├── .gitignore └── src │ └── main │ └── proto │ ├── documentation.proto │ ├── exception.proto │ └── logging.proto ├── flowman-plugins ├── aws │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ ├── META-INF │ │ └── services │ │ │ └── com.dimajix.flowman.spi.LogFilter │ │ └── plugin.yml ├── avro │ ├── .gitignore │ └── src │ │ ├── main │ │ └── resources │ │ │ └── plugin.yml │ │ └── test │ │ └── resources │ │ └── schema │ │ └── AvroSchema.json ├── azure │ ├── .gitignore │ └── src │ │ └── main │ │ ├── resources │ │ └── plugin.yml │ │ └── scala │ │ └── com │ │ └── microsoft │ │ └── azure │ │ └── synapse │ │ └── tokenlibrary │ │ └── TokenLibrary.scala ├── delta │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ ├── META-INF │ │ └── services │ │ │ └── com.dimajix.flowman.spi.SparkExtension │ │ └── plugin.yml ├── hbase │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── impala │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── json │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── kafka │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── mariadb │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── mysql │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── openapi │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── oracle │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── parent │ └── .gitignore ├── sftp │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── swagger │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml ├── trino │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ ├── META-INF │ │ └── services │ │ │ └── com.dimajix.flowman.jdbc.SqlDialect │ │ └── plugin.yml ├── mssqlserver │ ├── .gitignore │ └── src │ │ └── main │ │ └── resources │ │ └── plugin.yml └── postgresql │ ├── .gitignore │ └── src │ └── main │ └── resources │ └── plugin.yml ├── flowman-testing ├── .gitignore └── src │ └── test │ └── resources │ └── flows │ ├── config │ ├── environment.yml │ └── test.yml │ ├── project.yml │ └── job │ └── main.yml ├── flowman-yaml-schema ├── .gitignore └── src │ └── assembly │ └── assembly.xml ├── docs ├── spec │ ├── target │ │ ├── getFile.md │ │ ├── putFile.md │ │ ├── template.md │ │ ├── copyFile.md │ │ ├── deleteFile.md │ │ ├── count.md │ │ ├── empty.md │ │ └── blackhole.md │ ├── measure │ │ └── sql.md │ ├── connection │ │ ├── ssh.md │ │ └── index.md │ ├── dataset │ │ ├── file.md │ │ ├── index.md │ │ ├── mapping.md │ │ └── values.md │ ├── metric │ │ ├── index.md │ │ └── console.md │ ├── schema │ │ ├── index.md │ │ ├── spark.md │ │ └── mapping.md │ ├── assertion │ │ ├── index.md │ │ └── unique-key.md │ ├── hooks │ │ └── index.md │ ├── template │ │ ├── measure.md │ │ ├── assertion.md │ │ ├── schema.md │ │ └── dataset.md │ ├── relation │ │ └── template.md │ └── profiles.md ├── .gitignore ├── _static │ ├── flowman.js │ └── fonts │ │ └── RobotoSlab │ │ ├── RobotoSlab-Black.ttf │ │ ├── RobotoSlab-Bold.ttf │ │ ├── RobotoSlab-Light.ttf │ │ ├── RobotoSlab-Medium.ttf │ │ ├── RobotoSlab-Thin.ttf │ │ ├── RobotoSlab-Regular.ttf │ │ ├── RobotoSlab-SemiBold.ttf │ │ ├── RobotoSlab-ExtraBold.ttf │ │ ├── RobotoSlab-ExtraLight.ttf │ │ └── RobotoSlab-VariableFont_wght.ttf ├── images │ ├── console-01.png │ ├── flowman-logo.png │ ├── flowman-kernel.png │ ├── history-server.png │ ├── flowman-entities.png │ ├── flowman-lifecycle.png │ ├── flowman-overview.png │ ├── flowman-workflow.png │ └── flowman-documentation.png ├── cli │ └── flowexec │ │ └── misc.md ├── plugins │ ├── kafka.md │ ├── index.md │ ├── json.md │ ├── avro.md │ ├── openapi.md │ ├── swagger.md │ ├── azure.md │ ├── aws.md │ └── delta.md ├── spelling.txt ├── documenting │ └── targets.md ├── pyproject.toml ├── concepts │ └── index.md └── environment.yaml ├── examples ├── sftp-upload │ ├── data │ │ └── example.csv │ ├── project.yml │ ├── config │ │ ├── connections.yml │ │ └── environment.yml │ ├── README.md │ └── job │ │ └── main.yml └── weather │ ├── .gitignore │ ├── config │ ├── environment.yml │ └── aws.yml │ ├── target │ ├── aggregates.yml │ ├── documentation.yml │ ├── measurements.yml │ └── stations.yml │ ├── mapping │ └── stations.yml │ ├── README.md │ ├── model │ ├── stations.yml │ ├── measurements-raw.yml │ └── stations-raw.yml │ ├── project.yml │ └── documentation.yml ├── flowman-common-jersey └── .gitignore ├── flowman-kernel-common ├── .gitignore └── src │ └── main │ └── java │ └── com │ └── dimajix │ └── flowman │ ├── grpc │ ├── KernelConfiguration.java │ ├── GrpcClient.java │ ├── RemoteException.java │ └── GrpcService.java │ └── kernel │ └── model │ ├── FieldType.java │ └── Operation.java ├── flowman-kernel-server ├── .gitignore └── src │ ├── main │ ├── properties │ │ └── flowman-kernel-server.properties │ └── scala │ │ └── com │ │ └── dimajix │ │ └── flowman │ │ └── kernel │ │ ├── service │ │ └── KernelService.scala │ │ └── grpc │ │ └── ClientWatcher.scala │ └── test │ └── scala │ └── com │ └── dimajix │ └── flowman │ └── kernel │ └── KernelServerTest.scala ├── flowman-kernel-tools ├── .gitignore └── src │ └── main │ ├── properties │ └── flowman-kernel-tools.properties │ └── java │ └── com │ └── dimajix │ └── flowman │ └── tools │ └── rexec │ └── job │ └── BuildCommand.java ├── flowman-scalatest-compat ├── .gitignore └── src │ └── main │ └── scalatest-3.0 │ └── org │ └── scalatest │ ├── flatspec │ └── package.scala │ └── matchers │ └── should │ └── package.scala ├── flowman-spark-extensions ├── .gitignore └── src │ └── main │ ├── resources │ └── META-INF │ │ └── services │ │ ├── com.dimajix.spark.sql.local.RelationProvider │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ ├── spark-3.3 │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── catalyst │ │ └── expressions │ │ └── IfNull.scala │ ├── spark-3.4 │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── catalyst │ │ └── expressions │ │ └── IfNull.scala │ ├── spark-3.5 │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── catalyst │ │ └── expressions │ │ └── IfNull.scala │ ├── spark-2.4 │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── catalyst │ │ └── plans │ │ └── logical │ │ └── AnalysisOnlyCommand.scala │ ├── spark-3.0 │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── catalyst │ │ └── plans │ │ └── logical │ │ └── AnalysisOnlyCommand.scala │ ├── spark-3.1 │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── catalyst │ │ └── plans │ │ └── logical │ │ └── AnalysisOnlyCommand.scala │ └── scala │ └── com │ └── dimajix │ └── hadoop │ └── package.scala ├── flowman-spark-testing └── .gitignore ├── flowman-spark-dependencies └── .gitignore ├── flowman-server-ui ├── .browserslistrc ├── babel.config.js ├── postcss.config.js ├── public │ ├── favicon.png │ └── index.html ├── src │ ├── assets │ │ └── flowman-logo.png │ ├── plugins │ │ └── vuetify.js │ ├── views │ │ └── System.vue │ ├── main.js │ ├── components │ │ ├── EnvironmentTable.vue │ │ ├── PhaseSelector.vue │ │ ├── StatusSelector.vue │ │ └── ResourceTable.vue │ └── charts │ │ └── PieChart.js ├── .gitignore ├── vue.config.js ├── README.md └── .eslintrc.js ├── .gitignore ├── AUTHORS ├── .gitattributes ├── licenses ├── LICENSE-json-schema.txt └── LICENSE-swagger.txt ├── devtools ├── create-version.sh └── create-release.sh ├── .readthedocs.yaml └── .editorconfig /docker/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tests/emr/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-common/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-core/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-dist/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-parent/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-server/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-spec/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-tools/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tests/synapse/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tests/tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | tutorial 2 | -------------------------------------------------------------------------------- /docker/.gitattributes: -------------------------------------------------------------------------------- 1 | bin/* text eol=lf 2 | -------------------------------------------------------------------------------- /flowman-common-java/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-kernel-api/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/aws/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-testing/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-yaml-schema/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /docs/spec/target/getFile.md: -------------------------------------------------------------------------------- 1 | # GetFile Target 2 | -------------------------------------------------------------------------------- /examples/sftp-upload/data/example.csv: -------------------------------------------------------------------------------- 1 | 1,2,3 2 | -------------------------------------------------------------------------------- /flowman-common-jersey/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-kernel-common/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-kernel-server/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-kernel-tools/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/avro/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/azure/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/delta/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/hbase/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/impala/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/json/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/kafka/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/mariadb/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/mysql/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/openapi/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/oracle/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/parent/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/sftp/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/swagger/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/trino/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-scalatest-compat/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-spark-extensions/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-spark-testing/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tests/demo-weather/.gitignore: -------------------------------------------------------------------------------- 1 | demo-weather 2 | -------------------------------------------------------------------------------- /tests/quickstart/.gitignore: -------------------------------------------------------------------------------- 1 | quickstart-test 2 | -------------------------------------------------------------------------------- /docker/conf/system.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | - flowman-impala -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | _site 3 | _build 4 | -------------------------------------------------------------------------------- /docs/spec/target/putFile.md: -------------------------------------------------------------------------------- 1 | # Put File Target 2 | -------------------------------------------------------------------------------- /flowman-plugins/mssqlserver/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-plugins/postgresql/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /flowman-spark-dependencies/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tests/archetype-assembly/.gitignore: -------------------------------------------------------------------------------- 1 | quickstart-test 2 | -------------------------------------------------------------------------------- /tests/archetype-quickstart/.gitignore: -------------------------------------------------------------------------------- 1 | quickstart-test 2 | -------------------------------------------------------------------------------- /flowman-server-ui/.browserslistrc: -------------------------------------------------------------------------------- 1 | > 1% 2 | last 2 versions 3 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/data/data_1.csv: -------------------------------------------------------------------------------- 1 | "1","2","3" 2 | -------------------------------------------------------------------------------- /docs/_static/flowman.js: -------------------------------------------------------------------------------- 1 | document.body.setAttribute("data-theme", "light"); 2 | -------------------------------------------------------------------------------- /flowman-core/src/test/resources/com/dimajix/flowman/some-test-resource.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/weather/.gitignore: -------------------------------------------------------------------------------- 1 | generated-documentation 2 | generated-report.txt 3 | -------------------------------------------------------------------------------- /flowman-common-java/src/test/resources/com/dimajix/flowman/globber-test/file-1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/assertion-start.vtl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flowman-tools/src/main/resources/META-INF/flowman/conf/system.yml.template: -------------------------------------------------------------------------------- 1 | # Empty 2 | -------------------------------------------------------------------------------- /flowman-common-java/src/test/resources/com/dimajix/flowman/globber-test/excludedir/file-1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-a/relation/some-view.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM default.t0 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-b/relation/some-view.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM default.t0 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-c/relation/some-view.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM default.t0 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project/relation/some-view.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM default.t0 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-c/config/env.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - project_c_x=y 3 | -------------------------------------------------------------------------------- /docs/images/console-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/console-01.png -------------------------------------------------------------------------------- /flowman-common-java/src/test/resources/com/dimajix/flowman/globber-test/subdir_1/subdir_2/exclude-1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flowman-common-java/src/test/resources/com/dimajix/flowman/globber-test/subdir_1/subdir_2/file-1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flowman-common-java/src/test/resources/com/dimajix/flowman/globber-test/subdir_2/exclude/file-1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-imports/relation/some-view.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM default.t0 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-with-dups/relation/some-view.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM default.t0 2 | -------------------------------------------------------------------------------- /docs/images/flowman-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-logo.png -------------------------------------------------------------------------------- /flowman-server-ui/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | '@vue/app' 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /flowman-testing/src/test/resources/flows/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - text=Hello World 3 | -------------------------------------------------------------------------------- /tests/mariadb/migrations/project.yml: -------------------------------------------------------------------------------- 1 | name: "mariadb" 2 | version: "1.0" 3 | 4 | modules: 5 | - module 6 | -------------------------------------------------------------------------------- /tests/mysql/migrations/project.yml: -------------------------------------------------------------------------------- 1 | name: "mysql" 2 | version: "1.0" 3 | 4 | modules: 5 | - module 6 | -------------------------------------------------------------------------------- /tests/oracle/migrations/project.yml: -------------------------------------------------------------------------------- 1 | name: "oracle" 2 | version: "1.0" 3 | 4 | modules: 5 | - module 6 | -------------------------------------------------------------------------------- /docs/images/flowman-kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-kernel.png -------------------------------------------------------------------------------- /docs/images/history-server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/history-server.png -------------------------------------------------------------------------------- /flowman-tools/src/main/properties/flowman-tools.properties: -------------------------------------------------------------------------------- 1 | flowman-tools.classpath=${flowman-tools.classpath} 2 | -------------------------------------------------------------------------------- /tests/sqlserver/docker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | /opt/mssql/bin/sqlservr & /opt/flowman-mssql/setup-db.sh && sleep 365d 2 | -------------------------------------------------------------------------------- /tests/sqlserver/migrations/project.yml: -------------------------------------------------------------------------------- 1 | name: "sqlserver" 2 | version: "1.0" 3 | 4 | modules: 5 | - module 6 | -------------------------------------------------------------------------------- /docs/images/flowman-entities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-entities.png -------------------------------------------------------------------------------- /docs/images/flowman-lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-lifecycle.png -------------------------------------------------------------------------------- /docs/images/flowman-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-overview.png -------------------------------------------------------------------------------- /docs/images/flowman-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-workflow.png -------------------------------------------------------------------------------- /examples/sftp-upload/project.yml: -------------------------------------------------------------------------------- 1 | name: "sftp-upload" 2 | version: "1.0" 3 | 4 | modules: 5 | - job 6 | - config 7 | -------------------------------------------------------------------------------- /flowman-server-ui/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | autoprefixer: {} 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /flowman-server/src/main/properties/flowman-server.properties: -------------------------------------------------------------------------------- 1 | flowman-server.classpath=${flowman-server.classpath} 2 | -------------------------------------------------------------------------------- /tests/postgresql/migrations/project.yml: -------------------------------------------------------------------------------- 1 | name: "postgresql" 2 | version: "1.0" 3 | 4 | modules: 5 | - module 6 | -------------------------------------------------------------------------------- /flowman-server-ui/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/flowman-server-ui/public/favicon.png -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-a/config/env.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - x=y 3 | config: 4 | - spark.lala=lolo 5 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-b/config/env.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - x=y 3 | config: 4 | - spark.lala=lolo 5 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project/config/env.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - x=y 3 | config: 4 | - spark.lala=lolo 5 | -------------------------------------------------------------------------------- /flowman-tools/src/main/resources/META-INF/flowman/conf/default-namespace.yml.template: -------------------------------------------------------------------------------- 1 | metrics: 2 | - kind: console 3 | -------------------------------------------------------------------------------- /docs/images/flowman-documentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/images/flowman-documentation.png -------------------------------------------------------------------------------- /flowman-core/src/main/resources/META-INF/services/com.dimajix.flowman.spi.LogFilter: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spi.DefaultLogFilter 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-imports/config/env.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - x=y 3 | config: 4 | - spark.lala=lolo 5 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/META-INF/services/com.dimajix.flowman.spi.ModuleReader: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spec.YamlModuleReader 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-with-dups/config/env.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - x=y 3 | config: 4 | - spark.lala=lolo 5 | -------------------------------------------------------------------------------- /flowman-plugins/aws/src/main/resources/META-INF/services/com.dimajix.flowman.spi.LogFilter: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.plugin.aws.AwsLogFilter 2 | -------------------------------------------------------------------------------- /flowman-plugins/trino/src/main/resources/META-INF/services/com.dimajix.flowman.jdbc.SqlDialect: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.jdbc.TrinoDialect 2 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/META-INF/services/com.dimajix.flowman.spi.NamespaceReader: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spec.YamlNamespaceReader 2 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/META-INF/services/com.dimajix.flowman.spi.ProjectReader: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spec.YamlProjectReader 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-a/project.yml: -------------------------------------------------------------------------------- 1 | name: project-a 2 | version: 1.0 3 | 4 | modules: 5 | - config 6 | - relation 7 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project/TestProject.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | version: 1.0 3 | 4 | modules: 5 | - config 6 | - relation 7 | -------------------------------------------------------------------------------- /flowman-kernel-tools/src/main/properties/flowman-kernel-tools.properties: -------------------------------------------------------------------------------- 1 | flowman-kernel-tools.classpath=${flowman-kernel-tools.classpath} 2 | -------------------------------------------------------------------------------- /flowman-server-ui/src/assets/flowman-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/flowman-server-ui/src/assets/flowman-logo.png -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/META-INF/services/com.dimajix.flowman.spi.DocumenterReader: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spec.YamlDocumenterReader 2 | -------------------------------------------------------------------------------- /flowman-testing/src/test/resources/flows/project.yml: -------------------------------------------------------------------------------- 1 | name: "resource-test" 2 | version: "1.0" 3 | 4 | modules: 5 | - job 6 | - config 7 | -------------------------------------------------------------------------------- /flowman-kernel-server/src/main/properties/flowman-kernel-server.properties: -------------------------------------------------------------------------------- 1 | flowman-kernel-server.classpath=${flowman-kernel-server.classpath} 2 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/META-INF/services/com.dimajix.flowman.spi.PluginListener: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spec.ObjectMapperPluginListener 2 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-with-dups/TestProject.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | version: 1.0 3 | 4 | modules: 5 | - config 6 | - relation 7 | -------------------------------------------------------------------------------- /flowman-testing/src/test/resources/flows/config/test.yml: -------------------------------------------------------------------------------- 1 | profiles: 2 | test: 3 | environment: 4 | - text=Hello World (test profile) 5 | -------------------------------------------------------------------------------- /docker/libexec/flowman-init.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Setup environment 5 | source /opt/docker/libexec/flowman-vars.sh 6 | -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-Black.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-Black.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-Bold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-Light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-Light.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-Medium.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-Medium.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-Thin.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-Thin.ttf -------------------------------------------------------------------------------- /flowman-core/src/main/resources/META-INF/services/com.dimajix.flowman.spi.PluginListener: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spi.ClassAnnotationScannerPluginListener 2 | -------------------------------------------------------------------------------- /tests/sqlserver/twitter/job/job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | main: 3 | targets: 4 | - tweets-mssql 5 | - tweets-index 6 | - fulltext-catalog 7 | -------------------------------------------------------------------------------- /tests/sqlserver/twitter/project.yml: -------------------------------------------------------------------------------- 1 | name: "twitter" 2 | version: "1.0" 3 | 4 | modules: 5 | - config 6 | - job 7 | - mapping 8 | - model 9 | -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-Regular.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-SemiBold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-SemiBold.ttf -------------------------------------------------------------------------------- /flowman-core/src/main/resources/META-INF/services/com.dimajix.flowman.spi.ClassAnnotationHandler: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.templating.TemplateObjectHandler 2 | 3 | -------------------------------------------------------------------------------- /flowman-core/src/main/resources/META-INF/services/com.dimajix.flowman.spi.ColumnCheckExecutor: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.documentation.DefaultColumnCheckExecutor 2 | -------------------------------------------------------------------------------- /flowman-core/src/main/resources/META-INF/services/com.dimajix.flowman.spi.SchemaCheckExecutor: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.documentation.DefaultSchemaCheckExecutor 2 | -------------------------------------------------------------------------------- /flowman-plugins/delta/src/main/resources/META-INF/services/com.dimajix.flowman.spi.SparkExtension: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.spark.sql.delta.DeltaSparkExtension 2 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/resources/META-INF/services/com.dimajix.spark.sql.local.RelationProvider: -------------------------------------------------------------------------------- 1 | com.dimajix.spark.sql.local.csv.CsvFileFormat 2 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/target-start.vtl: -------------------------------------------------------------------------------- 1 | --- Start $phase target '${target.identifier}' at ${Timestamp.now()} --- 2 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates 5 | relation: aggregates 6 | -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-ExtraBold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-ExtraBold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-ExtraLight.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-ExtraLight.ttf -------------------------------------------------------------------------------- /docker/bin/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Setup environment 5 | source /opt/docker/libexec/flowman-init.sh 6 | 7 | exec "$@" 8 | -------------------------------------------------------------------------------- /tests/emr/flow/target/database.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | database: 3 | kind: hiveDatabase 4 | database: weather 5 | location: s3://flowman-test/glue/weather.db 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | .flattened-pom.xml 4 | velocity.log* 5 | derby.log 6 | dependency-reduced-pom.xml 7 | metastore_db/ 8 | /target/ 9 | release/ 10 | -------------------------------------------------------------------------------- /flowman-common-java/src/test/resources/com/dimajix/flowman/globber-test/.flowman-ignore: -------------------------------------------------------------------------------- 1 | # This is a comment 2 | */exclude 3 | **/exclude-* 4 | no_such_dir 5 | excludedir 6 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/documentation/html/template.properties: -------------------------------------------------------------------------------- 1 | template.project.input=project.vtl 2 | template.project.output=project.html 3 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/documentation/text/template.properties: -------------------------------------------------------------------------------- 1 | template.project.input=project.vtl 2 | template.project.output=project.txt 3 | -------------------------------------------------------------------------------- /tests/oracle/weather/project.yml: -------------------------------------------------------------------------------- 1 | name: "weather" 2 | version: "1.0" 3 | 4 | modules: 5 | - model 6 | - config 7 | - mapping 8 | - job 9 | - target 10 | -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/RobotoSlab-VariableFont_wght.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimajix/flowman/HEAD/docs/_static/fonts/RobotoSlab/RobotoSlab-VariableFont_wght.ttf -------------------------------------------------------------------------------- /tests/postgresql/weather/project.yml: -------------------------------------------------------------------------------- 1 | name: "weather" 2 | version: "1.0" 3 | 4 | modules: 5 | - config 6 | - job 7 | - target 8 | - mapping 9 | - model 10 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/project.yml: -------------------------------------------------------------------------------- 1 | name: "weather" 2 | version: "1.0" 3 | 4 | modules: 5 | - config 6 | - job 7 | - target 8 | - mapping 9 | - model 10 | -------------------------------------------------------------------------------- /examples/weather/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - basedir=file:///tmp/weather 3 | - srcdir=$System.getenv('WEATHER_SRCDIR', 's3a://dimajix-training/data/weather') 4 | -------------------------------------------------------------------------------- /tests/emr/flow/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates 5 | relation: aggregates 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /flowman-plugins/azure/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/json/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: 6 | - ${plugin.jar} 7 | -------------------------------------------------------------------------------- /tests/mysql/weather/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates 5 | relation: aggregates 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project/relation/some-view.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /tests/mariadb/weather/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates 5 | relation: aggregates 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /tests/mysql/weather/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | mysql: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | -------------------------------------------------------------------------------- /tests/postgresql/weather/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates 5 | relation: aggregates 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /flowman-scalatest-compat/src/main/scalatest-3.0/org/scalatest/flatspec/package.scala: -------------------------------------------------------------------------------- 1 | package org.scalatest 2 | 3 | package object flatspec { 4 | type AnyFlatSpec = org.scalatest.FlatSpec 5 | } 6 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-a/relation/some-view.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-b/relation/some-view.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-c/relation/some-view.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /tests/mariadb/weather/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | mariadb: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | -------------------------------------------------------------------------------- /flowman-plugins/avro/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/aws/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/delta/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/impala/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/kafka/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/oracle/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: 6 | - ojdbc8-${oracle.version}.jar 7 | -------------------------------------------------------------------------------- /flowman-plugins/sftp/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/trino/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/target-finish.vtl: -------------------------------------------------------------------------------- 1 | --> ${result.status} ${result.phase} target '${target.identifier}' at ${result.endTime} (${result.duration}) )<-- 2 | 3 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-imports/relation/some-view.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-with-dups/relation/some-view.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /tests/emr/flow/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_extracted 5 | relation: measurements 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /tests/oracle/weather/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates_conformed 5 | relation: aggregates 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /flowman-plugins/mssqlserver/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/openapi/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-plugins/swagger/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: [${plugin.jar},${plugin.classpath}] 6 | -------------------------------------------------------------------------------- /flowman-scalatest-compat/src/main/scalatest-3.0/org/scalatest/matchers/should/package.scala: -------------------------------------------------------------------------------- 1 | package org.scalatest.matchers 2 | 3 | package object should { 4 | type Matchers = org.scalatest.Matchers 5 | } 6 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-with-dups/relation/some-view-dup.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | some_view: 3 | kind: hiveView 4 | view: some_view 5 | file: ${project.basedir}/relation/some-view.sql 6 | -------------------------------------------------------------------------------- /flowman-common-java/src/main/resources/com/dimajix/flowman/flowman.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} 2 | spark_version=${spark.version} 3 | hadoop_version=${hadoop.version} 4 | scala_version=${scala.version} 5 | -------------------------------------------------------------------------------- /tests/mariadb/weather/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_deduplicated 5 | relation: measurements 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /tests/mysql/migrations/module/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | jdbcConnection: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url/flowman" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | -------------------------------------------------------------------------------- /tests/mysql/weather/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_deduplicated 5 | relation: measurements 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /tests/oracle/weather/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_deduplicated 5 | relation: measurements 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_deduplicated 5 | relation: measurements 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /flowman-plugins/postgresql/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: 6 | - postgresql-${postgresql.version}.jar 7 | -------------------------------------------------------------------------------- /flowman-testing/src/test/resources/flows/job/main.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | null: 3 | kind: empty 4 | 5 | jobs: 6 | main: 7 | description: "Simply print 'Hello World'" 8 | targets: 9 | - "null" 10 | -------------------------------------------------------------------------------- /tests/mariadb/migrations/module/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | jdbcConnection: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url/flowman" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | -------------------------------------------------------------------------------- /tests/postgresql/migrations/module/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | jdbcConnection: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url/flowman" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | -------------------------------------------------------------------------------- /tests/postgresql/weather/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | jdbcConnection: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url/flowman" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | -------------------------------------------------------------------------------- /tests/postgresql/weather/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_deduplicated 5 | relation: measurements 6 | partition: 7 | year: $year 8 | -------------------------------------------------------------------------------- /flowman-plugins/mysql/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: 6 | - mysql-connector-j-${mysql-connector-j.version}.jar 7 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-c/project.yml: -------------------------------------------------------------------------------- 1 | name: project-c 2 | version: 1.0 3 | 4 | modules: 5 | - config 6 | - relation 7 | 8 | 9 | imports: 10 | - project: project-a 11 | location: ../project-a 12 | -------------------------------------------------------------------------------- /flowman-plugins/mariadb/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: 6 | - mariadb-java-client-${mariadb-java-client.version}.jar 7 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/documentation/html+css/template.properties: -------------------------------------------------------------------------------- 1 | template.project.input=project.vtl 2 | template.project.output=project.html 3 | template.css.input=project.css 4 | template.css.output=project.css 5 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/data/expected/data_3.txt: -------------------------------------------------------------------------------- 1 | magna aliquyam erat, sed diam voluptua. At vero eos et accusam et 2 | justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. 3 | -------------------------------------------------------------------------------- /tests/emr/flow/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: hiveTable 4 | database: weather 5 | table: stations 6 | format: parquet 7 | schema: 8 | kind: mapping 9 | mapping: stations_raw 10 | -------------------------------------------------------------------------------- /examples/weather/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | description: "Write aggregated measurements per year" 5 | mapping: aggregates 6 | relation: aggregates 7 | partition: 8 | year: $year 9 | -------------------------------------------------------------------------------- /tests/synapse/flow/project.yml: -------------------------------------------------------------------------------- 1 | name: "@project.artifactId@" 2 | version: "@project.version@" 3 | description: "@project.description@" 4 | 5 | modules: 6 | - config 7 | - job 8 | - target 9 | - mapping 10 | - model 11 | - test 12 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | The following people have contributed to Flowman and are referred to as "The Flowman Authors": 2 | 3 | * Kaya Kupferschmidt 4 | * Siegfried Weber 5 | * Jacek Kmiecik 6 | * Lukas Senicourt 7 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/data/expected/data_1.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut 2 | labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et 3 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/data/expected/data_2.txt: -------------------------------------------------------------------------------- 1 | ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum 2 | dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore 3 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-b/project.yaml: -------------------------------------------------------------------------------- 1 | name: project-b 2 | version: 1.0 3 | 4 | modules: 5 | - config 6 | - relation 7 | - no_such_directory 8 | 9 | imports: 10 | - project: project-a 11 | location: ${project.basedir}/../project-a 12 | -------------------------------------------------------------------------------- /tests/synapse/flow/config/config.yml: -------------------------------------------------------------------------------- 1 | # Here go all project specific configuration options for Spark and Flowman 2 | config: 3 | # Use anonymous access to S3 4 | - spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider 5 | -------------------------------------------------------------------------------- /tests/synapse/flow/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | stations: 3 | kind: relation 4 | mapping: stations_conformed 5 | relation: stations 6 | 7 | stations_jdbc: 8 | kind: relation 9 | mapping: stations 10 | relation: stations_jdbc 11 | -------------------------------------------------------------------------------- /examples/weather/target/documentation.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # This target will create a documentation in the VERIFY phase 3 | documentation: 4 | kind: documentation 5 | # We do not specify any additional configuration, so the project's documentation.yml file will be used 6 | -------------------------------------------------------------------------------- /tests/emr/flow/target/documentation.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # This target will create a documentation in the VERIFY phase 3 | documentation: 4 | kind: documentation 5 | # We do not specify any additional configuration, so the project's documentation.yml file will be used 6 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | com.dimajix.spark.sql.sources.sequencefile.SequenceFileFormat 2 | com.dimajix.spark.sql.sources.fixedwidth.FixedWidthFormat 3 | com.dimajix.spark.sql.sources.empty.NullFormat 4 | -------------------------------------------------------------------------------- /tests/synapse/flow/target/documentation.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # This target will create a documentation in the VERIFY phase 3 | documentation: 4 | kind: documentation 5 | # We do not specify any additional configuration, so the project's documentation.yml file will be used 6 | -------------------------------------------------------------------------------- /docs/cli/flowexec/misc.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous Commands 2 | 3 | 4 | ## `info` Command 5 | As a small debugging utility, Flowman also provides an `info` command, which simply shows all environment variables 6 | and configuration settings. 7 | ```shell 8 | flowexec info 9 | ``` 10 | 11 | -------------------------------------------------------------------------------- /tests/mariadb/README.md: -------------------------------------------------------------------------------- 1 | # Test Suite for MariaDB 2 | 3 | ## Start Docker Containers 4 | ```shell 5 | docker-compose up -d mariadb 6 | docker-compose run flowman 7 | ``` 8 | 9 | ## Execute project 10 | ``` 11 | job create v1 12 | job create v1 13 | 14 | job create v2 15 | ``` 16 | -------------------------------------------------------------------------------- /tests/mysql/README.md: -------------------------------------------------------------------------------- 1 | # Test Suite for MySQL Server 2 | 3 | ## Start Docker Containers 4 | ```shell 5 | docker-compose up -d mysql 6 | docker-compose run flowman 7 | ``` 8 | 9 | ## Execute project 10 | ``` 11 | job create v1 12 | job create v1 13 | 14 | job create v2 15 | ``` 16 | -------------------------------------------------------------------------------- /flowman-dist/conf/system.yml.template: -------------------------------------------------------------------------------- 1 | # The system configuration loads plugin before namespaces are instantiated. The Impala plugin may already be required 2 | # within a namespace to define an external catalog, therefore it needs to be loaded in advance. 3 | plugins: 4 | - flowman-impala 5 | -------------------------------------------------------------------------------- /tests/postgresql/README.md: -------------------------------------------------------------------------------- 1 | # Test Suite for MS SQL Server 2 | 3 | ## Start Docker Containers 4 | ```shell 5 | docker-compose up -d sqlserver 6 | docker-compose run flowman 7 | ``` 8 | 9 | ## Execute project 10 | ``` 11 | job create v1 12 | job create v1 13 | 14 | job create v2 15 | ``` 16 | -------------------------------------------------------------------------------- /tests/sqlserver/README.md: -------------------------------------------------------------------------------- 1 | # Test Suite for MS SQL Server 2 | 3 | ## Start Docker Containers 4 | ```shell 5 | docker-compose up -d sqlserver 6 | docker-compose run flowman 7 | ``` 8 | 9 | ## Execute project 10 | ``` 11 | job create v1 12 | job create v1 13 | 14 | job create v2 15 | ``` 16 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/assertion-finish.vtl: -------------------------------------------------------------------------------- 1 | #if($result.success) ✓ #else ✘ #end${result.status} assertion '${assertion.name}' (${assertion.description}) 2 | #foreach($test in ${result.children}) #if($test.success) ✓ passed:#else ✘ failed:#end ${test.name} 3 | #end 4 | -------------------------------------------------------------------------------- /tests/mariadb/migrations/module/job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | v1: 3 | targets: 4 | - tgt_v1 5 | 6 | v2: 7 | targets: 8 | - tgt_v2 9 | 10 | 11 | targets: 12 | tgt_v1: 13 | kind: relation 14 | relation: rel_v1 15 | 16 | tgt_v2: 17 | kind: relation 18 | relation: rel_v2 19 | -------------------------------------------------------------------------------- /tests/mysql/migrations/module/job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | v1: 3 | targets: 4 | - tgt_v1 5 | 6 | v2: 7 | targets: 8 | - tgt_v2 9 | 10 | 11 | targets: 12 | tgt_v1: 13 | kind: relation 14 | relation: rel_v1 15 | 16 | tgt_v2: 17 | kind: relation 18 | relation: rel_v2 19 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/project-imports/project.yml: -------------------------------------------------------------------------------- 1 | name: project-imports 2 | version: 1.0 3 | 4 | modules: 5 | - config 6 | - relation 7 | 8 | 9 | imports: 10 | - project: project-b 11 | location: ${project.basedir}/../project-b 12 | - project: project-c 13 | location: ../project-c 14 | -------------------------------------------------------------------------------- /flowman-common-java/src/main/resources/com/dimajix/flowman/flowman-logo.txt: -------------------------------------------------------------------------------- 1 | ______ _ 2 | | ___|| | 3 | | |_ | | ___ __ __ _ __ ___ __ _ _ __ 4 | | _| | | / _ \\ \ /\ / /| '_ ` _ \ / _` || '_ \ 5 | | | | || (_) |\ V V / | | | | | || (_| || | | | 6 | \_| |_| \___/ \_/\_/ |_| |_| |_| \__,_||_| |_| 7 | -------------------------------------------------------------------------------- /tests/sqlserver/migrations/module/job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | v1: 3 | targets: 4 | - tgt_v1 5 | 6 | v2: 7 | targets: 8 | - tgt_v2 9 | 10 | 11 | targets: 12 | tgt_v1: 13 | kind: relation 14 | relation: rel_v1 15 | 16 | tgt_v2: 17 | kind: relation 18 | relation: rel_v2 19 | 20 | -------------------------------------------------------------------------------- /examples/sftp-upload/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | sftp: 3 | kind: sftp 4 | host: "${sftp_host}" 5 | port: ${sftp_port} 6 | username: "${sftp_username}" 7 | password: "${sftp_password}" 8 | keyFile: "${sftp_keyfile}" 9 | knownHosts: "$System.getProperty('user.home')/.ssh/known_hosts" 10 | -------------------------------------------------------------------------------- /examples/sftp-upload/README.md: -------------------------------------------------------------------------------- 1 | # Preparing the Environment 2 | 3 | SFTP_USERNAME= 4 | SFTP_PASSWORD= 5 | SFTP_KEYFILE= 6 | SFTP_HOST= 7 | SFTP_TARGET= 8 | 9 | 10 | # Using flowman 11 | 12 | flowexec -f examples/sftp-upload project run 13 | -------------------------------------------------------------------------------- /examples/sftp-upload/job/main.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | upload: 3 | kind: sftpUpload 4 | connection: sftp 5 | source: "${project.basedir}/data/example.csv" 6 | target: "${sftp_target}/example.csv" 7 | overwrite: true 8 | 9 | 10 | jobs: 11 | main: 12 | description: "Upload File via SFTP" 13 | targets: upload 14 | -------------------------------------------------------------------------------- /tests/mysql/weather/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: jdbcTable 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | connection: mysql 6 | database: $jdbc_db 7 | table: stations 8 | schema: 9 | kind: mapping 10 | mapping: stations_conformed 11 | -------------------------------------------------------------------------------- /docs/plugins/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka Plugin 2 | 3 | ## Provided Entities 4 | * [`kafka` relation](../spec/relation/kafka.md) 5 | 6 | 7 | ## Activation 8 | 9 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 10 | ```yaml 11 | plugins: 12 | - flowman-kafka 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/spec/measure/sql.md: -------------------------------------------------------------------------------- 1 | # SQL Measure 2 | 3 | ## Example 4 | 5 | ```yaml 6 | targets: 7 | my_measures: 8 | kind: measure 9 | measures: 10 | nulls: 11 | kind: sql 12 | query: " 13 | SELECT 14 | SUM(col IS NULL) AS col_nulls 15 | FROM some_mapping 16 | " 17 | ``` 18 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/data/actual/data_2.txt: -------------------------------------------------------------------------------- 1 | dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore 2 | magna aliquyam erat, sed diam voluptua. At vero eos et accusam et 3 | justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. 4 | -------------------------------------------------------------------------------- /tests/emr/flow/project.yml: -------------------------------------------------------------------------------- 1 | name: "@project.artifactId@" 2 | version: "@project.version@" 3 | description: "@project.description@" 4 | 5 | # The following modules simply contain a list of subdirectories containing the specification files 6 | modules: 7 | - model 8 | - mapping 9 | - target 10 | - job 11 | - config 12 | - test 13 | -------------------------------------------------------------------------------- /tests/mariadb/weather/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: jdbcTable 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | connection: mariadb 6 | database: $jdbc_db 7 | table: stations 8 | schema: 9 | kind: mapping 10 | mapping: stations_conformed 11 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Declare files that will always have CRLF line endings on checkout. 2 | *.sh text eol=lf 3 | *.yml text eol=lf 4 | *.scala text eol=lf 5 | *.java text eol=lf 6 | *.txt text eol=lf 7 | *.md text eol=lf 8 | *.xml text eol=lf 9 | LICENSE text eol=lf 10 | NOTICE text eol=lf 11 | Dockerfile text eol=lf 12 | Jenkinsfile text eol=lf 13 | -------------------------------------------------------------------------------- /flowman-plugins/azure/src/main/scala/com/microsoft/azure/synapse/tokenlibrary/TokenLibrary.scala: -------------------------------------------------------------------------------- 1 | package com.microsoft.azure.synapse.tokenlibrary 2 | 3 | 4 | object TokenLibrary { 5 | def getSecret(vault: String, secretName:String) : String = ??? 6 | def getSecret(vault: String, secretName:String, linkedService: String) : String = ??? 7 | } 8 | -------------------------------------------------------------------------------- /tests/oracle/migrations/module/job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | v1: 3 | targets: 4 | - aggregates_v1 5 | 6 | v2: 7 | targets: 8 | - aggregates_v2 9 | 10 | 11 | targets: 12 | aggregates_v1: 13 | kind: relation 14 | relation: aggregates_v1 15 | 16 | aggregates_v2: 17 | kind: relation 18 | relation: aggregates_v2 19 | -------------------------------------------------------------------------------- /tests/postgresql/migrations/module/job.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | v1: 3 | targets: 4 | - aggregates_v1 5 | 6 | v2: 7 | targets: 8 | - aggregates_v2 9 | 10 | 11 | targets: 12 | aggregates_v1: 13 | kind: relation 14 | relation: aggregates_v1 15 | 16 | aggregates_v2: 17 | kind: relation 18 | relation: aggregates_v2 19 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowshell.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem This is the entry point for running SparkR. To avoid polluting the 4 | rem environment, it just launches a new cmd to do the real work. 5 | 6 | rem The outermost quotes are used to prevent Windows command line parse error rem when there are some quotes in parameters 7 | cmd /V /E /C ""%~dp0flowshell2.cmd" %*" 8 | -------------------------------------------------------------------------------- /flowman-server-ui/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | dist 4 | target 5 | 6 | # local env files 7 | .env.local 8 | .env.*.local 9 | 10 | # Log files 11 | npm-debug.log* 12 | yarn-debug.log* 13 | yarn-error.log* 14 | 15 | # Editor directories and files 16 | .idea 17 | .vscode 18 | *.suo 19 | *.ntvs* 20 | *.njsproj 21 | *.sln 22 | *.sw? 23 | *.iml 24 | -------------------------------------------------------------------------------- /tests/emr/flow/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_raw 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | -------------------------------------------------------------------------------- /flowman-spec/src/test/resources/data/actual/data_1.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut 2 | labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et 3 | ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum 4 | -------------------------------------------------------------------------------- /tests/mariadb/bin/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Check migrations 4 | flowexec -f migrations job build v1 5 | flowexec -f migrations job build v1 6 | flowexec -f migrations job build v2 7 | flowexec -f migrations job build v2 8 | flowexec -f migrations job build v1 9 | 10 | 11 | # Run weather example 12 | flowexec -f weather job build main --force 13 | -------------------------------------------------------------------------------- /tests/mysql/weather/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_conformed 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | -------------------------------------------------------------------------------- /tests/oracle/weather/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_conformed 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | -------------------------------------------------------------------------------- /tests/sqlserver/twitter/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | sql_server: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | properties: 8 | # We need to specify the database name already as part of connection, otherwise the login won't work 9 | databaseName: "$jdbc_db" 10 | -------------------------------------------------------------------------------- /docs/spec/connection/ssh.md: -------------------------------------------------------------------------------- 1 | # SSH Connection 2 | 3 | ## Example 4 | ```yaml 5 | connections: 6 | sftp-server: 7 | kind: sftp 8 | host: "sftp.server.dimajix.net" 9 | port: "22" 10 | username: "testuser" 11 | password: "12345678" 12 | keyFile: "/home/user/private_key" 13 | knownHosts: 14 | ``` 15 | 16 | ## Fields 17 | 18 | ## Description 19 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowshell2.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | call "%~dp0../libexec/flowman-common.cmd" 4 | 5 | SET APP_NAME=flowman-tools 6 | SET APP_VERSION=@project.version@ 7 | SET APP_MAIN=com.dimajix.flowman.tools.shell.Shell 8 | 9 | SET APP_JAR=%APP_NAME%-%APP_VERSION%.jar 10 | SET LIB_JARS="@flowman-tools.classpath@" 11 | 12 | call "%~dp0../libexec/flowman-launch.cmd" %* 13 | -------------------------------------------------------------------------------- /tests/mariadb/weather/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_conformed 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | -------------------------------------------------------------------------------- /tests/postgresql/weather/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_conformed 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_conformed 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | -------------------------------------------------------------------------------- /docs/spec/dataset/file.md: -------------------------------------------------------------------------------- 1 | # File Dataset 2 | 3 | The *file dataset* can be used for reading data from a shared file system 4 | 5 | ## Example 6 | ```yaml 7 | kind: file 8 | format: json 9 | location: "${project.basedir}/test/data/results/${relation}/data.json" 10 | schema: 11 | kind: spark 12 | file: "${project.basedir}/test/data/results/${relation}/schema.json" 13 | ``` 14 | -------------------------------------------------------------------------------- /tests/oracle/weather/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | jdbcConnection: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url/$jdbc_db" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | properties: 8 | # We need to specify the database name already as part of connection, otherwise the login won't work 9 | databaseName: "$jdbc_db" 10 | -------------------------------------------------------------------------------- /examples/sftp-upload/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - sftp_username=$System.getenv("SFTP_USERNAME") 3 | - sftp_password=$System.getenv("SFTP_PASSWORD") 4 | - sftp_keyfile=$System.getenv("SFTP_KEYFILE") 5 | - sftp_host=$System.getenv("SFTP_HOST") 6 | - sftp_port=$System.getenv("SFTP_PORT", "22") 7 | - sftp_target=$System.getenv("SFTP_TARGET", "/tmp/flowman-sftp-target") 8 | -------------------------------------------------------------------------------- /examples/weather/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | # This mapping refers to the Parquet relation and reads in data from the local file system 8 | stations: 9 | kind: relation 10 | relation: stations 11 | -------------------------------------------------------------------------------- /tests/mysql/bin/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Check migrations 6 | flowexec -f migrations job build v1 7 | flowexec -f migrations job build v1 8 | flowexec -f migrations job build v2 9 | flowexec -f migrations job build v2 10 | flowexec -f migrations job build v1 11 | 12 | 13 | # Run weather example 14 | flowexec -f weather job build main --force 15 | -------------------------------------------------------------------------------- /tests/oracle/bin/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Check migrations 6 | flowexec -f migrations job build v1 7 | flowexec -f migrations job build v1 8 | flowexec -f migrations job build v2 9 | flowexec -f migrations job build v2 10 | flowexec -f migrations job build v1 11 | 12 | 13 | # Run weather example 14 | flowexec -f weather job build main --force 15 | -------------------------------------------------------------------------------- /tests/oracle/migrations/module/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | jdbcConnection: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url/$jdbc_db" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | properties: 8 | # We need to specify the database name already as part of connection, otherwise the login won't work 9 | databaseName: "$jdbc_db" 10 | -------------------------------------------------------------------------------- /flowman-plugins/avro/src/test/resources/schema/AvroSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "namespace": "", 4 | "name": "test_schema", 5 | "fields": [ 6 | { 7 | "doc": "AccessDateTime as a string", 8 | "type": ["string", "null"], 9 | "name": "AccessDateTime", 10 | "order": "ignore" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /tests/postgresql/bin/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Check migrations 6 | flowexec -f migrations job build v1 7 | flowexec -f migrations job build v1 8 | flowexec -f migrations job build v2 9 | flowexec -f migrations job build v2 10 | flowexec -f migrations job build v1 11 | 12 | 13 | # Run weather example 14 | flowexec -f weather job build main --force 15 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | sql_server: 3 | kind: jdbc 4 | driver: "$jdbc_driver" 5 | url: "$jdbc_url" 6 | username: "$jdbc_username" 7 | password: "$jdbc_password" 8 | properties: 9 | # We need to specify the database name already as part of connection, otherwise the login won't work 10 | databaseName: "$jdbc_db" 11 | -------------------------------------------------------------------------------- /tests/sqlserver/migrations/module/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | sql_server: 3 | kind: jdbc 4 | driver: "$jdbc_driver" 5 | url: "$jdbc_url" 6 | username: "$jdbc_username" 7 | password: "$jdbc_password" 8 | properties: 9 | # We need to specify the database name already as part of connection, otherwise the login won't work 10 | databaseName: "$jdbc_db" 11 | -------------------------------------------------------------------------------- /docs/spec/metric/index.md: -------------------------------------------------------------------------------- 1 | # Execution Metrics 2 | 3 | Flowman can push metrics to external metric collectors, for example to Prometheus. The push will be performed after 4 | each build phase of a job. More details are described in [runtime metrics](../../concepts/metrics.md) 5 | 6 | ## Sink Types 7 | 8 | ```eval_rst 9 | .. toctree:: 10 | :maxdepth: 1 11 | :glob: 12 | 13 | * 14 | ``` 15 | -------------------------------------------------------------------------------- /flowman-plugins/hbase/src/main/resources/plugin.yml: -------------------------------------------------------------------------------- 1 | name: ${plugin.name} 2 | description: ${project.name} 3 | version: ${plugin.version} 4 | isolation: false 5 | jars: 6 | - ${plugin.jar} 7 | - hbase-client-${hbase.version}.jar 8 | - hbase-common-${hbase.version}.jar 9 | - hbase-protocol-${hbase.version}.jar 10 | - htrace-core-3.2.0-incubating.jar 11 | - htrace-core4-4.0.1-incubating.jar 12 | -------------------------------------------------------------------------------- /tests/synapse/flow/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | kind: file 4 | format: csv 5 | location: "${srcdir}/isd-history/" 6 | options: 7 | sep: "," 8 | encoding: "UTF-8" 9 | quote: "\"" 10 | header: "true" 11 | dateFormat: "yyyyMMdd" 12 | schema: 13 | kind: avro 14 | file: "${project.basedir}/schema/stations.avsc" 15 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/job-start.vtl: -------------------------------------------------------------------------------- 1 | ====================================================================================================================== 2 | Start $phase of job '${job.identifier}' (${job.description}) at ${Timestamp.now()} 3 | ====================================================================================================================== 4 | 5 | -------------------------------------------------------------------------------- /tests/synapse/flow/target/aggregates.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | aggregates: 3 | kind: relation 4 | mapping: aggregates 5 | relation: aggregates 6 | 7 | aggregates_jdbc: 8 | kind: relation 9 | mapping: aggregates 10 | relation: aggregates_jdbc 11 | 12 | aggregates_delta: 13 | kind: relation 14 | mapping: aggregates 15 | relation: aggregates_delta 16 | mode: update 17 | -------------------------------------------------------------------------------- /flowman-server-ui/vue.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | outputDir: 'target/classes/META-INF/resources/webjars/flowman-server-ui', 3 | transpileDependencies: [ 4 | 'vuetify' 5 | ], 6 | devServer: { 7 | port: 8088, 8 | proxy: { 9 | '^/api': { 10 | target: 'http://localhost:8080', 11 | ws: true, 12 | changeOrigin: true 13 | } 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /docs/plugins/index.md: -------------------------------------------------------------------------------- 1 | # Plugins 2 | 3 | This part of the documentation contains a description of the plugins which are provided directly as part of Flowman. 4 | You need to enable each plugin by adding it to the `namespace.yml` file as described in the 5 | [namespace documentation](../spec/namespace.md). 6 | 7 | 8 | ```eval_rst 9 | .. toctree:: 10 | :maxdepth: 1 11 | :glob: 12 | 13 | * 14 | ``` 15 | -------------------------------------------------------------------------------- /flowman-core/src/main/resources/META-INF/services/com.dimajix.flowman.jdbc.SqlDialect: -------------------------------------------------------------------------------- 1 | com.dimajix.flowman.jdbc.DerbyDialect 2 | com.dimajix.flowman.jdbc.H2Dialect 3 | com.dimajix.flowman.jdbc.HiveDialect 4 | com.dimajix.flowman.jdbc.MariaDialect 5 | com.dimajix.flowman.jdbc.MySQLDialect 6 | com.dimajix.flowman.jdbc.OracleDialect 7 | com.dimajix.flowman.jdbc.PostgresDialect 8 | com.dimajix.flowman.jdbc.SqlServerDialect 9 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowexec: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | APP_NAME="flowman-tools" 7 | APP_VERSION="@project.version@" 8 | APP_MAIN="com.dimajix.flowman.tools.exec.Driver" 9 | 10 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 11 | LIB_JARS="@flowman-tools.classpath@" 12 | 13 | run_spark $APP_JAR $LIB_JARS $APP_MAIN "$@" 14 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowshell: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | APP_NAME="flowman-tools" 7 | APP_VERSION="@project.version@" 8 | APP_MAIN="com.dimajix.flowman.tools.shell.Shell" 9 | 10 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 11 | LIB_JARS="@flowman-tools.classpath@" 12 | 13 | run_spark $APP_JAR $LIB_JARS $APP_MAIN "$@" 14 | -------------------------------------------------------------------------------- /tests/synapse/README.md: -------------------------------------------------------------------------------- 1 | # Test Suite for Azure Synapse 2 | 3 | export AZURE_TENANT_ID= 4 | export AZURE_CLIENT_ID= 5 | export AZURE_USERNAME= 6 | export AZURE_PASSWORD= 7 | 8 | az account get-access-token 9 | 10 | Jar File: abfss://flowman@dimajixspark.dfs.core.windows.net/synapse-test-1.0-SNAPSHOT.jar 11 | Main class: com.dimajix.flowman.tools.exec.Driver 12 | Arguments: -B -f flow job build main --force 13 | -------------------------------------------------------------------------------- /docs/plugins/json.md: -------------------------------------------------------------------------------- 1 | # JSON Plugin 2 | 3 | The OpenAPI plugin provides compatibility with JSON schema definition files. 4 | 5 | 6 | ## Provided Entities 7 | * [`json` schema](../spec/schema/json.md) 8 | 9 | 10 | ## Activation 11 | 12 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 13 | ```yaml 14 | plugins: 15 | - flowman-json 16 | ``` 17 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowman-schema: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | APP_NAME="flowman-tools" 7 | APP_VERSION="@project.version@" 8 | APP_MAIN="com.dimajix.flowman.tools.schema.Driver" 9 | 10 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 11 | LIB_JARS="@flowman-tools.classpath@" 12 | 13 | run_spark $APP_JAR $LIB_JARS $APP_MAIN "$@" 14 | -------------------------------------------------------------------------------- /docs/plugins/avro.md: -------------------------------------------------------------------------------- 1 | # Avro Plugin 2 | 3 | ## Provided Entities 4 | * Implements [`avro` schema](../spec/schema/avro.md) 5 | * Adds support for Avro file format in [`file` relation](../spec/relation/file.md) 6 | 7 | 8 | ## Activation 9 | 10 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 11 | ```yaml 12 | plugins: 13 | - flowman-avro 14 | ``` 15 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowrexec: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | APP_NAME="flowman-kernel-tools" 7 | APP_VERSION="@project.version@" 8 | APP_MAIN="com.dimajix.flowman.tools.rexec.Driver" 9 | 10 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 11 | LIB_JARS="@flowman-kernel-tools.classpath@" 12 | 13 | run_java $APP_JAR $LIB_JARS $APP_MAIN "$@" 14 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowrshell: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | APP_NAME="flowman-kernel-tools" 7 | APP_VERSION="@project.version@" 8 | APP_MAIN="com.dimajix.flowman.tools.rshell.Shell" 9 | 10 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 11 | LIB_JARS="@flowman-kernel-tools.classpath@" 12 | 13 | run_java $APP_JAR $LIB_JARS $APP_MAIN "$@" 14 | -------------------------------------------------------------------------------- /tests/oracle/weather/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: jdbcTable 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | connection: jdbcConnection 6 | table: stations 7 | schema: 8 | kind: mapping 9 | mapping: stations_conformed 10 | indexes: 11 | - name: "stations_idx" 12 | columns: [usaf, wban] 13 | clustered: true 14 | -------------------------------------------------------------------------------- /tests/postgresql/weather/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: jdbcTable 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | connection: jdbcConnection 6 | table: stations 7 | schema: 8 | kind: mapping 9 | mapping: stations_conformed 10 | indexes: 11 | - name: "stations_idx" 12 | columns: [usaf, wban] 13 | clustered: true 14 | -------------------------------------------------------------------------------- /docs/plugins/openapi.md: -------------------------------------------------------------------------------- 1 | # OpenAPI Plugin 2 | 3 | The OpenAPI plugin provides compatibility with OpenAPI schema definition files. 4 | 5 | 6 | ## Provided Entities 7 | * [`openApi` schema](../spec/schema/open-api.md) 8 | 9 | 10 | ## Activation 11 | 12 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 13 | ```yaml 14 | plugins: 15 | - flowman-openapi 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/plugins/swagger.md: -------------------------------------------------------------------------------- 1 | # Swagger Plugin 2 | 3 | The Swagger plugin provides compatibility with Swagger schema definition files. 4 | 5 | 6 | ## Provided Entities 7 | * [`swagger` schema](../spec/schema/swagger.md) 8 | 9 | 10 | ## Activation 11 | 12 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 13 | ```yaml 14 | plugins: 15 | - flowman-swagger 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/spec/schema/index.md: -------------------------------------------------------------------------------- 1 | # Schemas 2 | 3 | Flowman uses schema definitions at various places, most commonly where relations (data sources and sinks) 4 | are defined. Flowman does not only support inline schema definitions, but also supports various external schema 5 | definitions like Avro, Swagger and JSON Schema. 6 | 7 | 8 | ## Schema Types 9 | ```eval_rst 10 | .. toctree:: 11 | :maxdepth: 1 12 | :glob: 13 | 14 | * 15 | ``` 16 | -------------------------------------------------------------------------------- /tests/sqlserver/bin/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Check migrations 6 | flowexec -f migrations job build v1 7 | flowexec -f migrations job build v1 8 | flowexec -f migrations job build v2 9 | flowexec -f migrations job build v2 10 | flowexec -f migrations job build v1 11 | 12 | flowexec -f migrations job build test_timestamp 13 | 14 | 15 | # Run weather example 16 | flowexec -f weather job build main --force 17 | -------------------------------------------------------------------------------- /docs/spec/target/template.md: -------------------------------------------------------------------------------- 1 | # Template Target 2 | 3 | ## Example 4 | 5 | ```yaml 6 | targets: 7 | structured_macro: 8 | kind: relation 9 | relation: ${table} 10 | mode: OVERWRITE 11 | 12 | fee: 13 | kind: template 14 | relation: structured_macro 15 | environment: 16 | - table=fee 17 | ``` 18 | 19 | ## Supported Execution Phases 20 | 21 | The supported execution phases are determined by the referenced target. 22 | -------------------------------------------------------------------------------- /flowman-server-ui/src/plugins/vuetify.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue'; 2 | import Vuetify from 'vuetify/lib'; 3 | 4 | Vue.use(Vuetify); 5 | 6 | 7 | const vuetify = new Vuetify({ 8 | theme: { 9 | dark: false, 10 | themes: { 11 | light: { 12 | 'brand-blue': '#071f4e', 13 | 'brand-green': '#0C6545', 14 | 'brand-light-blue': '#008AFC' 15 | }, 16 | }, 17 | }, 18 | }); 19 | 20 | export default vuetify; 21 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: sqlserver 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | connection: sql_server 6 | database: dbo 7 | table: stations 8 | schema: 9 | kind: mapping 10 | mapping: stations_conformed 11 | indexes: 12 | - name: "stations_idx" 13 | columns: [usaf, wban] 14 | clustered: true 15 | -------------------------------------------------------------------------------- /examples/weather/README.md: -------------------------------------------------------------------------------- 1 | # Preparing the Environment 2 | 3 | Since we will read from S3, you need some valid S3 credentials 4 | AWS_ACCESS_KEY_ID=your_aws_key 5 | AWS_SECRET_ACCESS_KEY=your_aws_secret 6 | AWS_PROXY_HOST= 7 | AWS_PROXY_PORT= 8 | 9 | # Using flowman 10 | 11 | ## Running the whole project 12 | 13 | flowexec -f examples/weather project run 14 | 15 | ## Executing outputs 16 | 17 | flowexec -f examples/weather target build 18 | -------------------------------------------------------------------------------- /docs/spec/dataset/index.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | Similar to [schemas](../schema/index.md), datasets in Flowman are not primary entities but used at various places 4 | to abstract from specific implementations (like files, Hive tables or even mappings). Depending on the specific kind 5 | the dataset abstraction can support reading and/or writing data. 6 | 7 | ## Dataset Types 8 | ```eval_rst 9 | .. toctree:: 10 | :maxdepth: 1 11 | :glob: 12 | 13 | * 14 | ``` 15 | -------------------------------------------------------------------------------- /tests/emr/flow/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - basedir=file:///tmp/weather 3 | - srcdir=$System.getenv('WEATHER_SRCDIR', 's3a://dimajix-training/data/weather') 4 | 5 | - secretsManagerValue=$AwsSecretsManager.getSecret('my-test-secret', 'eu-central-1') 6 | - secretsManagerUsername=$AwsSecretsManager.getSecret('my-test-secret', 'username', 'eu-central-1') 7 | - secretsManagerPassword=$AwsSecretsManager.getSecret('my-test-secret', 'password', 'eu-central-1') 8 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/lifecycle-finish.vtl: -------------------------------------------------------------------------------- 1 | ≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡ 2 | ${result.status} lifecycle ${lifecycle} of job '${job.identifier}' (${job.description}) 3 | Total time: ${result.duration} 4 | Finished at: ${result.endTime} 5 | ≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡ 6 | 7 | -------------------------------------------------------------------------------- /tests/quickstart/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | mariadb: 5 | image: mariadb:10.9.3 6 | environment: 7 | - MARIADB_USER=weather 8 | - MARIADB_PASSWORD=weather 9 | - MARIADB_ROOT_PASSWORD=yourStrong(!)Password 10 | - MARIADB_DATABASE=weather 11 | command: 12 | - --sql-mode=STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION,ANSI_QUOTES 13 | ports: 14 | - "3306:3306" 15 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | flowman: 5 | image: dimajix/flowman:${project.version}${flowman.dist.suffix} 6 | command: bash 7 | environment: 8 | - http_proxy=${http_proxy} 9 | - https_proxy=${https_proxy} 10 | - SPARK_MASTER=local[10] 11 | #- AWS_ACCESS_KEY_ID= 12 | #- AWS_SECRET_ACCESS_KEY= 13 | #- S3_ENDPOINT=s3.eu-central-1.amazonaws.com 14 | #- S3_PROXY_HOST= 15 | #- S3_PROXY_PORT=-1 16 | -------------------------------------------------------------------------------- /docs/spec/dataset/mapping.md: -------------------------------------------------------------------------------- 1 | # Mapping Dataset 2 | 3 | A *mapping dataset* represents the records as produced by a named mapping. Note that this dataset only supports read 4 | operations, since mapping cannot perform any write operations 5 | 6 | ## Example 7 | ```yaml 8 | kind: mapping 9 | mapping: ${mapping} 10 | ``` 11 | 12 | ## Fields 13 | 14 | * `kind` **(mandatory)** *(type: string)*: `relation` 15 | * `mapping` **(mandatory)** *(type: string)*: Name of the mapping output 16 | -------------------------------------------------------------------------------- /flowman-server-ui/src/views/System.vue: -------------------------------------------------------------------------------- 1 | 15 | 16 | 22 | 23 | 26 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/lifecycle-start.vtl: -------------------------------------------------------------------------------- 1 | ≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡ 2 | Project '${project.name}' version ${project.version} 3 | Lifecycle ${lifecycle} of job '${job.identifier}' (${job.description}) 4 | Started at ${Timestamp.now()} 5 | ≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡ 6 | 7 | -------------------------------------------------------------------------------- /flowman-core/src/main/scala/com/dimajix/flowman/util/FixedPoint.scala: -------------------------------------------------------------------------------- 1 | package com.dimajix.flowman.util 2 | 3 | import scala.annotation.tailrec 4 | 5 | object FixedPoint { 6 | def fix[T <: AnyRef](iter:T => T, stop:(T,T) => Boolean) : T => T = { 7 | @tailrec 8 | def recurse(x:T) : T = { 9 | val r = iter(x) 10 | if (stop(r, x)) 11 | r 12 | else 13 | recurse(r) 14 | } 15 | recurse 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tests/emr/flow/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "${srcdir}/" 6 | pattern: "${year}" 7 | partitions: 8 | - name: year 9 | type: integer 10 | granularity: 1 11 | description: "The year when the measurement was made" 12 | schema: 13 | kind: inline 14 | fields: 15 | - name: raw_data 16 | type: string 17 | description: "Raw measurement data" 18 | -------------------------------------------------------------------------------- /examples/weather/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define build target for measurements 3 | measurements: 4 | # Again, the target is of type "relation" 5 | kind: relation 6 | description: "Write extracted measurements per year" 7 | # Read records from mapping 8 | mapping: measurements_extracted 9 | # ... and write them into the relation "measurements" 10 | relation: measurements 11 | # Specify the data partition to be written 12 | partition: 13 | year: $year 14 | -------------------------------------------------------------------------------- /tests/mariadb/weather/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "${srcdir}/" 6 | pattern: "${year}" 7 | partitions: 8 | - name: year 9 | type: integer 10 | granularity: 1 11 | description: "The year when the measurement was made" 12 | schema: 13 | kind: inline 14 | fields: 15 | - name: raw_data 16 | type: string 17 | description: "Raw measurement data" 18 | -------------------------------------------------------------------------------- /tests/mysql/weather/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "${srcdir}/" 6 | pattern: "${year}" 7 | partitions: 8 | - name: year 9 | type: integer 10 | granularity: 1 11 | description: "The year when the measurement was made" 12 | schema: 13 | kind: inline 14 | fields: 15 | - name: raw_data 16 | type: string 17 | description: "Raw measurement data" 18 | -------------------------------------------------------------------------------- /tests/synapse/flow/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "${srcdir}/" 6 | pattern: "${year}" 7 | partitions: 8 | - name: year 9 | type: integer 10 | granularity: 1 11 | description: "The year when the measurement was made" 12 | schema: 13 | kind: inline 14 | fields: 15 | - name: raw_data 16 | type: string 17 | description: "Raw measurement data" 18 | -------------------------------------------------------------------------------- /docker/libexec/flowman-vars.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | export FLOWMAN_HOME=${FLOWMAN_HOME=/opt/flowman} 5 | export FLOWMAN_CONF_DIR=${FLOWMAN_CONF_DIR=$FLOWMAN_HOME/conf} 6 | 7 | export FLOWMAN_LOGDB_DRIVER=${FLOWMAN_LOGDB_DRIVER="org.apache.derby.jdbc.EmbeddedDriver"} 8 | export FLOWMAN_LOGDB_URL=${FLOWMAN_LOGDB_URL="jdbc:derby:${FLOWMAN_HOME}/var/history;create=true"} 9 | export FLOWMAN_LOGDB_USER=${FLOWMAN_LOGDB_USER=""} 10 | export FLOWMAN_LOGDB_PASSWORD=${FLOWMAN_LOGDB_PASSWORD=""} 11 | -------------------------------------------------------------------------------- /flowman-server-ui/README.md: -------------------------------------------------------------------------------- 1 | # petapp 2 | 3 | ## Project setup 4 | ``` 5 | npm install 6 | ``` 7 | 8 | ### Compiles and hot-reloads for development 9 | ``` 10 | npm run serve 11 | ``` 12 | 13 | ### Compiles and minifies for production 14 | ``` 15 | npm run build 16 | ``` 17 | 18 | ### Run your tests 19 | ``` 20 | npm run test 21 | ``` 22 | 23 | ### Lints and fixes files 24 | ``` 25 | npm run lint 26 | ``` 27 | 28 | ### Customize configuration 29 | See [Configuration Reference](https://cli.vuejs.org/config/). 30 | -------------------------------------------------------------------------------- /flowman-server-ui/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | env: { 4 | node: true 5 | }, 6 | 'extends': [ 7 | 'plugin:vue/essential', 8 | 'eslint:recommended' 9 | ], 10 | rules: { 11 | 'no-console': process.env.NODE_ENV === 'production' ? 'error' : 'off', 12 | 'no-debugger': process.env.NODE_ENV === 'production' ? 'error' : 'off', 13 | 'vue/valid-v-slot': ['error', { allowModifiers: true }], 14 | }, 15 | parserOptions: { 16 | parser: 'babel-eslint' 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /flowman-server-ui/src/main.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import '@/plugins/axios' 3 | import vuetify from '@/plugins/vuetify' 4 | import Api from '@/services/api' 5 | import App from '@/App.vue' 6 | import VueApexCharts from 'vue-apexcharts' 7 | import router from '@/router' 8 | 9 | Vue.config.productionTip = false 10 | 11 | Vue.use(Api) 12 | Vue.use(VueApexCharts) 13 | 14 | Vue.component('apexchart', VueApexCharts) 15 | 16 | new Vue({ 17 | vuetify, 18 | router, 19 | render: h => h(App) 20 | }).$mount('#app') 21 | -------------------------------------------------------------------------------- /tests/sqlserver/docker/setup-db.sh: -------------------------------------------------------------------------------- 1 | #run the setup script to create the DB and the schema in the DB 2 | #do this in a loop because the timing for when the SQL instance is ready is indeterminate 3 | for i in {1..50}; 4 | do 5 | /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P $SA_PASSWORD -d master -Q "CREATE DATABASE $DB_NAME" 6 | if [ $? -eq 0 ] 7 | then 8 | echo "setup.sql completed" 9 | break 10 | else 11 | echo "not ready yet..." 12 | sleep 1 13 | fi 14 | done 15 | -------------------------------------------------------------------------------- /examples/weather/target/stations.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | # Define a build target "stations"... 3 | stations: 4 | # ... which builds a relation 5 | kind: relation 6 | # ... by reading the result from the mapping "stations_raw" 7 | mapping: stations_raw 8 | # ... and by writing the records to the relation "stations" 9 | relation: stations 10 | 11 | # Add some documentation to the build target 12 | documentation: 13 | description: "This build target is used to write the weather stations" 14 | -------------------------------------------------------------------------------- /tests/mysql/weather/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - basedir=file:///tmp/weather 3 | - srcdir=$System.getenv('WEATHER_SRCDIR', 's3a://dimajix-training/data/weather') 4 | 5 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'com.mysql.cj.jdbc.Driver') 6 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:mysql://localhost') 7 | - jdbc_db=$System.getenv('JDBC_DB', 'weather') 8 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'flowman') 9 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 10 | -------------------------------------------------------------------------------- /docs/spec/target/copyFile.md: -------------------------------------------------------------------------------- 1 | # CopyFile Target 2 | 3 | ## Fields 4 | 5 | * `kind` **(mandatory)** *(type: string)*: `copyFile` 6 | 7 | * `description` **(optional)** *(type: string)*: 8 | Optional descriptive text of the build target 9 | 10 | * `source` **(mandatory)** *(type: string)*: 11 | * `target` **(mandatory)** *(type: string)*: 12 | 13 | 14 | ## Supported Execution Phases 15 | * `BUILD` 16 | * `VERIFY` 17 | * `TRUNCATE` 18 | * `DESTROY` 19 | 20 | Read more about [execution phases](../../concepts/lifecycle.md). 21 | -------------------------------------------------------------------------------- /examples/weather/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: file 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | format: parquet 6 | location: "$basedir/stations/" 7 | # Use an explicit schema stored in an external file 8 | schema: 9 | kind: avro 10 | file: "${project.basedir}/schema/stations.avsc" 11 | 12 | documentation: 13 | checks: 14 | kind: primaryKey 15 | columns: 16 | - usaf 17 | - wban 18 | -------------------------------------------------------------------------------- /tests/mariadb/weather/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - basedir=file:///tmp/weather 3 | - srcdir=$System.getenv('WEATHER_SRCDIR', 's3a://dimajix-training/data/weather') 4 | 5 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'org.mariadb.jdbc.Driver') 6 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:mariadb://localhost') 7 | - jdbc_db=$System.getenv('JDBC_DB', 'weather') 8 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'flowman') 9 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 10 | -------------------------------------------------------------------------------- /tests/emr/flow/documentation.yml: -------------------------------------------------------------------------------- 1 | collectors: 2 | # Collect documentation of relations 3 | - kind: relations 4 | # Collect documentation of mappings 5 | - kind: mappings 6 | # Collect documentation of build targets 7 | - kind: targets 8 | # Collect lineage of all columns 9 | - kind: lineage 10 | # Execute all checks 11 | - kind: checks 12 | 13 | generators: 14 | # Create an output file in the project directory 15 | - kind: file 16 | location: ${project.basedir}/generated-documentation 17 | template: html 18 | -------------------------------------------------------------------------------- /tests/synapse/flow/documentation.yml: -------------------------------------------------------------------------------- 1 | collectors: 2 | # Collect documentation of relations 3 | - kind: relations 4 | # Collect documentation of mappings 5 | - kind: mappings 6 | # Collect documentation of build targets 7 | - kind: targets 8 | # Collect lineage of all columns 9 | - kind: lineage 10 | # Execute all checks 11 | - kind: checks 12 | 13 | generators: 14 | # Create an output file in the project directory 15 | - kind: file 16 | location: ${project.basedir}/generated-documentation 17 | template: html 18 | -------------------------------------------------------------------------------- /tests/mariadb/weather/mapping/aggregates.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # Create some aggregates containing min/max/avg metrics of wind speed and temperature 3 | aggregates: 4 | kind: aggregate 5 | input: facts 6 | dimensions: 7 | - country 8 | aggregations: 9 | min_wind_speed: "MIN(wind_speed)" 10 | max_wind_speed: "MAX(wind_speed)" 11 | avg_wind_speed: "AVG(wind_speed)" 12 | min_temperature: "MIN(air_temperature)" 13 | max_temperature: "MAX(air_temperature)" 14 | avg_temperature: "AVG(air_temperature)" 15 | -------------------------------------------------------------------------------- /tests/mysql/weather/mapping/aggregates.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # Create some aggregates containing min/max/avg metrics of wind speed and temperature 3 | aggregates: 4 | kind: aggregate 5 | input: facts 6 | dimensions: 7 | - country 8 | aggregations: 9 | min_wind_speed: "MIN(wind_speed)" 10 | max_wind_speed: "MAX(wind_speed)" 11 | avg_wind_speed: "AVG(wind_speed)" 12 | min_temperature: "MIN(air_temperature)" 13 | max_temperature: "MAX(air_temperature)" 14 | avg_temperature: "AVG(air_temperature)" 15 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - basedir=file:///tmp/weather 3 | - srcdir=$System.getenv('WEATHER_SRCDIR', 's3a://dimajix-training/data/weather') 4 | 5 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'com.microsoft.sqlserver.jdbc.SQLServerDriver') 6 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:sqlserver://localhost:1433') 7 | - jdbc_db=$System.getenv('JDBC_DB', 'flowman') 8 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'sa') 9 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 10 | -------------------------------------------------------------------------------- /flowman-kernel-api/src/main/proto/documentation.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option java_multiple_files = true; 4 | option java_package = "com.dimajix.flowman.kernel.proto.documentation"; 5 | option java_outer_classname = "DocumentationProto"; 6 | 7 | package com.dimajix.flowman.kernel.documentation; 8 | 9 | 10 | import "common.proto"; 11 | 12 | 13 | message GenerateDocumentationRequest { 14 | string sessionId = 1; 15 | JobIdentifier job = 2; 16 | map arguments = 3; 17 | } 18 | message GenerateDocumentationResponse { 19 | } 20 | -------------------------------------------------------------------------------- /docs/spec/assertion/index.md: -------------------------------------------------------------------------------- 1 | # Assertions 2 | 3 | Assertions are an important building block of test cases and validation/verification steps. Assertions are not a top 4 | level entity like [targets](../target/index.md), [mappings](../mapping/index.md), [job](../job/index.md), etc... Instead, 5 | assertions are used as part of [tests](../test/index.md) and [validation](../target/validate.md) and 6 | [verification](../target/verify.md) targets. 7 | 8 | ## Assertion Types 9 | ```eval_rst 10 | .. toctree:: 11 | :maxdepth: 1 12 | :glob: 13 | 14 | * 15 | ``` 16 | -------------------------------------------------------------------------------- /tests/synapse/flow/config/connections.yml: -------------------------------------------------------------------------------- 1 | connections: 2 | mariadb: 3 | driver: "$jdbc_driver" 4 | url: "$jdbc_url" 5 | username: "$jdbc_username" 6 | password: "$jdbc_password" 7 | properties: 8 | # We need to specify the database name already as part of connection, otherwise the login won't work 9 | databaseName: "$jdbc_db" 10 | encrypt : "true" 11 | trustServerCertificate: "false" 12 | hostNameInCertificate: "*.sql.azuresynapse.net" 13 | loginTimeout: "30" 14 | Authentication: "ActiveDirectoryIntegrated" 15 | -------------------------------------------------------------------------------- /flowman-core/src/main/java/com/dimajix/flowman/annotation/TemplateObject.java: -------------------------------------------------------------------------------- 1 | package com.dimajix.flowman.annotation; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | @Retention(RetentionPolicy.RUNTIME) 9 | @Target({ElementType.TYPE}) 10 | public @interface TemplateObject { 11 | /** 12 | * Specifies the name under which the object will be available in template scripts 13 | * @return 14 | */ 15 | String name(); 16 | } 17 | -------------------------------------------------------------------------------- /docs/plugins/azure.md: -------------------------------------------------------------------------------- 1 | # Azure Plugin 2 | 3 | The Azure plugin mainly provides the ADLS2 (Azure Data Lake file system) and ABS (Azure Blob Storage) to be used 4 | as the storage layer. 5 | 6 | 7 | ## Provided Template Functions 8 | Since Flowman version 1.1.0, the plugin also provides the templating function `AzureKeyVault.getSecret` to access Azure 9 | Key Vaults. 10 | 11 | 12 | ## Activation 13 | 14 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 15 | ```yaml 16 | plugins: 17 | - flowman-azure 18 | ``` 19 | -------------------------------------------------------------------------------- /tests/synapse/flow/target/measurements.yml: -------------------------------------------------------------------------------- 1 | targets: 2 | measurements: 3 | kind: relation 4 | mapping: measurements_extracted 5 | relation: measurements 6 | partition: 7 | year: $year 8 | 9 | measurements_jdbc: 10 | kind: relation 11 | mapping: measurements_deduplicated 12 | relation: measurements_jdbc 13 | partition: 14 | year: $year 15 | 16 | measurements_delta: 17 | kind: merge 18 | mapping: measurements_deduplicated 19 | relation: measurements_delta 20 | clauses: 21 | - action: insert 22 | - action: update 23 | -------------------------------------------------------------------------------- /docs/spelling.txt: -------------------------------------------------------------------------------- 1 | Avro 2 | blackhole 3 | cloudera 4 | CopyFile 5 | DataFrame 6 | de 7 | deduplicate 8 | DTDs 9 | emr 10 | Flowman 11 | flowman-mariadb 12 | flowman-mssqlserver 13 | flowman-mysql 14 | flowman-oracle 15 | flowman-postgresql 16 | GetFile 17 | HDFS 18 | JAAS 19 | JARs 20 | kerberized 21 | keytab 22 | kv 23 | lang 24 | Liquibase 25 | metastore 26 | migratable 27 | nonmigratable 28 | nullability 29 | OLAP 30 | OpenAPI 31 | postconditions 32 | repartition 33 | repartitioned 34 | scoverage 35 | Schuhe 36 | subgraph 37 | transactionally 38 | Trino 39 | upsert 40 | WBAN 41 | Winutils 42 | -------------------------------------------------------------------------------- /flowman-spec/src/main/jackson-2.6/com/fasterxml/jackson/databind/introspect/AnnotatedClassResolver.java: -------------------------------------------------------------------------------- 1 | package com.fasterxml.jackson.databind.introspect; 2 | 3 | import com.fasterxml.jackson.databind.JavaType; 4 | import com.fasterxml.jackson.databind.cfg.MapperConfig; 5 | import scala.NotImplementedError; 6 | 7 | 8 | public class AnnotatedClassResolver { 9 | public static AnnotatedClass resolve(MapperConfig config, JavaType forType, 10 | ClassIntrospector.MixInResolver r) { 11 | throw new NotImplementedError(); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/model/measurements.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements: 3 | kind: sqlserver 4 | connection: sql_server 5 | database: dbo 6 | table: measurement 7 | schema: 8 | kind: spark 9 | file: "${project.basedir}/schema/measurements.json" 10 | # Add partition column 11 | partitions: 12 | - name: year 13 | type: integer 14 | granularity: 1 15 | primaryKey: 16 | - usaf 17 | - wban 18 | - date 19 | - time 20 | indexes: 21 | - name: "measurement_idx0" 22 | columns: [usaf, wban] 23 | -------------------------------------------------------------------------------- /tests/mysql/weather/job/main.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | # Define the 'main' job, which implicitly is used whenever you build the whole project 3 | main: 4 | # Add a parameter for selecting the year to process. This will create an environment variable `$year` which 5 | # can be accessed from within other entities like mappings, relations, etc 6 | parameters: 7 | - name: year 8 | type: Integer 9 | default: 2013 10 | # List all targets which should be built as part of the `main` job 11 | targets: 12 | - measurements 13 | - stations 14 | - aggregates 15 | -------------------------------------------------------------------------------- /tests/oracle/weather/job/main.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | # Define the 'main' job, which implicitly is used whenever you build the whole project 3 | main: 4 | # Add a parameter for selecting the year to process. This will create an environment variable `$year` which 5 | # can be accessed from within other entities like mappings, relations, etc 6 | parameters: 7 | - name: year 8 | type: Integer 9 | default: 2013 10 | # List all targets which should be built as part of the `main` job 11 | targets: 12 | - measurements 13 | - stations 14 | - aggregates 15 | -------------------------------------------------------------------------------- /tests/mariadb/weather/job/main.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | # Define the 'main' job, which implicitly is used whenever you build the whole project 3 | main: 4 | # Add a parameter for selecting the year to process. This will create an environment variable `$year` which 5 | # can be accessed from within other entities like mappings, relations, etc 6 | parameters: 7 | - name: year 8 | type: Integer 9 | default: 2013 10 | # List all targets which should be built as part of the `main` job 11 | targets: 12 | - measurements 13 | - stations 14 | - aggregates 15 | -------------------------------------------------------------------------------- /tests/postgresql/weather/job/main.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | # Define the 'main' job, which implicitly is used whenever you build the whole project 3 | main: 4 | # Add a parameter for selecting the year to process. This will create an environment variable `$year` which 5 | # can be accessed from within other entities like mappings, relations, etc 6 | parameters: 7 | - name: year 8 | type: Integer 9 | default: 2013 10 | # List all targets which should be built as part of the `main` job 11 | targets: 12 | - measurements 13 | - stations 14 | - aggregates 15 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/job/main.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | # Define the 'main' job, which implicitly is used whenever you build the whole project 3 | main: 4 | # Add a parameter for selecting the year to process. This will create an environment variable `$year` which 5 | # can be accessed from within other entities like mappings, relations, etc 6 | parameters: 7 | - name: year 8 | type: Integer 9 | default: 2013 10 | # List all targets which should be built as part of the `main` job 11 | targets: 12 | - measurements 13 | - stations 14 | - aggregates 15 | -------------------------------------------------------------------------------- /docs/spec/assertion/unique-key.md: -------------------------------------------------------------------------------- 1 | # Unique Key Assertion 2 | 3 | ## Example: 4 | 5 | ```yaml 6 | kind: uniqueKey 7 | mapping: some_mapping 8 | key: id 9 | ``` 10 | 11 | ```yaml 12 | kind: uniqueKey 13 | mapping: exchange_rates 14 | key: 15 | - date 16 | - from_currency 17 | - to_currency 18 | ``` 19 | 20 | ## Fields 21 | 22 | * `kind` **(mandatory)** *(type: string)*: `uniqueKey` 23 | 24 | * `description` **(optional)** *(type: string)*: 25 | A textual description of the assertion 26 | 27 | * `mapping` **(required)** *(type: string)*: 28 | The name of the mapping which is to be tested. 29 | -------------------------------------------------------------------------------- /flowman-spec/src/main/resources/com/dimajix/flowman/report/text/job-finish.vtl: -------------------------------------------------------------------------------- 1 | ====================================================================================================================== 2 | ${result.status} ${result.phase} of job '${job.identifier}' (${job.description}) 3 | Execution Metrics: 4 | #foreach($metric in ${metrics}) ${metric.name} ${metric.labels} ${metric.value} 5 | #end 6 | 7 | Total time: ${result.duration} 8 | Finished at: ${result.endTime} 9 | ====================================================================================================================== 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/oracle/weather/model/measurements.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements: 3 | kind: jdbcTable 4 | connection: jdbcConnection 5 | table: measurement 6 | stagingTable: measurement_staging 7 | schema: 8 | kind: spark 9 | file: "${project.basedir}/schema/measurements.json" 10 | # Add partition column 11 | partitions: 12 | - name: year 13 | type: integer 14 | granularity: 1 15 | primaryKey: 16 | - usaf 17 | - wban 18 | - date 19 | - time 20 | indexes: 21 | - name: "measurement_idx0" 22 | columns: [usaf, wban] 23 | -------------------------------------------------------------------------------- /docs/documenting/targets.md: -------------------------------------------------------------------------------- 1 | # Documenting Targets 2 | 3 | Flowman also supports documenting build targets. There aren't many options or properties, since targets do not represent 4 | any data or transformations themselves. Documenting them mainly serves to complete a technical reference for 5 | developers. 6 | 7 | ## Example 8 | 9 | ```yaml 10 | targets: 11 | stations: 12 | kind: relation 13 | description: "Write stations" 14 | mapping: stations_raw 15 | relation: stations 16 | 17 | documentation: 18 | description: "This build target is used to write the weather stations" 19 | ``` 20 | -------------------------------------------------------------------------------- /tests/postgresql/weather/model/measurements.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements: 3 | kind: jdbcTable 4 | connection: jdbcConnection 5 | table: measurement 6 | stagingTable: measurement_staging 7 | schema: 8 | kind: spark 9 | file: "${project.basedir}/schema/measurements.json" 10 | # Add partition column 11 | partitions: 12 | - name: year 13 | type: integer 14 | granularity: 1 15 | primaryKey: 16 | - usaf 17 | - wban 18 | - date 19 | - time 20 | indexes: 21 | - name: "measurement_idx0" 22 | columns: [usaf, wban] 23 | -------------------------------------------------------------------------------- /tests/mysql/docker/mysql/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | key_buffer_size = 32768M 3 | #max_allowed_packet = 2M 4 | #table_open_cache = 128 5 | #sort_buffer_size = 1024K 6 | #net_buffer_length = 64K 7 | #read_buffer_size = 1024K 8 | #read_rnd_buffer_size = 1024K 9 | 10 | interactive_timeout = 300 11 | wait_timeout = 300 12 | 13 | # Added values after load testing 14 | thread_cache_size = 8 15 | #tmp_table_size = 256M 16 | #max_heap_table_size = 256M 17 | #table_cache = 512 18 | #join_buffer_size = 512 19 | 20 | innodb_buffer_pool_size=1024M 21 | #innodb_file_per_table 22 | #innodb_log_file_size=250M 23 | innodb_log_buffer_size=16M 24 | -------------------------------------------------------------------------------- /licenses/LICENSE-json-schema.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2011 Everit Kft. (http://www.everit.org) 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | -------------------------------------------------------------------------------- /docs/plugins/aws.md: -------------------------------------------------------------------------------- 1 | # AWS Plugin 2 | 3 | The AWS plugin does not provide new entity types to Flowman, but will provide compatibility with the S3 object 4 | store to be usable as a data source or sink via the `s3a` file system. 5 | 6 | 7 | ## Provided Template Functions 8 | Since Flowman version 1.1.0, the plugin also provides the templating function `AwsSecretsManager.getSecret` to access Azure 9 | Key Vaults. 10 | 11 | 12 | ## Activation 13 | 14 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 15 | ```yaml 16 | plugins: 17 | - flowman-aws 18 | ``` 19 | -------------------------------------------------------------------------------- /tests/mysql/weather/model/measurements.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements: 3 | kind: jdbcTable 4 | connection: mysql 5 | database: $jdbc_db 6 | table: measurement 7 | stagingTable: measurement_staging 8 | schema: 9 | kind: spark 10 | file: "${project.basedir}/schema/measurements.json" 11 | # Add partition column 12 | partitions: 13 | - name: year 14 | type: integer 15 | granularity: 1 16 | primaryKey: 17 | - usaf 18 | - wban 19 | - date 20 | - time 21 | indexes: 22 | - name: "measurement_idx0" 23 | columns: [usaf, wban] 24 | -------------------------------------------------------------------------------- /docs/spec/target/deleteFile.md: -------------------------------------------------------------------------------- 1 | # Delete File Target 2 | 3 | ## Example 4 | 5 | ```yaml 6 | targets: 7 | deleteTempFiles: 8 | kind: deleteFile 9 | location: hdfs:///tmp/my-location 10 | ``` 11 | 12 | ## Fields 13 | 14 | * `kind` **(mandatory)** *(type: string)*: `deleteFile` 15 | 16 | * `description` **(optional)** *(type: string)*: 17 | Optional descriptive text of the build target 18 | 19 | * `location` **(mandatory)** *(type: string)*: 20 | 21 | 22 | ## Supported Execution Phases 23 | * `BUILD` - This will remove the specified location 24 | 25 | Read more about [execution phases](../../concepts/lifecycle.md). 26 | -------------------------------------------------------------------------------- /tests/emr/flow/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /tests/mariadb/weather/model/measurements.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements: 3 | kind: jdbcTable 4 | connection: mariadb 5 | database: $jdbc_db 6 | table: measurement 7 | stagingTable: measurement_staging 8 | schema: 9 | kind: spark 10 | file: "${project.basedir}/schema/measurements.json" 11 | # Add partition column 12 | partitions: 13 | - name: year 14 | type: integer 15 | granularity: 1 16 | primaryKey: 17 | - usaf 18 | - wban 19 | - date 20 | - time 21 | indexes: 22 | - name: "measurement_idx0" 23 | columns: [usaf, wban] 24 | -------------------------------------------------------------------------------- /docs/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "flowman-docs" 3 | version = "1.0.0" 4 | description = "Documentation of Flowman" 5 | authors = ["The Flowman Authors "] 6 | license = "Apache License 2.0" 7 | package-mode = false 8 | 9 | [tool.poetry.dependencies] 10 | furo = "^2024.1.29" 11 | myst-parser = "^2.0.0" 12 | python = "^3.10" 13 | recommonmark = "^0.7.1" 14 | Sphinx = "^7.2.6" 15 | sphinx-markdown-tables = "^0.0.17" 16 | sphinxcontrib-mermaid = "^0.9.2" 17 | 18 | [tool.poetry2conda] 19 | name = "flowman-docs" 20 | 21 | [build-system] 22 | requires = ["poetry-core"] 23 | build-backend = "poetry.core.masonry.api" 24 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/spark-3.3/org/apache/spark/sql/catalyst/expressions/IfNull.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.expressions 2 | 3 | case class IfNull(left: Expression, right: Expression, replacement: Expression) 4 | extends RuntimeReplaceable with InheritAnalysisRules { 5 | 6 | def this(left: Expression, right: Expression) = { 7 | this(left, right, Coalesce(Seq(left, right))) 8 | } 9 | 10 | override def parameters: Seq[Expression] = Seq(left, right) 11 | 12 | override protected def withNewChildInternal(newChild: Expression): IfNull = 13 | copy(replacement = newChild) 14 | } 15 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/spark-3.4/org/apache/spark/sql/catalyst/expressions/IfNull.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.expressions 2 | 3 | case class IfNull(left: Expression, right: Expression, replacement: Expression) 4 | extends RuntimeReplaceable with InheritAnalysisRules { 5 | 6 | def this(left: Expression, right: Expression) = { 7 | this(left, right, Coalesce(Seq(left, right))) 8 | } 9 | 10 | override def parameters: Seq[Expression] = Seq(left, right) 11 | 12 | override protected def withNewChildInternal(newChild: Expression): IfNull = 13 | copy(replacement = newChild) 14 | } 15 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/spark-3.5/org/apache/spark/sql/catalyst/expressions/IfNull.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.expressions 2 | 3 | case class IfNull(left: Expression, right: Expression, replacement: Expression) 4 | extends RuntimeReplaceable with InheritAnalysisRules { 5 | 6 | def this(left: Expression, right: Expression) = { 7 | this(left, right, Coalesce(Seq(left, right))) 8 | } 9 | 10 | override def parameters: Seq[Expression] = Seq(left, right) 11 | 12 | override protected def withNewChildInternal(newChild: Expression): IfNull = 13 | copy(replacement = newChild) 14 | } 15 | -------------------------------------------------------------------------------- /tests/synapse/flow/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /tests/mariadb/weather/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /tests/mysql/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Get current Flowman version 6 | FLOWMAN_TAG=$(mvn -f ../.. -q -N help:evaluate -Dexpression=flowman.dist.label -DforceStdout) 7 | docker image tag dimajix/flowman:$FLOWMAN_TAG flowman-it-mysql:latest 8 | 9 | # Make projects world readable 10 | find . -type f | xargs chmod a+r 11 | find . -type d | xargs chmod a+rx 12 | find bin -type f | xargs chmod a+rx 13 | 14 | # Start database 15 | docker-compose up -d mysql 16 | 17 | # Run tests 18 | docker-compose run --rm flowman bin/run-tests.sh 19 | 20 | # Clean up 21 | docker-compose down 22 | docker image rm flowman-it-mysql:latest 23 | -------------------------------------------------------------------------------- /tests/mysql/weather/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /tests/oracle/weather/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /licenses/LICENSE-swagger.txt: -------------------------------------------------------------------------------- 1 | Copyright 2017 SmartBear Software 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at [apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) 6 | 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | -------------------------------------------------------------------------------- /tests/oracle/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Get current Flowman version 6 | FLOWMAN_TAG=$(mvn -f ../.. -q -N help:evaluate -Dexpression=flowman.dist.label -DforceStdout) 7 | docker image tag dimajix/flowman:$FLOWMAN_TAG flowman-it-oracle:latest 8 | 9 | # Make projects world readable 10 | find . -type f | xargs chmod a+r 11 | find . -type d | xargs chmod a+rx 12 | find bin -type f | xargs chmod a+rx 13 | 14 | # Start database 15 | docker-compose up -d oracle 16 | 17 | # Run tests 18 | docker-compose run --rm flowman bin/run-tests.sh 19 | 20 | # Clean up 21 | docker-compose down 22 | docker image rm flowman-it-oracle:latest 23 | -------------------------------------------------------------------------------- /tests/postgresql/weather/mapping/stations.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | # This mapping refers to the "raw" relation and reads in data from the source in S3 3 | stations_raw: 4 | kind: relation 5 | relation: stations_raw 6 | 7 | stations_conformed: 8 | kind: cast 9 | input: stations_raw 10 | columns: 11 | usaf: CHAR(6) 12 | wban: CHAR(5) 13 | name: VARCHAR(64) 14 | country: CHAR(2) 15 | state: VARCHAR(32) 16 | icao: VARCHAR(32) 17 | 18 | # This mapping refers to the Parquet relation and reads in data from the local file system 19 | stations: 20 | kind: relation 21 | relation: stations 22 | -------------------------------------------------------------------------------- /docs/spec/schema/spark.md: -------------------------------------------------------------------------------- 1 | # Spark Schema 2 | 3 | The *Spark schema* refers to a schema produced by Apache Spark. 4 | 5 | ## Example 6 | ```yaml 7 | kind: spark 8 | file: "${project.basedir}/test/data/results/${relation}/schema.json" 9 | ``` 10 | 11 | ## Fields 12 | * `kind` **(mandatory)** *(type: string)*: `spark` 13 | * `file` **(optional)** *(type: string)*: 14 | Specifies the path of a schema file. 15 | * `url` **(optional)** *(type: string)*: 16 | Specifies the URL of a schema. 17 | * `spec` **(optional)** *(type: string)*: 18 | Specifies the schema itself as an embedded string 19 | 20 | Note that you can only use one of `file`, `url` or `spec`. 21 | -------------------------------------------------------------------------------- /tests/mariadb/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Get current Flowman version 6 | FLOWMAN_TAG=$(mvn -f ../.. -q -N help:evaluate -Dexpression=flowman.dist.label -DforceStdout) 7 | docker image tag dimajix/flowman:$FLOWMAN_TAG flowman-it-mariadb:latest 8 | 9 | # Make projects world readable 10 | find . -type f | xargs chmod a+r 11 | find . -type d | xargs chmod a+rx 12 | find bin -type f | xargs chmod a+rx 13 | 14 | # Start database 15 | docker-compose up -d mariadb 16 | 17 | # Run tests 18 | docker-compose run --rm flowman bin/run-tests.sh 19 | 20 | # Clean up 21 | docker-compose down 22 | docker image rm flowman-it-mariadb:latest 23 | -------------------------------------------------------------------------------- /docs/spec/target/count.md: -------------------------------------------------------------------------------- 1 | # Count Target 2 | 3 | ## Example 4 | ``` 5 | targets: 6 | measurements-count: 7 | kind: count 8 | mapping: measurements 9 | ``` 10 | 11 | ## Fields 12 | * `kind` **(mandatory)** *(string)*: `count` 13 | * `description` **(optional)** *(type: string)*: 14 | Optional descriptive text of the build target 15 | * `mapping` **(mandatory)** *(string)*: 16 | Specifies the name of the input mapping to be counted 17 | 18 | 19 | ## Supported Execution Phases 20 | * `BUILD` - Counting records of a mapping will be executed as part of the `BUILD` phase 21 | 22 | Read more about [execution phases](../../concepts/lifecycle.md). 23 | -------------------------------------------------------------------------------- /flowman-dist/libexec/flowman-launch.cmd: -------------------------------------------------------------------------------- 1 | rem Collect all libraries 2 | 3 | SETLOCAL 4 | 5 | 6 | SET APP_JAR="%FLOWMAN_HOME%\lib\%APP_JAR%" 7 | 8 | set r=%LIB_JARS% 9 | set LIB_JARS= 10 | :loop 11 | for /F "tokens=1* delims=," %%a in (%r%) do ( 12 | if "x%LIB_JARS%" == "x" ( 13 | set LIB_JARS="%FLOWMAN_HOME%\%%a" 14 | ) else ( 15 | set LIB_JARS=%LIB_JARS%,"%FLOWMAN_HOME%\%%a" 16 | ) 17 | set r="%%b" 18 | ) 19 | if not %r% == "" goto :loop 20 | 21 | 22 | %SPARK_SUBMIT% --driver-java-options "%SPARK_DRIVER_JAVA_OPTS%" --conf spark.executor.extraJavaOptions="%SPARK_EXECUTOR_JAVA_OPTS%" --class %APP_MAIN% %SPARK_OPTS% --jars %LIB_JARS% %APP_JAR% %* 23 | -------------------------------------------------------------------------------- /tests/sqlserver/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Get current Flowman version 6 | FLOWMAN_TAG=$(mvn -f ../.. -q -N help:evaluate -Dexpression=flowman.dist.label -DforceStdout) 7 | docker image tag dimajix/flowman:$FLOWMAN_TAG flowman-it-sqlserver:latest 8 | 9 | # Make projects world readable 10 | find . -type f | xargs chmod a+r 11 | find . -type d | xargs chmod a+rx 12 | find bin -type f | xargs chmod a+rx 13 | 14 | # Start database 15 | docker-compose up -d sqlserver 16 | 17 | # Run tests 18 | docker-compose run --rm flowman bin/run-tests.sh 19 | 20 | # Clean up 21 | docker-compose down 22 | docker image rm flowman-it-sqlserver:latest 23 | -------------------------------------------------------------------------------- /tests/postgresql/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Get current Flowman version 6 | FLOWMAN_TAG=$(mvn -f ../.. -q -N help:evaluate -Dexpression=flowman.dist.label -DforceStdout) 7 | docker image tag dimajix/flowman:$FLOWMAN_TAG flowman-it-postgresql:latest 8 | 9 | # Make projects world readable 10 | find . -type f | xargs chmod a+r 11 | find . -type d | xargs chmod a+rx 12 | find bin -type f | xargs chmod a+rx 13 | 14 | # Start database 15 | docker-compose up -d postgresql 16 | 17 | # Run tests 18 | docker-compose run --rm flowman bin/run-tests.sh 19 | 20 | # Clean up 21 | docker-compose down 22 | docker image rm flowman-it-postgresql:latest 23 | -------------------------------------------------------------------------------- /devtools/create-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SELF=$(cd $(dirname $0) && pwd) 4 | . "$SELF/release-utils.sh" 5 | 6 | set -e 7 | set -o pipefail 8 | 9 | while getopts ":v:n" opt; do 10 | case $opt in 11 | v) NEXT_VERSION=$OPTARG ;; 12 | n) DRY_RUN=1 ;; 13 | \?) error "Invalid option: $OPTARG" ;; 14 | esac 15 | done 16 | 17 | check_clean_directory 18 | get_branch_info 19 | 20 | git checkout main 21 | git pull origin main 22 | git checkout -b $GIT_BRANCH 23 | git push origin 24 | 25 | git checkout main 26 | mvn versions:set -DnewVersion=$NEXT_VERSION 27 | git commit -a -m "Set version to next main version to $NEXT_VERSION" 28 | git push origin 29 | -------------------------------------------------------------------------------- /tests/mariadb/weather/project.yml: -------------------------------------------------------------------------------- 1 | name: "weather" 2 | version: "1.0" 3 | description: " 4 | This is a simple but very comprehensive example project for Flowman using publicly available weather data. 5 | The project will demonstrate many features of Flowman, like reading and writing data, performing data transformations, 6 | joining, filtering and aggregations. The project will also create a meaningful documentation containing data quality 7 | tests. 8 | " 9 | 10 | # The following modules simply contain a list of subdirectories containing the specification files 11 | modules: 12 | - model 13 | - mapping 14 | - target 15 | - job 16 | - config 17 | -------------------------------------------------------------------------------- /tests/mysql/weather/project.yml: -------------------------------------------------------------------------------- 1 | name: "weather" 2 | version: "1.0" 3 | description: " 4 | This is a simple but very comprehensive example project for Flowman using publicly available weather data. 5 | The project will demonstrate many features of Flowman, like reading and writing data, performing data transformations, 6 | joining, filtering and aggregations. The project will also create a meaningful documentation containing data quality 7 | tests. 8 | " 9 | 10 | # The following modules simply contain a list of subdirectories containing the specification files 11 | modules: 12 | - model 13 | - mapping 14 | - target 15 | - job 16 | - config 17 | -------------------------------------------------------------------------------- /tests/synapse/flow/config/environment.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | - basedir=file:///tmp/weather 3 | - srcdir=$System.getenv('WEATHER_SRCDIR', 's3a://dimajix-training/data/weather') 4 | 5 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'com.microsoft.sqlserver.jdbc.SQLServerDriver') 6 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:sqlserver://flowman-ondemand.sql.azuresynapse.net:1433') 7 | - jdbc_db=$System.getenv('JDBC_DB', 'weather') 8 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'sqladminuser@flowman') 9 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 10 | 11 | - keyVaultValue=$AzureKeyVault.getSecret('flowman','my-test-secret') 12 | -------------------------------------------------------------------------------- /examples/weather/project.yml: -------------------------------------------------------------------------------- 1 | name: "weather" 2 | version: "1.0" 3 | description: " 4 | This is a simple but very comprehensive example project for Flowman using publicly available weather data. 5 | The project will demonstrate many features of Flowman, like reading and writing data, performing data transformations, 6 | joining, filtering and aggregations. The project will also create a meaningful documentation containing data quality 7 | tests. 8 | " 9 | 10 | # The following modules simply contain a list of subdirectories containing the specification files 11 | modules: 12 | - model 13 | - mapping 14 | - target 15 | - job 16 | - config 17 | - test 18 | -------------------------------------------------------------------------------- /tests/emr/flow/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | kind: file 4 | format: csv 5 | location: "${srcdir}/isd-history/" 6 | options: 7 | sep: "," 8 | encoding: "UTF-8" 9 | quote: "\"" 10 | header: "true" 11 | dateFormat: "yyyyMMdd" 12 | schema: 13 | kind: avro 14 | file: "${project.basedir}/schema/stations.avsc" 15 | 16 | 17 | targets: 18 | validate_stations_raw: 19 | kind: validate 20 | assertions: 21 | check_primary_key: 22 | kind: sql 23 | query: "SELECT usaf,wban,COUNT(*) FROM stations_raw GROUP BY usaf,wban HAVING COUNT(*) > 1" 24 | expected: [ ] 25 | -------------------------------------------------------------------------------- /tests/mysql/migrations/module/config.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Default migration policy for relations (default: RELAXED) 3 | - flowman.default.relation.migrationPolicy=STRICT 4 | # Default migration strategy for relations (default: ALTER) 5 | - flowman.default.relation.migrationStrategy=ALTER_REPLACE 6 | 7 | 8 | environment: 9 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'com.mysql.cj.jdbc.Driver') 10 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:mysql://localhost') 11 | - jdbc_db=$System.getenv('JDBC_DB', 'flowman') 12 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'flowman') 13 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 14 | 15 | -------------------------------------------------------------------------------- /tests/mariadb/migrations/module/config.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Default migration policy for relations (default: RELAXED) 3 | - flowman.default.relation.migrationPolicy=STRICT 4 | # Default migration strategy for relations (default: ALTER) 5 | - flowman.default.relation.migrationStrategy=ALTER_REPLACE 6 | 7 | 8 | environment: 9 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'org.mariadb.jdbc.Driver') 10 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:mariadb://localhost') 11 | - jdbc_db=$System.getenv('JDBC_DB', 'flowman') 12 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'flowman') 13 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 14 | 15 | -------------------------------------------------------------------------------- /tests/mariadb/weather/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | kind: file 4 | format: csv 5 | location: "${srcdir}/isd-history/" 6 | options: 7 | sep: "," 8 | encoding: "UTF-8" 9 | quote: "\"" 10 | header: "true" 11 | dateFormat: "yyyyMMdd" 12 | schema: 13 | kind: avro 14 | file: "${project.basedir}/schema/stations.avsc" 15 | 16 | 17 | targets: 18 | validate_stations_raw: 19 | kind: validate 20 | assertions: 21 | check_primary_key: 22 | kind: sql 23 | query: "SELECT usaf,wban,COUNT(*) FROM stations_raw GROUP BY usaf,wban HAVING COUNT(*) > 1" 24 | expected: [ ] 25 | -------------------------------------------------------------------------------- /tests/mysql/weather/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | kind: file 4 | format: csv 5 | location: "${srcdir}/isd-history/" 6 | options: 7 | sep: "," 8 | encoding: "UTF-8" 9 | quote: "\"" 10 | header: "true" 11 | dateFormat: "yyyyMMdd" 12 | schema: 13 | kind: avro 14 | file: "${project.basedir}/schema/stations.avsc" 15 | 16 | 17 | targets: 18 | validate_stations_raw: 19 | kind: validate 20 | assertions: 21 | check_primary_key: 22 | kind: sql 23 | query: "SELECT usaf,wban,COUNT(*) FROM stations_raw GROUP BY usaf,wban HAVING COUNT(*) > 1" 24 | expected: [ ] 25 | -------------------------------------------------------------------------------- /tests/oracle/migrations/module/config.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Default migration policy for relations (default: RELAXED) 3 | - flowman.default.relation.migrationPolicy=STRICT 4 | # Default migration strategy for relations (default: ALTER) 5 | - flowman.default.relation.migrationStrategy=ALTER_REPLACE 6 | 7 | 8 | environment: 9 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'oracle.jdbc.OracleDriver') 10 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:oracle:thin:@localhost:1521') 11 | - jdbc_db=$System.getenv('JDBC_DB', 'flowman') 12 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'flowman') 13 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 14 | 15 | -------------------------------------------------------------------------------- /tests/oracle/weather/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | # The relation is of type "file" 4 | kind: file 5 | # ... and it uses CSV as file format 6 | format: csv 7 | # data itself is stored at the following location 8 | location: "s3a://dimajix-training/data/weather/isd-history/" 9 | # Specify some CSV-specific options 10 | options: 11 | sep: "," 12 | encoding: "UTF-8" 13 | quote: "\"" 14 | header: "true" 15 | dateFormat: "yyyyMMdd" 16 | # Specify the schema (which is stored in an external file) 17 | schema: 18 | kind: avro 19 | file: "${project.basedir}/schema/stations.avsc" 20 | 21 | -------------------------------------------------------------------------------- /tests/postgresql/migrations/module/config.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Default migration policy for relations (default: RELAXED) 3 | - flowman.default.relation.migrationPolicy=STRICT 4 | # Default migration strategy for relations (default: ALTER) 5 | - flowman.default.relation.migrationStrategy=ALTER_REPLACE 6 | 7 | 8 | environment: 9 | - jdbc_driver=$System.getenv('JDBC_DRIVER', 'org.postgresql.Driver') 10 | - jdbc_url=$System.getenv('JDBC_URL', 'jdbc:postgresql://localhost:5432') 11 | - jdbc_db=$System.getenv('JDBC_DB', 'flowman') 12 | - jdbc_username=$System.getenv('JDBC_USERNAME', 'flowman') 13 | - jdbc_password=$System.getenv('JDBC_PASSWORD' ,'yourStrong(!)Password') 14 | 15 | -------------------------------------------------------------------------------- /tests/postgresql/weather/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | # The relation is of type "file" 4 | kind: file 5 | # ... and it uses CSV as file format 6 | format: csv 7 | # data itself is stored at the following location 8 | location: "s3a://dimajix-training/data/weather/isd-history/" 9 | # Specify some CSV-specific options 10 | options: 11 | sep: "," 12 | encoding: "UTF-8" 13 | quote: "\"" 14 | header: "true" 15 | dateFormat: "yyyyMMdd" 16 | # Specify the schema (which is stored in an external file) 17 | schema: 18 | kind: avro 19 | file: "${project.basedir}/schema/stations.avsc" 20 | 21 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | # The relation is of type "file" 4 | kind: file 5 | # ... and it uses CSV as file format 6 | format: csv 7 | # data itself is stored at the following location 8 | location: "s3a://dimajix-training/data/weather/isd-history/" 9 | # Specify some CSV-specific options 10 | options: 11 | sep: "," 12 | encoding: "UTF-8" 13 | quote: "\"" 14 | header: "true" 15 | dateFormat: "yyyyMMdd" 16 | # Specify the schema (which is stored in an external file) 17 | schema: 18 | kind: avro 19 | file: "${project.basedir}/schema/stations.avsc" 20 | 21 | -------------------------------------------------------------------------------- /examples/weather/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "$srcdir/" 6 | # Define the pattern to be used for partitions 7 | pattern: "${year}" 8 | # Define data partitions. Each year is stored in a separate sub directory 9 | partitions: 10 | - name: year 11 | type: integer 12 | granularity: 1 13 | description: "The year when the measurement was made" 14 | schema: 15 | # Specify the (single) column via an embedded schema. 16 | kind: inline 17 | fields: 18 | - name: raw_data 19 | type: string 20 | description: "Raw measurement data" 21 | -------------------------------------------------------------------------------- /docs/spec/hooks/index.md: -------------------------------------------------------------------------------- 1 | # Execution Hooks 2 | 3 | Flowman provides the ability to specify so-called *hooks*, which are called during lifecycle execution for every job 4 | and target. For example by using the `web` hook, you can inform an external system about successful processing of 5 | jobs and targets. 6 | 7 | Hooks can be specified both on a global [namespace](../namespace.md) level and on a [job](../job/index.md) level. 8 | 9 | 10 | ## Hook Types 11 | 12 | Flowman supports different kinds of hooks, the following list gives you an exhaustive overview of all hooks implemented 13 | by Flowman 14 | 15 | ```eval_rst 16 | .. toctree:: 17 | :maxdepth: 1 18 | :glob: 19 | 20 | * 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /docs/spec/template/measure.md: -------------------------------------------------------------------------------- 1 | # Measure Template 2 | 3 | ## Example 4 | 5 | ```yaml 6 | templates: 7 | user: 8 | kind: measure 9 | parameters: 10 | - name: $table 11 | type: string 12 | - name: p1 13 | type: int 14 | default: 12 15 | template: 16 | kind: sql 17 | query: "SELECT * FROM $table WHERE x = $p1" 18 | 19 | targets: 20 | measure_1: 21 | kind: measure 22 | measures: 23 | m1: 24 | kind: template/user 25 | table: some_table 26 | 27 | measure_2: 28 | kind: measure 29 | measures: 30 | m1: 31 | kind: template/user 32 | table: some_other_table 33 | p1: 27 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/spec/template/assertion.md: -------------------------------------------------------------------------------- 1 | # Assertion Template 2 | 3 | ## Example 4 | ```yaml 5 | templates: 6 | user: 7 | kind: assertion 8 | parameters: 9 | - name: table 10 | type: string 11 | - name: p1 12 | type: int 13 | default: 12 14 | template: 15 | kind: sql 16 | query: "SELECT * FROM $table WHERE idx = $p1" 17 | 18 | targets: 19 | validate_1: 20 | kind: validate 21 | assertions: 22 | a1: 23 | kind: template/user 24 | table: some_table 25 | p1: 13 26 | verify_1: 27 | kind: verify 28 | assertions: 29 | a1: 30 | kind: template/user 31 | table: some_other_value 32 | p1: 27 33 | ``` 34 | -------------------------------------------------------------------------------- /tests/archetype-quickstart/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Clean previous project 6 | rm -rf quickstart-test 7 | 8 | # Get current Flowman version 9 | FLOWMAN_VERSION=$(mvn -f ../.. -q -N help:evaluate -Dexpression=project.version -DforceStdout) 10 | 11 | mvn archetype:generate \ 12 | -B \ 13 | -DarchetypeGroupId=com.dimajix.flowman.maven \ 14 | -DarchetypeArtifactId=flowman-archetype-quickstart \ 15 | -DarchetypeVersion=0.4.0 \ 16 | -DgroupId=test \ 17 | -DartifactId=quickstart-test \ 18 | -Dversion=1.0-SNAPSHOT \ 19 | -DflowmanVersion=$FLOWMAN_VERSION 20 | 21 | cd quickstart-test || exit 22 | mvn clean install 23 | 24 | cd .. 25 | 26 | # Clean up 27 | rm -rf quickstart-test 28 | -------------------------------------------------------------------------------- /tests/oracle/weather/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "s3a://dimajix-training/data/weather/" 6 | # Define the pattern to be used for partitions 7 | pattern: "${year}" 8 | # Define data partitions. Each year is stored in a separate sub directory 9 | partitions: 10 | - name: year 11 | type: integer 12 | granularity: 1 13 | description: "The year when the measurement was made" 14 | schema: 15 | # Specify the (single) column via an embedded schema. 16 | kind: inline 17 | fields: 18 | - name: raw_data 19 | type: string 20 | description: "Raw measurement data" 21 | -------------------------------------------------------------------------------- /tests/postgresql/weather/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "s3a://dimajix-training/data/weather/" 6 | # Define the pattern to be used for partitions 7 | pattern: "${year}" 8 | # Define data partitions. Each year is stored in a separate sub directory 9 | partitions: 10 | - name: year 11 | type: integer 12 | granularity: 1 13 | description: "The year when the measurement was made" 14 | schema: 15 | # Specify the (single) column via an embedded schema. 16 | kind: inline 17 | fields: 18 | - name: raw_data 19 | type: string 20 | description: "Raw measurement data" 21 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/model/measurements-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | measurements_raw: 3 | kind: file 4 | format: text 5 | location: "s3a://dimajix-training/data/weather/" 6 | # Define the pattern to be used for partitions 7 | pattern: "${year}" 8 | # Define data partitions. Each year is stored in a separate sub directory 9 | partitions: 10 | - name: year 11 | type: integer 12 | granularity: 1 13 | description: "The year when the measurement was made" 14 | schema: 15 | # Specify the (single) column via an embedded schema. 16 | kind: inline 17 | fields: 18 | - name: raw_data 19 | type: string 20 | description: "Raw measurement data" 21 | -------------------------------------------------------------------------------- /tests/synapse/flow/model/stations.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations: 3 | kind: hiveTable 4 | description: "The 'stations' table contains meta data on all weather stations" 5 | database: weather 6 | table: stations 7 | format: parquet 8 | schema: 9 | kind: mapping 10 | mapping: stations_conformed 11 | 12 | stations_jdbc: 13 | kind: sqlserver 14 | description: "The 'stations' table contains meta data on all weather stations" 15 | connection: sql_server 16 | database: dbo 17 | table: stations 18 | schema: 19 | kind: mapping 20 | mapping: stations_conformed 21 | indexes: 22 | - name: "stations_idx" 23 | columns: [usaf, wban] 24 | clustered: true 25 | -------------------------------------------------------------------------------- /tests/sqlserver/twitter/mapping/tweets.yml: -------------------------------------------------------------------------------- 1 | mappings: 2 | tweets-raw: 3 | kind: relation 4 | relation: tweets-raw 5 | filter: id IS NOT NULL 6 | 7 | tweets-flattened: 8 | kind: flatten 9 | input: tweets-raw 10 | 11 | tweets-extended: 12 | kind: extend 13 | input: tweets-flattened 14 | columns: 15 | # Make column non-nullable, since we require a non-nullable unique index 16 | id: "COALESCE(id, 1)" 17 | timestamp: "from_unixtime(CAST(timestamp_ms AS LONG)/1000)" 18 | 19 | tweets-deduplicated: 20 | kind: deduplicate 21 | input: tweets-extended 22 | columns: id 23 | 24 | tweets: 25 | kind: drop 26 | input: tweets-deduplicated 27 | columns: 28 | - timestamp_ms 29 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/spark-2.4/org/apache/spark/sql/catalyst/plans/logical/AnalysisOnlyCommand.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.QueryPlan 4 | 5 | 6 | /** 7 | * A logical node that can be used for a command that requires its children to be only analyzed, 8 | * but not optimized. 9 | */ 10 | trait AnalysisOnlyCommand extends Command { 11 | val isAnalyzed: Boolean 12 | def childrenToAnalyze: Seq[LogicalPlan] 13 | override final def children: Seq[LogicalPlan] = if (isAnalyzed) Nil else childrenToAnalyze 14 | override def innerChildren: Seq[QueryPlan[_]] = if (isAnalyzed) childrenToAnalyze else Nil 15 | def markAsAnalyzed(): LogicalPlan 16 | } 17 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/spark-3.0/org/apache/spark/sql/catalyst/plans/logical/AnalysisOnlyCommand.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.QueryPlan 4 | 5 | 6 | /** 7 | * A logical node that can be used for a command that requires its children to be only analyzed, 8 | * but not optimized. 9 | */ 10 | trait AnalysisOnlyCommand extends Command { 11 | val isAnalyzed: Boolean 12 | def childrenToAnalyze: Seq[LogicalPlan] 13 | override final def children: Seq[LogicalPlan] = if (isAnalyzed) Nil else childrenToAnalyze 14 | override def innerChildren: Seq[QueryPlan[_]] = if (isAnalyzed) childrenToAnalyze else Nil 15 | def markAsAnalyzed(): LogicalPlan 16 | } 17 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/spark-3.1/org/apache/spark/sql/catalyst/plans/logical/AnalysisOnlyCommand.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.QueryPlan 4 | 5 | 6 | /** 7 | * A logical node that can be used for a command that requires its children to be only analyzed, 8 | * but not optimized. 9 | */ 10 | trait AnalysisOnlyCommand extends Command { 11 | val isAnalyzed: Boolean 12 | def childrenToAnalyze: Seq[LogicalPlan] 13 | override final def children: Seq[LogicalPlan] = if (isAnalyzed) Nil else childrenToAnalyze 14 | override def innerChildren: Seq[QueryPlan[_]] = if (isAnalyzed) childrenToAnalyze else Nil 15 | def markAsAnalyzed(): LogicalPlan 16 | } 17 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-22.04" 5 | tools: 6 | python: "3.10" 7 | jobs: 8 | post_create_environment: 9 | # Install poetry 10 | # https://python-poetry.org/docs/#installing-manually 11 | - pip install poetry 12 | # Tell poetry to not use a virtual environment 13 | - poetry config virtualenvs.create false 14 | post_install: 15 | # Install dependencies 16 | # VIRTUAL_ENV needs to be set manually for now. 17 | # See https://github.com/readthedocs/readthedocs.org/pull/11152/ 18 | - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --directory=docs 19 | 20 | # Build from the docs/ directory with Sphinx 21 | sphinx: 22 | configuration: docs/conf.py 23 | -------------------------------------------------------------------------------- /docs/spec/template/schema.md: -------------------------------------------------------------------------------- 1 | # Schema Template 2 | 3 | ## Example 4 | ```yaml 5 | # All template definitions (independent of their kind) go into the templates section 6 | templates: 7 | default_schema: 8 | kind: schema 9 | parameters: 10 | - name: schemaName 11 | type: string 12 | template: 13 | kind: avro 14 | file: "${project.basedir}/schema/${schemaName}.avsc" 15 | 16 | relations: 17 | measurements: 18 | kind: file 19 | format: parquet 20 | location: "$basedir/measurements/" 21 | pattern: "${year}" 22 | partitions: 23 | - name: year 24 | type: integer 25 | granularity: 1 26 | schema: 27 | kind: template/default_schema 28 | schemaName: measurements 29 | ``` 30 | -------------------------------------------------------------------------------- /devtools/create-release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SELF=$(cd $(dirname $0) && pwd) 4 | . "$SELF/release-utils.sh" 5 | 6 | set -e 7 | set -o pipefail 8 | 9 | while getopts ":b:n" opt; do 10 | case $opt in 11 | b) GIT_BRANCH=$OPTARG ;; 12 | n) DRY_RUN=1 ;; 13 | \?) error "Invalid option: $OPTARG" ;; 14 | esac 15 | done 16 | 17 | 18 | check_clean_directory 19 | get_release_info 20 | 21 | git checkout $GIT_BRANCH 22 | git pull origin $GIT_BRANCH 23 | mvn versions:set -DnewVersion=$RELEASE_VERSION 24 | git commit -a -m "Create release version $RELEASE_VERSION" 25 | git tag $RELEASE_TAG 26 | mvn versions:set -DnewVersion=$NEXT_VERSION 27 | git commit -a -m "Set version to next SNAPSHOT $NEXT_VERSION" 28 | 29 | git push origin 30 | git push origin --tags 31 | -------------------------------------------------------------------------------- /docs/spec/metric/console.md: -------------------------------------------------------------------------------- 1 | # Console Metric Sink 2 | 3 | The `console` metric sink is the simplest possible way to publish [execution metrics](../../cookbook/execution-metrics.md) and 4 | [data quality measures](../../cookbook/data-quality.md) as simple logging output on the console. Even if you 5 | publish your metrics to a metric collector like [Prometheus](prometheus.md), it is a good idea to also add the 6 | `console` metric sink, so you can see all metrics together with other log output 7 | 8 | 9 | ## Example 10 | The following example configures a console sink in a namespace. You would need to include this snippet 11 | for example in the `default-namespace.yml` in the Flowman configuration directory 12 | 13 | ```yaml 14 | metrics: 15 | kind: console 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/spec/schema/mapping.md: -------------------------------------------------------------------------------- 1 | # Mapping Schema 2 | 3 | The `mapping` schema is used to infer a schema from a given mapping. This way, the schema of outgoing relations can 4 | be implicitly specified by referencing the mapping that will be written to the relation. 5 | 6 | ## Example 7 | 8 | ```yaml 9 | relations: 10 | output: 11 | kind: hiveTable 12 | database: "crm" 13 | table: "customers" 14 | format: parquet 15 | partitions: 16 | - name: landing_date 17 | type: string 18 | schema: 19 | kind: mapping 20 | mapping: customers 21 | ``` 22 | 23 | ## Fields 24 | * `kind` **(mandatory)** *(type: string)*: `mapping` 25 | * `mapping` **(mandatory)** *(type: string)*: 26 | Specifies the name of mapping of which the schema should be used. 27 | -------------------------------------------------------------------------------- /examples/weather/documentation.yml: -------------------------------------------------------------------------------- 1 | collectors: 2 | # Collect documentation of relations 3 | - kind: relations 4 | # Collect documentation of mappings 5 | - kind: mappings 6 | # Collect documentation of build targets 7 | - kind: targets 8 | # Collect lineage of all columns 9 | - kind: lineage 10 | # Execute all checks 11 | - kind: checks 12 | 13 | generators: 14 | # Create an output file in the project directory 15 | - kind: file 16 | location: ${project.basedir}/generated-documentation 17 | template: html+css 18 | excludeRelations: 19 | # You can either specify a name (without the project) 20 | - "some_unwanted_relation" 21 | # Or can also explicitly specify a name with the project 22 | - ".*/another_internal_relation" 23 | -------------------------------------------------------------------------------- /tests/emr/deployment.yml: -------------------------------------------------------------------------------- 1 | flowman: 2 | version: ${flowman.version} 3 | distribution: com.dimajix.flowman:flowman-dist:bin:${flowman.version} 4 | plugins: 5 | - flowman-avro 6 | - flowman-aws 7 | 8 | 9 | # List of subdirectories containing Flowman projects 10 | projects: 11 | - flow 12 | 13 | 14 | # List of packages to be built 15 | packages: 16 | emr: 17 | kind: fatjar 18 | flowman: 19 | config: 20 | # Optional: Use Glue as Metastore 21 | - spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory 22 | 23 | 24 | # List of deployments 25 | deployments: 26 | aws: 27 | kind: copy 28 | package: emr 29 | location: s3://flowman-test/integration-tests/apps 30 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | [*] 8 | 9 | # Change these settings to your own preference 10 | indent_style = space 11 | indent_size = 4 12 | 13 | # We recommend you to keep these unchanged 14 | end_of_line = lf 15 | charset = utf-8 16 | trim_trailing_whitespace = true 17 | insert_final_newline = true 18 | 19 | [*.scala] 20 | indent_size = 4 21 | max_line_length = 120 22 | 23 | [*.yml] 24 | indent_size = 2 25 | 26 | [*.js] 27 | indent_size = 2 28 | 29 | [*.json] 30 | indent_size = 2 31 | 32 | [*.vue] 33 | indent_size = 2 34 | 35 | [*.md] 36 | trim_trailing_whitespace = false 37 | indent_size = 2 38 | max_line_length = 120 39 | -------------------------------------------------------------------------------- /docs/spec/relation/template.md: -------------------------------------------------------------------------------- 1 | # Template Relation 2 | 3 | ## Example 4 | 5 | ```yaml 6 | relations: 7 | # First define the template relation itself 8 | structured_macro: 9 | kind: hiveUnionTable 10 | viewDatabase: "dqm" 11 | view: "${table}" 12 | tableDatabase: "dqm" 13 | tablePrefix: "zz_${table}" 14 | locationPrefix: "$hdfs_structured_dir/dqm/zz_${table}" 15 | external: true 16 | format: parquet 17 | partitions: 18 | - name: landing_date 19 | type: string 20 | schema: 21 | kind: mapping 22 | mapping: ${schema} 23 | 24 | # Now use the template and replace some of the used variables 25 | fee: 26 | kind: template 27 | relation: structured_macro 28 | environment: 29 | - table=fee 30 | - schema=fee 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/spec/template/dataset.md: -------------------------------------------------------------------------------- 1 | # Dataset Template 2 | 3 | ## Example 4 | 5 | ```yaml 6 | templates: 7 | user: 8 | kind: dataset 9 | parameters: 10 | - name: p0 11 | type: string 12 | - name: p1 13 | type: int 14 | default: 12 15 | template: 16 | kind: values 17 | records: 18 | - ["$p0",$p1] 19 | schema: 20 | kind: inline 21 | fields: 22 | - name: str_col 23 | type: string 24 | - name: int_col 25 | type: integer 26 | 27 | targets: 28 | dump_1: 29 | kind: console 30 | input: 31 | kind: template/user 32 | p1: 13 33 | dump_2: 34 | kind: console 35 | input: 36 | kind: template/user 37 | p0: some_value 38 | p1: 27 39 | ``` 40 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowman-kernel: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | # Add log4j config 7 | if [ -f "$FLOWMAN_CONF_DIR/log4j-kernel.properties" ]; then 8 | SPARK_DRIVER_LOGGING_OPTS="-Dlog4j.configuration=$FLOWMAN_CONF_DIR/log4j-kernel.properties" 9 | fi 10 | if [ -f "$FLOWMAN_CONF_DIR/log4j2-kernel.properties" ]; then 11 | SPARK_DRIVER_LOGGING_OPTS="-Dlog4j.configurationFile=$FLOWMAN_CONF_DIR/log4j2-kernel.properties" 12 | fi 13 | 14 | 15 | APP_NAME="flowman-kernel-server" 16 | APP_VERSION="@project.version@" 17 | APP_MAIN="com.dimajix.flowman.tools.kernel.Kernel" 18 | 19 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 20 | LIB_JARS="@flowman-kernel-server.classpath@" 21 | 22 | run_spark $APP_JAR $LIB_JARS $APP_MAIN "$@" 23 | -------------------------------------------------------------------------------- /examples/weather/config/aws.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Inject AWS Credentials 3 | - spark.hadoop.fs.s3a.access.key=$System.getenv('AWS_ACCESS_KEY_ID') 4 | - spark.hadoop.fs.s3a.secret.key=$System.getenv('AWS_SECRET_ACCESS_KEY') 5 | # Use anonymous access to S3 6 | - spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider 7 | # Inject proxy for accessing S3 8 | - spark.hadoop.fs.s3a.proxy.host=$System.getenv('S3_PROXY_HOST', $System.getenv('AWS_PROXY_HOST')) 9 | - spark.hadoop.fs.s3a.proxy.port=$System.getenv('S3_PROXY_PORT', $System.getenv('AWS_PROXY_PORT' ,'-1')) 10 | - spark.hadoop.fs.s3a.proxy.username= 11 | - spark.hadoop.fs.s3a.proxy.password= 12 | - spark.hadoop.fs.s3a.endpoint=s3.eu-central-1.amazonaws.com 13 | - spark.hadoop.fs.s3a.signature_version=s3v4 14 | -------------------------------------------------------------------------------- /tests/mariadb/weather/config/aws.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Inject AWS Credentials 3 | - spark.hadoop.fs.s3a.access.key=$System.getenv('AWS_ACCESS_KEY_ID') 4 | - spark.hadoop.fs.s3a.secret.key=$System.getenv('AWS_SECRET_ACCESS_KEY') 5 | # Use anonymous access to S3 6 | - spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider 7 | # Inject proxy for accessing S3 8 | - spark.hadoop.fs.s3a.proxy.host=$System.getenv('S3_PROXY_HOST', $System.getenv('AWS_PROXY_HOST')) 9 | - spark.hadoop.fs.s3a.proxy.port=$System.getenv('S3_PROXY_PORT', $System.getenv('AWS_PROXY_PORT' ,'-1')) 10 | - spark.hadoop.fs.s3a.proxy.username= 11 | - spark.hadoop.fs.s3a.proxy.password= 12 | - spark.hadoop.fs.s3a.endpoint=s3.eu-central-1.amazonaws.com 13 | - spark.hadoop.fs.s3a.signature_version=s3v4 14 | -------------------------------------------------------------------------------- /tests/mysql/weather/config/aws.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Inject AWS Credentials 3 | - spark.hadoop.fs.s3a.access.key=$System.getenv('AWS_ACCESS_KEY_ID') 4 | - spark.hadoop.fs.s3a.secret.key=$System.getenv('AWS_SECRET_ACCESS_KEY') 5 | # Use anonymous access to S3 6 | - spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider 7 | # Inject proxy for accessing S3 8 | - spark.hadoop.fs.s3a.proxy.host=$System.getenv('S3_PROXY_HOST', $System.getenv('AWS_PROXY_HOST')) 9 | - spark.hadoop.fs.s3a.proxy.port=$System.getenv('S3_PROXY_PORT', $System.getenv('AWS_PROXY_PORT' ,'-1')) 10 | - spark.hadoop.fs.s3a.proxy.username= 11 | - spark.hadoop.fs.s3a.proxy.password= 12 | - spark.hadoop.fs.s3a.endpoint=s3.eu-central-1.amazonaws.com 13 | - spark.hadoop.fs.s3a.signature_version=s3v4 14 | -------------------------------------------------------------------------------- /docs/spec/profiles.md: -------------------------------------------------------------------------------- 1 | # Profiles 2 | 3 | Flowman supports so-called *profiles* which contain again some specifications like environment 4 | variables, Spark configurations and connections. But in contrast to a module definition, these 5 | entities are not used per default, instead a profile needs to be *activated* on the command 6 | line. 7 | 8 | Using profiles allows defining environment-specific settings (like for the test and for the 9 | prod environment, which may have different database names, host names, credentials and so on). 10 | Any active profile will override any entity within a project with the same name (for example 11 | a profiles Spark configuration properties will override the ones in a modules `config` 12 | section) 13 | 14 | ## Defining Profiles 15 | 16 | TBD 17 | 18 | ## Activating Profiles 19 | 20 | TBD 21 | -------------------------------------------------------------------------------- /flowman-dist/bin/flowman-server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | basedir=$(readlink -f "$(dirname "$0")"/..) 4 | source "$basedir"/libexec/flowman-common.sh 5 | 6 | # Add log4j config 7 | if [ -f "$FLOWMAN_CONF_DIR/log4j-history-server.properties" ]; then 8 | SPARK_DRIVER_LOGGING_OPTS="-Dlog4j.configuration=$FLOWMAN_CONF_DIR/log4j-history-server.properties" 9 | fi 10 | if [ -f "$FLOWMAN_CONF_DIR/log4j2-history-server.properties" ]; then 11 | SPARK_DRIVER_LOGGING_OPTS="-Dlog4j.configurationFile=$FLOWMAN_CONF_DIR/log4j2-history-server.properties" 12 | fi 13 | 14 | 15 | APP_NAME="flowman-server" 16 | APP_VERSION="@project.version@" 17 | APP_MAIN="com.dimajix.flowman.server.Application" 18 | 19 | APP_JAR="$APP_NAME-$APP_VERSION.jar" 20 | LIB_JARS="@flowman-server.classpath@" 21 | 22 | run_spark $APP_JAR $LIB_JARS $APP_MAIN "$@" 23 | -------------------------------------------------------------------------------- /flowman-kernel-server/src/test/scala/com/dimajix/flowman/kernel/KernelServerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2018 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.kernel 18 | 19 | class KernelServerTest { 20 | 21 | } 22 | -------------------------------------------------------------------------------- /flowman-kernel-common/src/main/java/com/dimajix/flowman/grpc/KernelConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.grpc; 18 | 19 | public class KernelConfiguration { 20 | } 21 | -------------------------------------------------------------------------------- /flowman-kernel-server/src/main/scala/com/dimajix/flowman/kernel/service/KernelService.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.kernel.service 18 | 19 | class KernelService { 20 | 21 | } 22 | -------------------------------------------------------------------------------- /tests/sqlserver/migrations/module/test-timestamp.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | test_timestamp: 3 | kind: sqlserver 4 | connection: sql_server 5 | table: test_timestamp 6 | schema: 7 | kind: inline 8 | fields: 9 | - name: ts 10 | type: timestamp 11 | 12 | 13 | mappings: 14 | test_timestamp: 15 | kind: values 16 | columns: 17 | ts: string 18 | records: 19 | - ['2022-01-01 00:00:00'] 20 | - ['2022-01-01 00:00:00.123'] 21 | - ['2022-01-01 00:00:00.123456'] 22 | - ['2022-01-01 00:00:00.123456789'] 23 | - ['2022-01-01 00:00:00.123456789123'] 24 | 25 | targets: 26 | test_timestamp: 27 | kind: relation 28 | relation: test_timestamp 29 | mapping: test_timestamp 30 | 31 | 32 | jobs: 33 | test_timestamp: 34 | targets: 35 | - test_timestamp 36 | -------------------------------------------------------------------------------- /flowman-kernel-api/src/main/proto/exception.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option java_multiple_files = true; 4 | option java_package = "com.dimajix.flowman.kernel.proto"; 5 | option java_outer_classname = "ExceptionProto"; 6 | 7 | package com.dimajix.flowman.kernel; 8 | 9 | 10 | message StackTraceElement { 11 | optional string classLoaderName = 1; 12 | optional string moduleName = 2; 13 | optional string moduleVersion = 3; 14 | string declaringClass = 4; 15 | string methodName = 5; 16 | optional string fileName = 6; 17 | optional int32 lineNumber = 7; 18 | } 19 | message Exception { 20 | string className = 1; 21 | string fqName = 2; 22 | optional string message = 3; 23 | repeated StackTraceElement stackTrace = 4; 24 | repeated Exception suppressed = 5; 25 | optional Exception cause = 6; 26 | } 27 | -------------------------------------------------------------------------------- /docs/concepts/index.md: -------------------------------------------------------------------------------- 1 | # Core Concepts 2 | 3 | Flowman reduces the development efforts for creating robust and scalable data processing applications. At the heart 4 | of Flowman are some basic concepts which provide the simple building blocks which can be used to build even complex 5 | data transformations. 6 | 7 | In order to appreciate the elegance and power of Flowman, it is important to understand the [core entities](entities.md), 8 | which are used to model all the aspects of a data flow, like relations (which describe 9 | data sources and sinks), mappings (which describe data transformations) and targets (which describe the actual work 10 | to be performed). 11 | 12 | In addition to understanding the core entities, it is also important to understand Flowman's execution model which is 13 | described in the [lifecycle documentation](lifecycle.md). 14 | -------------------------------------------------------------------------------- /flowman-yaml-schema/src/assembly/assembly.xml: -------------------------------------------------------------------------------- 1 | 5 | bin 6 | 7 | tar.gz 8 | 9 | false 10 | 11 | 12 | ${project.build.outputDirectory} 13 | 14 | 0644 15 | 0755 16 | 17 | yaml-schema/* 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /flowman-core/src/main/scala/com/dimajix/flowman/metric/GaugeMetric.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.metric 18 | 19 | abstract class GaugeMetric extends Metric { 20 | def value : Double 21 | } 22 | -------------------------------------------------------------------------------- /flowman-server-ui/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Flowman History Server 9 | 10 | 11 | 12 | 13 | 16 |
17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /flowman-kernel-common/src/main/java/com/dimajix/flowman/kernel/model/FieldType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2018 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.kernel.model; 18 | 19 | 20 | abstract class FieldType { 21 | abstract String getTypeName(); 22 | } 23 | -------------------------------------------------------------------------------- /docker/conf/history-server.yml: -------------------------------------------------------------------------------- 1 | # The following definition provides a "run history" stored in a database. If nothing else is specified, the database 2 | # is stored locally as a Derby database. If you do not want to use the history, you can simply remove the whole 3 | # 'history' block from this file. 4 | history: 5 | kind: jdbc 6 | connection: flowman_state 7 | retries: 3 8 | timeout: 1000 9 | 10 | connections: 11 | flowman_state: 12 | driver: $System.getenv('FLOWMAN_LOGDB_DRIVER', 'org.apache.derby.jdbc.EmbeddedDriver') 13 | url: $System.getenv('FLOWMAN_LOGDB_URL', $String.concat('jdbc:derby:', $System.getenv('FLOWMAN_HOME'), '/logdb;create=true')) 14 | username: $System.getenv('FLOWMAN_LOGDB_USER', '') 15 | password: $System.getenv('FLOWMAN_LOGDB_PASSWORD', '') 16 | 17 | plugins: 18 | - flowman-mariadb 19 | - flowman-mysql 20 | - flowman-mssqlserver 21 | -------------------------------------------------------------------------------- /flowman-kernel-common/src/main/java/com/dimajix/flowman/kernel/model/Operation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2018 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.kernel.model; 18 | 19 | 20 | public enum Operation { 21 | CREATE, 22 | READ, 23 | WRITE 24 | } 25 | -------------------------------------------------------------------------------- /docs/spec/target/empty.md: -------------------------------------------------------------------------------- 1 | # Empty Target 2 | 3 | The `empty` target is a dummy target, mainly used for testing purposes. In contrast to the 4 | [Blackhole Target](blackhole.md), the `empty` target does not provide an input mapping and supports all build phases, 5 | but the target is never *dirty*. This means that the target will only be executed when the `--force` option is specified. 6 | 7 | ## Example 8 | ```yaml 9 | targets: 10 | dummy: 11 | kind: empty 12 | ``` 13 | 14 | 15 | ## Fields 16 | 17 | * `kind` **(mandatory)** *(type: string)*: `empty` 18 | 19 | * `description` **(optional)** *(type: string)*: 20 | Optional descriptive text of the build target 21 | 22 | 23 | ## Supported Execution Phases 24 | * `CREATE` 25 | * `MIGRATE` 26 | * `BUILD` 27 | * `VERIFY` 28 | * `TRUNCATE` 29 | * `DESTROY` 30 | 31 | Read more about [execution phases](../../concepts/lifecycle.md). 32 | -------------------------------------------------------------------------------- /flowman-spark-extensions/src/main/scala/com/dimajix/hadoop/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix 18 | 19 | 20 | package object hadoop { 21 | final val HADOOP_VERSION = org.apache.hadoop.util.VersionInfo.getVersion 22 | } 23 | -------------------------------------------------------------------------------- /flowman-server-ui/src/components/EnvironmentTable.vue: -------------------------------------------------------------------------------- 1 | 26 | 27 | 28 | 40 | -------------------------------------------------------------------------------- /tests/sqlserver/weather/config/config.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Default migration policy for relations (default: RELAXED) 3 | - flowman.default.relation.migrationPolicy=STRICT 4 | # Default migration strategy for relations (default: ALTER) 5 | - flowman.default.relation.migrationStrategy=ALTER_REPLACE 6 | # Use anonymous access to S3 7 | - spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider 8 | # Inject proxy for accessing S3 9 | - spark.hadoop.fs.s3a.proxy.host=$System.getenv('S3_PROXY_HOST', $System.getenv('AWS_PROXY_HOST')) 10 | - spark.hadoop.fs.s3a.proxy.port=$System.getenv('S3_PROXY_PORT', $System.getenv('AWS_PROXY_PORT' ,'-1')) 11 | - spark.hadoop.fs.s3a.proxy.username= 12 | - spark.hadoop.fs.s3a.proxy.password= 13 | - spark.hadoop.fs.s3a.endpoint=s3.eu-central-1.amazonaws.com 14 | - spark.hadoop.fs.s3a.signature_version=s3v4 15 | -------------------------------------------------------------------------------- /tests/synapse/deployment.yml: -------------------------------------------------------------------------------- 1 | flowman: 2 | version: ${flowman.version} 3 | distribution: com.dimajix.flowman:flowman-dist:bin:${flowman.version} 4 | plugins: 5 | - flowman-avro 6 | - flowman-aws 7 | - flowman-azure 8 | - flowman-delta 9 | - flowman-mssqlserver 10 | 11 | 12 | # List of subdirectories containing Flowman projects 13 | projects: 14 | - flow 15 | 16 | 17 | # List of packages to be built 18 | packages: 19 | synapse: 20 | kind: fatjar 21 | 22 | execution: 23 | javaOptions: 24 | - -Dhttp.proxyHost=${http.proxyHost} 25 | - -Dhttp.proxyPort=${http.proxyPort} 26 | - -Dhttps.proxyHost=${https.proxyHost} 27 | - -Dhttps.proxyPort=${https.proxyPort} 28 | 29 | # List of deployments 30 | deployments: 31 | synapse: 32 | kind: copy 33 | package: synapse 34 | location: abfs://flowman@dimajixspark.dfs.core.windows.net/integration-tests 35 | -------------------------------------------------------------------------------- /docs/environment.yaml: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # NOTE: This file has been auto-generated by poetry2conda 3 | # poetry2conda version = 0.3.0 4 | # date: Wed May 31 15:29:15 2023 5 | ############################################################################### 6 | # If you want to change the contents of this file, you should probably change 7 | # the pyproject.toml file and then use poetry2conda again to update this file. 8 | # Alternatively, stop using (ana)conda. 9 | ############################################################################### 10 | name: flowman-docs 11 | dependencies: 12 | - furo>=2023.5.20,<2024.0.0 13 | - myst-parser>=1.0.0,<2.0.0 14 | - python>=3.10,<4.0 15 | - recommonmark>=0.7.1,<0.8.0 16 | - Sphinx>=6.2.1,<7.0.0 17 | - sphinx-markdown-tables>=0.0.17,<0.0.18 18 | - sphinxcontrib-mermaid>=0.9.2,<0.10.0 19 | -------------------------------------------------------------------------------- /flowman-server-ui/src/charts/PieChart.js: -------------------------------------------------------------------------------- 1 | import { Doughnut, mixins } from "vue-chartjs"; 2 | 3 | export default { 4 | extends: Doughnut, 5 | mixins: [ mixins.reactiveProp ], 6 | 7 | props: { 8 | titlePosition: { 9 | type: String, 10 | default: () => 'top' 11 | } 12 | }, 13 | 14 | mounted() { 15 | // this.chartData is created in the mixin. 16 | // If you want to pass options please create a local options object 17 | let options = { 18 | responsive: true, 19 | title: { 20 | display: true, 21 | position: this.titlePosition, 22 | text: this.chartData.title, 23 | fontSize: 24 24 | }, 25 | maintainAspectRatio: true, 26 | hoverBorderWidth: 20, 27 | legend: { 28 | position: 'right', 29 | align: 'center' 30 | }, 31 | } 32 | this.renderChart(this.chartData, options); 33 | } 34 | }; 35 | -------------------------------------------------------------------------------- /tests/archetype-assembly/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Clean previous project 6 | rm -rf quickstart-test 7 | 8 | # Get current Flowman version 9 | FLOWMAN_VERSION=$(mvn -f ../.. -q -N help:evaluate -Dexpression=project.version -DforceStdout) 10 | 11 | mvn archetype:generate \ 12 | -B \ 13 | -DarchetypeGroupId=com.dimajix.flowman.maven \ 14 | -DarchetypeArtifactId=flowman-archetype-assembly \ 15 | -DarchetypeVersion=0.4.0 \ 16 | -DgroupId=test \ 17 | -DartifactId=quickstart-test \ 18 | -Dversion=1.0-SNAPSHOT 19 | 20 | # Replace Flowman version 21 | xmlstarlet ed \ 22 | --inplace \ 23 | -N x=http://maven.apache.org/POM/4.0.0 \ 24 | --update /x:project/x:parent/x:version \ 25 | --value "$FLOWMAN_VERSION" \ 26 | quickstart-test/pom.xml 27 | 28 | cd quickstart-test || exit 29 | mvn clean install 30 | 31 | cd .. 32 | 33 | # Clean up 34 | rm -rf quickstart-test 35 | -------------------------------------------------------------------------------- /examples/weather/model/stations-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | stations_raw: 3 | # The relation is of type "file" 4 | kind: file 5 | # ... and it uses CSV as file format 6 | format: csv 7 | # data itself is stored at the following location 8 | location: "$srcdir/isd-history/" 9 | # Specify some CSV-specific options 10 | options: 11 | sep: "," 12 | encoding: "UTF-8" 13 | quote: "\"" 14 | header: "true" 15 | dateFormat: "yyyyMMdd" 16 | # Specify the schema (which is stored in an external file) 17 | schema: 18 | kind: avro 19 | file: "${project.basedir}/schema/stations.avsc" 20 | 21 | 22 | targets: 23 | validate_stations_raw: 24 | kind: validate 25 | assertions: 26 | check_primary_key: 27 | kind: sql 28 | query: "SELECT usaf,wban,COUNT(*) FROM stations_raw GROUP BY usaf,wban HAVING COUNT(*) > 1" 29 | expected: [ ] 30 | -------------------------------------------------------------------------------- /flowman-kernel-common/src/main/java/com/dimajix/flowman/grpc/GrpcClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.grpc; 18 | 19 | import io.grpc.ManagedChannel; 20 | 21 | 22 | abstract public class GrpcClient { 23 | abstract public ManagedChannel getChannel(); 24 | } 25 | -------------------------------------------------------------------------------- /docs/spec/target/blackhole.md: -------------------------------------------------------------------------------- 1 | # Blackhole Target 2 | 3 | A *blackhole target* simply materializes all records of a mapping, but immediately discards them. This can be 4 | useful for some test scenarios but probably is not worth much in a real production environment. 5 | 6 | 7 | ## Example 8 | 9 | ```yaml 10 | targets: 11 | blackhole: 12 | kind: blackhole 13 | mapping: some_mapping 14 | ``` 15 | 16 | ## Fields 17 | 18 | * `kind` **(mandatory)** *(type: string)*: `blackhole` 19 | 20 | * `description` **(optional)** *(type: string)*: 21 | Optional descriptive text of the build target 22 | 23 | * `mapping` **(mandatory)** *(type: string)*: 24 | Specifies the name of the mapping output to be materialized 25 | 26 | 27 | ## Supported Execution Phases 28 | * `BUILD` - In the build phase, all records of the specified mapping will be materialized 29 | 30 | Read more about [execution phases](../../concepts/lifecycle.md). 31 | -------------------------------------------------------------------------------- /flowman-kernel-common/src/main/java/com/dimajix/flowman/grpc/RemoteException.java: -------------------------------------------------------------------------------- 1 | package com.dimajix.flowman.grpc; 2 | 3 | public class RemoteException extends RuntimeException { 4 | private final String declaredClass; 5 | 6 | public RemoteException(String declaredClass, String message) { 7 | super(message, null, true, true); 8 | 9 | this.declaredClass = declaredClass; 10 | } 11 | 12 | public RemoteException(String declaredClass, String message, Throwable cause) { 13 | super(message, cause, true, true); 14 | 15 | this.declaredClass = declaredClass; 16 | } 17 | 18 | public String getDeclaredClass() { 19 | return declaredClass; 20 | } 21 | 22 | @Override 23 | public String toString() { 24 | String s = declaredClass; 25 | String message = this.getLocalizedMessage(); 26 | return message != null ? s + ": " + message : s; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /flowman-server-ui/src/components/PhaseSelector.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 39 | 40 | 42 | -------------------------------------------------------------------------------- /docs/plugins/delta.md: -------------------------------------------------------------------------------- 1 | # Delta Lake Plugin 2 | 3 | The `delta` plugin provide read/write support for [Delta Lake](https://delta.io) tables. 4 | 5 | Note that this plugin is only provided for Spark 3.0+. For Spark 2.4, the plugin does not contain the Flowman relations 6 | below, you might still be able to use it using a [`generic` relation](../spec/relation/generic.md) or 7 | [`file` relation](../spec/relation/file.md), but this is not officially supported, and we strongly recommend moving 8 | to Spark 3.0+. 9 | 10 | 11 | ## Provided Entities 12 | * [`deltaTable` relation](../spec/relation/deltaTable.md) 13 | * [`deltaFile` relation](../spec/relation/deltaFile.md) 14 | * [`deltaVacuum` target](../spec/target/deltaVacuum.md) 15 | 16 | 17 | ## Activation 18 | 19 | The plugin can be easily activated by adding the following section to the [default-namespace.yml](../spec/namespace.md) 20 | ```yaml 21 | plugins: 22 | - flowman-delta 23 | ``` 24 | -------------------------------------------------------------------------------- /flowman-server-ui/src/components/StatusSelector.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 39 | 40 | 42 | -------------------------------------------------------------------------------- /tests/mysql/conf/default-namespace.yml: -------------------------------------------------------------------------------- 1 | name: "default" 2 | 3 | history: 4 | kind: jdbc 5 | retries: 3 6 | timeout: 1000 7 | connection: 8 | url: "jdbc:mysql://mysql:3306/flowman" 9 | driver: "com.mysql.cj.jdbc.Driver" 10 | username: "flowman" 11 | password: "yourStrong(!)Password" 12 | 13 | metrics: 14 | - kind: console 15 | - kind: jdbc 16 | labels: 17 | project: ${project.name} 18 | version: ${project.version} 19 | phase: ${phase} 20 | connection: 21 | kind: jdbc 22 | url: "jdbc:mysql://mysql:3306/flowman" 23 | driver: "com.mysql.cj.jdbc.Driver" 24 | username: "flowman" 25 | password: "yourStrong(!)Password" 26 | 27 | config: 28 | - spark.executor.cores=$System.getenv('SPARK_EXECUTOR_CORES', '8') 29 | - spark.executor.memory=$System.getenv('SPARK_EXECUTOR_MEMORY', '16g') 30 | 31 | plugins: 32 | - flowman-avro 33 | - flowman-aws 34 | - flowman-mysql 35 | -------------------------------------------------------------------------------- /tests/mariadb/conf/default-namespace.yml: -------------------------------------------------------------------------------- 1 | name: "default" 2 | 3 | history: 4 | kind: jdbc 5 | retries: 3 6 | timeout: 1000 7 | connection: 8 | url: "jdbc:mariadb://mariadb:3306/flowman" 9 | driver: "org.mariadb.jdbc.Driver" 10 | username: "flowman" 11 | password: "yourStrong(!)Password" 12 | 13 | metrics: 14 | - kind: console 15 | - kind: jdbc 16 | labels: 17 | project: ${project.name} 18 | version: ${project.version} 19 | phase: ${phase} 20 | connection: 21 | kind: jdbc 22 | url: "jdbc:mariadb://mariadb:3306/flowman" 23 | driver: "org.mariadb.jdbc.Driver" 24 | username: "flowman" 25 | password: "yourStrong(!)Password" 26 | 27 | config: 28 | - spark.executor.cores=$System.getenv('SPARK_EXECUTOR_CORES', '8') 29 | - spark.executor.memory=$System.getenv('SPARK_EXECUTOR_MEMORY', '16g') 30 | 31 | plugins: 32 | - flowman-avro 33 | - flowman-aws 34 | - flowman-mariadb 35 | -------------------------------------------------------------------------------- /tests/emr/flow/model/aggregates.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | aggregates: 3 | kind: hiveTable 4 | database: weather 5 | table: aggregates 6 | format: parquet 7 | # Add partition column 8 | partitions: 9 | - name: year 10 | type: integer 11 | granularity: 1 12 | # Specify an optional schema here. It is always recommended to explicitly specify a schema for every relation 13 | # and not just let data flow from a mapping into a target. 14 | schema: 15 | kind: inline 16 | fields: 17 | - name: country 18 | type: STRING 19 | - name: min_wind_speed 20 | type: FLOAT 21 | - name: max_wind_speed 22 | type: FLOAT 23 | - name: avg_wind_speed 24 | type: FLOAT 25 | - name: min_temperature 26 | type: FLOAT 27 | - name: max_temperature 28 | type: FLOAT 29 | - name: avg_temperature 30 | type: FLOAT 31 | -------------------------------------------------------------------------------- /flowman-kernel-api/src/main/proto/logging.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option java_multiple_files = true; 4 | option java_package = "com.dimajix.flowman.kernel.proto"; 5 | option java_outer_classname = "LoggingProto"; 6 | 7 | package com.dimajix.flowman.kernel; 8 | 9 | import "common.proto"; 10 | import "exception.proto"; 11 | 12 | 13 | enum LogLevel { 14 | ALL = 0; 15 | FATAL = 10; 16 | ERROR = 20; 17 | WARN = 30; 18 | INFO = 40; 19 | DEBUG = 50; 20 | TRACE = 60; 21 | } 22 | message LogLocation { 23 | optional string file = 1; 24 | optional string declaringClass = 2; 25 | optional string methodName = 3; 26 | optional string fileName = 4; 27 | optional int32 lineNumber = 5; 28 | } 29 | message LogEvent { 30 | string logger = 1; 31 | Timestamp timestamp = 2; 32 | LogLevel level = 3; 33 | string message = 4; 34 | optional LogLocation location = 5; 35 | optional Exception exception = 6; 36 | } 37 | -------------------------------------------------------------------------------- /tests/postgresql/conf/default-namespace.yml: -------------------------------------------------------------------------------- 1 | name: "default" 2 | 3 | history: 4 | kind: jdbc 5 | retries: 3 6 | timeout: 1000 7 | connection: 8 | url: "jdbc:postgresql://postgresql:5432/flowman" 9 | driver: "org.postgresql.Driver" 10 | username: "flowman" 11 | password: "yourStrong(!)Password" 12 | 13 | metrics: 14 | - kind: console 15 | - kind: jdbc 16 | labels: 17 | project: ${project.name} 18 | version: ${project.version} 19 | phase: ${phase} 20 | connection: 21 | kind: jdbc 22 | url: "jdbc:postgresql://postgresql:5432/flowman" 23 | driver: "org.postgresql.Driver" 24 | username: "flowman" 25 | password: "yourStrong(!)Password" 26 | 27 | config: 28 | - spark.executor.cores=$System.getenv('SPARK_EXECUTOR_CORES', '8') 29 | - spark.executor.memory=$System.getenv('SPARK_EXECUTOR_MEMORY', '16g') 30 | 31 | plugins: 32 | - flowman-avro 33 | - flowman-aws 34 | - flowman-postgresql 35 | -------------------------------------------------------------------------------- /tests/sqlserver/twitter/model/tweets-raw.yml: -------------------------------------------------------------------------------- 1 | relations: 2 | tweets-raw: 3 | kind: file 4 | location: s3a://dimajix-training/data/twitter-sample/ 5 | format: json 6 | schema: 7 | kind: inline 8 | fields: 9 | - name: id 10 | type: long 11 | # Reserve a little bit more space because of Unicode hiccups (🔥 is 5 bytes in UTF-8, 2 chars in MS SQL, and 1 char in Spark) 12 | - name: text 13 | type: varchar(320) 14 | - name: lang 15 | type: char(2) 16 | - name: timestamp_ms 17 | type: string 18 | - name: user 19 | type: 20 | kind: struct 21 | fields: 22 | - name: id 23 | type: long 24 | - name: screen_name 25 | type: varchar(140) 26 | - name: description 27 | type: varchar(140) 28 | - name: lang 29 | type: char(2) 30 | -------------------------------------------------------------------------------- /docs/spec/connection/index.md: -------------------------------------------------------------------------------- 1 | # Connections 2 | 3 | For some operations it is required to specify *connections* to other systems. Common examples are 4 | JDBC connections to relation databases or SSH connections to SFTP servers. Flowman provides the 5 | capability to specify generic connection information (like hostname, URL, username, password and 6 | so on) inside a `Connection` object which can be reused at multiple places. 7 | 8 | This simplifies working with external systems, for example when multiple tables inside a 9 | relational database are required for reading and/or writing. Using connections, you only need 10 | to specify the generic parameters once. Moreover, connections can also be part of profiles, 11 | thereby easily allowing to specify different connection parameters for different environments 12 | (like dev and test). 13 | 14 | 15 | ## Connection Types 16 | 17 | ```eval_rst 18 | .. toctree:: 19 | :maxdepth: 1 20 | :glob: 21 | 22 | * 23 | ``` 24 | -------------------------------------------------------------------------------- /docs/spec/dataset/values.md: -------------------------------------------------------------------------------- 1 | # Values Dataset 2 | 3 | ## Example 4 | 5 | ```yaml 6 | kind: values 7 | schema: 8 | kind: inline 9 | fields: 10 | - name: int_col 11 | type: integer 12 | - name: str_col 13 | type: string 14 | records: 15 | - [1,"some_string"] 16 | - [2,"cat"] 17 | ``` 18 | 19 | ```yaml 20 | kind: values 21 | columns: 22 | int_col: integer 23 | str_col: string 24 | records: 25 | - [1,"some_string"] 26 | - [2,"cat"] 27 | ``` 28 | 29 | ## Fields 30 | 31 | * `kind` **(mandatory)** *(type: string)*: `values` or `const` 32 | 33 | * `records` **(optional)** *(type: list:array)* *(default: empty)*: 34 | An optional list of records to be returned. 35 | 36 | * `columns` **(optional)** *(type: map:string)*: 37 | Specifies the list of column names (key) with their type (value) 38 | 39 | * `schema` **(optional)** *(type: schema)*: 40 | As an alternative of specifying a list of columns you can also directly specify a schema. 41 | -------------------------------------------------------------------------------- /flowman-kernel-server/src/main/scala/com/dimajix/flowman/kernel/grpc/ClientWatcher.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.kernel.grpc 18 | 19 | import java.util.UUID 20 | 21 | 22 | trait ClientWatcher { 23 | def clientConnected(clientId:UUID) : Unit = {} 24 | def clientDisconnected(clientId:UUID) : Unit = {} 25 | } 26 | -------------------------------------------------------------------------------- /tests/emr/flow/config/aws.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # Inject AWS Credentials 3 | - spark.hadoop.fs.s3a.access.key=$System.getenv('AWS_ACCESS_KEY_ID') 4 | - spark.hadoop.fs.s3a.secret.key=$System.getenv('AWS_SECRET_ACCESS_KEY') 5 | # Use anonymous access to S3 6 | - spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider 7 | # Inject proxy for accessing S3 8 | - spark.hadoop.fs.s3a.proxy.host=$System.getenv('S3_PROXY_HOST', $System.getenv('AWS_PROXY_HOST')) 9 | - spark.hadoop.fs.s3a.proxy.port=$System.getenv('S3_PROXY_PORT', $System.getenv('AWS_PROXY_PORT' ,'-1')) 10 | - spark.hadoop.fs.s3a.proxy.username= 11 | - spark.hadoop.fs.s3a.proxy.password= 12 | - spark.hadoop.fs.s3a.endpoint=s3.eu-central-1.amazonaws.com 13 | - spark.hadoop.fs.s3a.signature_version=s3v4 14 | # Optional: Use Glue as Metastore 15 | - spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory 16 | -------------------------------------------------------------------------------- /flowman-kernel-common/src/main/java/com/dimajix/flowman/grpc/GrpcService.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2018 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.grpc; 18 | 19 | 20 | /** 21 | * This is an alias interface which is not shaded, and therefore avoids working with the shaded gRPC core classes 22 | */ 23 | public interface GrpcService extends io.grpc.BindableService { 24 | } 25 | -------------------------------------------------------------------------------- /flowman-kernel-tools/src/main/java/com/dimajix/flowman/tools/rexec/job/BuildCommand.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2018 The Flowman Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.dimajix.flowman.tools.rexec.job; 18 | 19 | import com.dimajix.flowman.kernel.model.Phase; 20 | 21 | 22 | public class BuildCommand extends PhaseCommand { 23 | public BuildCommand() { 24 | super(Phase.BUILD); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /flowman-server-ui/src/components/ResourceTable.vue: -------------------------------------------------------------------------------- 1 | 35 | 36 | 45 | --------------------------------------------------------------------------------