├── .gitignore ├── LICENSE ├── README.md ├── project └── plugins.sbt └── src ├── main ├── resources │ └── com │ │ └── bloomberg │ │ └── sparkflow │ │ └── log4j-defaults.properties └── scala │ ├── com │ └── bloomberg │ │ └── sparkflow │ │ ├── components │ │ ├── Bundle.scala │ │ └── Component.scala │ │ ├── dc │ │ ├── DC.scala │ │ ├── DR.scala │ │ ├── DRImpl.scala │ │ ├── DataFrameFunctions.scala │ │ ├── DataframeSourceDC.scala │ │ ├── DatasetTransformDC.scala │ │ ├── Dependency.scala │ │ ├── DoubleDCFunctions.scala │ │ ├── GroupedTransformDC.scala │ │ ├── KeyValueGroupedDC.scala │ │ ├── KeyValueGroupedDCImpl.scala │ │ ├── MultiDatasetTransformDC.scala │ │ ├── MultiGroupedTransformDC.scala │ │ ├── MultiInputDC.scala │ │ ├── MultiInputPairDC.scala │ │ ├── PairDCFunctions.scala │ │ ├── ParallelCollectionDC.scala │ │ ├── RDDTransformDC.scala │ │ ├── ResultDepDC.scala │ │ ├── SecondaryPairDCFunctions.scala │ │ ├── SourceDC.scala │ │ ├── UnionDC.scala │ │ └── Util.scala │ │ ├── graphs │ │ └── Util.scala │ │ ├── internal │ │ └── Logging.scala │ │ ├── package.scala │ │ ├── partitioning │ │ └── SecondarySortPartioner.scala │ │ ├── serialization │ │ ├── ClassExploration.scala │ │ ├── DependencyPrinter.scala │ │ └── Hashing.scala │ │ └── util │ │ └── Utils.scala │ └── org │ └── apache │ └── spark │ └── sql │ ├── DCDataFrameReader.scala │ └── EncoderUtil.scala └── test ├── resources ├── Film_Locations_in_San_Francisco.csv ├── cars.csv └── test.json └── scala └── com └── bloomberg └── sparkflow ├── CaseClasses.scala ├── components └── ComponentTest.scala ├── dc ├── DCTest.scala ├── DRTest.scala ├── DataFrameDCFunctionsTest.scala ├── DataFrameDCSuite.scala ├── DatasetDCSuite.scala ├── DoubleDCFunctionsTest.scala ├── MapDCTest.scala ├── PairDCFunctionsTest.scala └── SecondaryPairDCFunctionsTest.scala ├── example ├── FilmsPipeline.scala └── FilmsTest.scala └── serialization ├── ClassExplorationTest.scala ├── HashingSample.scala ├── HashingTest.scala └── SomeFunctions.scala /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/README.md -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/project/plugins.sbt -------------------------------------------------------------------------------- /src/main/resources/com/bloomberg/sparkflow/log4j-defaults.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/resources/com/bloomberg/sparkflow/log4j-defaults.properties -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/components/Bundle.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/components/Bundle.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/components/Component.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/components/Component.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DR.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DR.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DRImpl.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DRImpl.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DataFrameFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DataFrameFunctions.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DataframeSourceDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DataframeSourceDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DatasetTransformDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DatasetTransformDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/Dependency.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/Dependency.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/DoubleDCFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/DoubleDCFunctions.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/GroupedTransformDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/GroupedTransformDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/KeyValueGroupedDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/KeyValueGroupedDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/KeyValueGroupedDCImpl.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/KeyValueGroupedDCImpl.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/MultiDatasetTransformDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/MultiDatasetTransformDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/MultiGroupedTransformDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/MultiGroupedTransformDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/MultiInputDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/MultiInputDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/MultiInputPairDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/MultiInputPairDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/PairDCFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/PairDCFunctions.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/ParallelCollectionDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/ParallelCollectionDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/RDDTransformDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/RDDTransformDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/ResultDepDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/ResultDepDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/SecondaryPairDCFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/SecondaryPairDCFunctions.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/SourceDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/SourceDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/UnionDC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/UnionDC.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/dc/Util.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/dc/Util.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/graphs/Util.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/graphs/Util.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/internal/Logging.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/internal/Logging.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/package.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/partitioning/SecondarySortPartioner.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/partitioning/SecondarySortPartioner.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/serialization/ClassExploration.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/serialization/ClassExploration.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/serialization/DependencyPrinter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/serialization/DependencyPrinter.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/serialization/Hashing.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/serialization/Hashing.scala -------------------------------------------------------------------------------- /src/main/scala/com/bloomberg/sparkflow/util/Utils.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/com/bloomberg/sparkflow/util/Utils.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/DCDataFrameReader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/org/apache/spark/sql/DCDataFrameReader.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/EncoderUtil.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/main/scala/org/apache/spark/sql/EncoderUtil.scala -------------------------------------------------------------------------------- /src/test/resources/Film_Locations_in_San_Francisco.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/resources/Film_Locations_in_San_Francisco.csv -------------------------------------------------------------------------------- /src/test/resources/cars.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/resources/cars.csv -------------------------------------------------------------------------------- /src/test/resources/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/resources/test.json -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/CaseClasses.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/CaseClasses.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/components/ComponentTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/components/ComponentTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/DCTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/DCTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/DRTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/DRTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/DataFrameDCFunctionsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/DataFrameDCFunctionsTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/DataFrameDCSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/DataFrameDCSuite.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/DatasetDCSuite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/DatasetDCSuite.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/DoubleDCFunctionsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/DoubleDCFunctionsTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/MapDCTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/MapDCTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/PairDCFunctionsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/PairDCFunctionsTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/dc/SecondaryPairDCFunctionsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/dc/SecondaryPairDCFunctionsTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/example/FilmsPipeline.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/example/FilmsPipeline.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/example/FilmsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/example/FilmsTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/serialization/ClassExplorationTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/serialization/ClassExplorationTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/serialization/HashingSample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/serialization/HashingSample.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/serialization/HashingTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/serialization/HashingTest.scala -------------------------------------------------------------------------------- /src/test/scala/com/bloomberg/sparkflow/serialization/SomeFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/spark-flow/HEAD/src/test/scala/com/bloomberg/sparkflow/serialization/SomeFunctions.scala --------------------------------------------------------------------------------