├── .gitignore ├── CODEOWNERS ├── README.md ├── accumulators-reliability ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── AccumulatorsFailureTest.scala │ └── package.scala ├── broadcast-join-structured-streaming ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── BroadcastJoinDemo.scala │ ├── BroadcastVariableDemo.scala │ ├── delta │ ├── DeltaTableCreator.scala │ ├── DeltaTableUpdater.scala │ ├── RateStreamApp.scala │ └── package.scala │ ├── extrafiles │ ├── ItemToWrite.scala │ ├── README.md │ ├── delta │ │ ├── DeltaLakeJoinApp.scala │ │ ├── StaticDataWriterV1.scala │ │ └── StaticDataWriterV2.scala │ └── json │ │ ├── NotWorkingBatchApp.scala │ │ ├── StaticDataWriterV1.scala │ │ ├── StaticDataWriterV2.scala │ │ └── TemporaryViewApp.scala │ └── table │ ├── JsonTableCreator.scala │ ├── JsonTableUpdater.scala │ ├── RateStreamApp.scala │ ├── RateStreamAppWithForeachBatch.scala │ └── package.scala ├── dqx-demo ├── databricks │ ├── accounts_validation.jobs.yml │ └── validate_account.py └── requirements.txt ├── file-sink-metadata-oom-issue ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── FileSinkCompactDemo.scala │ └── WorkaroundsForeachBatch.scala ├── file-source-metadata-oom-issue ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── FileSourceCompactDemo.scala ├── pyspark-arbitrary-stateful-processing ├── arbitrary_stateful_processing.py ├── arbitrary_stateful_processing_refactored.py ├── output_handlers.py ├── requirements.txt ├── state_handlers.py ├── stateful_mapper.py └── stateful_mapper_refactored.py ├── pyspark-deduplication ├── deduplicator.py ├── deduplicator_custom_logic.py ├── deduplicator_scoped.py ├── in_memory_data_source.py └── requirements.txt ├── pyspark-generators ├── README.md ├── collect_spark_code_generator_approach.py ├── collect_spark_code_list_approach.py ├── count_spark_code_generator_approach.py ├── count_spark_code_list_approach.py ├── debug │ ├── __init__.py │ ├── serializers.py │ └── worker.py ├── python_generators_example.py └── requirements.txt ├── pyspark-join-null ├── join_null_column.py ├── join_null_column_considering_nulls.py ├── join_null_column_extra_rows.py └── requirements.txt ├── pyspark-jvm ├── inference_from_one_column.py ├── report.html └── requirements.txt ├── pyspark-others-null-handling ├── arithmetic_with_nulls.py ├── count_with_nulls.py ├── filtering_with_nulls.py ├── in_vs_exists_null_column.py ├── requirements.txt └── sort_with_nulls.py ├── pyspark-schema-inference ├── inference_from_one_column.py └── requirements.txt ├── pyspark-serializers ├── arrow_stream_pandas_serializer.py ├── arrow_stream_pandas_udf_serializer.py ├── arrow_stream_serializer_example.py ├── cartesian_deserializer_example.py ├── cogroup_udf_serializer.py ├── flattened_values_serializer_example.py ├── marshal_serializer_example.py ├── requirements.txt ├── serializers │ ├── pyspark │ │ └── serializers.py │ └── sql │ │ └── pandas │ │ └── serializers.py └── utf8_deserializer_example.py ├── pyspark-typed-proxy-pattern ├── __init__.py ├── filters.py ├── mappers.py ├── requirements.txt ├── schemas_declarations.py ├── typed_schema_job_example.py ├── typed_schemas.py └── untyped_schema_job_example.py ├── pyspark-vectorized-udf ├── pyspark_udf.py ├── pyspark_vectorized_udf.py └── requirements.txt ├── shuffle-readers-iterators ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── AggregatorExample.scala │ ├── GroupByKeyVsDistinct.scala │ └── WrappedIterators.scala ├── shuffle-writers ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── User.scala │ ├── bypassmerge │ ├── BypassMergeAndMapSideCombineExample.scala │ ├── BypassMergeAndRddWithoutMapSideCombineExample.scala │ └── BypassMergeShuffleWriterExample.scala │ ├── sortshuffle │ ├── SortShuffleBlockIdExample.scala │ ├── SortShuffleWriterExample.scala │ ├── SortShuffleWriterLessPartitionsThanBypassMergeThresholdExample.scala │ ├── SortShuffleWriterNotUsedSerializedInputExample.scala │ └── SortShuffleWriterSpillingExample.scala │ └── unsafewriter │ ├── MapSideCombineWithSerializedShuffleExample.scala │ └── UnsafeShuffleWriterExample.scala ├── spark-3.0-features ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pom.xml └── src │ ├── main │ ├── resources │ │ └── log4j.properties │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── AdaptiveQueryExecutionDemoteBroadcastHashJoin.scala │ │ ├── AdaptiveQueryExecutionDemoteBroadcastHashJoinBothSides.scala │ │ ├── AdaptiveQueryExecutionJoinSkewDemo.scala │ │ ├── AdaptiveQueryExecutionLocalShuffleReader.scala │ │ ├── AdaptiveQueryExecutionReuseAdaptiveSubqueryDemo.scala │ │ ├── AdaptiveQueryExecutionShuffePartitionCoalesceDemo.scala │ │ ├── ContinuousModeWithQueueSize.scala │ │ ├── KafkaHeaderDemo.scala │ │ ├── PredicatePushdownNestedFieldsDemo.scala │ │ ├── TestEntryKV.scala │ │ ├── dynamicpartitionpruning │ │ ├── DatasetsInstaller.scala │ │ ├── DynamicPartitionPruningEnabledDisabledDemo.scala │ │ ├── DynamicPartitionPruningJoinNotOnPartitionKeysDemo.scala │ │ ├── DynamicPartitionPruningMissingRatioDemo.scala │ │ └── DynamicPartitionPruningNoExchangeReuseDemo.scala │ │ └── source │ │ └── file │ │ ├── ArchiveStrategy.scala │ │ ├── DeleteStrategy.scala │ │ └── FilesGenerator.scala │ └── test │ ├── resources │ ├── binary_files │ │ └── apache_spark_logo.png │ ├── log4j.properties │ └── parquet-spark-2.4.0 │ │ ├── ._SUCCESS.crc │ │ ├── .part-00000-60ef372c-8cf7-427c-8722-65eefe1e5fc1-c000.snappy.parquet.crc │ │ ├── _SUCCESS │ │ └── part-00000-60ef372c-8cf7-427c-8722-65eefe1e5fc1-c000.snappy.parquet │ └── scala │ └── com │ └── waitingforcode │ ├── BinaryDataSourceTest.scala │ ├── DateTimeBackwardCompatibilityTest.scala │ ├── DateTimeRebaseTest.scala │ ├── DateTimeTest.scala │ ├── DeleteUpdateMergeTest.scala │ ├── NewFunctionsTest.scala │ ├── PostgreSQLCompatibilityAnsiEnabledTest.scala │ └── PostgreSQLCompatibilitySyntaxChangesTest.scala ├── spark-3.0-vs-2.4.5 ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pom.xml ├── spark-2.4.5 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── BatchConsumer245.scala │ │ ├── FailedStreamingLeftOuterJoin245.scala │ │ ├── KafkaConsumer245.scala │ │ └── StreamingAggregationLimit245.scala └── spark-3.0.0 │ ├── pom.xml │ └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── BatchConsumer300.scala │ ├── FailedStreamingLeftOuterJoin300.scala │ ├── KafkaConsumer300.scala │ ├── Producer300.scala │ └── StreamingAggregationLimit300.scala ├── spark-3.1.1-vs-3.0.1 ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pyspark-3.0.1 │ ├── exception_message.py │ ├── hints_example.py │ └── requirements.txt ├── pyspark-3.1.1 │ ├── exception_message.py │ ├── hints_example.py │ └── requirements.txt ├── spark-3.0.1 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── BucketCoalesceJoin.scala │ │ ├── JoinGroupByOptimization301.scala │ │ └── ShuffleJoinForFullOuterJoin301.scala └── spark-3.1.1 │ ├── pom.xml │ └── src │ ├── main │ ├── resources │ │ └── log4j.properties │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── BucketCoalesceJoin.scala │ │ ├── BucketCoalesceNotAppliedOutputPartitionExpressionJoin.scala │ │ ├── EventLog.scala │ │ ├── FullOuterJoinStructuredStreaming311.scala │ │ ├── GlobalWatermarkConsistencyIssueStructuredStreaming311.scala │ │ ├── JoinGroupByOptimization311.scala │ │ ├── JsonDataSourceV2Example.scala │ │ ├── LeftSemiJoinStructuredStreaming311.scala │ │ ├── PredicatePushdownJson.scala │ │ ├── ShuffleJoinForFullOuterJoin311.scala │ │ ├── StateStoreMetrics.scala │ │ ├── StructuredStreamingTableReader.scala │ │ └── StructuredStreamingTableWriter.scala │ └── test │ └── scala │ └── com │ └── waitingforcode │ └── NewFunctionsTest.scala ├── spark-3.2.0-features ├── avro_parquet │ ├── pom.xml │ ├── spark245-input │ │ ├── ._SUCCESS.crc │ │ ├── .part-00000-cd4350d9-61e8-4f01-adaf-bf99d0284420-c000.snappy.parquet.crc │ │ ├── _SUCCESS │ │ └── part-00000-cd4350d9-61e8-4f01-adaf-bf99d0284420-c000.snappy.parquet │ └── src │ │ ├── main │ │ └── scala │ │ │ └── com │ │ │ └── waitingforcode │ │ │ ├── GenerateParquetData.scala │ │ │ └── ReadParquetData.scala │ │ └── test │ │ ├── resources │ │ ├── log4j.properties │ │ ├── user_avro_schema.avsc │ │ └── user_avro_schema_invalid.avsc │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── AvroSchemaTest.scala │ │ ├── ParquetInPredicatePushdownTest.scala │ │ └── ReadParquetDataTest.scala ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── misc_changes │ ├── pom.xml │ └── src │ │ └── test │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── SqlCodesTest.scala │ │ └── UserDefinedTypeTest.scala ├── pandas_on_pyspark │ ├── categorical_dtype_demo.py │ ├── pandas_on_pyspark_demo.py │ └── requirements.txt ├── performance_improvements │ ├── pom.xml │ └── src │ │ └── test │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── PreservedOutputPartitioningWithAggregatesTest.scala │ │ └── RemoveRedundantAggregatesTest.scala ├── session_window │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── SessionWindow.scala │ │ └── SessionWindowBatchDemo.scala ├── sql_functions │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── GroupingSetsFunctions.scala │ │ ├── IntervalFunctions.scala │ │ ├── StringFunctions.scala │ │ ├── TryCastFunctions.scala │ │ └── WindowFunctions.scala ├── structured_streaming_kafka_misc │ ├── README.md │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── DefaultStateArbitraryStatefulProcessingDemo.scala │ │ ├── KafkaChangesDataLossConfigurationDemo.scala │ │ ├── KafkaChangesMinOffsetsMaxTriggerDemo.scala │ │ ├── KafkaChangesStartingTimestampDemo.scala │ │ └── KafkaLatencyMetricsDemo.scala └── structured_streaming_rocksdb │ ├── README.md │ ├── pom.xml │ └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── RocksDbStateStoreDemo.scala ├── spark-3.3.0-features ├── joins │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j2.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── RowLevelRuntimeFilterBloomFilterAggregateApp.scala │ │ └── RowLevelRuntimeFilterSemiJoinReductionApp.scala ├── pyspark │ ├── new_features.py │ └── requirements.txt ├── sql_functions │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j2.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── AnsiAggregationFunctions.scala │ │ ├── ComplexTypesFunctions.scala │ │ ├── DateTimeFunctions.scala │ │ ├── ErrorHandlingFunctions.scala │ │ ├── MiscFunctions.scala │ │ └── StringFunctions.scala ├── structured_streaming │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j2.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── RatePerMicroBatchSourceApp.scala │ │ ├── RateSourceApp.scala │ │ ├── TriggerAvailableNowApp.scala │ │ └── TriggerOnceApp.scala └── structured_streaming_correctness_issue │ ├── pom.xml │ └── src │ └── main │ ├── resources │ ├── log4j.properties │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ └── StatefulCorrectnessIssueApp.scala ├── spark-3.4.0-features ├── pyspark │ ├── apply_in_pandas_with_state_example.py │ ├── requirements.txt │ └── spark_connect_example.py ├── structured_streaming │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ ├── log4j2.properties │ │ └── protobuf.pb │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── AsyncProgressTrackingWithBigInterval.scala │ │ ├── CorrectnessIssueFixFor3_4_0.scala │ │ ├── CorrectnessIssueJoinStillPresent.scala │ │ └── ProtobufSupport.scala └── structured_streaming_3.3.0 │ ├── pom.xml │ └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── CorrectnessIssueJoinStillPresent.scala │ ├── CorrectnessIssueQuery.scala │ └── GlobalWatermarkCorrectnessIssue.scala ├── spark-3.5.0-features └── structured_streaming │ ├── pom.xml │ └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── DropDuplicatesWithEventTime.scala │ ├── DropDuplicatesWithinWatermark.scala │ ├── OnQueryIdleListener.scala │ └── WatermarkPropagation.scala ├── spark-4-features └── src │ └── main │ └── scala │ └── com │ └── waitingforcode │ └── UserDefinedFunctionSQLExample.scala ├── spark-4-structured-streaming-new-state-api ├── build.sbt ├── project │ ├── build.properties │ └── plugins.sbt └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── Models.scala │ ├── batch │ ├── TransformWithStateBatch.scala │ ├── TransformWithStateBatchIncremental.scala │ └── package.scala │ ├── package.scala │ ├── state_init │ └── TransformWithStateInitStateWithReconciliation.scala │ ├── ttl_list │ └── TransformWithStateAndTtlListExample.scala │ └── ttl_state │ └── TransformWithStateAndTtlExample.scala ├── spark-coalesce-vs-repartition ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── CoalesceDemo.scala │ ├── RepartitionDemo.scala │ └── StaticLocationsRdd.scala ├── spark-commands ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── AlterTableCommandExample.scala ├── spark-delta-unified-data-management-patterns ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── AcidGuaranteeExample.scala │ ├── ChangeDataCaptureDeltaLakeExample.scala │ ├── InPlaceOperationsExample.scala │ └── SchemaManagementExample.scala ├── spark-expectations-demo ├── databricks │ ├── accounts.jobs.yml │ └── refresh_accounts_job.py ├── local │ ├── data_quality_rules_builders.py │ └── spark_expectations_demo.py └── requirements.txt ├── spark-filter-accumulator ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── FilterAccumulatorDemo1.scala │ ├── FilterAccumulatorWithTemporaryErrorDemo2.scala │ └── package.scala ├── spark-kubernetes-concepts ├── Dockerfile ├── README.md ├── persistent_volume_definition.yaml ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── StageLevelSchedulingApp.scala │ └── StaticApp.scala ├── spark-listeners ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── QueryExecutionListenerExample.scala │ ├── SparkListenerExample.scala │ └── StreamingQueryListenerExample.scala ├── spark-observe ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── BatchDatasetObservation.scala │ └── BatchDatasetObservationWithGet.scala ├── spark-predicate-pushdown ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── GenerateTestData.scala │ ├── Letter.scala │ ├── ReadTestDataWithPushdown.scala │ ├── ReadTestDataWithoutPushdown.scala │ └── package.scala ├── spark-show ├── build.sbt ├── project │ ├── build.properties │ └── plugins.sbt └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── ShowDataFrameExample.scala │ └── package.scala ├── spark-sql-checkpoint ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── IterativeExecutionNoCheckpoint.scala │ └── IterativeExecutionWithCheckpoint.scala ├── spark-sql-group-by-distinct ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── GroupByKeyVsDistinct.scala ├── spark-sql-hints ├── pom.xml └── src │ └── main │ └── scala │ └── com │ └── waitingforcode │ └── JoinHintsExamples.scala ├── spark-sql-insertinto-trap ├── build.sbt ├── project │ └── build.properties └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── InsertIntoByNameJob.scala │ └── InsertIntoTrapJob.scala ├── spark-sql-jit-compilation ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ └── JitCompilation.scala ├── spark-sql-lateral-aliases ├── spark-3.3.0 │ ├── build.sbt │ ├── project │ │ └── build.properties │ └── src │ │ └── main │ │ ├── resources │ │ └── log4j2.properties │ │ └── scala │ │ └── com │ │ └── waitingforcode │ │ └── LateralAliasReferenceMissingExample.scala └── spark-3.4.0 │ ├── build.sbt │ ├── project │ └── build.properties │ └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ └── LateralAliasReferenceMissingExample.scala ├── spark-sql-lateral-subquery ├── build.sbt ├── postgresql_example │ ├── README.md │ ├── docker-compose.yaml │ └── init.sql ├── project │ └── build.properties └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── LateralSubqueryExamples.scala │ └── LateralViewExamples.scala ├── spark-sql-mapgroupswithstate ├── README.md ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── FlatMapGroupsWithStateAndInitBatch.scala │ ├── FlatMapGroupsWithStateBatch.scala │ ├── StatefulMappingFunction.scala │ └── TimestampedEvent.scala ├── spark-sql-not-in-vs-not-exists ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── NotInVsNotExistsSingleColumn.scala │ ├── NotInVsNotExistsTwoColumns.scala │ └── User.scala ├── spark-sql-outers ├── explode_on_array.py ├── explode_outer_on_array.py ├── explodes_on_map.py └── requirements.txt ├── spark-sql-overwrite-partition ├── build.sbt ├── project │ └── build.properties └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── DeltaLakeDynamicPartitionOverwriteExample.scala │ ├── DeltaLakeReplaceWhereExample.scala │ ├── DynamicPartitionOverwriteExample.scala │ ├── SaveAsTableDynamicPartitionOverwriteExample.scala │ ├── SaveAsTableStaticPartitionOverwriteExample.scala │ ├── StaticPartitionOverwriteExample.scala │ ├── StaticPartitionOverwriteWithPartitionSpecExample.scala │ └── solutions │ ├── DeltaLakeReplaceWhereSaveAsTableExample.scala │ └── InsertByNameExample.scala ├── spark-sql-recursive-cte └── recursive_query_checkpoints_cache.py ├── spark-sql-saveastable ├── build.sbt ├── project │ └── build.properties └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── SaveAsTableExample.scala │ └── SaveAsTableExampleForHive.scala ├── spark-sql-wildcard ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── Demo1DataGeneration.scala │ ├── Demo2DataReadingNoWildcard.scala │ └── Demo3DataReadingWithWildcard.scala ├── spark-sql ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── BucketingJoinExample.scala │ ├── BucketingJoinWithPartitioningExample.scala │ ├── PartitionWiseSimulationExample.scala │ ├── PivotExample.scala │ ├── SelectImpactOnDropDemo.scala │ └── StackExample.scala ├── spark-stage-scheduling ├── Dockerfile ├── README.md ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── StageLevelSchedulingDemo.scala ├── spark-tables ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── ExternalTableExample1_CreateApp.scala │ ├── ExternalTableExample2_ReadApp.scala │ ├── ExternalTableExample3_ReadAfterRemoveApp.scala │ ├── InternalTableExample1_CreateApp.scala │ ├── InternalTableExample2_ReadApp.scala │ ├── InternalTableExample3_ReadAfterRemoveApp.scala │ ├── Letter.scala │ └── package.scala ├── spark-vs-beam ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── waitingforcode │ │ ├── BeamExample.java │ │ └── LabelWithSum.java │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── SparkExample.scala ├── spark-withcolumn-problem ├── README.md ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── CleanserFunctionsStudy.scala │ ├── DataDispatcherIfElseStudy.scala │ ├── EventLog.scala │ └── VersionsToRun.scala ├── structured-streaming-2-sinks └── src │ └── main │ └── scala │ └── com │ └── waitingforcode │ ├── JobWith2Sinks.scala │ └── JobWith2SinksWithSleep.scala ├── structured-streaming-dynamic-resource-allocation ├── Dockerfile ├── README.md ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── AutoscaledRateStreamReader.scala ├── structured-streaming-event-skew-watermark ├── build.sbt ├── docker │ └── docker-compose.yaml ├── project │ └── build.properties └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── DropDuplicatesWithinWatermarkForKafka.scala │ └── DropDuplicatesWithinWatermarkForMemoryStream.scala ├── structured-streaming-first-micro-batch-state-expiration ├── README.md ├── docker │ └── docker-compose.yaml ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── becomedataengineer │ ├── FirstStateBrokenExpiration.scala │ ├── FirstStateBrokenMapper.scala │ ├── RateMicroBatchAugmented.scala │ ├── UserWithVisits.scala │ ├── VisitTimeAndPage.scala │ ├── eventtime │ ├── CounterWithMaxEventTime.scala │ ├── EventTimeStateMapper.scala │ └── StatefulJobWithEventTimePattern.scala │ └── temporarystate │ ├── CounterWithFlag.scala │ ├── FlagStateMapper.scala │ └── StatefulJobWithFlagPattern.scala ├── structured-streaming-initial-state └── src │ └── main │ └── resources │ └── log4j2.properties ├── structured-streaming-integration-tests ├── pom.xml └── src │ ├── main │ ├── resources │ │ └── log4j.properties │ └── scala │ │ └── com │ │ └── waitingforcode │ │ ├── DataDispatcher.scala │ │ └── DataDispatcherConfig.scala │ └── test │ └── scala │ └── com │ └── waitingforcode │ └── DataDispatcherTest.scala ├── structured-streaming-listeners ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── FailingListener.scala │ └── FailingListeners.scala ├── structured-streaming-maxoffsetspertrigger ├── README.md ├── broker │ └── docker-compose.yaml ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── AnotherLessPerfectUseCaseConsolePrinterFromKafka.scala │ ├── LessPerfectUseCaseConsolePrinterFromKafka.scala │ ├── PerfectUseCaseConsolePrinterFromKafka.scala │ ├── TestDataGenerator.scala │ └── package.scala ├── structured-streaming-minoffsets-availablenow-kafka ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ └── MinOffsetsForAvailableNow.scala ├── structured-streaming-outputmodes ├── README.md ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ ├── AppendModeForAggregatesDemo.scala │ ├── CompleteModeForAggregatesDemo.scala │ ├── TimestampedEvent.scala │ └── UpdateModeForAggregatesDemo.scala ├── structured-streaming-retries ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── RetriesContinuousTriggerDemo.scala │ └── RetriesMicroBatchTriggerDemo.scala ├── structured-streaming-schema-registry ├── README.md ├── broker │ └── docker-compose.yaml ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── waitingforcode │ │ └── Order.java │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── AbrisSchemaRegistryDemo.scala │ ├── ProducerAppV1.scala │ ├── ProducerAppV2.scala │ └── package.scala ├── structured-streaming-spark-metadata ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── becomedataengineer │ ├── SparkMetadataIssueGenerator.scala │ └── SparkMetadataWithRetentionGenerator.scala ├── structured-streaming-state-metrics ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ └── WindowsWithWatermarkDemo.scala ├── structured-streaming-stop-job ├── README.md ├── docker │ ├── docker-compose.yaml │ └── generation_configuration.yaml ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j2.properties │ └── scala │ └── com │ └── waitingforcode │ └── VisitsCounterInWindows.scala ├── structured-streaming-temporary-view-based-processing ├── broker │ ├── docker-compose.yaml │ └── log4j.properties ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── ApiBasedProcessing.scala │ ├── DataProducer.scala │ └── SqlBasedProcessing.scala ├── structured-streaming-transactional-kafka ├── README.md ├── broker │ └── docker-compose.yaml ├── pom.xml └── src │ └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── waitingforcode │ ├── CommittedTransactionsStore.scala │ ├── ForeachKafkaNonTransactionalDemo.scala │ ├── ForeachKafkaTransactionalDemo.scala │ ├── ForeachKafkaTransactionalWriter.scala │ ├── TestDataGenerator.scala │ └── package.scala ├── structured-streaming-ui-patterns └── src │ └── main │ └── scala │ └── com │ └── waitingforcode │ └── package.scala └── structured-streaming-window ├── broker ├── docker-compose.yaml └── log4j.properties ├── pom.xml └── src └── main ├── resources └── log4j.properties └── scala └── com └── waitingforcode └── WindowsWithWatermarkDemo.scala /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/.gitignore -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @bartosz25 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/README.md -------------------------------------------------------------------------------- /accumulators-reliability/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/accumulators-reliability/pom.xml -------------------------------------------------------------------------------- /accumulators-reliability/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/accumulators-reliability/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /accumulators-reliability/src/main/scala/com/waitingforcode/AccumulatorsFailureTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/accumulators-reliability/src/main/scala/com/waitingforcode/AccumulatorsFailureTest.scala -------------------------------------------------------------------------------- /accumulators-reliability/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/accumulators-reliability/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/broker/docker-compose.yaml -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/broker/log4j.properties -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/pom.xml -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/BroadcastJoinDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/BroadcastJoinDemo.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/BroadcastVariableDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/BroadcastVariableDemo.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/DeltaTableCreator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/DeltaTableCreator.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/DeltaTableUpdater.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/DeltaTableUpdater.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/RateStreamApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/RateStreamApp.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/delta/package.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/ItemToWrite.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/ItemToWrite.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/README.md -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/delta/DeltaLakeJoinApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/delta/DeltaLakeJoinApp.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/delta/StaticDataWriterV1.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/delta/StaticDataWriterV1.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/delta/StaticDataWriterV2.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/delta/StaticDataWriterV2.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/NotWorkingBatchApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/NotWorkingBatchApp.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/StaticDataWriterV1.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/StaticDataWriterV1.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/StaticDataWriterV2.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/StaticDataWriterV2.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/TemporaryViewApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/extrafiles/json/TemporaryViewApp.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/JsonTableCreator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/JsonTableCreator.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/JsonTableUpdater.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/JsonTableUpdater.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/RateStreamApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/RateStreamApp.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/RateStreamAppWithForeachBatch.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/RateStreamAppWithForeachBatch.scala -------------------------------------------------------------------------------- /broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/broadcast-join-structured-streaming/src/main/scala/com/waitingforcode/table/package.scala -------------------------------------------------------------------------------- /dqx-demo/databricks/accounts_validation.jobs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/dqx-demo/databricks/accounts_validation.jobs.yml -------------------------------------------------------------------------------- /dqx-demo/databricks/validate_account.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/dqx-demo/databricks/validate_account.py -------------------------------------------------------------------------------- /dqx-demo/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/dqx-demo/requirements.txt -------------------------------------------------------------------------------- /file-sink-metadata-oom-issue/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-sink-metadata-oom-issue/pom.xml -------------------------------------------------------------------------------- /file-sink-metadata-oom-issue/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-sink-metadata-oom-issue/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /file-sink-metadata-oom-issue/src/main/scala/com/waitingforcode/FileSinkCompactDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-sink-metadata-oom-issue/src/main/scala/com/waitingforcode/FileSinkCompactDemo.scala -------------------------------------------------------------------------------- /file-sink-metadata-oom-issue/src/main/scala/com/waitingforcode/WorkaroundsForeachBatch.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-sink-metadata-oom-issue/src/main/scala/com/waitingforcode/WorkaroundsForeachBatch.scala -------------------------------------------------------------------------------- /file-source-metadata-oom-issue/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-source-metadata-oom-issue/pom.xml -------------------------------------------------------------------------------- /file-source-metadata-oom-issue/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-source-metadata-oom-issue/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /file-source-metadata-oom-issue/src/main/scala/com/waitingforcode/FileSourceCompactDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/file-source-metadata-oom-issue/src/main/scala/com/waitingforcode/FileSourceCompactDemo.scala -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/arbitrary_stateful_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/arbitrary_stateful_processing.py -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/arbitrary_stateful_processing_refactored.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/arbitrary_stateful_processing_refactored.py -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/output_handlers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/output_handlers.py -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/requirements.txt -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/state_handlers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/state_handlers.py -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/stateful_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/stateful_mapper.py -------------------------------------------------------------------------------- /pyspark-arbitrary-stateful-processing/stateful_mapper_refactored.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-arbitrary-stateful-processing/stateful_mapper_refactored.py -------------------------------------------------------------------------------- /pyspark-deduplication/deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-deduplication/deduplicator.py -------------------------------------------------------------------------------- /pyspark-deduplication/deduplicator_custom_logic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-deduplication/deduplicator_custom_logic.py -------------------------------------------------------------------------------- /pyspark-deduplication/deduplicator_scoped.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-deduplication/deduplicator_scoped.py -------------------------------------------------------------------------------- /pyspark-deduplication/in_memory_data_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-deduplication/in_memory_data_source.py -------------------------------------------------------------------------------- /pyspark-deduplication/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-deduplication/requirements.txt -------------------------------------------------------------------------------- /pyspark-generators/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/README.md -------------------------------------------------------------------------------- /pyspark-generators/collect_spark_code_generator_approach.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/collect_spark_code_generator_approach.py -------------------------------------------------------------------------------- /pyspark-generators/collect_spark_code_list_approach.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/collect_spark_code_list_approach.py -------------------------------------------------------------------------------- /pyspark-generators/count_spark_code_generator_approach.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/count_spark_code_generator_approach.py -------------------------------------------------------------------------------- /pyspark-generators/count_spark_code_list_approach.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/count_spark_code_list_approach.py -------------------------------------------------------------------------------- /pyspark-generators/debug/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyspark-generators/debug/serializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/debug/serializers.py -------------------------------------------------------------------------------- /pyspark-generators/debug/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/debug/worker.py -------------------------------------------------------------------------------- /pyspark-generators/python_generators_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-generators/python_generators_example.py -------------------------------------------------------------------------------- /pyspark-generators/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.2.1 2 | -------------------------------------------------------------------------------- /pyspark-join-null/join_null_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-join-null/join_null_column.py -------------------------------------------------------------------------------- /pyspark-join-null/join_null_column_considering_nulls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-join-null/join_null_column_considering_nulls.py -------------------------------------------------------------------------------- /pyspark-join-null/join_null_column_extra_rows.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-join-null/join_null_column_extra_rows.py -------------------------------------------------------------------------------- /pyspark-join-null/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.5.0 -------------------------------------------------------------------------------- /pyspark-jvm/inference_from_one_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-jvm/inference_from_one_column.py -------------------------------------------------------------------------------- /pyspark-jvm/report.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-jvm/report.html -------------------------------------------------------------------------------- /pyspark-jvm/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.2.1 2 | -------------------------------------------------------------------------------- /pyspark-others-null-handling/arithmetic_with_nulls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-others-null-handling/arithmetic_with_nulls.py -------------------------------------------------------------------------------- /pyspark-others-null-handling/count_with_nulls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-others-null-handling/count_with_nulls.py -------------------------------------------------------------------------------- /pyspark-others-null-handling/filtering_with_nulls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-others-null-handling/filtering_with_nulls.py -------------------------------------------------------------------------------- /pyspark-others-null-handling/in_vs_exists_null_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-others-null-handling/in_vs_exists_null_column.py -------------------------------------------------------------------------------- /pyspark-others-null-handling/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.5.0 -------------------------------------------------------------------------------- /pyspark-others-null-handling/sort_with_nulls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-others-null-handling/sort_with_nulls.py -------------------------------------------------------------------------------- /pyspark-schema-inference/inference_from_one_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-schema-inference/inference_from_one_column.py -------------------------------------------------------------------------------- /pyspark-schema-inference/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.0.1 2 | -------------------------------------------------------------------------------- /pyspark-serializers/arrow_stream_pandas_serializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/arrow_stream_pandas_serializer.py -------------------------------------------------------------------------------- /pyspark-serializers/arrow_stream_pandas_udf_serializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/arrow_stream_pandas_udf_serializer.py -------------------------------------------------------------------------------- /pyspark-serializers/arrow_stream_serializer_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/arrow_stream_serializer_example.py -------------------------------------------------------------------------------- /pyspark-serializers/cartesian_deserializer_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/cartesian_deserializer_example.py -------------------------------------------------------------------------------- /pyspark-serializers/cogroup_udf_serializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/cogroup_udf_serializer.py -------------------------------------------------------------------------------- /pyspark-serializers/flattened_values_serializer_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/flattened_values_serializer_example.py -------------------------------------------------------------------------------- /pyspark-serializers/marshal_serializer_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/marshal_serializer_example.py -------------------------------------------------------------------------------- /pyspark-serializers/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/requirements.txt -------------------------------------------------------------------------------- /pyspark-serializers/serializers/pyspark/serializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/serializers/pyspark/serializers.py -------------------------------------------------------------------------------- /pyspark-serializers/serializers/sql/pandas/serializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/serializers/sql/pandas/serializers.py -------------------------------------------------------------------------------- /pyspark-serializers/utf8_deserializer_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-serializers/utf8_deserializer_example.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/__init__.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/filters.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/mappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/mappers.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/requirements.txt -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/schemas_declarations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/schemas_declarations.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/typed_schema_job_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/typed_schema_job_example.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/typed_schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/typed_schemas.py -------------------------------------------------------------------------------- /pyspark-typed-proxy-pattern/untyped_schema_job_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-typed-proxy-pattern/untyped_schema_job_example.py -------------------------------------------------------------------------------- /pyspark-vectorized-udf/pyspark_udf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-vectorized-udf/pyspark_udf.py -------------------------------------------------------------------------------- /pyspark-vectorized-udf/pyspark_vectorized_udf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-vectorized-udf/pyspark_vectorized_udf.py -------------------------------------------------------------------------------- /pyspark-vectorized-udf/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/pyspark-vectorized-udf/requirements.txt -------------------------------------------------------------------------------- /shuffle-readers-iterators/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-readers-iterators/pom.xml -------------------------------------------------------------------------------- /shuffle-readers-iterators/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-readers-iterators/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /shuffle-readers-iterators/src/main/scala/com/waitingforcode/AggregatorExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-readers-iterators/src/main/scala/com/waitingforcode/AggregatorExample.scala -------------------------------------------------------------------------------- /shuffle-readers-iterators/src/main/scala/com/waitingforcode/GroupByKeyVsDistinct.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-readers-iterators/src/main/scala/com/waitingforcode/GroupByKeyVsDistinct.scala -------------------------------------------------------------------------------- /shuffle-readers-iterators/src/main/scala/com/waitingforcode/WrappedIterators.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-readers-iterators/src/main/scala/com/waitingforcode/WrappedIterators.scala -------------------------------------------------------------------------------- /shuffle-writers/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/pom.xml -------------------------------------------------------------------------------- /shuffle-writers/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/User.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/User.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/bypassmerge/BypassMergeAndMapSideCombineExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/bypassmerge/BypassMergeAndMapSideCombineExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/bypassmerge/BypassMergeAndRddWithoutMapSideCombineExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/bypassmerge/BypassMergeAndRddWithoutMapSideCombineExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/bypassmerge/BypassMergeShuffleWriterExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/bypassmerge/BypassMergeShuffleWriterExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleBlockIdExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleBlockIdExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterLessPartitionsThanBypassMergeThresholdExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterLessPartitionsThanBypassMergeThresholdExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterNotUsedSerializedInputExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterNotUsedSerializedInputExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterSpillingExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/sortshuffle/SortShuffleWriterSpillingExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/unsafewriter/MapSideCombineWithSerializedShuffleExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/unsafewriter/MapSideCombineWithSerializedShuffleExample.scala -------------------------------------------------------------------------------- /shuffle-writers/src/main/scala/com/waitingforcode/unsafewriter/UnsafeShuffleWriterExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/shuffle-writers/src/main/scala/com/waitingforcode/unsafewriter/UnsafeShuffleWriterExample.scala -------------------------------------------------------------------------------- /spark-3.0-features/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/broker/docker-compose.yaml -------------------------------------------------------------------------------- /spark-3.0-features/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/broker/log4j.properties -------------------------------------------------------------------------------- /spark-3.0-features/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/pom.xml -------------------------------------------------------------------------------- /spark-3.0-features/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionDemoteBroadcastHashJoin.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionDemoteBroadcastHashJoin.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionDemoteBroadcastHashJoinBothSides.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionDemoteBroadcastHashJoinBothSides.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionJoinSkewDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionJoinSkewDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionLocalShuffleReader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionLocalShuffleReader.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionReuseAdaptiveSubqueryDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionReuseAdaptiveSubqueryDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionShuffePartitionCoalesceDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/AdaptiveQueryExecutionShuffePartitionCoalesceDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/ContinuousModeWithQueueSize.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/ContinuousModeWithQueueSize.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/KafkaHeaderDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/KafkaHeaderDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/PredicatePushdownNestedFieldsDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/PredicatePushdownNestedFieldsDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/TestEntryKV.scala: -------------------------------------------------------------------------------- 1 | package com.waitingforcode 2 | 3 | case class TestEntryKV(key: Int, value: String) 4 | -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DatasetsInstaller.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DatasetsInstaller.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningEnabledDisabledDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningEnabledDisabledDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningJoinNotOnPartitionKeysDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningJoinNotOnPartitionKeysDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningMissingRatioDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningMissingRatioDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningNoExchangeReuseDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/dynamicpartitionpruning/DynamicPartitionPruningNoExchangeReuseDemo.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/source/file/ArchiveStrategy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/source/file/ArchiveStrategy.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/source/file/DeleteStrategy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/source/file/DeleteStrategy.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/main/scala/com/waitingforcode/source/file/FilesGenerator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/main/scala/com/waitingforcode/source/file/FilesGenerator.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/resources/binary_files/apache_spark_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/resources/binary_files/apache_spark_logo.png -------------------------------------------------------------------------------- /spark-3.0-features/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.0-features/src/test/resources/parquet-spark-2.4.0/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /spark-3.0-features/src/test/resources/parquet-spark-2.4.0/.part-00000-60ef372c-8cf7-427c-8722-65eefe1e5fc1-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/resources/parquet-spark-2.4.0/.part-00000-60ef372c-8cf7-427c-8722-65eefe1e5fc1-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /spark-3.0-features/src/test/resources/parquet-spark-2.4.0/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spark-3.0-features/src/test/resources/parquet-spark-2.4.0/part-00000-60ef372c-8cf7-427c-8722-65eefe1e5fc1-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/resources/parquet-spark-2.4.0/part-00000-60ef372c-8cf7-427c-8722-65eefe1e5fc1-c000.snappy.parquet -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/BinaryDataSourceTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/BinaryDataSourceTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/DateTimeBackwardCompatibilityTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/DateTimeBackwardCompatibilityTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/DateTimeRebaseTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/DateTimeRebaseTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/DateTimeTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/DateTimeTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/DeleteUpdateMergeTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/DeleteUpdateMergeTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/NewFunctionsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/NewFunctionsTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/PostgreSQLCompatibilityAnsiEnabledTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/PostgreSQLCompatibilityAnsiEnabledTest.scala -------------------------------------------------------------------------------- /spark-3.0-features/src/test/scala/com/waitingforcode/PostgreSQLCompatibilitySyntaxChangesTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-features/src/test/scala/com/waitingforcode/PostgreSQLCompatibilitySyntaxChangesTest.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/broker/docker-compose.yaml -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/broker/log4j.properties -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/pom.xml -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-2.4.5/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-2.4.5/pom.xml -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-2.4.5/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-2.4.5/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/BatchConsumer245.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/BatchConsumer245.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/FailedStreamingLeftOuterJoin245.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/FailedStreamingLeftOuterJoin245.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/KafkaConsumer245.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/KafkaConsumer245.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/StreamingAggregationLimit245.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-2.4.5/src/main/scala/com/waitingforcode/StreamingAggregationLimit245.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/pom.xml -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/BatchConsumer300.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/BatchConsumer300.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/FailedStreamingLeftOuterJoin300.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/FailedStreamingLeftOuterJoin300.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/KafkaConsumer300.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/KafkaConsumer300.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/Producer300.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/Producer300.scala -------------------------------------------------------------------------------- /spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/StreamingAggregationLimit300.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.0-vs-2.4.5/spark-3.0.0/src/main/scala/com/waitingforcode/StreamingAggregationLimit300.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/broker/docker-compose.yaml -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/broker/log4j.properties -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/pyspark-3.0.1/exception_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/pyspark-3.0.1/exception_message.py -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/pyspark-3.0.1/hints_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/pyspark-3.0.1/hints_example.py -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/pyspark-3.0.1/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.0.1 2 | -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/pyspark-3.1.1/exception_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/pyspark-3.1.1/exception_message.py -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/pyspark-3.1.1/hints_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/pyspark-3.1.1/hints_example.py -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/pyspark-3.1.1/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.1.1 2 | -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.0.1/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.0.1/pom.xml -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/scala/com/waitingforcode/BucketCoalesceJoin.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/scala/com/waitingforcode/BucketCoalesceJoin.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/scala/com/waitingforcode/JoinGroupByOptimization301.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/scala/com/waitingforcode/JoinGroupByOptimization301.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/scala/com/waitingforcode/ShuffleJoinForFullOuterJoin301.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.0.1/src/main/scala/com/waitingforcode/ShuffleJoinForFullOuterJoin301.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/pom.xml -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/BucketCoalesceJoin.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/BucketCoalesceJoin.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/BucketCoalesceNotAppliedOutputPartitionExpressionJoin.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/BucketCoalesceNotAppliedOutputPartitionExpressionJoin.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/EventLog.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/EventLog.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/FullOuterJoinStructuredStreaming311.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/FullOuterJoinStructuredStreaming311.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/GlobalWatermarkConsistencyIssueStructuredStreaming311.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/GlobalWatermarkConsistencyIssueStructuredStreaming311.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/JoinGroupByOptimization311.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/JoinGroupByOptimization311.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/JsonDataSourceV2Example.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/JsonDataSourceV2Example.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/LeftSemiJoinStructuredStreaming311.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/LeftSemiJoinStructuredStreaming311.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/PredicatePushdownJson.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/PredicatePushdownJson.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/ShuffleJoinForFullOuterJoin311.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/ShuffleJoinForFullOuterJoin311.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/StateStoreMetrics.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/StateStoreMetrics.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/StructuredStreamingTableReader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/StructuredStreamingTableReader.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/StructuredStreamingTableWriter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/main/scala/com/waitingforcode/StructuredStreamingTableWriter.scala -------------------------------------------------------------------------------- /spark-3.1.1-vs-3.0.1/spark-3.1.1/src/test/scala/com/waitingforcode/NewFunctionsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.1.1-vs-3.0.1/spark-3.1.1/src/test/scala/com/waitingforcode/NewFunctionsTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/spark245-input/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/spark245-input/.part-00000-cd4350d9-61e8-4f01-adaf-bf99d0284420-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/spark245-input/.part-00000-cd4350d9-61e8-4f01-adaf-bf99d0284420-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/spark245-input/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/spark245-input/part-00000-cd4350d9-61e8-4f01-adaf-bf99d0284420-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/spark245-input/part-00000-cd4350d9-61e8-4f01-adaf-bf99d0284420-c000.snappy.parquet -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/main/scala/com/waitingforcode/GenerateParquetData.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/main/scala/com/waitingforcode/GenerateParquetData.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/main/scala/com/waitingforcode/ReadParquetData.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/main/scala/com/waitingforcode/ReadParquetData.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/test/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/test/resources/user_avro_schema.avsc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/test/resources/user_avro_schema.avsc -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/test/resources/user_avro_schema_invalid.avsc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/test/resources/user_avro_schema_invalid.avsc -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/test/scala/com/waitingforcode/AvroSchemaTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/test/scala/com/waitingforcode/AvroSchemaTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/test/scala/com/waitingforcode/ParquetInPredicatePushdownTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/test/scala/com/waitingforcode/ParquetInPredicatePushdownTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/avro_parquet/src/test/scala/com/waitingforcode/ReadParquetDataTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/avro_parquet/src/test/scala/com/waitingforcode/ReadParquetDataTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/broker/docker-compose.yaml -------------------------------------------------------------------------------- /spark-3.2.0-features/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/broker/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/misc_changes/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/misc_changes/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/misc_changes/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/misc_changes/src/test/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/misc_changes/src/test/scala/com/waitingforcode/SqlCodesTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/misc_changes/src/test/scala/com/waitingforcode/SqlCodesTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/misc_changes/src/test/scala/com/waitingforcode/UserDefinedTypeTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/misc_changes/src/test/scala/com/waitingforcode/UserDefinedTypeTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/pandas_on_pyspark/categorical_dtype_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/pandas_on_pyspark/categorical_dtype_demo.py -------------------------------------------------------------------------------- /spark-3.2.0-features/pandas_on_pyspark/pandas_on_pyspark_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/pandas_on_pyspark/pandas_on_pyspark_demo.py -------------------------------------------------------------------------------- /spark-3.2.0-features/pandas_on_pyspark/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.2.0 2 | pandas==1.3.4 3 | pyarrow==6.0.1 -------------------------------------------------------------------------------- /spark-3.2.0-features/performance_improvements/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/performance_improvements/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/performance_improvements/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/performance_improvements/src/test/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/performance_improvements/src/test/scala/com/waitingforcode/PreservedOutputPartitioningWithAggregatesTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/performance_improvements/src/test/scala/com/waitingforcode/PreservedOutputPartitioningWithAggregatesTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/performance_improvements/src/test/scala/com/waitingforcode/RemoveRedundantAggregatesTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/performance_improvements/src/test/scala/com/waitingforcode/RemoveRedundantAggregatesTest.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/session_window/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/session_window/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/session_window/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/session_window/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/session_window/src/main/scala/com/waitingforcode/SessionWindow.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/session_window/src/main/scala/com/waitingforcode/SessionWindow.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/session_window/src/main/scala/com/waitingforcode/SessionWindowBatchDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/session_window/src/main/scala/com/waitingforcode/SessionWindowBatchDemo.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/GroupingSetsFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/GroupingSetsFunctions.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/IntervalFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/IntervalFunctions.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/StringFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/StringFunctions.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/TryCastFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/TryCastFunctions.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/WindowFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/sql_functions/src/main/scala/com/waitingforcode/WindowFunctions.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/README.md -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/DefaultStateArbitraryStatefulProcessingDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/DefaultStateArbitraryStatefulProcessingDemo.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaChangesDataLossConfigurationDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaChangesDataLossConfigurationDemo.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaChangesMinOffsetsMaxTriggerDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaChangesMinOffsetsMaxTriggerDemo.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaChangesStartingTimestampDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaChangesStartingTimestampDemo.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaLatencyMetricsDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_kafka_misc/src/main/scala/com/waitingforcode/KafkaLatencyMetricsDemo.scala -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_rocksdb/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_rocksdb/README.md -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_rocksdb/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_rocksdb/pom.xml -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_rocksdb/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_rocksdb/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.2.0-features/structured_streaming_rocksdb/src/main/scala/com/waitingforcode/RocksDbStateStoreDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.2.0-features/structured_streaming_rocksdb/src/main/scala/com/waitingforcode/RocksDbStateStoreDemo.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/joins/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/joins/pom.xml -------------------------------------------------------------------------------- /spark-3.3.0-features/joins/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/joins/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.3.0-features/joins/src/main/scala/com/waitingforcode/RowLevelRuntimeFilterBloomFilterAggregateApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/joins/src/main/scala/com/waitingforcode/RowLevelRuntimeFilterBloomFilterAggregateApp.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/joins/src/main/scala/com/waitingforcode/RowLevelRuntimeFilterSemiJoinReductionApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/joins/src/main/scala/com/waitingforcode/RowLevelRuntimeFilterSemiJoinReductionApp.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/pyspark/new_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/pyspark/new_features.py -------------------------------------------------------------------------------- /spark-3.3.0-features/pyspark/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==3.3.0 2 | pyarrow==6.0.1 3 | -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/pom.xml -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/AnsiAggregationFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/AnsiAggregationFunctions.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/ComplexTypesFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/ComplexTypesFunctions.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/DateTimeFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/DateTimeFunctions.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/ErrorHandlingFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/ErrorHandlingFunctions.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/MiscFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/MiscFunctions.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/StringFunctions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/sql_functions/src/main/scala/com/waitingforcode/StringFunctions.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming/pom.xml -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/RatePerMicroBatchSourceApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/RatePerMicroBatchSourceApp.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/RateSourceApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/RateSourceApp.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/TriggerAvailableNowApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/TriggerAvailableNowApp.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/TriggerOnceApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming/src/main/scala/com/waitingforcode/TriggerOnceApp.scala -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming_correctness_issue/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming_correctness_issue/pom.xml -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming_correctness_issue/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming_correctness_issue/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming_correctness_issue/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming_correctness_issue/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.3.0-features/structured_streaming_correctness_issue/src/main/scala/com/waitingforcode/StatefulCorrectnessIssueApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.3.0-features/structured_streaming_correctness_issue/src/main/scala/com/waitingforcode/StatefulCorrectnessIssueApp.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/pyspark/apply_in_pandas_with_state_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/pyspark/apply_in_pandas_with_state_example.py -------------------------------------------------------------------------------- /spark-3.4.0-features/pyspark/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/pyspark/requirements.txt -------------------------------------------------------------------------------- /spark-3.4.0-features/pyspark/spark_connect_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/pyspark/spark_connect_example.py -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/pom.xml -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/src/main/resources/protobuf.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/src/main/resources/protobuf.pb -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/AsyncProgressTrackingWithBigInterval.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/AsyncProgressTrackingWithBigInterval.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/CorrectnessIssueFixFor3_4_0.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/CorrectnessIssueFixFor3_4_0.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/CorrectnessIssueJoinStillPresent.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/CorrectnessIssueJoinStillPresent.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/ProtobufSupport.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming/src/main/scala/com/waitingforcode/ProtobufSupport.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming_3.3.0/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming_3.3.0/pom.xml -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming_3.3.0/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming_3.3.0/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming_3.3.0/src/main/scala/com/waitingforcode/CorrectnessIssueJoinStillPresent.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming_3.3.0/src/main/scala/com/waitingforcode/CorrectnessIssueJoinStillPresent.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming_3.3.0/src/main/scala/com/waitingforcode/CorrectnessIssueQuery.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming_3.3.0/src/main/scala/com/waitingforcode/CorrectnessIssueQuery.scala -------------------------------------------------------------------------------- /spark-3.4.0-features/structured_streaming_3.3.0/src/main/scala/com/waitingforcode/GlobalWatermarkCorrectnessIssue.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.4.0-features/structured_streaming_3.3.0/src/main/scala/com/waitingforcode/GlobalWatermarkCorrectnessIssue.scala -------------------------------------------------------------------------------- /spark-3.5.0-features/structured_streaming/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.5.0-features/structured_streaming/pom.xml -------------------------------------------------------------------------------- /spark-3.5.0-features/structured_streaming/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.5.0-features/structured_streaming/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/DropDuplicatesWithEventTime.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/DropDuplicatesWithEventTime.scala -------------------------------------------------------------------------------- /spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/DropDuplicatesWithinWatermark.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/DropDuplicatesWithinWatermark.scala -------------------------------------------------------------------------------- /spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/OnQueryIdleListener.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/OnQueryIdleListener.scala -------------------------------------------------------------------------------- /spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/WatermarkPropagation.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-3.5.0-features/structured_streaming/src/main/scala/com/waitingforcode/WatermarkPropagation.scala -------------------------------------------------------------------------------- /spark-4-features/src/main/scala/com/waitingforcode/UserDefinedFunctionSQLExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-features/src/main/scala/com/waitingforcode/UserDefinedFunctionSQLExample.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/build.sbt -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/Models.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/Models.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/batch/TransformWithStateBatch.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/batch/TransformWithStateBatch.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/batch/TransformWithStateBatchIncremental.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/batch/TransformWithStateBatchIncremental.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/batch/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/batch/package.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/state_init/TransformWithStateInitStateWithReconciliation.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/state_init/TransformWithStateInitStateWithReconciliation.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/ttl_list/TransformWithStateAndTtlListExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/ttl_list/TransformWithStateAndTtlListExample.scala -------------------------------------------------------------------------------- /spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/ttl_state/TransformWithStateAndTtlExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-4-structured-streaming-new-state-api/src/main/scala/com/waitingforcode/ttl_state/TransformWithStateAndTtlExample.scala -------------------------------------------------------------------------------- /spark-coalesce-vs-repartition/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-coalesce-vs-repartition/pom.xml -------------------------------------------------------------------------------- /spark-coalesce-vs-repartition/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-coalesce-vs-repartition/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-coalesce-vs-repartition/src/main/scala/com/waitingforcode/CoalesceDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-coalesce-vs-repartition/src/main/scala/com/waitingforcode/CoalesceDemo.scala -------------------------------------------------------------------------------- /spark-coalesce-vs-repartition/src/main/scala/com/waitingforcode/RepartitionDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-coalesce-vs-repartition/src/main/scala/com/waitingforcode/RepartitionDemo.scala -------------------------------------------------------------------------------- /spark-coalesce-vs-repartition/src/main/scala/com/waitingforcode/StaticLocationsRdd.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-coalesce-vs-repartition/src/main/scala/com/waitingforcode/StaticLocationsRdd.scala -------------------------------------------------------------------------------- /spark-commands/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-commands/pom.xml -------------------------------------------------------------------------------- /spark-commands/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-commands/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-commands/src/main/scala/com/waitingforcode/AlterTableCommandExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-commands/src/main/scala/com/waitingforcode/AlterTableCommandExample.scala -------------------------------------------------------------------------------- /spark-delta-unified-data-management-patterns/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-delta-unified-data-management-patterns/pom.xml -------------------------------------------------------------------------------- /spark-delta-unified-data-management-patterns/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-delta-unified-data-management-patterns/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/AcidGuaranteeExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/AcidGuaranteeExample.scala -------------------------------------------------------------------------------- /spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/ChangeDataCaptureDeltaLakeExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/ChangeDataCaptureDeltaLakeExample.scala -------------------------------------------------------------------------------- /spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/InPlaceOperationsExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/InPlaceOperationsExample.scala -------------------------------------------------------------------------------- /spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/SchemaManagementExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-delta-unified-data-management-patterns/src/main/scala/com/waitingforcode/SchemaManagementExample.scala -------------------------------------------------------------------------------- /spark-expectations-demo/databricks/accounts.jobs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-expectations-demo/databricks/accounts.jobs.yml -------------------------------------------------------------------------------- /spark-expectations-demo/databricks/refresh_accounts_job.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-expectations-demo/databricks/refresh_accounts_job.py -------------------------------------------------------------------------------- /spark-expectations-demo/local/data_quality_rules_builders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-expectations-demo/local/data_quality_rules_builders.py -------------------------------------------------------------------------------- /spark-expectations-demo/local/spark_expectations_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-expectations-demo/local/spark_expectations_demo.py -------------------------------------------------------------------------------- /spark-expectations-demo/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-expectations-demo/requirements.txt -------------------------------------------------------------------------------- /spark-filter-accumulator/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-filter-accumulator/pom.xml -------------------------------------------------------------------------------- /spark-filter-accumulator/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-filter-accumulator/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-filter-accumulator/src/main/scala/com/waitingforcode/FilterAccumulatorDemo1.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-filter-accumulator/src/main/scala/com/waitingforcode/FilterAccumulatorDemo1.scala -------------------------------------------------------------------------------- /spark-filter-accumulator/src/main/scala/com/waitingforcode/FilterAccumulatorWithTemporaryErrorDemo2.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-filter-accumulator/src/main/scala/com/waitingforcode/FilterAccumulatorWithTemporaryErrorDemo2.scala -------------------------------------------------------------------------------- /spark-filter-accumulator/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-filter-accumulator/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /spark-kubernetes-concepts/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/Dockerfile -------------------------------------------------------------------------------- /spark-kubernetes-concepts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/README.md -------------------------------------------------------------------------------- /spark-kubernetes-concepts/persistent_volume_definition.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/persistent_volume_definition.yaml -------------------------------------------------------------------------------- /spark-kubernetes-concepts/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/pom.xml -------------------------------------------------------------------------------- /spark-kubernetes-concepts/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-kubernetes-concepts/src/main/scala/com/waitingforcode/StageLevelSchedulingApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/src/main/scala/com/waitingforcode/StageLevelSchedulingApp.scala -------------------------------------------------------------------------------- /spark-kubernetes-concepts/src/main/scala/com/waitingforcode/StaticApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-kubernetes-concepts/src/main/scala/com/waitingforcode/StaticApp.scala -------------------------------------------------------------------------------- /spark-listeners/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-listeners/pom.xml -------------------------------------------------------------------------------- /spark-listeners/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-listeners/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-listeners/src/main/scala/com/waitingforcode/QueryExecutionListenerExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-listeners/src/main/scala/com/waitingforcode/QueryExecutionListenerExample.scala -------------------------------------------------------------------------------- /spark-listeners/src/main/scala/com/waitingforcode/SparkListenerExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-listeners/src/main/scala/com/waitingforcode/SparkListenerExample.scala -------------------------------------------------------------------------------- /spark-listeners/src/main/scala/com/waitingforcode/StreamingQueryListenerExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-listeners/src/main/scala/com/waitingforcode/StreamingQueryListenerExample.scala -------------------------------------------------------------------------------- /spark-observe/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-observe/pom.xml -------------------------------------------------------------------------------- /spark-observe/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-observe/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-observe/src/main/scala/com/waitingforcode/BatchDatasetObservation.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-observe/src/main/scala/com/waitingforcode/BatchDatasetObservation.scala -------------------------------------------------------------------------------- /spark-observe/src/main/scala/com/waitingforcode/BatchDatasetObservationWithGet.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-observe/src/main/scala/com/waitingforcode/BatchDatasetObservationWithGet.scala -------------------------------------------------------------------------------- /spark-predicate-pushdown/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/pom.xml -------------------------------------------------------------------------------- /spark-predicate-pushdown/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-predicate-pushdown/src/main/scala/com/waitingforcode/GenerateTestData.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/src/main/scala/com/waitingforcode/GenerateTestData.scala -------------------------------------------------------------------------------- /spark-predicate-pushdown/src/main/scala/com/waitingforcode/Letter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/src/main/scala/com/waitingforcode/Letter.scala -------------------------------------------------------------------------------- /spark-predicate-pushdown/src/main/scala/com/waitingforcode/ReadTestDataWithPushdown.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/src/main/scala/com/waitingforcode/ReadTestDataWithPushdown.scala -------------------------------------------------------------------------------- /spark-predicate-pushdown/src/main/scala/com/waitingforcode/ReadTestDataWithoutPushdown.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/src/main/scala/com/waitingforcode/ReadTestDataWithoutPushdown.scala -------------------------------------------------------------------------------- /spark-predicate-pushdown/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-predicate-pushdown/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /spark-show/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-show/build.sbt -------------------------------------------------------------------------------- /spark-show/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-show/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spark-show/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-show/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-show/src/main/scala/com/waitingforcode/ShowDataFrameExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-show/src/main/scala/com/waitingforcode/ShowDataFrameExample.scala -------------------------------------------------------------------------------- /spark-show/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-show/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /spark-sql-checkpoint/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-checkpoint/pom.xml -------------------------------------------------------------------------------- /spark-sql-checkpoint/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-checkpoint/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-checkpoint/src/main/scala/com/waitingforcode/IterativeExecutionNoCheckpoint.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-checkpoint/src/main/scala/com/waitingforcode/IterativeExecutionNoCheckpoint.scala -------------------------------------------------------------------------------- /spark-sql-checkpoint/src/main/scala/com/waitingforcode/IterativeExecutionWithCheckpoint.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-checkpoint/src/main/scala/com/waitingforcode/IterativeExecutionWithCheckpoint.scala -------------------------------------------------------------------------------- /spark-sql-group-by-distinct/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-group-by-distinct/pom.xml -------------------------------------------------------------------------------- /spark-sql-group-by-distinct/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-group-by-distinct/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-sql-group-by-distinct/src/main/scala/com/waitingforcode/GroupByKeyVsDistinct.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-group-by-distinct/src/main/scala/com/waitingforcode/GroupByKeyVsDistinct.scala -------------------------------------------------------------------------------- /spark-sql-hints/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-hints/pom.xml -------------------------------------------------------------------------------- /spark-sql-hints/src/main/scala/com/waitingforcode/JoinHintsExamples.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-hints/src/main/scala/com/waitingforcode/JoinHintsExamples.scala -------------------------------------------------------------------------------- /spark-sql-insertinto-trap/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-insertinto-trap/build.sbt -------------------------------------------------------------------------------- /spark-sql-insertinto-trap/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-sql-insertinto-trap/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-insertinto-trap/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-insertinto-trap/src/main/scala/com/waitingforcode/InsertIntoByNameJob.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-insertinto-trap/src/main/scala/com/waitingforcode/InsertIntoByNameJob.scala -------------------------------------------------------------------------------- /spark-sql-insertinto-trap/src/main/scala/com/waitingforcode/InsertIntoTrapJob.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-insertinto-trap/src/main/scala/com/waitingforcode/InsertIntoTrapJob.scala -------------------------------------------------------------------------------- /spark-sql-jit-compilation/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-jit-compilation/pom.xml -------------------------------------------------------------------------------- /spark-sql-jit-compilation/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-jit-compilation/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-jit-compilation/src/main/scala/com/waitingforcode/JitCompilation.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-jit-compilation/src/main/scala/com/waitingforcode/JitCompilation.scala -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.3.0/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-aliases/spark-3.3.0/build.sbt -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.3.0/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.3.0/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-aliases/spark-3.3.0/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.3.0/src/main/scala/com/waitingforcode/LateralAliasReferenceMissingExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-aliases/spark-3.3.0/src/main/scala/com/waitingforcode/LateralAliasReferenceMissingExample.scala -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.4.0/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-aliases/spark-3.4.0/build.sbt -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.4.0/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.4.0/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-aliases/spark-3.4.0/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-lateral-aliases/spark-3.4.0/src/main/scala/com/waitingforcode/LateralAliasReferenceMissingExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-aliases/spark-3.4.0/src/main/scala/com/waitingforcode/LateralAliasReferenceMissingExample.scala -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/build.sbt -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/postgresql_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/postgresql_example/README.md -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/postgresql_example/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/postgresql_example/docker-compose.yaml -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/postgresql_example/init.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/postgresql_example/init.sql -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/src/main/scala/com/waitingforcode/LateralSubqueryExamples.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/src/main/scala/com/waitingforcode/LateralSubqueryExamples.scala -------------------------------------------------------------------------------- /spark-sql-lateral-subquery/src/main/scala/com/waitingforcode/LateralViewExamples.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-lateral-subquery/src/main/scala/com/waitingforcode/LateralViewExamples.scala -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/README.md -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/pom.xml -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/FlatMapGroupsWithStateAndInitBatch.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/FlatMapGroupsWithStateAndInitBatch.scala -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/FlatMapGroupsWithStateBatch.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/FlatMapGroupsWithStateBatch.scala -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/StatefulMappingFunction.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/StatefulMappingFunction.scala -------------------------------------------------------------------------------- /spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/TimestampedEvent.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-mapgroupswithstate/src/main/scala/com/waitingforcode/TimestampedEvent.scala -------------------------------------------------------------------------------- /spark-sql-not-in-vs-not-exists/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-not-in-vs-not-exists/pom.xml -------------------------------------------------------------------------------- /spark-sql-not-in-vs-not-exists/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-not-in-vs-not-exists/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-sql-not-in-vs-not-exists/src/main/scala/com/waitingforcode/NotInVsNotExistsSingleColumn.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-not-in-vs-not-exists/src/main/scala/com/waitingforcode/NotInVsNotExistsSingleColumn.scala -------------------------------------------------------------------------------- /spark-sql-not-in-vs-not-exists/src/main/scala/com/waitingforcode/NotInVsNotExistsTwoColumns.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-not-in-vs-not-exists/src/main/scala/com/waitingforcode/NotInVsNotExistsTwoColumns.scala -------------------------------------------------------------------------------- /spark-sql-not-in-vs-not-exists/src/main/scala/com/waitingforcode/User.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-not-in-vs-not-exists/src/main/scala/com/waitingforcode/User.scala -------------------------------------------------------------------------------- /spark-sql-outers/explode_on_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-outers/explode_on_array.py -------------------------------------------------------------------------------- /spark-sql-outers/explode_outer_on_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-outers/explode_outer_on_array.py -------------------------------------------------------------------------------- /spark-sql-outers/explodes_on_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-outers/explodes_on_map.py -------------------------------------------------------------------------------- /spark-sql-outers/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==4.0.0 2 | -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/build.sbt -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/DeltaLakeDynamicPartitionOverwriteExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/DeltaLakeDynamicPartitionOverwriteExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/DeltaLakeReplaceWhereExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/DeltaLakeReplaceWhereExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/DynamicPartitionOverwriteExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/DynamicPartitionOverwriteExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/SaveAsTableDynamicPartitionOverwriteExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/SaveAsTableDynamicPartitionOverwriteExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/SaveAsTableStaticPartitionOverwriteExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/SaveAsTableStaticPartitionOverwriteExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/StaticPartitionOverwriteExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/StaticPartitionOverwriteExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/StaticPartitionOverwriteWithPartitionSpecExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/StaticPartitionOverwriteWithPartitionSpecExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/solutions/DeltaLakeReplaceWhereSaveAsTableExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/solutions/DeltaLakeReplaceWhereSaveAsTableExample.scala -------------------------------------------------------------------------------- /spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/solutions/InsertByNameExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-overwrite-partition/src/main/scala/com/waitingforcode/solutions/InsertByNameExample.scala -------------------------------------------------------------------------------- /spark-sql-recursive-cte/recursive_query_checkpoints_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-recursive-cte/recursive_query_checkpoints_cache.py -------------------------------------------------------------------------------- /spark-sql-saveastable/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-saveastable/build.sbt -------------------------------------------------------------------------------- /spark-sql-saveastable/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /spark-sql-saveastable/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-saveastable/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-saveastable/src/main/scala/com/waitingforcode/SaveAsTableExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-saveastable/src/main/scala/com/waitingforcode/SaveAsTableExample.scala -------------------------------------------------------------------------------- /spark-sql-saveastable/src/main/scala/com/waitingforcode/SaveAsTableExampleForHive.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-saveastable/src/main/scala/com/waitingforcode/SaveAsTableExampleForHive.scala -------------------------------------------------------------------------------- /spark-sql-wildcard/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-wildcard/pom.xml -------------------------------------------------------------------------------- /spark-sql-wildcard/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-wildcard/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /spark-sql-wildcard/src/main/scala/com/waitingforcode/Demo1DataGeneration.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-wildcard/src/main/scala/com/waitingforcode/Demo1DataGeneration.scala -------------------------------------------------------------------------------- /spark-sql-wildcard/src/main/scala/com/waitingforcode/Demo2DataReadingNoWildcard.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-wildcard/src/main/scala/com/waitingforcode/Demo2DataReadingNoWildcard.scala -------------------------------------------------------------------------------- /spark-sql-wildcard/src/main/scala/com/waitingforcode/Demo3DataReadingWithWildcard.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql-wildcard/src/main/scala/com/waitingforcode/Demo3DataReadingWithWildcard.scala -------------------------------------------------------------------------------- /spark-sql/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/pom.xml -------------------------------------------------------------------------------- /spark-sql/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/waitingforcode/BucketingJoinExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/scala/com/waitingforcode/BucketingJoinExample.scala -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/waitingforcode/BucketingJoinWithPartitioningExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/scala/com/waitingforcode/BucketingJoinWithPartitioningExample.scala -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/waitingforcode/PartitionWiseSimulationExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/scala/com/waitingforcode/PartitionWiseSimulationExample.scala -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/waitingforcode/PivotExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/scala/com/waitingforcode/PivotExample.scala -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/waitingforcode/SelectImpactOnDropDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/scala/com/waitingforcode/SelectImpactOnDropDemo.scala -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/waitingforcode/StackExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-sql/src/main/scala/com/waitingforcode/StackExample.scala -------------------------------------------------------------------------------- /spark-stage-scheduling/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-stage-scheduling/Dockerfile -------------------------------------------------------------------------------- /spark-stage-scheduling/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-stage-scheduling/README.md -------------------------------------------------------------------------------- /spark-stage-scheduling/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-stage-scheduling/pom.xml -------------------------------------------------------------------------------- /spark-stage-scheduling/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-stage-scheduling/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-stage-scheduling/src/main/scala/com/waitingforcode/StageLevelSchedulingDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-stage-scheduling/src/main/scala/com/waitingforcode/StageLevelSchedulingDemo.scala -------------------------------------------------------------------------------- /spark-tables/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/pom.xml -------------------------------------------------------------------------------- /spark-tables/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/ExternalTableExample1_CreateApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/ExternalTableExample1_CreateApp.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/ExternalTableExample2_ReadApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/ExternalTableExample2_ReadApp.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/ExternalTableExample3_ReadAfterRemoveApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/ExternalTableExample3_ReadAfterRemoveApp.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/InternalTableExample1_CreateApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/InternalTableExample1_CreateApp.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/InternalTableExample2_ReadApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/InternalTableExample2_ReadApp.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/InternalTableExample3_ReadAfterRemoveApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/InternalTableExample3_ReadAfterRemoveApp.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/Letter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/Letter.scala -------------------------------------------------------------------------------- /spark-tables/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-tables/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /spark-vs-beam/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-vs-beam/pom.xml -------------------------------------------------------------------------------- /spark-vs-beam/src/main/java/com/waitingforcode/BeamExample.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-vs-beam/src/main/java/com/waitingforcode/BeamExample.java -------------------------------------------------------------------------------- /spark-vs-beam/src/main/java/com/waitingforcode/LabelWithSum.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-vs-beam/src/main/java/com/waitingforcode/LabelWithSum.java -------------------------------------------------------------------------------- /spark-vs-beam/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-vs-beam/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-vs-beam/src/main/scala/com/waitingforcode/SparkExample.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-vs-beam/src/main/scala/com/waitingforcode/SparkExample.scala -------------------------------------------------------------------------------- /spark-withcolumn-problem/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/README.md -------------------------------------------------------------------------------- /spark-withcolumn-problem/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/pom.xml -------------------------------------------------------------------------------- /spark-withcolumn-problem/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /spark-withcolumn-problem/src/main/scala/com/waitingforcode/CleanserFunctionsStudy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/src/main/scala/com/waitingforcode/CleanserFunctionsStudy.scala -------------------------------------------------------------------------------- /spark-withcolumn-problem/src/main/scala/com/waitingforcode/DataDispatcherIfElseStudy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/src/main/scala/com/waitingforcode/DataDispatcherIfElseStudy.scala -------------------------------------------------------------------------------- /spark-withcolumn-problem/src/main/scala/com/waitingforcode/EventLog.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/src/main/scala/com/waitingforcode/EventLog.scala -------------------------------------------------------------------------------- /spark-withcolumn-problem/src/main/scala/com/waitingforcode/VersionsToRun.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/spark-withcolumn-problem/src/main/scala/com/waitingforcode/VersionsToRun.scala -------------------------------------------------------------------------------- /structured-streaming-2-sinks/src/main/scala/com/waitingforcode/JobWith2Sinks.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-2-sinks/src/main/scala/com/waitingforcode/JobWith2Sinks.scala -------------------------------------------------------------------------------- /structured-streaming-2-sinks/src/main/scala/com/waitingforcode/JobWith2SinksWithSleep.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-2-sinks/src/main/scala/com/waitingforcode/JobWith2SinksWithSleep.scala -------------------------------------------------------------------------------- /structured-streaming-dynamic-resource-allocation/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-dynamic-resource-allocation/Dockerfile -------------------------------------------------------------------------------- /structured-streaming-dynamic-resource-allocation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-dynamic-resource-allocation/README.md -------------------------------------------------------------------------------- /structured-streaming-dynamic-resource-allocation/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-dynamic-resource-allocation/pom.xml -------------------------------------------------------------------------------- /structured-streaming-dynamic-resource-allocation/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-dynamic-resource-allocation/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-dynamic-resource-allocation/src/main/scala/com/waitingforcode/AutoscaledRateStreamReader.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-dynamic-resource-allocation/src/main/scala/com/waitingforcode/AutoscaledRateStreamReader.scala -------------------------------------------------------------------------------- /structured-streaming-event-skew-watermark/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-event-skew-watermark/build.sbt -------------------------------------------------------------------------------- /structured-streaming-event-skew-watermark/docker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-event-skew-watermark/docker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-event-skew-watermark/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.1 2 | -------------------------------------------------------------------------------- /structured-streaming-event-skew-watermark/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-event-skew-watermark/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-event-skew-watermark/src/main/scala/com/waitingforcode/DropDuplicatesWithinWatermarkForKafka.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-event-skew-watermark/src/main/scala/com/waitingforcode/DropDuplicatesWithinWatermarkForKafka.scala -------------------------------------------------------------------------------- /structured-streaming-event-skew-watermark/src/main/scala/com/waitingforcode/DropDuplicatesWithinWatermarkForMemoryStream.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-event-skew-watermark/src/main/scala/com/waitingforcode/DropDuplicatesWithinWatermarkForMemoryStream.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/README.md -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/docker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/docker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/pom.xml -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/FirstStateBrokenExpiration.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/FirstStateBrokenExpiration.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/FirstStateBrokenMapper.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/FirstStateBrokenMapper.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/RateMicroBatchAugmented.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/RateMicroBatchAugmented.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/UserWithVisits.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/UserWithVisits.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/VisitTimeAndPage.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/VisitTimeAndPage.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/eventtime/CounterWithMaxEventTime.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/eventtime/CounterWithMaxEventTime.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/eventtime/EventTimeStateMapper.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/eventtime/EventTimeStateMapper.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/eventtime/StatefulJobWithEventTimePattern.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/eventtime/StatefulJobWithEventTimePattern.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/temporarystate/CounterWithFlag.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/temporarystate/CounterWithFlag.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/temporarystate/FlagStateMapper.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/temporarystate/FlagStateMapper.scala -------------------------------------------------------------------------------- /structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/temporarystate/StatefulJobWithFlagPattern.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-first-micro-batch-state-expiration/src/main/scala/com/becomedataengineer/temporarystate/StatefulJobWithFlagPattern.scala -------------------------------------------------------------------------------- /structured-streaming-initial-state/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-initial-state/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-integration-tests/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-integration-tests/pom.xml -------------------------------------------------------------------------------- /structured-streaming-integration-tests/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-integration-tests/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-integration-tests/src/main/scala/com/waitingforcode/DataDispatcher.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-integration-tests/src/main/scala/com/waitingforcode/DataDispatcher.scala -------------------------------------------------------------------------------- /structured-streaming-integration-tests/src/main/scala/com/waitingforcode/DataDispatcherConfig.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-integration-tests/src/main/scala/com/waitingforcode/DataDispatcherConfig.scala -------------------------------------------------------------------------------- /structured-streaming-integration-tests/src/test/scala/com/waitingforcode/DataDispatcherTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-integration-tests/src/test/scala/com/waitingforcode/DataDispatcherTest.scala -------------------------------------------------------------------------------- /structured-streaming-listeners/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-listeners/pom.xml -------------------------------------------------------------------------------- /structured-streaming-listeners/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-listeners/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-listeners/src/main/scala/com/waitingforcode/FailingListener.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-listeners/src/main/scala/com/waitingforcode/FailingListener.scala -------------------------------------------------------------------------------- /structured-streaming-listeners/src/main/scala/com/waitingforcode/FailingListeners.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-listeners/src/main/scala/com/waitingforcode/FailingListeners.scala -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/README.md -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/pom.xml -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/AnotherLessPerfectUseCaseConsolePrinterFromKafka.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/AnotherLessPerfectUseCaseConsolePrinterFromKafka.scala -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/LessPerfectUseCaseConsolePrinterFromKafka.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/LessPerfectUseCaseConsolePrinterFromKafka.scala -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/PerfectUseCaseConsolePrinterFromKafka.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/PerfectUseCaseConsolePrinterFromKafka.scala -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/TestDataGenerator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/TestDataGenerator.scala -------------------------------------------------------------------------------- /structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-maxoffsetspertrigger/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /structured-streaming-minoffsets-availablenow-kafka/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-minoffsets-availablenow-kafka/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-minoffsets-availablenow-kafka/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-minoffsets-availablenow-kafka/broker/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-minoffsets-availablenow-kafka/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-minoffsets-availablenow-kafka/pom.xml -------------------------------------------------------------------------------- /structured-streaming-minoffsets-availablenow-kafka/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-minoffsets-availablenow-kafka/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-minoffsets-availablenow-kafka/src/main/scala/com/waitingforcode/MinOffsetsForAvailableNow.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-minoffsets-availablenow-kafka/src/main/scala/com/waitingforcode/MinOffsetsForAvailableNow.scala -------------------------------------------------------------------------------- /structured-streaming-outputmodes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/README.md -------------------------------------------------------------------------------- /structured-streaming-outputmodes/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/pom.xml -------------------------------------------------------------------------------- /structured-streaming-outputmodes/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-outputmodes/src/main/scala/com/waitingforcode/AppendModeForAggregatesDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/src/main/scala/com/waitingforcode/AppendModeForAggregatesDemo.scala -------------------------------------------------------------------------------- /structured-streaming-outputmodes/src/main/scala/com/waitingforcode/CompleteModeForAggregatesDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/src/main/scala/com/waitingforcode/CompleteModeForAggregatesDemo.scala -------------------------------------------------------------------------------- /structured-streaming-outputmodes/src/main/scala/com/waitingforcode/TimestampedEvent.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/src/main/scala/com/waitingforcode/TimestampedEvent.scala -------------------------------------------------------------------------------- /structured-streaming-outputmodes/src/main/scala/com/waitingforcode/UpdateModeForAggregatesDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-outputmodes/src/main/scala/com/waitingforcode/UpdateModeForAggregatesDemo.scala -------------------------------------------------------------------------------- /structured-streaming-retries/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-retries/pom.xml -------------------------------------------------------------------------------- /structured-streaming-retries/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-retries/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-retries/src/main/scala/com/waitingforcode/RetriesContinuousTriggerDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-retries/src/main/scala/com/waitingforcode/RetriesContinuousTriggerDemo.scala -------------------------------------------------------------------------------- /structured-streaming-retries/src/main/scala/com/waitingforcode/RetriesMicroBatchTriggerDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-retries/src/main/scala/com/waitingforcode/RetriesMicroBatchTriggerDemo.scala -------------------------------------------------------------------------------- /structured-streaming-schema-registry/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/README.md -------------------------------------------------------------------------------- /structured-streaming-schema-registry/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-schema-registry/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/pom.xml -------------------------------------------------------------------------------- /structured-streaming-schema-registry/src/main/java/com/waitingforcode/Order.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/src/main/java/com/waitingforcode/Order.java -------------------------------------------------------------------------------- /structured-streaming-schema-registry/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-schema-registry/src/main/scala/com/waitingforcode/AbrisSchemaRegistryDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/src/main/scala/com/waitingforcode/AbrisSchemaRegistryDemo.scala -------------------------------------------------------------------------------- /structured-streaming-schema-registry/src/main/scala/com/waitingforcode/ProducerAppV1.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/src/main/scala/com/waitingforcode/ProducerAppV1.scala -------------------------------------------------------------------------------- /structured-streaming-schema-registry/src/main/scala/com/waitingforcode/ProducerAppV2.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/src/main/scala/com/waitingforcode/ProducerAppV2.scala -------------------------------------------------------------------------------- /structured-streaming-schema-registry/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-schema-registry/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /structured-streaming-spark-metadata/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-spark-metadata/pom.xml -------------------------------------------------------------------------------- /structured-streaming-spark-metadata/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-spark-metadata/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-spark-metadata/src/main/scala/com/becomedataengineer/SparkMetadataIssueGenerator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-spark-metadata/src/main/scala/com/becomedataengineer/SparkMetadataIssueGenerator.scala -------------------------------------------------------------------------------- /structured-streaming-spark-metadata/src/main/scala/com/becomedataengineer/SparkMetadataWithRetentionGenerator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-spark-metadata/src/main/scala/com/becomedataengineer/SparkMetadataWithRetentionGenerator.scala -------------------------------------------------------------------------------- /structured-streaming-state-metrics/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-state-metrics/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-state-metrics/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-state-metrics/broker/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-state-metrics/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-state-metrics/pom.xml -------------------------------------------------------------------------------- /structured-streaming-state-metrics/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-state-metrics/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-state-metrics/src/main/scala/com/waitingforcode/WindowsWithWatermarkDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-state-metrics/src/main/scala/com/waitingforcode/WindowsWithWatermarkDemo.scala -------------------------------------------------------------------------------- /structured-streaming-stop-job/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-stop-job/README.md -------------------------------------------------------------------------------- /structured-streaming-stop-job/docker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-stop-job/docker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-stop-job/docker/generation_configuration.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-stop-job/docker/generation_configuration.yaml -------------------------------------------------------------------------------- /structured-streaming-stop-job/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-stop-job/pom.xml -------------------------------------------------------------------------------- /structured-streaming-stop-job/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-stop-job/src/main/resources/log4j2.properties -------------------------------------------------------------------------------- /structured-streaming-stop-job/src/main/scala/com/waitingforcode/VisitsCounterInWindows.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-stop-job/src/main/scala/com/waitingforcode/VisitsCounterInWindows.scala -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/broker/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/pom.xml -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/src/main/scala/com/waitingforcode/ApiBasedProcessing.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/src/main/scala/com/waitingforcode/ApiBasedProcessing.scala -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/src/main/scala/com/waitingforcode/DataProducer.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/src/main/scala/com/waitingforcode/DataProducer.scala -------------------------------------------------------------------------------- /structured-streaming-temporary-view-based-processing/src/main/scala/com/waitingforcode/SqlBasedProcessing.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-temporary-view-based-processing/src/main/scala/com/waitingforcode/SqlBasedProcessing.scala -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/README.md -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/pom.xml -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/CommittedTransactionsStore.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/CommittedTransactionsStore.scala -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/ForeachKafkaNonTransactionalDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/ForeachKafkaNonTransactionalDemo.scala -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/ForeachKafkaTransactionalDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/ForeachKafkaTransactionalDemo.scala -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/ForeachKafkaTransactionalWriter.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/ForeachKafkaTransactionalWriter.scala -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/TestDataGenerator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/TestDataGenerator.scala -------------------------------------------------------------------------------- /structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-transactional-kafka/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /structured-streaming-ui-patterns/src/main/scala/com/waitingforcode/package.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-ui-patterns/src/main/scala/com/waitingforcode/package.scala -------------------------------------------------------------------------------- /structured-streaming-window/broker/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-window/broker/docker-compose.yaml -------------------------------------------------------------------------------- /structured-streaming-window/broker/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-window/broker/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-window/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-window/pom.xml -------------------------------------------------------------------------------- /structured-streaming-window/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-window/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /structured-streaming-window/src/main/scala/com/waitingforcode/WindowsWithWatermarkDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartosz25/spark-playground/HEAD/structured-streaming-window/src/main/scala/com/waitingforcode/WindowsWithWatermarkDemo.scala --------------------------------------------------------------------------------