├── .github └── workflows │ └── ci.yml ├── .gitignore ├── README.md ├── docs ├── details │ ├── .read-partition-issue.md.swp │ ├── SchemaColumnConvertNotSupportedException.md │ ├── best-pratice-collect.md │ ├── big-broadcast-join.md │ ├── broadcast-with-disable.md │ ├── class-or-method-not-found.md │ ├── container-oom.md │ ├── correlated-column-not-allowed.md │ ├── driver-max-result-size.md │ ├── error-driver-max-result-size.md │ ├── error-driver-out-of-memory.md │ ├── error-driver-stack-overflow.md │ ├── error-executor-out-of-disk.md │ ├── error-executor-out-of-memory.md │ ├── error-invalid-file.md │ ├── error-job.md │ ├── error-memory.md │ ├── error-other.md │ ├── error-shuffle.md │ ├── error-sql-analysis.md │ ├── even_partitioning_still_slow.md │ ├── failed-to-read-non-parquet-file.md │ ├── failure-executor-large-record.md │ ├── forced-computations.md │ ├── key-skew.md │ ├── max_serialized_task_size.md │ ├── notenoughexecs.md │ ├── partial_aggregates.md │ ├── pyudfoom.md │ ├── read-partition-issue.md │ ├── revise-bad_partitioning.md │ ├── revise-even_partitioning_still_slow.md │ ├── shuffle_exchange_loses_exec_reg.md │ ├── shuffle_fetch_corrupted.md │ ├── singlepartition.md │ ├── slow-executor.md │ ├── slow-job-slow-cluster.md │ ├── slow-job.md │ ├── slow-map.md │ ├── slow-partition_filter_pushdown.md │ ├── slow-reduce.md │ ├── slow-regex-tips.md │ ├── slow-skewed-join.md │ ├── slow-skewed-write.md │ ├── slow-stage.md │ ├── slow-writes-s3.md │ ├── slow-writes-too-many-files.md │ ├── slow-writes.md │ ├── toobigdag.md │ ├── toofew_tasks.md │ ├── toolargejar.md │ ├── toomany_tasks.md │ ├── udfslow.md │ └── write-fails.md ├── flowchart │ ├── error.md │ ├── index.md │ ├── shared.md │ └── slow.md ├── imgs │ ├── identify-slow-stage.png │ ├── spark-driver-max-result-size-error.png │ ├── spark-filter-ignored.png │ ├── spark-filter-pushdown-success.png │ ├── spark-salted.png │ └── spark-skewed.png └── index.md ├── mkdocs.yml ├── requirements.txt └── tools └── export_external.sh /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | site/ 4 | .python-version 5 | .*~ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/README.md -------------------------------------------------------------------------------- /docs/details/.read-partition-issue.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/.read-partition-issue.md.swp -------------------------------------------------------------------------------- /docs/details/SchemaColumnConvertNotSupportedException.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/SchemaColumnConvertNotSupportedException.md -------------------------------------------------------------------------------- /docs/details/best-pratice-collect.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/best-pratice-collect.md -------------------------------------------------------------------------------- /docs/details/big-broadcast-join.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/big-broadcast-join.md -------------------------------------------------------------------------------- /docs/details/broadcast-with-disable.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/broadcast-with-disable.md -------------------------------------------------------------------------------- /docs/details/class-or-method-not-found.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/class-or-method-not-found.md -------------------------------------------------------------------------------- /docs/details/container-oom.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/container-oom.md -------------------------------------------------------------------------------- /docs/details/correlated-column-not-allowed.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/correlated-column-not-allowed.md -------------------------------------------------------------------------------- /docs/details/driver-max-result-size.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/driver-max-result-size.md -------------------------------------------------------------------------------- /docs/details/error-driver-max-result-size.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-driver-max-result-size.md -------------------------------------------------------------------------------- /docs/details/error-driver-out-of-memory.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-driver-out-of-memory.md -------------------------------------------------------------------------------- /docs/details/error-driver-stack-overflow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-driver-stack-overflow.md -------------------------------------------------------------------------------- /docs/details/error-executor-out-of-disk.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-executor-out-of-disk.md -------------------------------------------------------------------------------- /docs/details/error-executor-out-of-memory.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-executor-out-of-memory.md -------------------------------------------------------------------------------- /docs/details/error-invalid-file.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-invalid-file.md -------------------------------------------------------------------------------- /docs/details/error-job.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-job.md -------------------------------------------------------------------------------- /docs/details/error-memory.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-memory.md -------------------------------------------------------------------------------- /docs/details/error-other.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-other.md -------------------------------------------------------------------------------- /docs/details/error-shuffle.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-shuffle.md -------------------------------------------------------------------------------- /docs/details/error-sql-analysis.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/error-sql-analysis.md -------------------------------------------------------------------------------- /docs/details/even_partitioning_still_slow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/even_partitioning_still_slow.md -------------------------------------------------------------------------------- /docs/details/failed-to-read-non-parquet-file.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/failed-to-read-non-parquet-file.md -------------------------------------------------------------------------------- /docs/details/failure-executor-large-record.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/failure-executor-large-record.md -------------------------------------------------------------------------------- /docs/details/forced-computations.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/forced-computations.md -------------------------------------------------------------------------------- /docs/details/key-skew.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/key-skew.md -------------------------------------------------------------------------------- /docs/details/max_serialized_task_size.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/max_serialized_task_size.md -------------------------------------------------------------------------------- /docs/details/notenoughexecs.md: -------------------------------------------------------------------------------- 1 | ### Not enough execs 2 | -------------------------------------------------------------------------------- /docs/details/partial_aggregates.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/partial_aggregates.md -------------------------------------------------------------------------------- /docs/details/pyudfoom.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/pyudfoom.md -------------------------------------------------------------------------------- /docs/details/read-partition-issue.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/read-partition-issue.md -------------------------------------------------------------------------------- /docs/details/revise-bad_partitioning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/revise-bad_partitioning.md -------------------------------------------------------------------------------- /docs/details/revise-even_partitioning_still_slow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/revise-even_partitioning_still_slow.md -------------------------------------------------------------------------------- /docs/details/shuffle_exchange_loses_exec_reg.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/shuffle_exchange_loses_exec_reg.md -------------------------------------------------------------------------------- /docs/details/shuffle_fetch_corrupted.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/shuffle_fetch_corrupted.md -------------------------------------------------------------------------------- /docs/details/singlepartition.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/singlepartition.md -------------------------------------------------------------------------------- /docs/details/slow-executor.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-executor.md -------------------------------------------------------------------------------- /docs/details/slow-job-slow-cluster.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-job-slow-cluster.md -------------------------------------------------------------------------------- /docs/details/slow-job.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-job.md -------------------------------------------------------------------------------- /docs/details/slow-map.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-map.md -------------------------------------------------------------------------------- /docs/details/slow-partition_filter_pushdown.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-partition_filter_pushdown.md -------------------------------------------------------------------------------- /docs/details/slow-reduce.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-reduce.md -------------------------------------------------------------------------------- /docs/details/slow-regex-tips.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-regex-tips.md -------------------------------------------------------------------------------- /docs/details/slow-skewed-join.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-skewed-join.md -------------------------------------------------------------------------------- /docs/details/slow-skewed-write.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-skewed-write.md -------------------------------------------------------------------------------- /docs/details/slow-stage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-stage.md -------------------------------------------------------------------------------- /docs/details/slow-writes-s3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-writes-s3.md -------------------------------------------------------------------------------- /docs/details/slow-writes-too-many-files.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-writes-too-many-files.md -------------------------------------------------------------------------------- /docs/details/slow-writes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/slow-writes.md -------------------------------------------------------------------------------- /docs/details/toobigdag.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/toobigdag.md -------------------------------------------------------------------------------- /docs/details/toofew_tasks.md: -------------------------------------------------------------------------------- 1 | ### Too few tasks 2 | -------------------------------------------------------------------------------- /docs/details/toolargejar.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/toolargejar.md -------------------------------------------------------------------------------- /docs/details/toomany_tasks.md: -------------------------------------------------------------------------------- 1 | ### Too many tasks 2 | -------------------------------------------------------------------------------- /docs/details/udfslow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/udfslow.md -------------------------------------------------------------------------------- /docs/details/write-fails.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/details/write-fails.md -------------------------------------------------------------------------------- /docs/flowchart/error.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/flowchart/error.md -------------------------------------------------------------------------------- /docs/flowchart/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/flowchart/index.md -------------------------------------------------------------------------------- /docs/flowchart/shared.md: -------------------------------------------------------------------------------- 1 | OHNOES[Contact support] 2 | -------------------------------------------------------------------------------- /docs/flowchart/slow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/flowchart/slow.md -------------------------------------------------------------------------------- /docs/imgs/identify-slow-stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/imgs/identify-slow-stage.png -------------------------------------------------------------------------------- /docs/imgs/spark-driver-max-result-size-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/imgs/spark-driver-max-result-size-error.png -------------------------------------------------------------------------------- /docs/imgs/spark-filter-ignored.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/imgs/spark-filter-ignored.png -------------------------------------------------------------------------------- /docs/imgs/spark-filter-pushdown-success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/imgs/spark-filter-pushdown-success.png -------------------------------------------------------------------------------- /docs/imgs/spark-salted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/imgs/spark-salted.png -------------------------------------------------------------------------------- /docs/imgs/spark-skewed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/imgs/spark-skewed.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/docs/index.md -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/requirements.txt -------------------------------------------------------------------------------- /tools/export_external.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdenk/spark-flowchart/HEAD/tools/export_external.sh --------------------------------------------------------------------------------