├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── documentation-request.md │ ├── feature_request.md │ └── submit-question.md ├── PULL_REQUEST_TEMPLATE.md ├── labeler.yml └── workflows │ ├── add-to-project.yml │ ├── configuration.json │ ├── labeler.yml │ ├── license-header-check.yml │ ├── markdown-links-check.yml │ ├── markdown-links-check │ └── markdown-links-check-config.json │ ├── mvn-verify-check.yml │ ├── python-unit-test.yml │ ├── release.yml │ └── signoff-check.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── NOTICE-binary ├── README.md ├── SECURITY.md ├── core ├── .gitignore ├── README.md ├── pom.xml ├── scalastyle-config.xml ├── scripts │ ├── discoveryScript.sh │ └── getWorkerInfo.sh └── src │ ├── main │ ├── resources │ │ ├── bootstrap │ │ │ └── tuningTable.yaml │ │ ├── configs │ │ │ └── build.properties │ │ ├── operatorsScore-databricks-aws-a10G.csv │ │ ├── operatorsScore-databricks-aws-t4.csv │ │ ├── operatorsScore-databricks-azure-t4.csv │ │ ├── operatorsScore-dataproc-gke-l4.csv │ │ ├── operatorsScore-dataproc-gke-t4.csv │ │ ├── operatorsScore-dataproc-l4.csv │ │ ├── operatorsScore-dataproc-serverless-l4.csv │ │ ├── operatorsScore-dataproc-t4.csv │ │ ├── operatorsScore-emr-a10.csv │ │ ├── operatorsScore-emr-a10G.csv │ │ ├── operatorsScore-emr-t4.csv │ │ ├── operatorsScore-onprem-a100.csv │ │ ├── photonOperatorMappings │ │ │ └── databricks-13_3.json │ │ ├── supportedDataSource.csv │ │ ├── supportedExecs.csv │ │ └── supportedExprs.csv │ └── scala │ │ ├── com │ │ └── nvidia │ │ │ └── spark │ │ │ └── rapids │ │ │ ├── SparkRapidsBuildInfoEvent.scala │ │ │ ├── ThreadFactoryBuilder.scala │ │ │ └── tool │ │ │ ├── AppSummaryInfoBaseProvider.scala │ │ │ ├── ClusterConfigurationStrategy.scala │ │ │ ├── EventLogPathProcessor.scala │ │ │ ├── GpuDevice.scala │ │ │ ├── Identifiable.scala │ │ │ ├── Platform.scala │ │ │ ├── ToolBase.scala │ │ │ ├── ToolTextFileWriter.scala │ │ │ ├── analysis │ │ │ ├── AggRawMetricsResult.scala │ │ │ ├── AppAnalysisBase.scala │ │ │ ├── AppIndexMapperTrait.scala │ │ │ ├── AppSQLPlanAnalyzer.scala │ │ │ ├── AppSparkMetricsAggTrait.scala │ │ │ ├── AppSparkMetricsAnalyzer.scala │ │ │ ├── ProfSparkMetricsAnalyzer.scala │ │ │ ├── QualSparkMetricsAnalyzer.scala │ │ │ ├── SparkSQLPlanInfoVisitor.scala │ │ │ ├── StatisticsMetrics.scala │ │ │ ├── package.scala │ │ │ └── util │ │ │ │ ├── AggAccumHelper.scala │ │ │ │ ├── AggAccumPhotonHelper.scala │ │ │ │ ├── DiagnosticMetrics.scala │ │ │ │ ├── JobAggAccum.scala │ │ │ │ ├── SQLAggAccum.scala │ │ │ │ ├── StageAggAccum.scala │ │ │ │ ├── StageAggPhoton.scala │ │ │ │ └── TaskMetricsAccumRec.scala │ │ │ ├── planparser │ │ │ ├── BatchScanExecParser.scala │ │ │ ├── BroadcastExchangeExecParser.scala │ │ │ ├── BroadcastHashJoinExecParser.scala │ │ │ ├── BroadcastNestedLoopJoinExecParser.scala │ │ │ ├── DataWritingCommandExecParser.scala │ │ │ ├── DatabricksParseHelper.scala │ │ │ ├── DeltaLakeHelper.scala │ │ │ ├── ExecParser.scala │ │ │ ├── FileSourceScanExecParser.scala │ │ │ ├── GenericExecParser.scala │ │ │ ├── HashAggregateExecParser.scala │ │ │ ├── HiveParseHelper.scala │ │ │ ├── ObjectHashAggregateExecParser.scala │ │ │ ├── ReadParser.scala │ │ │ ├── SQLPlanParser.scala │ │ │ ├── ShuffleExchangeExecParser.scala │ │ │ ├── ShuffledHashJoinExecParser.scala │ │ │ ├── SortMergeJoinExecParser.scala │ │ │ ├── SubqueryBroadcastExecParser.scala │ │ │ ├── SubqueryExecParser.scala │ │ │ ├── WholeStageExecParser.scala │ │ │ ├── WindowGroupLimitParser.scala │ │ │ ├── WriteFilesExecParser.scala │ │ │ ├── WriteOpMetaExtractorTrait.scala │ │ │ ├── ops │ │ │ │ ├── ExprOpRef.scala │ │ │ │ ├── OpRef.scala │ │ │ │ ├── OpRefWrapperBase.scala │ │ │ │ ├── OperatorCounter.scala │ │ │ │ ├── OperatorRefBase.scala │ │ │ │ ├── OperatorRefTrait.scala │ │ │ │ └── UnsupportedExprOpRef.scala │ │ │ └── photon │ │ │ │ ├── PhotonBroadcastNestedLoopJoinExecParser.scala │ │ │ │ ├── PhotonPlanParser.scala │ │ │ │ └── PhotonStageExecParser.scala │ │ │ ├── profiling │ │ │ ├── ApplicationSummaryInfo.scala │ │ │ ├── CollectInformation.scala │ │ │ ├── DiagnosticSummaryInfo.scala │ │ │ ├── DriverLogProcessor.scala │ │ │ ├── GenerateDot.scala │ │ │ ├── GenerateTimeline.scala │ │ │ ├── HealthCheck.scala │ │ │ ├── ProfSQLPlanClassifier.scala │ │ │ ├── ProfileArgs.scala │ │ │ ├── ProfileClassWarehouse.scala │ │ │ ├── ProfileMain.scala │ │ │ ├── ProfileOutputWriter.scala │ │ │ ├── ProfileUtils.scala │ │ │ └── Profiler.scala │ │ │ ├── qualification │ │ │ ├── AppSubscriber.scala │ │ │ ├── PluginTypeChecker.scala │ │ │ ├── QualOutputWriter.scala │ │ │ ├── Qualification.scala │ │ │ ├── QualificationArgs.scala │ │ │ ├── QualificationMain.scala │ │ │ ├── RunningAppMetadata.scala │ │ │ ├── RunningQualOutputWriter.scala │ │ │ └── RunningQualificationApp.scala │ │ │ ├── tuning │ │ │ ├── AutoTuner.scala │ │ │ ├── BootstrapReport.scala │ │ │ ├── QualAppSummaryInfoProvider.scala │ │ │ ├── QualificationAutoTuner.scala │ │ │ ├── QualificationAutoTunerRunner.scala │ │ │ ├── TargetClusterProps.scala │ │ │ ├── TunerContext.scala │ │ │ ├── TuningEntry.scala │ │ │ ├── TuningEntryDefinition.scala │ │ │ ├── TuningEntryTrait.scala │ │ │ └── TuningOpTypes.scala │ │ │ └── views │ │ │ ├── AggMetricsResultSorter.scala │ │ │ ├── AppStageMetricsView.scala │ │ │ ├── DataSourceView.scala │ │ │ ├── ExecutorView.scala │ │ │ ├── InformationView.scala │ │ │ ├── JobView.scala │ │ │ ├── OutHeaderRegistry.scala │ │ │ ├── PropertiesView.scala │ │ │ ├── QualRawReportGenerator.scala │ │ │ ├── RawMetricProfView.scala │ │ │ ├── SQLView.scala │ │ │ ├── StageView.scala │ │ │ ├── TaskView.scala │ │ │ ├── ViewableTrait.scala │ │ │ ├── WriteOpsView.scala │ │ │ └── package.scala │ │ └── org │ │ └── apache │ │ └── spark │ │ ├── rapids │ │ └── tool │ │ │ └── benchmarks │ │ │ ├── Benchmark.scala │ │ │ ├── BenchmarkArgs.scala │ │ │ ├── BenchmarkBase.scala │ │ │ ├── DevRuntimeCheckpoint.scala │ │ │ ├── NoOpRuntimeCheckpoint.scala │ │ │ ├── README.md │ │ │ ├── RuntimeCheckpointTrait.scala │ │ │ ├── RuntimeInjector.scala │ │ │ ├── SingleThreadedProfileToolBenchmark.scala │ │ │ └── SingleThreadedQualToolBenchmark.scala │ │ ├── scheduler │ │ ├── SparkListenerEvent.scala │ │ └── ToolsListenerEventExtraAPIs.scala │ │ └── sql │ │ └── rapids │ │ └── tool │ │ ├── AccumToStageRetriever.scala │ │ ├── AppBase.scala │ │ ├── AppFilterImpl.scala │ │ ├── AppMetaData.scala │ │ ├── ClassWarehouse.scala │ │ ├── ClusterTagPropHandler.scala │ │ ├── EventProcessorBase.scala │ │ ├── FilterAppInfo.scala │ │ ├── ToolUtils.scala │ │ ├── annotation │ │ ├── Calculated.scala │ │ ├── Since.scala │ │ ├── ToolsReflection.scala │ │ ├── WallClock.scala │ │ └── package.scala │ │ ├── profiling │ │ ├── ApplicationInfo.scala │ │ └── EventsProcessor.scala │ │ ├── qualification │ │ ├── QualificationAppInfo.scala │ │ ├── QualificationEventProcessor.scala │ │ └── RunningQualificationEventProcessor.scala │ │ ├── store │ │ ├── AccumInfo.scala │ │ ├── AccumManager.scala │ │ ├── AccumMetaRef.scala │ │ ├── AccumNameRef.scala │ │ ├── DataSourceRecord.scala │ │ ├── SQLPlanModel.scala │ │ ├── SQLPlanModelManager.scala │ │ ├── SQLPlanModelPrimaryWithDSCaching.scala │ │ ├── SQLPlanModelWithDSCaching.scala │ │ ├── SQLPlanVersion.scala │ │ ├── StageModel.scala │ │ ├── StageModelManager.scala │ │ ├── TaskModel.scala │ │ ├── TaskModelManager.scala │ │ └── WriteOperationStore.scala │ │ ├── ui │ │ └── ConsoleProgressBar.scala │ │ └── util │ │ ├── CacheablePropsHandler.scala │ │ ├── EventUtils.scala │ │ ├── FSUtils.scala │ │ ├── InPlaceMedianArrView.scala │ │ ├── MemoryMetricsTracker.scala │ │ ├── OperationResult.scala │ │ ├── PropertiesLoader.scala │ │ ├── RapidsToolsConfUtil.scala │ │ ├── RuntimeReporter.scala │ │ ├── RuntimeUtil.scala │ │ ├── SortedJProperties.scala │ │ ├── StringUtils.scala │ │ ├── ToolsPlanGraph.scala │ │ ├── ToolsTimer.scala │ │ ├── WebCrawlerUtil.scala │ │ ├── package.scala │ │ ├── plangraph │ │ ├── PhotonPlan.scala │ │ └── PlanGraphTransformer.scala │ │ └── stubs │ │ ├── DefaultGraphReflectionAPI.scala │ │ ├── GraphReflectionAPI.scala │ │ ├── GraphReflectionAPIHelper.scala │ │ ├── GraphReflectionEntry.scala │ │ ├── StageInfoStub.scala │ │ ├── bd │ │ ├── BDGraphClusterStub.scala │ │ ├── BDGraphNodeStub.scala │ │ └── BDGraphReflectionAPI.scala │ │ ├── db │ │ ├── DBGraphClusterStub.scala │ │ ├── DBGraphEdgeStub.scala │ │ ├── DBGraphNodeStub.scala │ │ ├── DBGraphReflectionAPI.scala │ │ └── DBGraphSQLMetricStub.scala │ │ └── package.scala │ └── test │ ├── resources │ ├── ProfilingExpectations │ │ ├── executors_removed_eventlog_expectation.csv │ │ ├── jobs_failure_eventlog_expectation.csv │ │ ├── nds_q88_photon_db_13_3_job_metrics_agg_expectation.csv │ │ ├── nds_q88_photon_db_13_3_sql_metrics_agg_expectation.csv │ │ ├── nds_q88_photon_db_13_3_stage_metrics_agg_expectation.csv │ │ ├── rapids_duration_and_cpu_expectation.csv │ │ ├── rapids_join_eventlog_iodiagnosticmetrics_expectation.csv │ │ ├── rapids_join_eventlog_jobmetricsagg2_expectation.csv │ │ ├── rapids_join_eventlog_jobmetricsagg_expectation.csv │ │ ├── rapids_join_eventlog_sqlmetrics_expectation.csv │ │ ├── rapids_join_eventlog_sqlmetricsagg2_expectation.csv │ │ ├── rapids_join_eventlog_sqlmetricsagg_expectation.csv │ │ ├── rapids_join_eventlog_stagediagnosticmetrics_expectation.csv │ │ ├── rapids_join_eventlog_stagemetricsagg2_expectation.csv │ │ ├── rapids_join_eventlog_stagemetricsagg_expectation.csv │ │ ├── removed_blockManagers_eventlog_expectation.csv │ │ ├── stages_failure_eventlog_expectation.csv │ │ ├── tasks_failure_eventlog_expectation.csv │ │ └── unsupported_sql_eventlog_expectation.csv │ ├── QualificationExpectations │ │ ├── complex_dec_expectation.csv │ │ ├── db_sim_test_expectation.csv │ │ ├── directory_test_expectation.csv │ │ ├── jdbc_expectation.csv │ │ ├── multi_run_freq_test_expectation.csv │ │ ├── nds_q86_fail_test_expectation.csv │ │ ├── nds_q86_fail_test_expectation_persql.csv │ │ ├── nds_q86_test_expectation.csv │ │ ├── nds_q86_test_expectation_persql.csv │ │ ├── nested_dsv2_expectation.csv │ │ ├── nested_type_expectation.csv │ │ ├── qual_test_missing_sql_end_expectation.csv │ │ ├── qual_test_simple_expectation.csv │ │ ├── qual_test_simple_expectation_persql.csv │ │ ├── read_dsv1_expectation.csv │ │ ├── read_dsv2_expectation.csv │ │ ├── spark2_expectation.csv │ │ ├── truncated_1_end_expectation.csv │ │ └── write_format_expectation.csv │ ├── dev │ │ └── generate-qualification-test-results.sh │ ├── key-value-pairs.txt │ ├── log4j.properties │ ├── log4j2.properties │ ├── spark-events-profiling │ │ ├── dataset_eventlog │ │ ├── driverlog │ │ ├── eventlog-gpu-dsv1.zstd │ │ ├── eventlog-gpu-dsv2.zstd │ │ ├── eventlog_dsv1.zstd │ │ ├── eventlog_dsv2.zstd │ │ ├── eventlog_minimal_events │ │ ├── gds_ucx_eventlog.zstd │ │ ├── gpu_oom_eventlog.zstd │ │ ├── malformed_json_eventlog.zstd │ │ ├── nds_q66_gpu.zstd │ │ ├── rapids_join_eventlog.zstd │ │ ├── rapids_join_eventlog2.zstd │ │ ├── rp_nosql_eventlog │ │ ├── rp_sql_eventlog.zstd │ │ ├── spark2-eventlog.zstd │ │ └── tasks_executors_fail_compressed_eventlog.zstd │ ├── spark-events-qualification │ │ ├── aqeshuffle_eventlog.zstd │ │ ├── cluster_information │ │ │ ├── eventlog_2nodes_8cores │ │ │ ├── eventlog_3nodes_12cores_exec_removed │ │ │ ├── eventlog_3nodes_12cores_multiple_executors │ │ │ ├── eventlog_3nodes_12cores_variable_cores │ │ │ ├── eventlog_4nodes_8cores_dynamic_alloc.zstd │ │ │ ├── eventlog_driver_only │ │ │ └── platform │ │ │ │ ├── databricks-aws │ │ │ │ ├── databricks-azure │ │ │ │ ├── dataproc │ │ │ │ ├── emr │ │ │ │ └── onprem │ │ ├── complex_dec_eventlog.zstd │ │ ├── createdatasourcetable_eventlog.zstd │ │ ├── customshuffle_eventlog.zstd │ │ ├── dataset_eventlog │ │ ├── db_sim_eventlog │ │ │ ├── eventlog │ │ │ └── eventlog-2021-06-15--15-00.gz │ │ ├── db_subExecution_id.zstd │ │ ├── dsAndDf_eventlog.zstd │ │ ├── empty_eventlog │ │ ├── eventlog_complex_dec_nested │ │ ├── eventlog_nested_dsv2 │ │ ├── eventlog_same_app_id_1.zstd │ │ ├── eventlog_same_app_id_2.zstd │ │ ├── eventlog_v2_local-1623876083964 │ │ │ ├── appstatus_local-1623876083964 │ │ │ ├── events_1_local-1623876083964.zstd │ │ │ └── events_2_local-1623876083964.zstd │ │ ├── global_local_limit_eventlog.zstd │ │ ├── gpu_eventlog │ │ ├── jdbc_eventlog.zstd │ │ ├── join_missing_sql_end │ │ ├── multiple_attempts │ │ │ ├── attempt_1_eventlog.zstd │ │ │ ├── attempt_2_eventlog.zstd │ │ │ ├── attempt_3_eventlog.zstd │ │ │ └── attempt_4_eventlog.zstd │ │ ├── nds_q86_fail_test │ │ ├── nds_q86_test │ │ ├── nds_q88_photon_db_13_3.zstd │ │ ├── nested_type_eventlog │ │ ├── pandas_execs_eventlog.zstd │ │ ├── rdd_only_eventlog │ │ ├── truncated_eventlog │ │ ├── udf_dataset_eventlog │ │ ├── udf_func_eventlog │ │ ├── writeformat_eventlog │ │ └── xgboost_eventlog.zstd │ └── worker_info-autotuner-example-8cores-16gmemory.yaml │ └── scala │ ├── com │ └── nvidia │ │ └── spark │ │ └── rapids │ │ ├── BaseTestSuite.scala │ │ ├── ThreadFactoryBuilderTest.scala │ │ └── tool │ │ ├── ToolTestUtils.scala │ │ ├── planparser │ │ ├── BasePlanParserSuite.scala │ │ ├── PhotonPlanParserSuite.scala │ │ ├── ReadParserSuite.scala │ │ ├── SqlPlanParserSuite.scala │ │ └── WriteOperationParserSuite.scala │ │ ├── profiling │ │ ├── AnalysisSuite.scala │ │ ├── AppFilterSuite.scala │ │ ├── ApplicationInfoSuite.scala │ │ ├── ClusterRecommendationSuite.scala │ │ ├── GenerateDotSuite.scala │ │ ├── GenerateTimelineSuite.scala │ │ ├── HealthCheckSuite.scala │ │ └── QualificationInfoUtils.scala │ │ ├── qualification │ │ ├── AppFilterSuite.scala │ │ ├── PluginTypeCheckerSuite.scala │ │ └── QualificationSuite.scala │ │ ├── tuning │ │ ├── BaseAutoTunerSuite.scala │ │ ├── ProfilingAutoTunerSuite.scala │ │ ├── ProfilingAutoTunerSuiteV2.scala │ │ └── QualificationAutoTunerSuite.scala │ │ └── util │ │ └── ToolUtilsSuite.scala │ └── org │ └── apache │ └── spark │ └── sql │ └── TrampolineUtil.scala ├── data_validation ├── docs │ └── validation-tools.md ├── pyproject.toml ├── setup.cfg ├── src │ └── spark_rapids_validation_tool │ │ ├── __init__.py │ │ ├── build.py │ │ ├── csp │ │ ├── __init__.py │ │ ├── csp.py │ │ └── dataproc.py │ │ ├── data_validation.py │ │ ├── data_validation_dataproc.py │ │ ├── dataproc_wrapper.py │ │ ├── utilities.py │ │ └── validation_scripts │ │ ├── dataset_validation.py │ │ └── metadata_validation.py ├── templates │ └── datavalid_conf.yml └── tox.ini ├── scripts ├── auto-copyrighter.sh ├── header-check.sh ├── idea-code-style-settings.xml ├── qual_validation │ └── qual_validation.py └── sync_plugin_files │ ├── README.md │ ├── override_supported_configs.json │ ├── process_supported_files.py │ └── sync_operator_scores.py └── user_tools ├── LICENSE ├── README.md ├── build.sh ├── docs ├── index.md ├── qualx.md ├── resources │ ├── debug-behave-intellij.png │ ├── spark_rapids_user_tools_overview-01.drawio │ └── spark_rapids_user_tools_overview-01.png ├── tools_e2e_tests.md ├── user-tools-aws-emr.md ├── user-tools-databricks-aws.md ├── user-tools-databricks-azure.md ├── user-tools-dataproc-gke.md ├── user-tools-dataproc.md └── user-tools-onprem.md ├── pyproject.toml ├── src ├── spark_rapids_pytools │ ├── __init__.py │ ├── build.py │ ├── cloud_api │ │ ├── __init__.py │ │ ├── azurestorage.py │ │ ├── databricks_aws.py │ │ ├── databricks_aws_job.py │ │ ├── databricks_azure.py │ │ ├── databricks_azure_job.py │ │ ├── dataproc.py │ │ ├── dataproc_gke.py │ │ ├── dataproc_gke_job.py │ │ ├── dataproc_job.py │ │ ├── emr.py │ │ ├── emr_job.py │ │ ├── gstorage.py │ │ ├── onprem.py │ │ ├── s3storage.py │ │ └── sp_types.py │ ├── common │ │ ├── __init__.py │ │ ├── cluster_inference.py │ │ ├── exceptions.py │ │ ├── prop_manager.py │ │ ├── sys_storage.py │ │ └── utilities.py │ ├── pricing │ │ ├── __init__.py │ │ ├── databricks_aws_pricing.py │ │ ├── databricks_azure_pricing.py │ │ ├── dataproc_gke_pricing.py │ │ ├── dataproc_pricing.py │ │ ├── emr_pricing.py │ │ └── price_provider.py │ ├── rapids │ │ ├── __init__.py │ │ ├── bootstrap.py │ │ ├── dev │ │ │ └── instance_description.py │ │ ├── diagnostic.py │ │ ├── profiling.py │ │ ├── qualification.py │ │ ├── qualification_stats.py │ │ ├── qualx │ │ │ ├── prediction.py │ │ │ ├── qualx_tool.py │ │ │ ├── train.py │ │ │ └── train_and_evaluate.py │ │ ├── rapids_job.py │ │ ├── rapids_tool.py │ │ └── tool_ctxt.py │ ├── resources │ │ ├── bootstrap-conf.yaml │ │ ├── cluster-configs.yaml │ │ ├── collect.sh │ │ ├── databricks_aws-configs.json │ │ ├── databricks_azure-configs.json │ │ ├── databricks_azure-instance-catalog.json │ │ ├── dataproc-configs.json │ │ ├── dataproc-instance-catalog.json │ │ ├── dataproc_gke-configs.json │ │ ├── dev │ │ │ ├── databricks-azure-price-jobs-compute-premium-westus2-raw.txt │ │ │ ├── log4j.properties │ │ │ ├── prepackage_mgr.py │ │ │ └── process_databricks_azure_pricing.py │ │ ├── diagnostic-conf.yaml │ │ ├── distributed-tools-conf.yaml │ │ ├── emr-configs.json │ │ ├── emr-instance-catalog.json │ │ ├── onprem-configs.json │ │ ├── premium-databricks-azure-catalog.json │ │ ├── profiling-conf.yaml │ │ ├── qualification-conf.yaml │ │ ├── qualx-conf.yaml │ │ ├── qualx-hash-conf.yaml │ │ ├── qualx │ │ │ └── models │ │ │ │ └── xgboost │ │ │ │ ├── combined.cfg │ │ │ │ ├── combined.json │ │ │ │ ├── combined.metrics │ │ │ │ ├── databricks-aws.cfg │ │ │ │ ├── databricks-aws.json │ │ │ │ ├── databricks-aws.metrics │ │ │ │ ├── databricks-aws_photon.cfg │ │ │ │ ├── databricks-aws_photon.json │ │ │ │ ├── databricks-aws_photon.metrics │ │ │ │ ├── databricks-azure.cfg │ │ │ │ ├── databricks-azure.json │ │ │ │ ├── databricks-azure.metrics │ │ │ │ ├── databricks-azure_photon.cfg │ │ │ │ ├── databricks-azure_photon.json │ │ │ │ ├── databricks-azure_photon.metrics │ │ │ │ ├── dataproc.cfg │ │ │ │ ├── dataproc.json │ │ │ │ ├── dataproc.metrics │ │ │ │ ├── emr.cfg │ │ │ │ ├── emr.json │ │ │ │ ├── emr.metrics │ │ │ │ ├── onprem.cfg │ │ │ │ ├── onprem.json │ │ │ │ └── onprem.metrics │ │ └── templates │ │ │ ├── cluster_template │ │ │ ├── databricks_aws.ms │ │ │ ├── databricks_azure.ms │ │ │ ├── dataproc.ms │ │ │ ├── emr.ms │ │ │ └── onprem.ms │ │ │ ├── dataproc-create_gpu_cluster_script.ms │ │ │ ├── dataproc-run_bootstrap.ms │ │ │ ├── emr-create_gpu_cluster_script.ms │ │ │ ├── emr-run_bootstrap.ms │ │ │ └── node_template │ │ │ ├── databricks_aws.ms │ │ │ ├── databricks_azure.ms │ │ │ └── emr.ms │ ├── wrapper.py │ └── wrappers │ │ ├── __init__.py │ │ ├── databricks_aws_wrapper.py │ │ ├── databricks_azure_wrapper.py │ │ ├── dataproc_wrapper.py │ │ └── emr_wrapper.py ├── spark_rapids_tools │ ├── __init__.py │ ├── cloud │ │ ├── __init__.py │ │ ├── cluster.py │ │ ├── databricks │ │ │ ├── __init__.py │ │ │ └── dbcluster.py │ │ ├── dataproc │ │ │ ├── __init__.py │ │ │ └── dataproccluster.py │ │ ├── emr │ │ │ ├── __init__.py │ │ │ └── emrcluster.py │ │ └── onprem │ │ │ ├── __init__.py │ │ │ └── onpremcluster.py │ ├── cmdli │ │ ├── __init__.py │ │ ├── argprocessor.py │ │ ├── dev_cli.py │ │ └── tools_cli.py │ ├── configuration │ │ ├── __init__.py │ │ ├── common.py │ │ ├── runtime_conf.py │ │ ├── submission │ │ │ ├── __init__.py │ │ │ ├── distributed_config.py │ │ │ └── local_config.py │ │ └── tools_config.py │ ├── enums.py │ ├── exceptions.py │ ├── storagelib │ │ ├── __init__.py │ │ ├── adls │ │ │ ├── __init__.py │ │ │ ├── adlsfs.py │ │ │ └── adlspath.py │ │ ├── cspfs.py │ │ ├── csppath.py │ │ ├── gcs │ │ │ ├── __init__.py │ │ │ ├── gcsfs.py │ │ │ └── gcspath.py │ │ ├── hdfs │ │ │ ├── __init__.py │ │ │ ├── hdfsfs.py │ │ │ └── hdfspath.py │ │ ├── local │ │ │ ├── __init__.py │ │ │ ├── localfs.py │ │ │ └── localpath.py │ │ ├── s3 │ │ │ ├── __init__.py │ │ │ ├── s3fs.py │ │ │ └── s3path.py │ │ └── tools │ │ │ └── fs_utils.py │ ├── tools │ │ ├── __init__.py │ │ ├── additional_heuristics.py │ │ ├── autotuner.py │ │ ├── cluster_config_recommender.py │ │ ├── qualification_stats_report.py │ │ ├── qualx │ │ │ ├── config.py │ │ │ ├── featurizers │ │ │ │ ├── default.py │ │ │ │ └── hash_plan.py │ │ │ ├── hash_config.py │ │ │ ├── hash_util.py │ │ │ ├── model.py │ │ │ ├── modifiers │ │ │ │ └── align_sql_id.py │ │ │ ├── plot.py │ │ │ ├── preprocess.py │ │ │ ├── qualx_config.py │ │ │ ├── qualx_main.py │ │ │ ├── qualx_pipeline.py │ │ │ ├── split_functions │ │ │ │ ├── split_all_test.py │ │ │ │ ├── split_random.py │ │ │ │ ├── split_stratified.py │ │ │ │ └── split_train_val.py │ │ │ └── util.py │ │ ├── speedup_category.py │ │ ├── top_candidates.py │ │ └── unsupported_ops_stage_duration.py │ └── utils │ │ ├── __init__.py │ │ ├── net_utils.py │ │ ├── propmanager.py │ │ └── util.py └── spark_rapids_tools_distributed │ ├── __init__.py │ ├── distributed_main.py │ ├── jar_cmd_args.py │ ├── output_processing │ ├── __init__.py │ ├── combiner.py │ └── processors.py │ ├── spark_management │ ├── __init__.py │ ├── spark_job_submitter.py │ └── spark_session_builder.py │ └── spark_map_task │ ├── __init__.py │ ├── jar_runner.py │ └── status_reporter.py ├── tests ├── __init__.py ├── mock_cluster.py ├── spark_rapids_tools_e2e │ ├── features │ │ ├── environment.py │ │ ├── event_log_processing.feature │ │ ├── hdfs_storage.feature │ │ ├── installation_checks.feature │ │ ├── preprocess.feature │ │ └── steps │ │ │ ├── e2e_utils.py │ │ │ ├── preprocess_steps.py │ │ │ └── test_steps.py │ └── resources │ │ ├── datasets │ │ └── onprem │ │ │ └── nds.json │ │ ├── event_logs │ │ ├── gpu_eventlog.zstd │ │ ├── incorrect_app_status_eventlog.zstd │ │ ├── join_agg_on_yarn_eventlog.zstd │ │ ├── onprem │ │ │ ├── README.md │ │ │ └── nds │ │ │ │ └── power │ │ │ │ └── eventlogs │ │ │ │ ├── cpu │ │ │ │ └── app-20231122005806-0064.zstd │ │ │ │ └── gpu │ │ │ │ └── app-20231114200842-0001.zstd │ │ ├── photon_eventlog.zstd │ │ └── streaming_eventlog.zstd │ │ └── scripts │ │ ├── common.sh │ │ ├── hdfs │ │ ├── cleanup_hdfs.sh │ │ ├── setup_hdfs.sh │ │ └── templates │ │ │ ├── core-site.xml │ │ │ └── hdfs-site.xml │ │ └── setup_env.sh ├── spark_rapids_tools_ut │ ├── __init__.py │ ├── conftest.py │ ├── qualx │ │ ├── __init__.py │ │ ├── test_config.py │ │ ├── test_hash_util.py │ │ ├── test_main.py │ │ ├── test_model.py │ │ ├── test_modifiers.py │ │ ├── test_preprocess.py │ │ ├── test_qualx_config.py │ │ ├── test_split_functions.py │ │ └── test_utils.py │ ├── resources │ │ ├── cluster │ │ │ ├── databricks │ │ │ │ ├── aws-cpu-00.json │ │ │ │ ├── azure-cpu-00.json │ │ │ │ └── test-azure-instances-catalog.json │ │ │ ├── dataproc │ │ │ │ └── cpu-00.yaml │ │ │ ├── dataproc_gke │ │ │ │ └── cpu-00.yaml │ │ │ ├── emr │ │ │ │ └── cpu-00.json │ │ │ └── onprem │ │ │ │ └── cpu-00.yaml │ │ ├── eventlogs │ │ │ └── .gitkeep │ │ ├── tools_config │ │ │ ├── invalid │ │ │ │ ├── tools_config_inv_00.yaml │ │ │ │ ├── tools_config_inv_01.yaml │ │ │ │ └── tools_config_inv_02.yaml │ │ │ ├── sample-distributed-config-specification.json │ │ │ ├── sample-local-config-specification.json │ │ │ └── valid │ │ │ │ ├── tools_config_00.yaml │ │ │ │ ├── tools_config_01.yaml │ │ │ │ ├── tools_config_02.yaml │ │ │ │ └── tools_config_03.yaml │ │ ├── tools_mock.jar │ │ └── worker_info.yaml │ ├── test_cluster.py │ └── test_tool_argprocessor.py ├── test_diagnostic.py └── utils │ └── test_net_utils.py └── tox.ini /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report to help us improve Tools of RAPIDS Accelerator for Apache Spark 4 | title: "[BUG]" 5 | labels: "? - Needs Triage, bug" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Steps/Code to reproduce bug** 14 | Please provide a list of steps or a code sample to reproduce the issue. 15 | Avoid posting private or sensitive data. 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Environment details (please complete the following information)** 21 | - Environment location: [Standalone, YARN, Kubernetes, Cloud(specify cloud provider)] 22 | 23 | **Additional context** 24 | Add any other context about the problem here. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation request 3 | about: Report incorrect or needed documentation 4 | title: "[DOC]" 5 | labels: "? - Needs Triage, documentation" 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Report incorrect documentation 11 | 12 | **Location of incorrect documentation** 13 | Provide links and line numbers if applicable. 14 | 15 | **Describe the problems or issues found in the documentation** 16 | A clear and concise description of what you found to be incorrect. 17 | 18 | **Steps taken to verify documentation is incorrect** 19 | List any steps you have taken: 20 | 21 | **Suggested fix for documentation** 22 | Detail proposed changes to fix the documentation if you have any. 23 | 24 | --- 25 | 26 | ## Report needed documentation 27 | 28 | **Report needed documentation** 29 | A clear and concise description of what documentation you believe it is needed and why. 30 | 31 | **Describe the documentation you'd like** 32 | A clear and concise description of what you want to happen. 33 | 34 | **Steps taken to search for needed documentation** 35 | List any steps you have taken: 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for Tools of RAPIDS Accelerator for Apache Spark 4 | title: "[FEA]" 5 | labels: "? - Needs Triage, feature request" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I wish the Tools of RAPIDS Accelerator for Apache Spark would [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context, code examples, or references to existing implementations about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/submit-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Submit question 3 | about: Ask a general question about Tools of RAPIDS Accelerator for Apache Spark here, or open a thread in the Discussions tab 4 | title: "[QST]" 5 | labels: "? - Needs Triage, question" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What is your question?** 11 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 34 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # configs of labeler 16 | core_tools: 17 | - changed-files: 18 | - any-glob-to-any-file: 'core/**/*' 19 | 20 | user_tools: 21 | - changed-files: 22 | - any-glob-to-any-file: 'user_tools/**/*' 23 | -------------------------------------------------------------------------------- /.github/workflows/add-to-project.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Add new issues and pull requests to project 16 | 17 | on: 18 | issues: 19 | types: 20 | - opened 21 | pull_request_target: 22 | types: 23 | - opened 24 | 25 | jobs: 26 | Add-to-project: 27 | if: github.repository_owner == 'NVIDIA' # avoid adding issues from forks 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: add-to-project 31 | uses: NVIDIA/spark-rapids-common/add-to-project@main 32 | with: 33 | token: ${{ secrets.PROJECT_TOKEN }} 34 | -------------------------------------------------------------------------------- /.github/workflows/configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "categories": [ 3 | { 4 | "title": "### User Tools", 5 | "labels": ["user_tools"] 6 | }, 7 | { 8 | "title": "### Core", 9 | "labels": ["core_tools"] 10 | }, 11 | { 12 | "title": "### Miscellaneous", 13 | "labels": [] 14 | } 15 | ], 16 | "sort": { 17 | "order": "DESC", 18 | "on_property": "mergedAt" 19 | }, 20 | "pr_template": "- ${{TITLE}} ([#${{NUMBER}}](${{URL}}))" 21 | } 22 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to create label for PRs 16 | name: Pull Request Labeler 17 | 18 | on: 19 | pull_request_target: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | labeler: 24 | permissions: 25 | contents: read 26 | pull-requests: write 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout code 30 | uses: actions/checkout@v4 31 | - name: Label PR 32 | uses: actions/labeler@v5 33 | with: 34 | configuration-path: ".github/labeler.yml" 35 | -------------------------------------------------------------------------------- /.github/workflows/license-header-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to check copyright/license header 16 | name: license header check 17 | 18 | on: 19 | pull_request: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | license-header-check: 24 | runs-on: ubuntu-latest 25 | if: "!contains(github.event.pull_request.title, '[bot]')" 26 | steps: 27 | - name: Get checkout depth 28 | run: | 29 | echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV 30 | 31 | - name: Checkout code 32 | uses: actions/checkout@v4 33 | with: 34 | fetch-depth: ${{ env.PR_FETCH_DEPTH }} 35 | 36 | - name: license-header-check 37 | uses: NVIDIA/spark-rapids-common/license-header-check@main 38 | with: 39 | included_file_patterns: | 40 | *.py, 41 | *.toml, 42 | *.ini, 43 | *.yml, 44 | *.yaml, 45 | *.sh, 46 | *.properties, 47 | *.xml, 48 | *.feature, 49 | *.scala 50 | -------------------------------------------------------------------------------- /.github/workflows/markdown-links-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to check if PR got broken hyperlinks 16 | name: Check Markdown links 17 | 18 | on: 19 | pull_request: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | markdown-link-check: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: work around permission issue 27 | run: git config --global --add safe.directory /github/workspace 28 | - uses: actions/checkout@master 29 | - uses: gaurav-nelson/github-action-markdown-link-check@v1 30 | with: 31 | max-depth: -1 32 | use-verbose-mode: 'yes' 33 | config-file: '.github/workflows/markdown-links-check/markdown-links-check-config.json' 34 | base-branch: 'dev' 35 | 36 | -------------------------------------------------------------------------------- /.github/workflows/markdown-links-check/markdown-links-check-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [ 3 | { 4 | "pattern": "https://github.com/NVIDIA/spark-rapids-tools/issues/*" 5 | }, 6 | { 7 | "pattern": "http://localhost*" 8 | }, 9 | { 10 | "pattern": "https://www.nvidia.com/en-us/security/pgp-key" 11 | } 12 | ], 13 | "timeout": "15s", 14 | "retryOn429": true, 15 | "retryCount":30, 16 | "aliveStatusCodes": [200, 403] 17 | } -------------------------------------------------------------------------------- /.github/workflows/mvn-verify-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to run maven verify for rapids tools 16 | name: Maven verify check 17 | 18 | on: 19 | pull_request: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | strategy: 26 | matrix: 27 | java-version: [8, 11] 28 | spark-version: ['324', '334', '350'] 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - name: Setup Java 33 | uses: actions/setup-java@v4 34 | with: 35 | distribution: adopt 36 | java-version: ${{ matrix.java-version }} 37 | 38 | - name: Run mvn verify with Spark ${{ matrix.spark-version }} 39 | run: cd core && mvn -Dbuildver=${{ matrix.spark-version }} verify 40 | -------------------------------------------------------------------------------- /.github/workflows/python-unit-test.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to run Python unit test for user tools 16 | name: Python unit test 17 | 18 | on: 19 | pull_request: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | strategy: 26 | matrix: 27 | python-version: ['3.9', '3.10', '3.11', '3.12'] 28 | 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v5 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | 37 | - name: Install tox 38 | run: | 39 | python -m pip install --upgrade pip 40 | python -m pip install tox 41 | python -m pip install --pre tox-gh-actions 42 | 43 | - name: Run tox test 44 | run: cd user_tools && tox 45 | -------------------------------------------------------------------------------- /.github/workflows/signoff-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to check if PR got sign-off 16 | name: signoff check 17 | 18 | on: 19 | pull_request_target: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | signoff-check: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: signoff 27 | uses: NVIDIA/spark-rapids-common/signoff-check@main 28 | with: 29 | owner: ${{ github.repository_owner }} 30 | repo: spark-rapids-tools 31 | pull_number: ${{ github.event.number }} 32 | token: ${{ secrets.GITHUB_TOKEN }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # Environments 9 | .env 10 | .venv 11 | env/ 12 | venv/ 13 | ENV/ 14 | env.bak/ 15 | venv.bak/ 16 | 17 | # intellij 18 | .idea/ 19 | *.iml 20 | 21 | # vscode 22 | .vscode/ 23 | 24 | ### macOS ### 25 | *.DS_Store 26 | 27 | # Distribution / packaging 28 | .Python 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | eggs/ 33 | .eggs/ 34 | wheels/ 35 | *.egg 36 | *.egg-info/ 37 | 38 | # Unit test / coverage reports 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .pytest_cache/ 43 | # qualx_cache folder generated in unit tests 44 | **/qualx_cache/ 45 | 46 | # Maven 47 | dependency-reduced-pom.xml 48 | # Scala classes 49 | target/ 50 | 51 | # prepackage locations 52 | csp-resources* 53 | tools-resources/ 54 | 55 | # precommit files 56 | *-E 57 | 58 | # ignore file or report generated by the plugin sync scripts 59 | **/operators_plugin_sync_report.txt 60 | **/new_operators.txt 61 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | repos: 16 | - repo: local 17 | hooks: 18 | - id: header-check 19 | name: Header check 20 | entry: scripts/header-check.sh 21 | language: script 22 | pass_filenames: true 23 | verbose: true 24 | - id: auto-copyrighter 25 | name: Update copyright year 26 | entry: scripts/auto-copyrighter.sh 27 | language: script 28 | pass_filenames: true 29 | verbose: true 30 | - repo: https://github.com/pre-commit/pre-commit-hooks.git 31 | rev: v4.0.1 32 | hooks: 33 | - id: check-added-large-files 34 | name: Check for file over 4.0MiB 35 | args: ['--maxkb=4000', '--enforce-all'] 36 | - id: trailing-whitespace 37 | name: trim trailing white spaces preserving md files 38 | args: ['--markdown-linebreak-ext=md'] 39 | - id: end-of-file-fixer 40 | name: Ensure files end with a single newline 41 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/CHANGELOG.md -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Spark Rapids Tools 2 | Copyright (c) 2023, NVIDIA CORPORATION 3 | 4 | -------------------------------------------------------------------------------- 5 | 6 | // ------------------------------------------------------------------ 7 | // NOTICE file corresponding to the section 4d of The Apache License, 8 | // Version 2.0, in this case for 9 | // ------------------------------------------------------------------ 10 | 11 | Apache Spark 12 | Copyright 2014 and onwards The Apache Software Foundation 13 | 14 | This product includes software developed at 15 | The Apache Software Foundation (http://www.apache.org/). 16 | -------------------------------------------------------------------------------- /NOTICE-binary: -------------------------------------------------------------------------------- 1 | Spark Rapids Tools 2 | Copyright (c) 2023-2024, NVIDIA CORPORATION 3 | 4 | // ------------------------------------------------------------------ 5 | // NOTICE file corresponding to the section 4d of The Apache License, 6 | // Version 2.0, in this case for 7 | // ------------------------------------------------------------------ 8 | 9 | Apache Spark 10 | Copyright 2014 and onwards The Apache Software Foundation 11 | 12 | This product includes software developed at 13 | The Apache Software Foundation (http://www.apache.org/). 14 | 15 | snakeyaml 16 | 17 | --------------------------------------------------------------------- 18 | This product bundles various third-party components under other open source licenses. 19 | 20 | jsoup - The MIT License (MIT) 21 | License Text ( https://jsoup.org/license ) 22 | Copyright (c) 2009 - 2023 Jonathan Hedley (https://jsoup.org/) 23 | 24 | scallop - The MIT License (MIT) 25 | License Text ( https://github.com/scallop/scallop/blob/develop/license.txt ) 26 | Copyright (c) 2012 Platon Pronko and Chris Hodapp 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAPIDS Accelerator for Apache Spark Tools 2 | 3 | This repo provides the tools to use [RAPIDS Accelerator for Apache Spark](https://github.com/NVIDIA/spark-rapids). 4 | 5 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/NVIDIA/spark-rapids-tools) 6 | 7 | ## Catalog 8 | 9 | - [RAPIDS core tools](./core): Tools that help developers getting the most out of their Apache 10 | Spark applications 11 | without any code change: 12 | - Report acceleration potential of RAPIDS Accelerator for Apache Spark on a set of Spark applications. 13 | - Generate comprehensive profiling analysis for Apache Sparks executing on accelerated GPU instances. This information 14 | can be used to further tune and optimize the application. 15 | - [spark-rapids-user-tools](./user_tools): A simple wrapper process around cloud service 16 | providers to run 17 | [RAPIDS core tools](./core) across multiple cloud platforms. In addition, the output educates 18 | the users on 19 | the cost savings and acceleration potential of RAPIDS Accelerator for Apache Spark and makes recommendations to tune 20 | the application performance based on the cluster shape. 21 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security 2 | 3 | NVIDIA is dedicated to the security and trust of our software products and services, including all 4 | source code repositories managed through our organization. 5 | 6 | If you need to report a security issue, please use the appropriate contact points outlined 7 | below. **Please do not report security vulnerabilities through GitHub/GitLab.** 8 | 9 | ## Reporting Potential Security Vulnerability in an NVIDIA Product 10 | 11 | To report a potential security vulnerability in any NVIDIA product: 12 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html) 13 | - E-Mail: psirt@nvidia.com 14 | - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key) 15 | - Please include the following information: 16 | - Product/Driver name and version/branch that contains the vulnerability 17 | -------------------------------------------------------------------------------- /core/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.iml 3 | target 4 | event_log_profiling.log 5 | ## ignore output folders of the test scripts 6 | **/dev/qualification-output/ 7 | -------------------------------------------------------------------------------- /core/src/main/resources/configs/build.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | ################################## 18 | # Build Properties 19 | ################################## 20 | 21 | build.version=${project.version} 22 | build.spark.version=${spark.version} 23 | build.hadoop.version=${hadoop.version} 24 | build.java.version=${java.version} 25 | build.scala.version=${scala.version} 26 | build.benchmarks.checkpoints=${benchmarks.checkpoints} 27 | -------------------------------------------------------------------------------- /core/src/main/resources/supportedDataSource.csv: -------------------------------------------------------------------------------- 1 | Format,Direction,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH 2 | Avro,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO 3 | CSV,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,NA,NA,NA,NA,NA,NA 4 | Delta,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 5 | Delta,write,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 6 | HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS 7 | HiveText,write,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS 8 | Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 9 | JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA 10 | ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA 11 | ORC,write,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA 12 | HiveORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA 13 | HiveORC,write,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA 14 | Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 15 | Parquet,write,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 16 | HiveParquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 17 | HiveParquet,write,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S 18 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/SparkRapidsBuildInfoEvent.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids 18 | 19 | import org.apache.spark.scheduler.SparkListenerEvent 20 | import org.apache.spark.sql.rapids.tool.annotation.ToolsReflection 21 | 22 | 23 | /** 24 | * This is a copy from spark-rapids: https://github.com/NVIDIA/spark-rapids/blob/ 25 | * branch-24.10/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala#L416. 26 | * 27 | * TODO: set up a automated job to sync this with spark-rapids plugin. 28 | */ 29 | case class SparkRapidsBuildInfoEvent( 30 | sparkRapidsBuildInfo: Map[String, String], 31 | sparkRapidsJniBuildInfo: Map[String, String], 32 | cudfBuildInfo: Map[String, String], 33 | sparkRapidsPrivateBuildInfo: Map[String, String] 34 | ) extends SparkListenerEvent { 35 | @ToolsReflection("BD-3.2.1", "Ignore") 36 | override val eventTime: Long = 0 37 | @ToolsReflection("BD-3.2.1", "Ignore") 38 | override val eventType: String = "" 39 | } 40 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/Identifiable.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool 18 | 19 | /** 20 | * A trait representing an entity that can be identified by a unique identifier of type `T`. 21 | * 22 | * @tparam T The type of the identifier. 23 | */ 24 | trait Identifiable[T] { 25 | /** 26 | * Retrieves the unique identifier of the entity. 27 | * 28 | * @return The identifier of type `T`. 29 | */ 30 | def id: T 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppAnalysisBase.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.analysis 18 | 19 | import org.apache.spark.sql.rapids.tool.AppBase 20 | 21 | /** 22 | * Base class for application analysis 23 | * @param app the AppBase object to analyze 24 | */ 25 | abstract class AppAnalysisBase(app: AppBase) { 26 | // Keep for future refactoring to use common methods for all Analysis classes. 27 | // Ideally, we can common interface 28 | // 1- caching layer 29 | // 2- initializations 30 | // 3- interface to pull information to generate views and reports 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppIndexMapperTrait.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.analysis 18 | 19 | import org.apache.spark.sql.rapids.tool.AppBase 20 | 21 | trait AppIndexMapperTrait { 22 | def zipAppsWithIndex(apps: Seq[AppBase]): Seq[(AppBase, Int)] 23 | } 24 | 25 | // Implementation used by Qualification components because AppBase has no appIndex field. Instead, 26 | // this implementation generates index based on the order of the apps. 27 | trait QualAppIndexMapperTrait extends AppIndexMapperTrait { 28 | def zipAppsWithIndex(apps: Seq[AppBase]): Seq[(AppBase, Int)] = { 29 | // we did not use zipWithIndex because we want to start from 1 instead of 0 30 | apps.zip(Stream.from(1)) 31 | } 32 | } 33 | 34 | // Implementation used by Profiling components because ApplicationInfo has appIndex field which is 35 | // used in generating reports with multiple AppIds 36 | trait ProfAppIndexMapperTrait extends QualAppIndexMapperTrait 37 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/ProfSparkMetricsAnalyzer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.analysis 18 | 19 | // This object is kept to provide the aggregation of the application data for the profiler. 20 | // Currently, the difference is not significant because `ProfAppIndexMapperTrait` handles the 21 | // extraction of AppIndex from ApplicationInfo. However, in the future this object can be used 22 | // to provide customized logic for the Profiler (i.e., handle metrics specific to GPU eventlogs) 23 | object ProfSparkMetricsAnalyzer extends AppSparkMetricsAggTrait with ProfAppIndexMapperTrait { 24 | 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/QualSparkMetricsAnalyzer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nvidia.spark.rapids.tool.analysis 17 | 18 | object QualSparkMetricsAnalyzer extends AppSparkMetricsAggTrait with QualAppIndexMapperTrait { 19 | // This object is kept to provide the aggregation of the application data for the Qualification. 20 | // In future, we might need to provide customized logic for the Qualification 21 | // (i.e., handle metrics; or filter; ..etc) 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool 18 | 19 | /** 20 | * RAPIDS tools analysis package holds the implementation of classes that process the information 21 | * gathered from the eventlogs to transform it into meaningful data which is consumed by the 22 | * reporting/heuristics engines. 23 | */ 24 | package object analysis { 25 | 26 | } 27 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumPhotonHelper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.analysis.util 18 | 19 | /** 20 | * Implementation of AggAccumHelper for Photon. 21 | * It takes the shuffleWriteValues and peakMemValues Accumulables as an argument because those 22 | * values are not available in the TaskModel. 23 | */ 24 | class AggAccumPhotonHelper( 25 | shuffleWriteValues: Iterable[Long], 26 | peakMemValues: Iterable[Long]) extends AggAccumHelper { 27 | 28 | override def createStageAccumRecord(): TaskMetricsAccumRec = { 29 | StageAggPhoton(shuffleWriteValues, peakMemValues) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/JobAggAccum.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.analysis.util 18 | 19 | import org.apache.spark.sql.rapids.tool.store.TaskModel 20 | 21 | /** 22 | * Accumulator for Job Aggregates. 23 | * This is an optimization to avoid using the Scala collections API on each field for the entire 24 | * number of tasks/stages in a Job. 25 | */ 26 | case class JobAggAccum() extends TaskMetricsAccumRec { 27 | override def addRecord(rec: TaskModel): Unit = { 28 | throw new UnsupportedOperationException( 29 | "Not implemented: JobAggAccum accepts only cached records") 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggAccum.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.analysis.util 18 | 19 | import com.nvidia.spark.rapids.tool.profiling.StageAggTaskMetricsProfileResult 20 | 21 | /** 22 | * Accumulator for Stage Aggregates. 23 | * This is an optimization to avoid using the Scala collections API on each field for the entire 24 | * number of tasks in a Stage. 25 | */ 26 | case class StageAggAccum() extends TaskMetricsAccumRec { 27 | override def addRecord(rec: StageAggTaskMetricsProfileResult): Unit = { 28 | throw new UnsupportedOperationException("Not implemented: Cannot use cached results to" + 29 | "calculate stage aggregates") 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/ExecParser.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.planparser 18 | 19 | import com.nvidia.spark.rapids.tool.planparser.ops.UnsupportedExprOpRef 20 | 21 | trait ExecParser { 22 | def parse: ExecInfo 23 | val fullExecName: String 24 | 25 | /** 26 | * Returns a sequence of UnsupportedExpr for given expressions if they are not supported 27 | * by the exec node. 28 | * This default implementation assumes all expressions are supported and returns an empty 29 | * sequence. Specific Exec parsers should override this method to provide the list of 30 | * unsupported expressions if required. 31 | * 32 | * @param expressions Array of expression strings to evaluate for support. 33 | * @return Empty Seq[UnsupportedExpr], indicating no unsupported expressions by default. 34 | */ 35 | def getUnsupportedExprReasonsForExec( 36 | expressions: Array[String]): Seq[UnsupportedExprOpRef] = Seq.empty 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/HashAggregateExecParser.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.planparser 18 | 19 | import com.nvidia.spark.rapids.tool.qualification.PluginTypeChecker 20 | 21 | import org.apache.spark.internal.Logging 22 | import org.apache.spark.sql.execution.ui.SparkPlanGraphNode 23 | import org.apache.spark.sql.rapids.tool.AppBase 24 | 25 | case class HashAggregateExecParser( 26 | override val node: SparkPlanGraphNode, 27 | override val checker: PluginTypeChecker, 28 | override val sqlID: Long, 29 | override val expressionFunction: Option[String => Array[String]], 30 | appBase: AppBase) extends 31 | GenericExecParser(node, checker, sqlID, 32 | expressionFunction = expressionFunction, app = Some(appBase)) with Logging { 33 | 34 | override def getDurationMetricIds: Seq[Long] = { 35 | node.metrics.find(_.name == "time in aggregation build").map(_.accumulatorId).toSeq 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/ObjectHashAggregateExecParser.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.planparser 18 | 19 | import com.nvidia.spark.rapids.tool.qualification.PluginTypeChecker 20 | 21 | import org.apache.spark.internal.Logging 22 | import org.apache.spark.sql.execution.ui.SparkPlanGraphNode 23 | import org.apache.spark.sql.rapids.tool.AppBase 24 | 25 | case class ObjectHashAggregateExecParser( 26 | override val node: SparkPlanGraphNode, 27 | override val checker: PluginTypeChecker, 28 | override val sqlID: Long, 29 | override val expressionFunction: Option[String => Array[String]], 30 | appBase: AppBase) extends 31 | GenericExecParser(node, checker, sqlID, 32 | expressionFunction = expressionFunction, app = Some(appBase)) with Logging { 33 | 34 | override def getDurationMetricIds: Seq[Long] = { 35 | node.metrics.find(_.name == "time in aggregation build").map(_.accumulatorId).toSeq 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/ops/ExprOpRef.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.planparser.ops 18 | 19 | /** 20 | * Represents a reference to an expression operator that is stored in the ExecInfo expressions 21 | * @param opRef the opRef to wrap 22 | * @param count the count of that expression within the exec. 23 | */ 24 | case class ExprOpRef(opRef: OpRef, count: Int = 1) extends OpRefWrapperBase(opRef) 25 | 26 | object ExprOpRef extends OpRefWrapperBaseTrait[ExprOpRef] { 27 | def fromRawExprSeq(exprArr: Seq[String]): Seq[ExprOpRef] = { 28 | exprArr.groupBy(identity) 29 | .mapValues(expr => ExprOpRef(OpRef.fromExpr(expr.head), expr.size)).values.toSeq 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/ops/OperatorRefTrait.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.planparser.ops 18 | 19 | trait OperatorRefTrait { 20 | def getOpName: String 21 | def getOpNameCSV: String 22 | def getOpType: String 23 | def getOpTypeCSV: String 24 | } 25 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/DiagnosticSummaryInfo.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.profiling 18 | 19 | /** 20 | * Stores Profiler diagnostic info. 21 | * TODO: We plan to add two more fields/views in upcoming PRs. 22 | */ 23 | case class DiagnosticSummaryInfo( 24 | stageDiagnostics: Seq[StageDiagnosticResult], 25 | IODiagnostics: Seq[IODiagnosticResult] 26 | ) 27 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/TuningOpTypes.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.tuning 18 | 19 | /** 20 | * Enumerated type to define the different modifications that the AutoTuner performs on 21 | * a sepecific property. 22 | */ 23 | object TuningOpTypes extends Enumeration { 24 | type TuningOpType = Value 25 | val ADD, // the property is added 26 | REMOVE, // the property is removed 27 | UPDATE, // the property is updated 28 | CLONE, // the property is the same 29 | UNRESOLVED, // the property is processed by the AutoTuner but the value is not resolved 30 | UNKNOWN = Value 31 | 32 | def isTuned(tuningOpType: TuningOpType): Boolean = { 33 | tuningOpType == ADD || tuningOpType == UPDATE || 34 | tuningOpType == REMOVE || tuningOpType == UNRESOLVED 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /core/src/main/scala/com/nvidia/spark/rapids/tool/views/ViewableTrait.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.tool.views 18 | 19 | import com.nvidia.spark.rapids.tool.analysis.AppIndexMapperTrait 20 | import com.nvidia.spark.rapids.tool.profiling.ProfileResult 21 | 22 | import org.apache.spark.internal.Logging 23 | import org.apache.spark.sql.rapids.tool.AppBase 24 | 25 | trait ViewableTrait[R <: ProfileResult] extends AppIndexMapperTrait with Logging { 26 | def getLabel: String 27 | def getDescription: String = "" 28 | 29 | def getRawView(app: AppBase, index: Int): Seq[R] 30 | 31 | def getRawView(apps: Seq[AppBase]): Seq[R] = { 32 | val allRows = zipAppsWithIndex(apps).flatMap { case (app, index) => 33 | getRawView(app, index) 34 | } 35 | if (allRows.isEmpty) { 36 | allRows 37 | } else { 38 | sortView(allRows) 39 | } 40 | } 41 | 42 | def sortView(rows: Seq[R]): Seq[R] = rows 43 | } 44 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/DevRuntimeCheckpoint.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.rapids.tool.benchmarks 18 | 19 | import org.apache.spark.sql.rapids.tool.util.RuntimeUtil 20 | 21 | /** 22 | * A simple implementation to insert checkpoints during runtime to pull some performance metrics 23 | * related to Tools. This is disabled by default and can be enabled by setting the build 24 | * property `benchmarks.checkpoints`. 25 | */ 26 | class DevRuntimeCheckpoint extends RuntimeCheckpointTrait { 27 | /** 28 | * Insert a memory marker with the given label. This will print the memory information. 29 | * @param label the label for the memory marker 30 | */ 31 | override def insertMemoryMarker(label: String): Unit = { 32 | val memoryInfo = RuntimeUtil.getJVMHeapInfo(runGC = true) 33 | // scalastyle:off println 34 | println(s"Memory Marker: $label, ${memoryInfo.mkString("\n")}") 35 | // scalastyle:on println 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/NoOpRuntimeCheckpoint.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.rapids.tool.benchmarks 18 | 19 | import scala.annotation.nowarn 20 | 21 | /** 22 | * An empty implementation of the Checkpoint interface that inserts NoOps. 23 | * This is the default implementation that will be used in production and normal builds. 24 | */ 25 | class NoOpRuntimeCheckpoint extends RuntimeCheckpointTrait { 26 | override def insertMemoryMarker(@nowarn label: String): Unit = { 27 | // Do nothing. This is a noOp 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/RuntimeCheckpointTrait.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.rapids.tool.benchmarks 18 | 19 | /** 20 | * API for inserting checkpoints in runtime. 21 | * This is used for debugging and benchmarking purposes. 22 | */ 23 | trait RuntimeCheckpointTrait { 24 | /** 25 | * Insert a memory marker with the given label. 26 | * @param label the label for the memory marker 27 | */ 28 | def insertMemoryMarker(label: String): Unit 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/SingleThreadedProfileToolBenchmark.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.rapids.tool.benchmarks 18 | 19 | import com.nvidia.spark.rapids.tool.profiling.{ProfileArgs, ProfileMain} 20 | 21 | object ProfToolBenchmark extends BenchmarkBase { 22 | override def runBenchmarkSuite(inputArgs: Array[String]): Unit = { 23 | // Currently the input arguments are assumed to be common across cases 24 | // This will be improved in a follow up PR to enable passing as a config 25 | // file with argument support for different cases 26 | runBenchmark("Benchmark_Profiling_CSV") { 27 | val (prefix, suffix) = inputArgs.splitAt(inputArgs.length - 1) 28 | addCase("Profiling_CSV") { _ => 29 | ProfileMain.mainInternal(new ProfileArgs(prefix :+ "--num-threads" 30 | :+ "1" :+ "--csv" :+ suffix.head), 31 | enablePB = true) 32 | } 33 | run() 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/scheduler/ToolsListenerEventExtraAPIs.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.scheduler 18 | 19 | import org.apache.spark.sql.rapids.tool.annotation.ToolsReflection 20 | 21 | /** 22 | * Base trait for events related to SparkRapids build info. This used to add extra APIs that are 23 | * not defined in the base Spark trait. This is a work around to be compatible in 24 | * runtime with custom Spark implementations that define abstract methods in the trait. 25 | * see https://github.com/NVIDIA/spark-rapids-tools/issues/1360 26 | */ 27 | trait ToolsListenerEventExtraAPIs { 28 | @ToolsReflection("BD-3.2.1", 29 | "Ignore the implementation: Definition for an abstract field in the SparkListenerEvent.") 30 | val eventTime: Long = 0 31 | @ToolsReflection("BD-3.2.1", 32 | "Ignore the implementation: Definition for an abstract field in the SparkListenerEvent.") 33 | val eventType: String = "" 34 | } 35 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/AccumToStageRetriever.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool 18 | 19 | /** 20 | * Trait that defines the interface for retrieving stage IDs from accumulables. 21 | * This is used to map accumulables to stages. We use it as interface in order to allow to separate 22 | * the logic and use dummy different implementations and mocks for testing when needed. 23 | */ 24 | trait AccumToStageRetriever { 25 | /** 26 | * Given a sequence of accumIds, return a set of stage IDs that are associated with the 27 | * accumIds. Note that this method can only be called after the accumulables have been fully 28 | * processed. 29 | */ 30 | def getStageIDsFromAccumIds(accumIds: Seq[Long]): Set[Int] 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/annotation/Calculated.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.annotation 18 | 19 | import scala.annotation.meta.{beanGetter, beanSetter, field, getter, param, setter} 20 | 21 | /** 22 | * A Scala annotation that specifies whether the specified entity is calculated or loaded from 23 | * Spark-information. 24 | */ 25 | @param @field @getter @setter @beanGetter @beanSetter 26 | class Calculated(desc: String = "") extends scala.annotation.Annotation 27 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/annotation/Since.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.annotation 18 | 19 | import scala.annotation.StaticAnnotation 20 | import scala.annotation.meta.{beanGetter, beanSetter, field, getter, param, setter} 21 | 22 | 23 | /** 24 | * This code is mostly copied from org.apache.spark.annotation.Since 25 | * Reason is copied here because it is being private to Spark packages which makes it 26 | * inaccessible for Non-Spark packages. 27 | * 28 | * A Scala annotation that specifies the Tools version when a definition or feature was added. 29 | */ 30 | @param @field @getter @setter @beanGetter @beanSetter 31 | class Since(version: String) extends StaticAnnotation 32 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/annotation/ToolsReflection.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.annotation 18 | 19 | import scala.annotation.StaticAnnotation 20 | import scala.annotation.meta.{beanGetter, beanSetter, field, getter, param, setter} 21 | 22 | 23 | /** 24 | * This code is mostly copied from org.apache.spark.annotation.Since 25 | * Reason is copied here because it is being private to Spark packages which makes it 26 | * inaccessible for Non-Spark packages. 27 | * 28 | * A Scala annotation that indicates entities that are used for reeflection in Tools to match 29 | * different Spark runtime APIs 30 | */ 31 | @param @field @getter @setter @beanGetter @beanSetter 32 | class ToolsReflection(source: String, comment: String) extends StaticAnnotation 33 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/annotation/WallClock.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.annotation 18 | 19 | import scala.annotation.StaticAnnotation 20 | import scala.annotation.meta.{beanGetter, beanSetter, field, getter, param, setter} 21 | 22 | 23 | /** 24 | * A Scala annotation that specifies whether the type of duration: wallClockTime Vs. TaskDuration 25 | */ 26 | @param @field @getter @setter @beanGetter @beanSetter 27 | class WallClock extends StaticAnnotation 28 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/annotation/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool 18 | 19 | /** 20 | * This package consist of the Tools annotations to mark some characteristics usages by developers. 21 | * The purpose is to provide some helpful tips, notes and metadata to classes, fields, params, 22 | * getters, and setters. 23 | */ 24 | package object annotation { 25 | 26 | } 27 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/util/ToolsTimer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.util 18 | 19 | /** 20 | * This code is mostly copied from org.apache.spark.benchmark.Benchmark.Timer 21 | * 22 | * Utility class to measure timing. 23 | * @param iteration specifies this is the nth iteration of running the benchmark case 24 | */ 25 | class ToolsTimer(val iteration: Int) { 26 | private var accumulatedTime: Long = 0L 27 | private var timeStart: Long = 0L 28 | 29 | def startTiming(): Unit = { 30 | assert(timeStart == 0L, "Already started timing.") 31 | timeStart = System.nanoTime 32 | } 33 | 34 | def stopTiming(): Unit = { 35 | assert(timeStart != 0L, "Have not started timing.") 36 | accumulatedTime += System.nanoTime - timeStart 37 | timeStart = 0L 38 | } 39 | 40 | def totalTime(): Long = { 41 | assert(timeStart == 0L, "Have not stopped timing.") 42 | accumulatedTime 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/util/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool 18 | 19 | import scala.collection.JavaConverters._ 20 | 21 | /** 22 | * RAPIDS tools utilities. 23 | */ 24 | package object util { 25 | val RAPIDS_TOOLS_SYS_PROP_PREFIX = "rapids.tools." 26 | def getSystemProperties: Map[String, String] = { 27 | System.getProperties.stringPropertyNames().asScala 28 | .map(key => (key, System.getProperty(key))).toMap 29 | } 30 | 31 | def loadConfFromSystemProperties: Map[String, String] = { 32 | getSystemProperties.filter(_._1.startsWith(RAPIDS_TOOLS_SYS_PROP_PREFIX)) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/util/stubs/bd/BDGraphNodeStub.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.util.stubs.bd 18 | 19 | import scala.reflect.runtime.universe.Mirror 20 | 21 | import org.apache.spark.sql.execution.ui.{SparkPlanGraphNode, SQLPlanMetric} 22 | import org.apache.spark.sql.rapids.tool.annotation.ToolsReflection 23 | import org.apache.spark.sql.rapids.tool.util.stubs.GraphReflectionEntry 24 | 25 | case class BDGraphNodeStub(m: Mirror) 26 | extends GraphReflectionEntry[org.apache.spark.sql.execution.ui.SparkPlanGraphNode]( 27 | m, "org.apache.spark.sql.execution.ui.SparkPlanGraphNode") { 28 | 29 | @ToolsReflection("BD-3.2.1", "Defines an extra argument planId: Int in the constructor") 30 | def createInstance(id: Long, name: String, desc: String, 31 | metrics: collection.Seq[SQLPlanMetric]): SparkPlanGraphNode = { 32 | // Define argument values 33 | createInstanceFromList(List(id, name, desc, metrics, 0)) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /core/src/main/scala/org/apache/spark/sql/rapids/tool/util/stubs/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.rapids.tool.util 18 | 19 | /** 20 | * Stubs used for reflection at runtime to work with different Spark implementations. 21 | * This will include stubs created for each Spark flavor when needed. 22 | */ 23 | package object stubs { 24 | 25 | } 26 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/executors_removed_eventlog_expectation.csv: -------------------------------------------------------------------------------- 1 | executorId,time,reason 2 | "1",1623285426806,"Executor Process Lost" 3 | "2",1623285426654,"Executor Process Lost" 4 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/jobs_failure_eventlog_expectation.csv: -------------------------------------------------------------------------------- 1 | jobID,sqlID,jobResult,failureReason 2 | 79,27,"JobFailed","java.lang.Exception: Job 79 cancelled because SparkContext was shut down" 3 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_sql_metrics_agg_expectation.csv: -------------------------------------------------------------------------------- 1 | appID,sqlID,description,numTasks,Duration,executorCPURatio,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | "app-20240919162642-0000",26,"query88",3472,250542,75.58,0,3858136,6743,54,1111.2,2885555,13523,18186,3818106,52997115316,69120188398,16100,0,0,0,250840493,181,16203,1394,1759,201596,1750,201200,0,402796,218614,19946,154 3 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_duration_and_cpu_expectation.csv: -------------------------------------------------------------------------------- 1 | App ID,rootSqlId,sqlID,SQL Duration,Contains Dataset or RDD Op,App Duration,Potential Problems,Executor CPU Time Percent 2 | "local-1626104300434","",0,1260,false,131104,"NESTED COMPLEX TYPE",93.01 3 | "local-1626104300434","",1,259,false,131104,"NESTED COMPLEX TYPE",77.38 4 | "local-1626104300434","",2,130,false,131104,"NESTED COMPLEX TYPE",92.06 5 | "local-1626104300434","",3,76,false,131104,"NESTED COMPLEX TYPE",100.0 6 | "local-1626104300434","",4,65,false,131104,"NESTED COMPLEX TYPE",100.0 7 | "local-1626104300434","",5,479,false,131104,"NESTED COMPLEX TYPE",87.8 8 | "local-1626104300434","",6,95,false,131104,"",96.3 9 | "local-1626104300434","",7,65,false,131104,"",95.24 10 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg2_expectation.csv: -------------------------------------------------------------------------------- 1 | jobId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | 0,213,2515,0,25761,1624,9,120.9,7151,3134,11178,13522,0,0,424,0,0,0,0,10,8075,0,2600,80279920,0,0,0,80279920,80279920,2600,901 3 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg_expectation.csv: -------------------------------------------------------------------------------- 1 | jobId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | 0,213,2569,0,26735,1598,10,125.5,6608,3531,12095,13414,0,0,336,0,0,0,0,8,8075,0,2600,80279908,0,0,0,80279908,80279908,2600,1001 3 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg2_expectation.csv: -------------------------------------------------------------------------------- 1 | appID,sqlID,description,numTasks,Duration,executorCPURatio,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | "local-1622821994212",0,"count at :28",213,3041,52.88,0,25761,1624,9,120.9,7151,3134,11178,13522,0,0,424,0,0,0,0,10,8075,0,2600,80279920,0,0,0,80279920,80279920,2600,901 3 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg_expectation.csv: -------------------------------------------------------------------------------- 1 | appID,sqlID,description,numTasks,Duration,executorCPURatio,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | "local-1622814619968",0,"count at :28",213,3087,49.26,0,26735,1598,10,125.5,6608,3531,12095,13414,0,0,336,0,0,0,0,8,8075,0,2600,80279908,0,0,0,80279908,80279908,2600,1001 3 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg2_expectation.csv: -------------------------------------------------------------------------------- 1 | stageId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | 0,6,1761,0,9455,1624,1540,1575.8,2917,1287,5056,4248,0,0,228,0,0,0,0,3,2951,0,0,0,0,0,0,0,40132263,1200,376 3 | 1,6,1666,0,9274,1621,1528,1545.7,2570,1007,5016,4099,0,0,196,0,0,0,0,4,2951,0,0,0,0,0,0,0,40132257,1200,475 4 | 2,200,592,0,6937,221,9,34.7,1619,802,1065,5125,0,0,0,0,0,0,0,3,7402,0,2400,80264520,0,0,0,80264520,15400,200,50 5 | 3,1,101,0,95,95,95,95.0,45,38,41,50,0,0,0,0,0,0,0,0,8075,0,200,15400,0,0,0,15400,0,0,0 6 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg_expectation.csv: -------------------------------------------------------------------------------- 1 | stageId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum 2 | 0,6,1743,0,9518,1598,1580,1586.3,2512,1393,5309,4043,0,0,168,0,0,0,0,3,2951,0,0,0,0,0,0,0,40132250,1200,400 3 | 1,6,1631,0,9434,1582,1568,1572.3,2406,1067,5273,3998,0,0,168,0,0,0,0,5,2951,0,0,0,0,0,0,0,40132258,1200,508 4 | 2,200,688,0,7705,237,10,38.5,1660,1034,1474,5337,0,0,0,0,0,0,0,0,7359,0,2400,80264508,0,0,0,80264508,15400,200,93 5 | 3,1,83,0,78,78,78,78.0,30,37,39,36,0,0,0,0,0,0,0,0,8075,0,200,15400,0,0,0,15400,0,0,0 6 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/removed_blockManagers_eventlog_expectation.csv: -------------------------------------------------------------------------------- 1 | executorId,time 2 | "1",1623285426800 3 | "2",1623285426638 4 | -------------------------------------------------------------------------------- /core/src/test/resources/ProfilingExpectations/unsupported_sql_eventlog_expectation.csv: -------------------------------------------------------------------------------- 1 | sqlID,nodeID,nodeName,nodeDescription,reason 2 | 0,2,"SerializeFromObject","SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromSt","Contains Dataset or RDD" 3 | 0,3,"MapElements","MapElements com.nvidia.spark.rapids.tool.profiling.QualificationInfoSuite$$$Lambda$1571/993650587@7b","Contains Dataset or RDD" 4 | 0,4,"Filter","Filter com.nvidia.spark.rapids.tool.profiling.QualificationInfoSuite$$$Lambda$1569/1828787392@2eb6d3","Contains Dataset or RDD" 5 | 0,5,"DeserializeToObject","DeserializeToObject newInstance(class com.nvidia.spark.rapids.tool.profiling.RapidsFriends), obj#30:","Contains Dataset or RDD" 6 | 0,10,"SerializeFromObject","SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromSt","Contains Dataset or RDD" 7 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/complex_dec_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1626104300434",1,1500,1469,131104,996,89.7,"","","","struct,lastname:string>;struct,previous:struct,city:string>>;array>;map;map>;map>;array>;array","struct,lastname:string>;struct,previous:struct,city:string>>;array>;map>;map>;array>","NESTED COMPLEX TYPE",1260,1388,129598,493,976,false,"CollectLimit","",1564 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/db_sim_test_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1623876083964",1,119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",1599 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/directory_test_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1623876083964",1,119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",1599 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/jdbc_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","app-20211019113801-0001",1,2942,19894,571967,2814,29.76,"","JDBC[*]","","","","",1812,2883,569025,859,19035,false,"CollectLimit;Scan jdbc;Execute CreateViewCommand","",9110 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/multi_run_freq_test_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1629446106683",1,1910,6475,17698,1910,27.8,"","","","array>;map>","array>;map>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",132 3 | "Spark shell","local-1623263471760",1,0,0,22937,0,0.0,"","","","","","",0,0,22937,0,0,false,"","",266 4 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "TPC-DS Like Bench q86","app-20210319163812-1778",1,9910,4320658,26171,9910,0.0,"24","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",24270 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/nds_q86_test_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "TPC-DS Like Bench q86","app-20210319163812-1778",1,9910,4320658,26171,9910,35.39,"","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",24270 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/nested_dsv2_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1630045673160",1,1363,3757,21200,1363,34.65,"","","","array>;map>;map>;map>","array>;map>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",132 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/qual_test_missing_sql_end_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Rapids Spark Profiling Tool Unit Tests","local-1622561780883",1,0,40448,7673,0,56.24,"","","","","","",0,5000,7673,8096,32352,false,"Scan unknown;SerializeFromObject","",82 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Rapids Spark Profiling Tool Unit Tests","local-1622043423018",1,11600,132257,16319,9868,37.97,"","","JSON","","","",7143,13770,4719,19744,112513,false,"SerializeFromObject;Scan unknown;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements","",186 3 | "Spark shell","local-1651187225439",1,224,180,355637,142,87.88,"","","","","","",498,228,355101,66,114,false,"SerializeFromObject;CollectLimit;DeserializeToObject;Filter;MapElements","",2834 4 | "Spark shell","local-1651188809790",1,347,283,166215,128,81.18,"","","","","","UDF",715,318,165572,178,105,false,"CollectLimit;Project","UDF",1318 5 | "Rapids Spark Profiling Tool Unit Tests","local-1623281204390",1,1156,4666,6240,122,47.48,"","","JSON","","","UDF",1209,1130,5809,4170,496,false,"Execute InsertIntoHadoopFsRelationCommand json;LocalTableScan;Execute CreateViewCommand;Project","UDF",64 6 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/qual_test_simple_expectation_persql.csv: -------------------------------------------------------------------------------- 1 | App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity 2 | "local-1622043423018","",1,"count at QualificationInfoUtils.scala:94",7143,6719 3 | "local-1622043423018","",3,"count at QualificationInfoUtils.scala:94",2052,1660 4 | "local-1622043423018","",2,"count at QualificationInfoUtils.scala:94",1933,1551 5 | "local-1651187225439","",0,"show at :26",498,333 6 | "local-1651188809790","",0,"show at :26",715,242 7 | "local-1622043423018","",0,"json at QualificationInfoUtils.scala:76",1306,164 8 | "local-1651188809790","",1,"show at :26",196,135 9 | "local-1651187225439","",1,"show at :26",262,110 10 | "local-1623281204390","",2,"json at QualificationInfoUtils.scala:136",321,107 11 | "local-1623281204390","",5,"json at QualificationInfoUtils.scala:136",129,43 12 | "local-1623281204390","",8,"json at QualificationInfoUtils.scala:136",127,42 13 | "local-1623281204390","",4,"createOrReplaceTempView at QualificationInfoUtils.scala:133",22,22 14 | "local-1623281204390","",7,"createOrReplaceTempView at QualificationInfoUtils.scala:133",4,4 15 | "local-1623281204390","",1,"createOrReplaceTempView at QualificationInfoUtils.scala:133",2,2 16 | "local-1623281204390","",0,"json at QualificationInfoUtils.scala:130",1209,0 17 | "local-1623281204390","",6,"json at QualificationInfoUtils.scala:130",110,0 18 | "local-1623281204390","",3,"json at QualificationInfoUtils.scala:130",108,0 19 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/read_dsv1_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1624371544219",1,4575,20421,175293,4365,72.2,"","Text[*]","JSON","","","",1859,5372,176916,938,19483,false,"CollectLimit;Scan text;Execute InsertIntoHadoopFsRelationCommand json","",2096 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/read_dsv2_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1624371906627",1,4917,21802,83738,4762,71.34,"","Text[*]","JSON","","","",1984,5438,83336,689,21113,false,"CollectLimit;Scan text;Execute InsertIntoHadoopFsRelationCommand json","",997 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/spark2_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1634253215009",1,523,359,47063,281,68.73,"","Text[*]","","","","",1068,385,46540,166,193,false,"CollectLimit;Scan text","",369 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/truncated_1_end_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Rapids Spark Profiling Tool Unit Tests","local-1622043423018",1,395,14353,4872,164,62.79,"","","JSON","","","",1306,794,4477,8376,5977,true,"SerializeFromObject;Scan unknown;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements","",49 3 | -------------------------------------------------------------------------------- /core/src/test/resources/QualificationExpectations/write_format_expectation.csv: -------------------------------------------------------------------------------- 1 | App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds 2 | "Spark shell","local-1629442299891",1,1151,920,19554,788,91.98,"","","CSV;JSON","","","",1235,1049,18251,290,630,false,"Execute InsertIntoHadoopFsRelationCommand csv;Execute InsertIntoHadoopFsRelationCommand json","",147 3 | -------------------------------------------------------------------------------- /core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootCategory=INFO, file 18 | log4j.appender.file=org.apache.log4j.FileAppender 19 | log4j.appender.file.append=true 20 | log4j.appender.file.file=target/surefire-reports/scala-test-detailed-output.log 21 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 22 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n 23 | 24 | #Just warnings for the console 25 | log4j.appender.console=org.apache.log4j.ConsoleAppender 26 | log4j.appender.console.target=System.err 27 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 28 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n 29 | -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/eventlog-gpu-dsv1.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/eventlog-gpu-dsv1.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/eventlog-gpu-dsv2.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/eventlog-gpu-dsv2.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/eventlog_dsv1.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/eventlog_dsv1.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/eventlog_dsv2.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/eventlog_dsv2.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/gds_ucx_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/gds_ucx_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/gpu_oom_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/gpu_oom_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/malformed_json_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/malformed_json_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/nds_q66_gpu.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/nds_q66_gpu.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/rapids_join_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/rapids_join_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/rapids_join_eventlog2.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/rapids_join_eventlog2.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/rp_sql_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/rp_sql_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/spark2-eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/spark2-eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-profiling/tasks_executors_fail_compressed_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-profiling/tasks_executors_fail_compressed_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/aqeshuffle_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/aqeshuffle_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/cluster_information/eventlog_4nodes_8cores_dynamic_alloc.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/cluster_information/eventlog_4nodes_8cores_dynamic_alloc.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/complex_dec_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/complex_dec_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/createdatasourcetable_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/createdatasourcetable_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/customshuffle_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/customshuffle_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/db_sim_eventlog/eventlog-2021-06-15--15-00.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/db_sim_eventlog/eventlog-2021-06-15--15-00.gz -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/db_subExecution_id.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/db_subExecution_id.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/dsAndDf_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/dsAndDf_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/eventlog_same_app_id_1.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/eventlog_same_app_id_1.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/eventlog_same_app_id_2.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/eventlog_same_app_id_2.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/eventlog_v2_local-1623876083964/appstatus_local-1623876083964: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/eventlog_v2_local-1623876083964/appstatus_local-1623876083964 -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/eventlog_v2_local-1623876083964/events_1_local-1623876083964.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/eventlog_v2_local-1623876083964/events_1_local-1623876083964.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/eventlog_v2_local-1623876083964/events_2_local-1623876083964.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/eventlog_v2_local-1623876083964/events_2_local-1623876083964.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/global_local_limit_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/global_local_limit_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/jdbc_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/jdbc_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_1_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_1_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_2_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_2_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_3_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_3_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_4_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/multiple_attempts/attempt_4_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/nds_q88_photon_db_13_3.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/nds_q88_photon_db_13_3.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/pandas_execs_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/pandas_execs_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/spark-events-qualification/xgboost_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/core/src/test/resources/spark-events-qualification/xgboost_eventlog.zstd -------------------------------------------------------------------------------- /core/src/test/resources/worker_info-autotuner-example-8cores-16gmemory.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | system: 16 | numCores: 8 17 | memory: 16384MiB 18 | numWorkers: 4 19 | softwareProperties: 20 | spark.scheduler.mode: FAIR 21 | spark.sql.cbo.enabled: 'true' 22 | spark.ui.port: '0' 23 | spark.yarn.am.memory: 640m 24 | -------------------------------------------------------------------------------- /core/src/test/scala/org/apache/spark/sql/TrampolineUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.apache.spark.sql 17 | 18 | import java.io.File 19 | 20 | import org.apache.spark.SparkConf 21 | import org.apache.spark.io.CompressionCodec 22 | import org.apache.spark.util.Utils 23 | 24 | object TrampolineUtil { 25 | /** Shuts down and cleans up any existing Spark session */ 26 | def cleanupAnyExistingSession(): Unit = SparkSession.cleanupAnyExistingSession() 27 | 28 | def withTempPath(f: File => Unit): Unit = { 29 | val path = Utils.createTempDir(namePrefix = "toolTestDir") 30 | path.delete() 31 | try f(path) finally Utils.deleteRecursively(path) 32 | } 33 | 34 | def withTempDir(f: File => Unit): Unit = { 35 | val path = Utils.createTempDir(namePrefix = "toolTestDir") 36 | try f(path) finally Utils.deleteRecursively(path) 37 | } 38 | 39 | def createCodec(conf: SparkConf, codecName: String) = 40 | CompressionCodec.createCodec(conf, codecName) 41 | } 42 | -------------------------------------------------------------------------------- /data_validation/setup.cfg: -------------------------------------------------------------------------------- 1 | [options.entry_points] 2 | console_scripts = 3 | spark_rapids_validation_tool = spark_rapids_validation_tool.dataproc_wrapper:main 4 | [options.package_data] 5 | * = *.json, *.yaml 6 | -------------------------------------------------------------------------------- /data_validation/src/spark_rapids_validation_tool/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the spark_rapids_validation package.""" 16 | from spark_rapids_validation_tool.build import get_version 17 | 18 | VERSION = '23.02.0' 19 | __version__ = get_version(VERSION) -------------------------------------------------------------------------------- /data_validation/src/spark_rapids_validation_tool/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Build helpers.""" 16 | 17 | import datetime 18 | import os 19 | 20 | 21 | def get_version(main=None): 22 | if main is None: 23 | # pylint: disable=import-outside-toplevel 24 | from spark_rapids_validation_tool import VERSION as main 25 | suffix = '' 26 | nightly = os.environ.get('USERTOOLS_NIGHTLY') 27 | if nightly == '1': 28 | suffix = '.dev' + datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S') 29 | return main + suffix 30 | -------------------------------------------------------------------------------- /data_validation/src/spark_rapids_validation_tool/csp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """CSP init file.""" 15 | 16 | from .csp import CspBase 17 | from .dataproc import Dataproc as _ # noqa: F401 18 | 19 | 20 | def new_csp(csp_type, args): 21 | """Create new CSP instance by CSP type.""" 22 | 23 | for cls in CspBase.__subclasses__(): 24 | if cls.is_csp(csp_type): 25 | return cls(args) 26 | 27 | raise Exception(f'unknown CSP type: {csp_type}') 28 | -------------------------------------------------------------------------------- /data_validation/templates/datavalid_conf.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | sparkConf: 16 | spark.executor.memory: 4g 17 | spark.executor.cores: 3 18 | spark.executor.instances: 5 19 | spark.dynamicAllocation.enabled: false 20 | toolConf: 21 | cluster: data-validation-test2 22 | region: us-central1 23 | check: valid_metadata 24 | format: hive 25 | table1: your-table1-name 26 | table1_partition: 27 | table2: your-table2-name 28 | table2_partition: 29 | pk: 30 | exclude_column: 31 | include_column: all 32 | filter: 33 | output_path: 34 | output_format: 35 | precision: 4 36 | debug: False -------------------------------------------------------------------------------- /scripts/auto-copyrighter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2024, NVIDIA CORPORATION. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | SPARK_RAPIDS_AUTO_COPYRIGHTER=${SPARK_RAPIDS_AUTO_COPYRIGHTER:-OFF} 19 | 20 | case "$SPARK_RAPIDS_AUTO_COPYRIGHTER" in 21 | 22 | OFF) 23 | echo "Copyright updater is DISABLED. Automatic Copyright Updater can be enabled/disabled by setting \ 24 | SPARK_RAPIDS_AUTO_COPYRIGHTER=ON or SPARK_RAPIDS_AUTO_COPYRIGHTER=OFF, \ 25 | correspondingly" 26 | exit 0 27 | ;; 28 | 29 | ON) 30 | ;; 31 | 32 | *) 33 | echo "Invalid value of SPARK_RAPIDS_AUTO_COPYRIGHTER=$SPARK_RAPIDS_AUTO_COPYRIGHTER. 34 | Only ON or OFF are allowed" 35 | exit 1 36 | ;; 37 | esac 38 | 39 | set -x 40 | echo "$@" | xargs -L1 sed -i -E \ 41 | "s/Copyright *\(c\) *([0-9,-]+)*-([0-9]{4}), *NVIDIA *CORPORATION/Copyright (c) \\1-`date +%Y`, NVIDIA CORPORATION/; /`date +%Y`/! s/Copyright *\(c\) ([0-9]{4}), *NVIDIA *CORPORATION/Copyright (c) \\1-`date +%Y`, NVIDIA CORPORATION/" 42 | -------------------------------------------------------------------------------- /user_tools/docs/resources/debug-behave-intellij.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/docs/resources/debug-behave-intellij.png -------------------------------------------------------------------------------- /user_tools/docs/resources/spark_rapids_user_tools_overview-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/docs/resources/spark_rapids_user_tools_overview-01.png -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the spark_rapids_pytools package.""" 16 | 17 | from spark_rapids_pytools.build import get_version, get_spark_dep_version 18 | 19 | VERSION = '25.04.3' 20 | # defines the default runtime build version for the user tools environment 21 | SPARK_DEP_VERSION = '350' 22 | __version__ = get_version(VERSION) 23 | __spark_dep_version__ = get_spark_dep_version() 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/cloud_api/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """provides interface to the cloud service providers.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/cloud_api/databricks_aws_job.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implementation of Job submissions on Databricks AWS""" 16 | 17 | from dataclasses import dataclass 18 | 19 | from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob 20 | 21 | 22 | @dataclass 23 | class DBAWSLocalRapidsJob(RapidsLocalJob): 24 | """ 25 | Implementation of a RAPIDS job that runs local on a local machine. 26 | """ 27 | job_label = 'DBAWSLocal' 28 | 29 | def _build_submission_cmd(self) -> list: 30 | # env vars are added later as a separate dictionary 31 | cmd_arg = super()._build_submission_cmd() 32 | # any s3 link has to be converted to S3a: 33 | for index, arr_entry in enumerate(cmd_arg): 34 | cmd_arg[index] = arr_entry.replace('s3://', 's3a://') 35 | return cmd_arg 36 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/cloud_api/dataproc_gke_job.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implementation of Job submissions on GCloud Dataproc GKE""" 16 | 17 | from dataclasses import dataclass 18 | 19 | from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob 20 | 21 | 22 | @dataclass 23 | class DataprocGkeLocalRapidsJob(RapidsLocalJob): 24 | """ 25 | Implementation of a RAPIDS job that runs on a local machine. 26 | """ 27 | job_label = 'dataprocLocal' 28 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/cloud_api/dataproc_job.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implementation of Job submissions on GCloud Dataproc""" 16 | 17 | from dataclasses import dataclass 18 | 19 | from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob 20 | 21 | 22 | @dataclass 23 | class DataprocLocalRapidsJob(RapidsLocalJob): 24 | """ 25 | Implementation of a RAPIDS job that runs on a local machine. 26 | """ 27 | job_label = 'dataprocLocal' 28 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/cloud_api/emr_job.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implementation of Job submissions on EMR""" 16 | 17 | from dataclasses import dataclass 18 | from spark_rapids_pytools.rapids.rapids_job import RapidsLocalJob 19 | 20 | 21 | @dataclass 22 | class EmrLocalRapidsJob(RapidsLocalJob): 23 | """ 24 | Implementation of a RAPIDS job that runs local on a local machine. 25 | """ 26 | job_label = 'emrLocal' 27 | 28 | def _build_submission_cmd(self) -> list: 29 | # env vars are added later as a separate dictionary 30 | cmd_arg = super()._build_submission_cmd() 31 | # any s3 link has to be converted to S3a: 32 | for index, arr_entry in enumerate(cmd_arg): 33 | cmd_arg[index] = arr_entry.replace('s3://', 's3a://') 34 | return cmd_arg 35 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Declaration and implementation of common helpers and utilities""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/common/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Definition of custom Exceptions""" 16 | 17 | 18 | class StorageException(Exception): 19 | """Exception raised for errors in the storage layer""" 20 | 21 | def __init__(self, 22 | message: str = 'Exception in storage operation', 23 | chained_err: OSError = None): 24 | self.msg = message if chained_err is None else f'{message}: {chained_err}' 25 | super().__init__(self.msg) 26 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/pricing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Provides abstractions and implementations of savings estimator.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/pricing/dataproc_gke_pricing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """providing absolute costs of resources in GCloud DataprocGke""" 16 | 17 | from dataclasses import dataclass 18 | 19 | from spark_rapids_pytools.pricing.dataproc_pricing import DataprocPriceProvider 20 | 21 | 22 | @dataclass 23 | class DataprocGkePriceProvider(DataprocPriceProvider): 24 | """ 25 | Provide costs of DataprocGke instances 26 | """ 27 | name = 'DataprocGke' 28 | 29 | def get_container_cost(self) -> float: 30 | gke_container_cost = self.__get_gke_container_cost() 31 | return gke_container_cost 32 | 33 | def __get_gke_container_cost(self) -> float: 34 | lookup_key = 'CP-GKE-CONTAINER-MANAGMENT-COST' 35 | return self.catalogs['gcloud'].get_value(lookup_key, 'us') 36 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/rapids/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Declaration and implementation of the RAPIDS plugin accelerator plugin""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/bootstrap-conf.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | platform: 16 | shortName: 'boot' 17 | # workDir: rapids_user_tools_bootstrap 18 | outputDir: bootstrap_tool_output 19 | cleanUp: true 20 | local: 21 | output: 22 | cleanUp: true 23 | # Name of the file where the final result is going to show 24 | fileName: rapids_4_dataproc_bootstrap_output.log 25 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/dev/log4j.properties: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Set root logger level to INFO and its only appender to file 16 | log4j.rootLogger=INFO, FILE, stderr 17 | 18 | # File appender configuration 19 | log4j.appender.FILE=org.apache.log4j.FileAppender 20 | log4j.appender.file.append=false 21 | log4j.appender.FILE.File=./log4j.log 22 | log4j.appender.FILE.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %t %p %c{1}:%L - %m%n 24 | 25 | # Console appender configuration for stderr 26 | log4j.appender.stderr=org.apache.log4j.ConsoleAppender 27 | log4j.appender.stderr.target=System.err 28 | log4j.appender.stderr.layout=org.apache.log4j.PatternLayout 29 | log4j.appender.stderr.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n 30 | 31 | # Logging level for package 32 | log4j.logger.org.apache.spark=INFO 33 | log4j.logger.org.apache.hadoop=WARN 34 | log4j.logger.com.nvidia=INFO 35 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/diagnostic-conf.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | platform: 16 | shortName: 'diag' 17 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx-conf.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cache_dir: qualx_cache 16 | datasets: datasets 17 | featurizers: 18 | - default.py 19 | - hash_plan.py 20 | modifiers: 21 | # - align_sql_id.py 22 | label: Duration 23 | split_functions: 24 | train: split_train_val.py 25 | test: split_all_test.py 26 | model_type: xgboost 27 | xgboost: 28 | model_name: xgb_model.json 29 | n_trials: 200 30 | qual_tool_filter: stage 31 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/combined.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"98"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0491240546","gamma":"0.00810215808","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0491240546","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"8","max_leaves":"0","min_child_weight":"2","min_split_loss":"0.00810215808","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.613816738"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"6.1548704E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,3]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/databricks-aws.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"98"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0405488536","gamma":"0.0371223316","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0405488536","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"10","max_leaves":"0","min_child_weight":"3","min_split_loss":"0.0371223316","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.722168922"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"6.2358004E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/databricks-aws_photon.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"65"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0476951823","gamma":"0.00684582209","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0476951823","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"10","max_leaves":"0","min_child_weight":"4","min_split_loss":"0.00684582209","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.666839182"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"-4.1710395E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/databricks-azure.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"96"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.042435348","gamma":"0.046802558","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.042435348","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"10","max_leaves":"0","min_child_weight":"4","min_split_loss":"0.046802558","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.650764108"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"6.2244874E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/databricks-azure_photon.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"98"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0466651507","gamma":"0.0462056585","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0466651507","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"9","max_leaves":"0","min_child_weight":"2","min_split_loss":"0.0462056585","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.716720402"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"-3.1569883E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/dataproc.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"76"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0431159772","gamma":"0.021764569","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0431159772","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"8","max_leaves":"0","min_child_weight":"2","min_split_loss":"0.021764569","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.713761687"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"8.6499995E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/emr.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"99"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0383473933","gamma":"0.00790323503","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0383473933","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"5","max_leaves":"0","min_child_weight":"5","min_split_loss":"0.00790323503","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.632138073"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"2.5273216E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/qualx/models/xgboost/onprem.cfg: -------------------------------------------------------------------------------- 1 | {"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"0","seed":"0","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"91"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.0462935194","gamma":"0.0235585812","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.0462935194","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"9","max_leaves":"0","min_child_weight":"3","min_split_loss":"0.0235585812","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","sparse_threshold":"0.20000000000000001","subsample":"0.798991024"},"updater":[{"hist_train_param":{"debug_synchronize":"0","max_cached_hist_node":"65536"},"name":"grow_quantile_histmaker"}]},"learner_model_param":{"base_score":"8.761862E-1","boost_from_average":"1","num_class":"0","num_feature":"118","num_target":"1"},"learner_train_param":{"booster":"gbtree","disable_default_eval_metric":"0","multi_strategy":"one_output_per_tree","objective":"reg:squarederror"},"metrics":[{"name":"mae"},{"name":"mape"}],"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[2,1,4]} -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/cluster_template/databricks_aws.ms: -------------------------------------------------------------------------------- 1 | { 2 | "cluster_id": "1234-5678-test", 3 | "cluster_name": "default-cluster-name", 4 | "driver_node_type_id": {{{ DRIVER_NODE_TYPE }}}, 5 | "node_type_id": {{{ WORKER_NODE_TYPE }}}, 6 | "num_workers": {{ NUM_WORKER_NODES }}, 7 | "state": "TERMINATED" 8 | } 9 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/cluster_template/databricks_azure.ms: -------------------------------------------------------------------------------- 1 | { 2 | "cluster_id": "1234-5678-1234567", 3 | "driver": { 4 | "node_id": "1234567890" 5 | }, 6 | "cluster_name": "default-cluster-name", 7 | "driver_node_type_id": {{{ DRIVER_NODE_TYPE }}}, 8 | "node_type_id": {{{ WORKER_NODE_TYPE }}}, 9 | "num_workers": {{ NUM_WORKER_NODES }}, 10 | "state": "TERMINATED" 11 | } 12 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/cluster_template/dataproc.ms: -------------------------------------------------------------------------------- 1 | { 2 | "clusterName": "default-cluster-name", 3 | "clusterUuid": "1234-5678-1234567", 4 | "config": { 5 | "gceClusterConfig": { 6 | "zoneUri": {{{ ZONE }}} 7 | }, 8 | "masterConfig": { 9 | "instanceNames": [ 10 | "test-node-d" 11 | ], 12 | "machineTypeUri": {{{ DRIVER_NODE_TYPE }}}, 13 | "numInstances": {{ NUM_DRIVER_NODES }} 14 | }, 15 | "workerConfig": { 16 | {{# GPU_NAME }} 17 | "accelerators": [ 18 | { 19 | "acceleratorCount": {{ NUM_GPUS }}, 20 | "acceleratorTypeUri": {{{ GPU_NAME }}} 21 | } 22 | ], 23 | {{/ GPU_NAME }} 24 | "machineTypeUri": {{{ WORKER_NODE_TYPE }}}, 25 | "numInstances": {{ NUM_WORKER_NODES }} 26 | }, 27 | "softwareConfig": { 28 | "imageVersion": {{{ IMAGE }}} 29 | } 30 | }, 31 | "status": { 32 | "state": "STOPPED" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/cluster_template/emr.ms: -------------------------------------------------------------------------------- 1 | { 2 | "Cluster": { 3 | "Id": "j-123456789", 4 | "Name": "default-cluster-name", 5 | "Status": { 6 | "State": "TERMINATED" 7 | }, 8 | "Ec2InstanceAttributes": { 9 | "Ec2AvailabilityZone": {{{ ZONE }}} 10 | }, 11 | "InstanceGroups": [ 12 | { 13 | "Id": "ig-123456789012e", 14 | "Name": "CORE", 15 | "Market": "ON_DEMAND", 16 | "InstanceGroupType": "CORE", 17 | "InstanceType": {{{ WORKER_NODE_TYPE }}}, 18 | "RequestedInstanceCount": {{ NUM_WORKER_NODES }} 19 | }, 20 | { 21 | "Id": "ig-123456789012d", 22 | "Name": "MASTER", 23 | "Market": "ON_DEMAND", 24 | "InstanceGroupType": "MASTER", 25 | "InstanceType": {{{ DRIVER_NODE_TYPE }}}, 26 | "RequestedInstanceCount": {{ NUM_DRIVER_NODES }} 27 | } 28 | ], 29 | "ReleaseLabel": {{{ IMAGE }}} 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/cluster_template/onprem.ms: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "workerConfig": { 4 | {{# GPU_NAME }} 5 | "gpuInfo": { 6 | "count": {{ NUM_GPUS }}, 7 | "name": {{{ GPU_NAME }}} 8 | }, 9 | {{/ GPU_NAME }} 10 | "numCores": {{ NUM_WORKER_CORES }}, 11 | "numWorkers": {{ NUM_WORKER_NODES }} 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/dataproc-create_gpu_cluster_script.ms: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export CLUSTER_NAME="{{ CLUSTER_NAME }}" 4 | 5 | gcloud dataproc clusters create $CLUSTER_NAME \ 6 | --image-version={{ IMAGE }} \ 7 | --region {{ REGION }} \ 8 | --zone {{ ZONE }} \ 9 | --master-machine-type {{ MASTER_MACHINE }} \ 10 | --num-workers {{ WORKERS_COUNT }} \ 11 | --worker-machine-type {{ WORKERS_MACHINE }} \ 12 | --num-worker-local-ssds {{ LOCAL_SSD }} \ 13 | --enable-component-gateway \ 14 | --subnet=default \ 15 | --initialization-actions=gs://goog-dataproc-initialization-actions-{{ REGION }}/spark-rapids/spark-rapids.sh \ 16 | --worker-accelerator type={{ GPU_DEVICE }},count={{ GPU_PER_WORKER }} \ 17 | --properties 'spark:spark.driver.memory=50g' -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/dataproc-run_bootstrap.ms: -------------------------------------------------------------------------------- 1 | # To see all options, run `spark_rapids_user_tools dataproc bootstrap -- --help` 2 | 3 | # The following cmd overrides the default Apache Spark configurations 4 | # on the cluster (requires SSH) 5 | spark_rapids_user_tools dataproc bootstrap \ 6 | --cluster {{ CLUSTER_NAME }} \ 7 | --verbose \ 8 | --nodry_run 9 | 10 | # The following cmd dumps the recommended configurations to the output 11 | # without overriding the existing cluster configurations 12 | spark_rapids_user_tools dataproc bootstrap \ 13 | --cluster {{ CLUSTER_NAME }} \ 14 | --verbose -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/emr-create_gpu_cluster_script.ms: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export CLUSTER_NAME="{{ CLUSTER_NAME }}" 4 | 5 | aws emr create-cluster \ 6 | --name "$CLUSTER_NAME" \ 7 | --release-label {{ IMAGE }} \ 8 | --log-uri s3://$LOG_BUCKET/logs \ 9 | --applications Name=Hadoop Name=Spark Name=Livy Name=JupyterEnterpriseGateway \ 10 | --bootstrap-actions '[{"Path":"s3://BUCKET_NAME/aws-emr-bootstrap.sh","Name":"My Spark Rapids Bootstrap action"}]' \ 11 | --ec2-attributes '{"KeyName":"MY_KEY_NAME","InstanceProfile":"EMR_EC2_DefaultRole","AvailabilityZone":"{{ ZONE }}"}' \ 12 | --service-role EMR_DefaultRole \ 13 | --instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType={{ MASTER_MACHINE }} \ 14 | InstanceGroupType=CORE,InstanceCount={{ WORKERS_COUNT }},InstanceType={{ WORKERS_MACHINE }} \ 15 | --configurations file://aws-emr-configuration.json \ 16 | --ebs-root-volume-size 100 \ 17 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/emr-run_bootstrap.ms: -------------------------------------------------------------------------------- 1 | # To see all options, run `spark_rapids_user_tools emr bootstrap -- --help` 2 | 3 | # The following cmd overrides the default Apache Spark configurations 4 | # on the cluster (requires SSH) 5 | spark_rapids_user_tools emr bootstrap \ 6 | --cluster {{ CLUSTER_NAME }} \ 7 | --verbose \ 8 | --key_pair_path ${KEY_PATH} \ 9 | --nodry_run 10 | 11 | # The following cmd dumps the recommended configurations to the output 12 | # without overriding the existing cluster configurations 13 | spark_rapids_user_tools emr bootstrap \ 14 | --cluster {{ CLUSTER_NAME }} \ 15 | --verbose -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/node_template/databricks_aws.ms: -------------------------------------------------------------------------------- 1 | { 2 | "node_id" : "123456789qwertyuiop" 3 | } 4 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/node_template/databricks_azure.ms: -------------------------------------------------------------------------------- 1 | { 2 | "node_id" : "123456789qwertyuiop" 3 | } 4 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/resources/templates/node_template/emr.ms: -------------------------------------------------------------------------------- 1 | { 2 | "Id": "ci-123456789", 3 | "Ec2InstanceId": "i-123456789", 4 | "PublicDnsName": "ec2-12-34-567-890.us-west-2.compute.amazonaws.com", 5 | "Status": { 6 | "State": "TERMINATED" 7 | }, 8 | "InstanceGroupId": {{{ INSTANCE_GROUP_ID }}}, 9 | "Market": "ON_DEMAND", 10 | "InstanceType": {{{ INSTANCE_TYPE }}} 11 | } -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/wrapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin.""" 16 | 17 | import fire 18 | 19 | from spark_rapids_pytools.wrappers.databricks_aws_wrapper import DBAWSWrapper 20 | from spark_rapids_pytools.wrappers.databricks_azure_wrapper import DBAzureWrapper 21 | from spark_rapids_pytools.wrappers.dataproc_wrapper import DataprocWrapper 22 | from spark_rapids_pytools.wrappers.emr_wrapper import EMRWrapper 23 | 24 | 25 | def main(): 26 | fire.Fire({ 27 | 'emr': EMRWrapper, 28 | 'dataproc': DataprocWrapper, 29 | 'databricks-aws': DBAWSWrapper, 30 | 'databricks-azure': DBAzureWrapper, 31 | }) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_pytools/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the wrappers package.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the Accelerated Spark python implementations""" 16 | 17 | 18 | from .enums import ( 19 | EnumeratedType, CspEnv 20 | ) 21 | 22 | from .utils import ( 23 | get_elem_from_dict, get_elem_non_safe 24 | ) 25 | 26 | from .storagelib.csppath import ( 27 | CspPath, path_impl_registry, CspPathT 28 | ) 29 | 30 | __all__ = [ 31 | 'EnumeratedType', 32 | 'CspEnv', 33 | 'get_elem_from_dict', 34 | 'get_elem_non_safe', 35 | 'CspPathT', 36 | 'path_impl_registry', 37 | 'CspPath' 38 | ] 39 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cloud/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the library that represents CSP interface and functionalities""" 16 | 17 | from .cluster import ClientCluster 18 | from .onprem.onpremcluster import OnPremClientCluster 19 | from .emr.emrcluster import EmrClientCluster 20 | from .dataproc.dataproccluster import DataprocClientCluster, DataprocGkeClientCluster 21 | from .databricks.dbcluster import DBAwsClientCluster, DBAzureClientCluster 22 | 23 | __all__ = [ 24 | 'ClientCluster', 25 | 'DBAwsClientCluster', 26 | 'DBAzureClientCluster', 27 | 'DataprocClientCluster', 28 | 'DataprocGkeClientCluster', 29 | 'EmrClientCluster', 30 | 'OnPremClientCluster' 31 | ] 32 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cloud/databricks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the cloud implementation for databricks.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cloud/dataproc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the cloud implementation for dataproc.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cloud/emr/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the cloud implementation for EMR.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cloud/emr/emrcluster.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | Define implementation for the EMR cluster 17 | """ 18 | 19 | from typing import ClassVar, Type 20 | 21 | from spark_rapids_tools.cloud.cluster import register_cluster_prop_mgr, register_client_cluster, ClusterPropMgr, ClientCluster 22 | from spark_rapids_tools.utils.propmanager import PropValidatorSchemaUpper, PropValidatorSchema 23 | 24 | 25 | class EmrClusterSchema(PropValidatorSchemaUpper): 26 | cluster: dict 27 | 28 | 29 | @register_cluster_prop_mgr('emr') 30 | class EmrClusterPropMgr(ClusterPropMgr): 31 | schema_clzz: ClassVar[Type['PropValidatorSchema']] = EmrClusterSchema 32 | 33 | 34 | @register_client_cluster('emr') 35 | class EmrClientCluster(ClientCluster): # pylint: disable=too-few-public-methods 36 | pass 37 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cloud/onprem/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the cloud implementation for onPrem.""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/cmdli/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the user CLI used to run the tools""" 16 | 17 | from .dev_cli import DevCLI 18 | from .tools_cli import ToolsCLI 19 | 20 | __all__ = [ 21 | 'ToolsCLI', 22 | 'DevCLI' 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/configuration/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the tools configurations module""" 16 | 17 | from .common import RuntimeDependencyType, DependencyVerification, RuntimeDependency 18 | 19 | __all__ = [ 20 | 'RuntimeDependency', 21 | 'RuntimeDependencyType', 22 | 'DependencyVerification' 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/configuration/runtime_conf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """The runtime configurations of the tools as defined by the user.""" 16 | 17 | from typing import List 18 | 19 | from pydantic import Field 20 | 21 | from spark_rapids_tools.configuration.common import RuntimeDependency, BaseConfig 22 | 23 | 24 | class ToolsRuntimeConfig(BaseConfig): 25 | """The runtime configurations of the tools as defined by the user.""" 26 | dependencies: List[RuntimeDependency] = Field( 27 | description='The list of runtime dependencies required by the tools java cmd. ' 28 | 'Set this list to specify Spark binaries along with any other required jar ' 29 | 'files (i.e., hadoop jars, gcp connectors,..etc.). ' 30 | 'When specified, the default predefined dependencies will be ignored.') 31 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/configuration/submission/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/configuration/submission/local_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ Configuration file for local submission mode """ 16 | from typing import Optional 17 | 18 | from pydantic import Field 19 | 20 | from spark_rapids_tools.configuration.common import SubmissionConfig 21 | from spark_rapids_tools.configuration.tools_config import ToolsConfig 22 | 23 | 24 | class LocalSubmissionConfig(SubmissionConfig): 25 | """Configuration class for local submission mode""" 26 | 27 | 28 | class LocalToolsConfig(ToolsConfig): 29 | """Container for the local submission mode configurations. This is the parts of the configuration that 30 | can be passed as an input to the CLI""" 31 | submission: Optional[LocalSubmissionConfig] = Field( 32 | default=None, 33 | description='Configuration related to local submission mode.') 34 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the storagelib package which offers a common interface to access any FS protocol.""" 16 | 17 | from .s3.s3fs import S3Fs 18 | from .s3.s3path import S3Path 19 | from .gcs.gcsfs import GcsFs 20 | from .gcs.gcspath import GcsPath 21 | from .hdfs.hdfsfs import HdfsFs 22 | from .hdfs.hdfspath import HdfsPath 23 | from .adls.adlsfs import AdlsFs 24 | from .adls.adlspath import AdlsPath 25 | from .local.localfs import LocalFs 26 | from .local.localpath import LocalPath 27 | from .csppath import CspPathT, path_impl_registry, CspPath 28 | from .cspfs import CspFs, BoundedArrowFsT, register_fs_class 29 | 30 | __all__ = [ 31 | 'AdlsFs', 32 | 'AdlsPath', 33 | 'CspFs', 34 | 'CspPath', 35 | 'BoundedArrowFsT', 36 | 'GcsFs', 37 | 'GcsPath', 38 | 'HdfsFs', 39 | 'HdfsPath', 40 | 'LocalFs', 41 | 'LocalPath', 42 | 'CspPath', 43 | 'CspPathT', 44 | 'path_impl_registry', 45 | 'register_fs_class', 46 | 'S3Fs', 47 | 'S3Path', 48 | ] 49 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/adls/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the storage library of ADLS systems (Azure data lake storage)""" 16 | 17 | from .adlsfs import AdlsFs 18 | from .adlspath import AdlsPath 19 | 20 | __all__ = [ 21 | 'AdlsFs', 22 | 'AdlsPath', 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/gcs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the storage library of googleStorage systems""" 16 | 17 | from .gcsfs import GcsFs 18 | from .gcspath import GcsPath 19 | 20 | __all__ = [ 21 | 'GcsFs', 22 | 'GcsPath', 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/gcs/gcsfs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper for the Google storage File system""" 16 | 17 | from ..cspfs import CspFs, register_fs_class 18 | 19 | 20 | @register_fs_class('gcs', 'GcsFileSystem') 21 | class GcsFs(CspFs): 22 | """Implementation of FileSystem for Google storage on top of pyArrow 23 | (Docstring copied from pyArrow.GcsFileSystem). 24 | 25 | The GcsFileSystem is initialized with the following list of arguments: 26 | 27 | >>> GcsFileSystem(bool anonymous=False, *, 28 | ... access_token=None, target_service_account=None, 29 | ... credential_token_expiration=None, default_bucket_location='US', 30 | ... scheme=None, endpoint_override=None, default_metadata=None, retry_time_limit=None) 31 | 32 | the constructor uses the process described in https://google.aip.dev/auth/4110 33 | to resolve credentials. If not running on Google Cloud Platform (GCP), this generally requires 34 | the environment variable GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file containing 35 | credentials. 36 | """ 37 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/gcs/gcspath.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper implementation for Gstorage remote path""" 16 | 17 | from ..csppath import register_path_class, CspPath 18 | 19 | 20 | @register_path_class("gcs") 21 | class GcsPath(CspPath): 22 | protocol_prefix: str = "gs://" 23 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/hdfs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the storage library of Hadoop file systems""" 16 | 17 | from .hdfsfs import HdfsFs 18 | from .hdfspath import HdfsPath 19 | 20 | __all__ = [ 21 | 'HdfsFs', 22 | 'HdfsPath', 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/hdfs/hdfspath.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper implementation for HDFS remote path""" 16 | 17 | from ..csppath import register_path_class, CspPath 18 | 19 | 20 | @register_path_class("hdfs") 21 | class HdfsPath(CspPath): 22 | protocol_prefix: str = "hdfs://" 23 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/local/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the storage library of local disk storage""" 16 | 17 | from .localfs import LocalFs 18 | from .localpath import LocalPath 19 | 20 | __all__ = [ 21 | 'LocalFs', 22 | 'LocalPath', 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/local/localfs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper for the local File system""" 16 | 17 | 18 | from ..cspfs import CspFs, register_fs_class 19 | 20 | 21 | @register_fs_class('local', 'LocalFileSystem') 22 | class LocalFs(CspFs): 23 | """ 24 | A FileSystem implementation accessing files on the local machine. Implemented on top of pyArrow. 25 | """ 26 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/local/localpath.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper implementation for local path""" 16 | 17 | from ..csppath import register_path_class, CspPath 18 | 19 | 20 | @register_path_class('local') 21 | class LocalPath(CspPath): 22 | protocol_prefix: str = 'file://' 23 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/s3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the storage library of S3""" 16 | 17 | from .s3fs import S3Fs 18 | from .s3path import S3Path 19 | 20 | __all__ = [ 21 | "S3Fs", 22 | "S3Path", 23 | ] 24 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/storagelib/s3/s3path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper implementation for S3 remote path""" 16 | 17 | from ..csppath import CspPath, register_path_class 18 | 19 | 20 | @register_path_class("s3") 21 | class S3Path(CspPath): 22 | protocol_prefix: str = "s3://" 23 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the library that interface with RAPIDS tools""" 16 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/tools/autotuner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Includes classes and wrappers related to autotuner feature""" 16 | 17 | from typing import Optional, ClassVar, Type 18 | 19 | from spark_rapids_tools.utils.propmanager import PropValidatorSchemaCamel, PropValidatorSchema, AbstractPropContainer 20 | 21 | 22 | class AutoTunerInputSchema(PropValidatorSchemaCamel): 23 | system: dict 24 | gpu: Optional[dict] = None 25 | software_properties: Optional[dict] = None 26 | 27 | 28 | class AutoTunerPropMgr(AbstractPropContainer): 29 | schema_clzz: ClassVar[Type['PropValidatorSchema']] = AutoTunerInputSchema 30 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/tools/qualx/split_functions/split_all_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Default split functions.""" 16 | 17 | import pandas as pd 18 | 19 | 20 | def split_function(features: pd.DataFrame, **kwargs) -> pd.DataFrame: 21 | """Sets all rows to 'test' split.""" 22 | # pylint: disable=unused-argument 23 | features['split'] = 'test' 24 | return features 25 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the utils package for the Accelerated Spark tools""" 16 | 17 | from .util import ( 18 | get_elem_from_dict, get_elem_non_safe, is_http_file, Utilities 19 | ) 20 | 21 | from .propmanager import ( 22 | AbstractPropContainer, 23 | PropValidatorSchema 24 | ) 25 | 26 | __all__ = [ 27 | 'get_elem_from_dict', 28 | 'get_elem_non_safe', 29 | 'AbstractPropContainer', 30 | 'PropValidatorSchema', 31 | 'is_http_file', 32 | 'Utilities' 33 | ] 34 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools_distributed/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools_distributed/output_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools_distributed/spark_management/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /user_tools/src/spark_rapids_tools_distributed/spark_map_task/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | This module contains functions that will be executed on the worker nodes as map tasks. 18 | It is necessary that there are no third-party dependencies in this module, as it will be 19 | serialized and shipped to the worker nodes. 20 | """ 21 | -------------------------------------------------------------------------------- /user_tools/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the testing package""" 16 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/datasets/onprem/nds.json: -------------------------------------------------------------------------------- 1 | { 2 | "nds_local": { 3 | "eventlogs": [ 4 | "${QUALX_DATA_DIR}/onprem/nds/power/eventlogs" 5 | ], 6 | "app_meta": { 7 | "app-20231122005806-0064": {"runType": "CPU", "scaleFactor": 10}, 8 | "app-20231114200842-0001": {"runType": "GPU", "scaleFactor": 10} 9 | }, 10 | "split_function": "${QUALX_DIR}/plugins/nds.py" 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/onprem/README.md: -------------------------------------------------------------------------------- 1 | # Sample eventlogs 2 | 3 | These eventlogs were produced by running the [NDS benchmarks](https://github.com/NVIDIA/spark-rapids-benchmarks) on both CPU and GPU versions of a Spark local cluster set up in an onprem environment. For simplicity, these are just copies of eventlogs used for training the qualx `onprem` model. 4 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/onprem/nds/power/eventlogs/cpu/app-20231122005806-0064.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/onprem/nds/power/eventlogs/cpu/app-20231122005806-0064.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/onprem/nds/power/eventlogs/gpu/app-20231114200842-0001.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/onprem/nds/power/eventlogs/gpu/app-20231114200842-0001.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2024, NVIDIA CORPORATION. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | readonly E2E_TEST_HDFS_DIR="${E2E_TEST_TMP_DIR}/hadoop" 18 | export E2E_TEST_HDFS_DIR E2E_TEST_TMP_DIR 19 | export LC_ALL=C 20 | 21 | err() { 22 | echo "ERROR: $1" >&2 23 | exit 1 24 | } 25 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | fs.defaultFS 20 | hdfs://localhost:9000 21 | 22 | 23 | hadoop.tmp.dir 24 | ${E2E_TEST_DATA_NODE_DIR} 25 | 26 | 27 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | dfs.replication 20 | 1 21 | 22 | 23 | dfs.namenode.name.dir 24 | file:${E2E_TEST_NAME_NODE_DIR} 25 | 26 | 27 | dfs.datanode.data.dir 28 | file:${E2E_TEST_DATA_NODE_DIR} 29 | 30 | 31 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the unit-tests package""" 16 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/qualx/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """init file of the unit-tests package""" 16 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/cluster/databricks/test-azure-instances-catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "Standard_NC4as_T4_v3": { 3 | "VCpuInfo": { 4 | "DefaultVCpus": 4 5 | }, 6 | "MemoryInfo": { 7 | "SizeInMiB": 0 8 | }, 9 | "GpuInfo": { 10 | "GPUs": [ 11 | { 12 | "Name": "", 13 | "Manufacturer": "", 14 | "Count": 1, 15 | "MemoryInfo": { 16 | "SizeInMiB": 0 17 | } 18 | } 19 | ] 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/cluster/onprem/cpu-00.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | config: 16 | masterConfig: 17 | numCores: 2 18 | memory: 7680MiB 19 | workerConfig: 20 | numCores: 8 21 | memory: 7680MiB 22 | numWorkers: 2 23 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/eventlogs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_ut/resources/eventlogs/.gitkeep -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_config/invalid/tools_config_inv_00.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This yaml file is a configuration file for a tool that uses Spark 3.5.0 as dependency 16 | # invalid: API version is smaller than accepted API version 17 | api_version: '0.9' 18 | runtime: 19 | dependencies: 20 | - name: my-spark350 21 | uri: https:///archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz 22 | dependency_type: 23 | dep_type: archive 24 | # for tgz files, it is required to give the subfolder where the jars are located 25 | relative_path: jars/* 26 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_config/invalid/tools_config_inv_01.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This yaml file is a configuration file 16 | # invalid: empty file 17 | # Error:1 validation error for ToolsConfig 18 | # runtime.dependencies 19 | # Input should be a valid list [type=list_type, input_value=None, input_type=NoneType] 20 | api_version: '1.0' 21 | runtime: 22 | dependencies: 23 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_config/valid/tools_config_00.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This yaml file is a configuration file for a tool that uses Spark 3.5.0 as dependency 16 | # Minimal file content. 17 | api_version: '1.0' 18 | runtime: 19 | dependencies: 20 | - name: my-spark350 21 | uri: https:///archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz 22 | dependency_type: 23 | dep_type: archive 24 | # for tgz files, it is required to give the subfolder where the jars are located 25 | relative_path: jars/* 26 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_config/valid/tools_config_01.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This yaml file is a sample configuration file for the distributed tools. It is valid 16 | # only if `--submission_mode distributed` is passed to the CLI. It provides submission 17 | # related configurations. 18 | api_version: '1.1' 19 | submission: 20 | remote_cache_dir: 'hdfs:///tmp/spark_rapids_distributed_tools_cache' 21 | spark_properties: 22 | - name: 'spark.executor.memory' 23 | value: '20g' 24 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_config/valid/tools_config_02.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This yaml file is a sample configuration file for the distributed tools. It is valid 16 | # only if `--submission_mode distributed` is passed to the CLI. It provides runtime 17 | # dependencies and submission related configurations. 18 | api_version: '1.1' 19 | runtime: 20 | dependencies: 21 | - name: my-spark350 22 | uri: https:///archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz 23 | dependency_type: 24 | dep_type: archive 25 | # for tgz files, it is required to give the subfolder where the jars are located 26 | relative_path: jars/* 27 | submission: 28 | remote_cache_dir: 'hdfs:///tmp/spark_rapids_distributed_tools_cache' 29 | spark_properties: 30 | - name: 'spark.executor.memory' 31 | value: '20g' 32 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_config/valid/tools_config_03.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This yaml file is a configuration file for a tool that uses Spark as dependency 16 | # and uses the environment variable SPARK_HOME to locate the jars. 17 | # Minimal file content. 18 | api_version: '1.2' 19 | runtime: 20 | dependencies: 21 | - name: my-spark-from-cp 22 | uri: !ENV ${SPARK_HOME}/jars/* 23 | dependency_type: 24 | dep_type: classpath 25 | -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/tools_mock.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-tools/f7a0e81f343b80350ab993ebe71525b7bc81deee/user_tools/tests/spark_rapids_tools_ut/resources/tools_mock.jar -------------------------------------------------------------------------------- /user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | system: 16 | numCores: 32 17 | memory: 212992MiB 18 | numWorkers: 5 19 | gpu: 20 | memory: 15109MiB 21 | count: 4 22 | name: T4 23 | softwareProperties: 24 | spark.driver.maxResultSize: 7680m 25 | spark.driver.memory: 15360m 26 | spark.executor.cores: '8' 27 | spark.executor.instances: '2' 28 | spark.executor.memory: 47222m 29 | spark.executorEnv.OPENBLAS_NUM_THREADS: '1' 30 | spark.scheduler.mode: FAIR 31 | spark.sql.cbo.enabled: 'true' 32 | spark.ui.port: '0' 33 | spark.yarn.am.memory: 640m 34 | --------------------------------------------------------------------------------