├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md ├── dev-requirements.txt ├── requirements.txt └── workflows │ ├── dag-check.yml │ ├── pyink-check.yml │ ├── pylint-check.yml │ ├── require-checklist.yml │ └── unit-test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── .vscode ├── extensions.json └── settings.json ├── LICENSE ├── README.md ├── __init__.py ├── dags ├── __init__.py ├── common │ ├── __init__.py │ ├── model_configs.py │ ├── quarantined_tests.py │ ├── test_owner.py │ └── vm_resource.py ├── composer_env.py ├── dashboard │ ├── airflow_to_bq_export.py │ ├── configs │ │ ├── __init__.py │ │ └── export_config.py │ └── xlml_to_buganizer.py ├── examples │ ├── always_fail.py │ ├── configs │ │ ├── __init__.py │ │ ├── xpk.Dockerfile │ │ └── xpk_example_config.py │ ├── maxtext_aqtp_version_sweep_gke_example_dag.py │ ├── maxtext_profile_namegen_example_dag.py │ ├── maxtext_profile_sweep_example_dag.py │ ├── maxtext_sweep_gke_example_dag.py │ └── xpk_example_dag.py ├── framework3p │ ├── configs │ │ └── microbenchmarks_config.py │ └── microbenchmarks_dag.py ├── gcs_bucket.py ├── inference │ ├── configs │ │ ├── jetstream_benchmark_serving_gce_config.py │ │ ├── jetstream_pytorch_gce_config.py │ │ ├── maxtext_gpu_inference_config.py │ │ ├── maxtext_inference_microbenchmark_gce_config.py │ │ ├── trt_llm_inference_config.py │ │ ├── trt_llm_mlperf_v40_config.py │ │ ├── trt_llm_mlperf_v41_config.py │ │ └── trtllm_bench_inference_config.py │ ├── jetstream_inference_e2e.py │ ├── jetstream_pytorch_inference.py │ ├── maxtext_gpu_inference.py │ ├── maxtext_inference.py │ ├── maxtext_inference_microbenchmark.py │ ├── maxtext_inference_offline_benchmark.py │ ├── maxtext_model_config_generator.py │ ├── trt_llm_inference.py │ ├── trt_llm_mlperf_v40_inference.py │ ├── trt_llm_mlperf_v41_inference.py │ ├── trtllm_bench_inference.py │ └── utils │ │ ├── maxtext_gpu_microbenchmark_jsonl_converter.py │ │ └── trtllm_bench_jsonl_converter.py ├── infra │ └── clean_up.py ├── legacy_test │ ├── templates │ │ ├── base.libsonnet │ │ ├── gpus.libsonnet │ │ ├── metrics.libsonnet │ │ ├── mixins.libsonnet │ │ ├── timeouts.libsonnet │ │ ├── tpus.libsonnet │ │ ├── utils.libsonnet │ │ └── volumes.libsonnet │ └── tests │ │ ├── all_tests.jsonnet │ │ ├── common.libsonnet │ │ ├── experimental.libsonnet │ │ ├── list_tests.jsonnet │ │ └── pytorch │ │ ├── common.libsonnet │ │ ├── experimental.libsonnet │ │ ├── nightly │ │ ├── accelerate-smoke.libsonnet │ │ ├── ci.libsonnet │ │ ├── common.libsonnet │ │ ├── hf-bert.libsonnet │ │ ├── llama2-model.libsonnet │ │ ├── mnist.libsonnet │ │ ├── resnet50-mp.libsonnet │ │ ├── stable-diffusion-2.libsonnet │ │ └── targets.jsonnet │ │ ├── r2.8 │ │ ├── accelerate-smoke.libsonnet │ │ ├── ci.libsonnet │ │ ├── common.libsonnet │ │ ├── hf-bert.libsonnet │ │ ├── llama2-model.libsonnet │ │ ├── mnist.libsonnet │ │ ├── resnet50-mp.libsonnet │ │ ├── stable-diffusion-2.libsonnet │ │ └── targets.jsonnet │ │ └── targets.jsonnet ├── mantaray │ ├── build_mantaray_docker.py │ └── run_mantaray_jobs.py ├── map_reproducibility │ ├── a3mega │ │ ├── gpt3_175b_nemo.py │ │ ├── llama_3_1_70b_nemo.py │ │ ├── llama_3_1_70b_nemo_gcsfuse.py │ │ ├── llama_3_70b_nemo.py │ │ ├── mixtral_8_7b_nemo.py │ │ └── nemo_two_node.py │ ├── a3ultra │ │ ├── llama_3_1_405b_maxtext.py │ │ ├── llama_3_1_405b_nemo.py │ │ ├── llama_3_1_70b_maxtext.py │ │ ├── llama_3_1_70b_nemo.py │ │ ├── mixtral_8_7b_maxtext.py │ │ ├── mixtral_8_7b_nemo.py │ │ └── nemo_two_node.py │ ├── a4 │ │ ├── llama3_1_70b │ │ │ ├── maxtext.py │ │ │ └── nemo.py │ │ ├── llama_3_1_405b │ │ │ ├── maxtext.py │ │ │ └── nemo.py │ │ ├── mixtral_8_7b │ │ │ └── nemo.py │ │ └── two_node_nemo.py │ ├── internal_runs │ │ ├── a3mega_maxtext_benchmarking_dags.py │ │ ├── a3mega_maxtext_inference_benchmarking_dags.py │ │ ├── a3ultra_maxtext_benchmarking_dags.py │ │ ├── a3ultra_nemo_benchmarking_dags.py │ │ ├── a4_maxtext_benchmarking_dags.py │ │ ├── backfill_dags.py │ │ ├── cleanup_dags.py │ │ ├── dag_configs.py │ │ ├── dag_configs_inference.py │ │ ├── sample_a3mega_maxtext_single_run.py │ │ ├── sample_a3ultra_maxtext_single_run.py │ │ ├── sample_a3ultra_nemo_single_run.py │ │ ├── sample_a4_maxtext_single_run.py │ │ └── sample_a4_nemo_single_run.py │ ├── tests │ │ ├── test_common_utils.py │ │ └── test_sample_workload_utils.py │ └── utils │ │ ├── benchmarkdb_utils.py │ │ ├── common_utils.py │ │ ├── common_utils_inference.py │ │ ├── constants.py │ │ ├── file_comparison.py │ │ ├── gcs_automation_utils.py │ │ ├── internal_aotc_inference_workload.py │ │ ├── internal_aotc_workload.py │ │ └── sample_workload_utils.py ├── maxtext_pathways │ ├── configs │ │ ├── __init__.py │ │ ├── commands.py │ │ ├── model_configs.py │ │ ├── parameters.py │ │ └── recipe_config.py │ └── pw_mcjax_dags.py ├── multipod │ ├── configs │ │ ├── __init__.py │ │ ├── common.py │ │ ├── gke_config.py │ │ ├── jax_tests_gce_config.py │ │ ├── jax_tests_gke_config.py │ │ ├── maxtext_gce_config.py │ │ ├── maxtext_sweep_gke_config.py │ │ ├── mxla_collective_config.py │ │ └── pytorch_config.py │ ├── jax_functional_tests.py │ ├── maxtext_checkpointing.py │ ├── maxtext_configs_aot.py │ ├── maxtext_configs_aot_gpu.py │ ├── maxtext_configs_aot_hybridsim.py │ ├── maxtext_convergence.py │ ├── maxtext_end_to_end.py │ ├── maxtext_gpu_end_to_end.py │ ├── maxtext_multi_tier_checkpointing.py │ ├── maxtext_profiling.py │ ├── maxtext_profiling_vertex_ai_tensorboard.py │ ├── maxtext_sft_trainer.py │ ├── maxtext_trillium_configs_perf.py │ ├── maxtext_v5e_configs_perf.py │ ├── maxtext_v5p_configs_perf.py │ ├── mxla_collective_nightly.py │ ├── mxla_gpt3_6b_nightly_gke.py │ ├── mxla_maxtext_nightly_gke.py │ └── pytorch.py ├── orbax │ ├── maxtext_emc_restore_gcs.py │ ├── maxtext_emc_restore_local.py │ ├── maxtext_emc_resume_gcs.py │ ├── maxtext_emc_save_gcs.py │ ├── maxtext_mtc_emergency_save_local.py │ ├── maxtext_mtc_restore_local.py │ ├── maxtext_mtc_resume_gcs.py │ ├── maxtext_mtc_save_gcs.py │ ├── maxtext_reg_restore_gcs_with_node_disruption.py │ ├── maxtext_reg_restore_gcs_with_resumed_workload.py │ ├── maxtext_reg_save_gcs.py │ └── util │ │ ├── __init__.py │ │ ├── checkpoint_util.py │ │ ├── test_config_util.py │ │ └── validation_util.py ├── pytorch_xla │ ├── configs │ │ ├── __init__.py │ │ └── pytorchxla_torchbench_config.py │ ├── nightly-gpu.py │ ├── nightly-tpu.py │ ├── pytorchxla-torchbench-release-gpu.py │ ├── pytorchxla-torchbench-release-tpu.py │ ├── pytorchxla2_torchbench_gpu.py │ ├── pytorchxla2_torchbench_tpu.py │ ├── pytorchxla_torchbench_gpu.py │ ├── pytorchxla_torchbench_tpu.py │ ├── r2_8_gpu.py │ └── r2_8_tpu.py ├── solutions_team │ ├── configs │ │ ├── tensorflow │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── solutionsteam_tf_nightly_supported_config.py │ │ │ └── solutionsteam_tf_release_supported_config.py │ │ └── vllm │ │ │ └── vllm_benchmark_config.py │ ├── solutionsteam_tf_dlrm_benchmarks.py │ ├── solutionsteam_tf_nightly_supported.py │ ├── solutionsteam_tf_release_se_supported.py │ ├── solutionsteam_tf_release_supported.py │ ├── solutionsteam_vllm_benchmarks_gpu.py │ └── solutionsteam_vllm_benchmarks_tpu.py ├── sparsity_diffusion_devx │ ├── configs │ │ ├── __init__.py │ │ ├── common.py │ │ ├── gke_config.py │ │ └── project_bite_config.py │ ├── jax_ai_image_candidate_tpu_e2e.py │ ├── jax_ai_image_gpu_e2e.py │ ├── jax_ai_image_tpu_e2e.py │ ├── maxdiffusion_gpu_e2e.py │ ├── maxdiffusion_tpu_e2e.py │ ├── maxtext_moe_gpu_e2e.py │ ├── maxtext_moe_tpu_e2e.py │ ├── project_bite_gpu_e2e.py │ └── project_bite_tpu_e2e.py └── tpu_observability │ ├── configs │ └── common.py │ ├── interruption_validation_dag.py │ ├── multi_host_nodepool_rollback_dag.py │ ├── node_pool_status.py │ ├── tpu_info_format_validation_dags.py │ ├── update_node_pool_label.py │ └── utils │ ├── __init__.py │ ├── gcp_util.py │ ├── jobset_util.py │ ├── node_pool_util.py │ ├── subprocess_util.py │ ├── time_util.py │ └── tpu_info_util.py ├── deployment ├── .terraform.lock.hcl ├── README.md ├── artifact_registry.tf ├── bigquery.auto.tfvars ├── bigquery_template.tf ├── cloud_composer.auto.tfvars ├── cloud_composer_template.tf ├── clusters.tf ├── deployment_composer_env │ ├── README.md │ ├── composer_env.tf │ ├── provider.tf │ ├── setup_terraform.sh │ └── variables.tf ├── modules │ └── composer_env │ │ ├── main.tf │ │ └── variables.tf ├── provider.tf └── schema │ ├── job_history.json │ ├── metadata_history.json │ └── metric_history.json ├── docs ├── code-of-conduct.md └── contributing.md ├── pipeline └── auto-push.cloudbuild.yml ├── plugins ├── README.md ├── allow_list.txt ├── block_list.txt ├── config.json └── on_failure_actions.py ├── pyproject.toml ├── scripts ├── code-style.sh ├── dag-check.sh ├── gen-configs.sh ├── local-airflow.sh └── upload-tests.sh └── xlml ├── __init__.py ├── apis ├── __init__.py ├── gcp_config.py ├── gcs.py ├── metric_config.py ├── mlcompass.py ├── task.py ├── test_config.py └── xpk_cluster_config.py └── utils ├── __init__.py ├── bigquery.py ├── bigquery_test.py ├── composer.py ├── gke.py ├── gpu.py ├── mantaray.py ├── metric.py ├── metric_test.py ├── name_format.py ├── ssh.py ├── startup_script.py ├── tpu.py └── xpk.py /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/ISSUE_TEMPLATE.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/dev-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/dev-requirements.txt -------------------------------------------------------------------------------- /.github/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/requirements.txt -------------------------------------------------------------------------------- /.github/workflows/dag-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/workflows/dag-check.yml -------------------------------------------------------------------------------- /.github/workflows/pyink-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/workflows/pyink-check.yml -------------------------------------------------------------------------------- /.github/workflows/pylint-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/workflows/pylint-check.yml -------------------------------------------------------------------------------- /.github/workflows/require-checklist.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/workflows/require-checklist.yml -------------------------------------------------------------------------------- /.github/workflows/unit-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.github/workflows/unit-test.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.pylintrc -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.vscode/extensions.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/common/model_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/common/model_configs.py -------------------------------------------------------------------------------- /dags/common/quarantined_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/common/quarantined_tests.py -------------------------------------------------------------------------------- /dags/common/test_owner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/common/test_owner.py -------------------------------------------------------------------------------- /dags/common/vm_resource.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/common/vm_resource.py -------------------------------------------------------------------------------- /dags/composer_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/composer_env.py -------------------------------------------------------------------------------- /dags/dashboard/airflow_to_bq_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/dashboard/airflow_to_bq_export.py -------------------------------------------------------------------------------- /dags/dashboard/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/dashboard/configs/export_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/dashboard/configs/export_config.py -------------------------------------------------------------------------------- /dags/dashboard/xlml_to_buganizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/dashboard/xlml_to_buganizer.py -------------------------------------------------------------------------------- /dags/examples/always_fail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/always_fail.py -------------------------------------------------------------------------------- /dags/examples/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/examples/configs/xpk.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/configs/xpk.Dockerfile -------------------------------------------------------------------------------- /dags/examples/configs/xpk_example_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/configs/xpk_example_config.py -------------------------------------------------------------------------------- /dags/examples/maxtext_aqtp_version_sweep_gke_example_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/maxtext_aqtp_version_sweep_gke_example_dag.py -------------------------------------------------------------------------------- /dags/examples/maxtext_profile_namegen_example_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/maxtext_profile_namegen_example_dag.py -------------------------------------------------------------------------------- /dags/examples/maxtext_profile_sweep_example_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/maxtext_profile_sweep_example_dag.py -------------------------------------------------------------------------------- /dags/examples/maxtext_sweep_gke_example_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/maxtext_sweep_gke_example_dag.py -------------------------------------------------------------------------------- /dags/examples/xpk_example_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/examples/xpk_example_dag.py -------------------------------------------------------------------------------- /dags/framework3p/configs/microbenchmarks_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/framework3p/configs/microbenchmarks_config.py -------------------------------------------------------------------------------- /dags/framework3p/microbenchmarks_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/framework3p/microbenchmarks_dag.py -------------------------------------------------------------------------------- /dags/gcs_bucket.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/gcs_bucket.py -------------------------------------------------------------------------------- /dags/inference/configs/jetstream_benchmark_serving_gce_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/jetstream_benchmark_serving_gce_config.py -------------------------------------------------------------------------------- /dags/inference/configs/jetstream_pytorch_gce_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/jetstream_pytorch_gce_config.py -------------------------------------------------------------------------------- /dags/inference/configs/maxtext_gpu_inference_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/maxtext_gpu_inference_config.py -------------------------------------------------------------------------------- /dags/inference/configs/maxtext_inference_microbenchmark_gce_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/maxtext_inference_microbenchmark_gce_config.py -------------------------------------------------------------------------------- /dags/inference/configs/trt_llm_inference_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/trt_llm_inference_config.py -------------------------------------------------------------------------------- /dags/inference/configs/trt_llm_mlperf_v40_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/trt_llm_mlperf_v40_config.py -------------------------------------------------------------------------------- /dags/inference/configs/trt_llm_mlperf_v41_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/trt_llm_mlperf_v41_config.py -------------------------------------------------------------------------------- /dags/inference/configs/trtllm_bench_inference_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/configs/trtllm_bench_inference_config.py -------------------------------------------------------------------------------- /dags/inference/jetstream_inference_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/jetstream_inference_e2e.py -------------------------------------------------------------------------------- /dags/inference/jetstream_pytorch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/jetstream_pytorch_inference.py -------------------------------------------------------------------------------- /dags/inference/maxtext_gpu_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/maxtext_gpu_inference.py -------------------------------------------------------------------------------- /dags/inference/maxtext_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/maxtext_inference.py -------------------------------------------------------------------------------- /dags/inference/maxtext_inference_microbenchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/maxtext_inference_microbenchmark.py -------------------------------------------------------------------------------- /dags/inference/maxtext_inference_offline_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/maxtext_inference_offline_benchmark.py -------------------------------------------------------------------------------- /dags/inference/maxtext_model_config_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/maxtext_model_config_generator.py -------------------------------------------------------------------------------- /dags/inference/trt_llm_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/trt_llm_inference.py -------------------------------------------------------------------------------- /dags/inference/trt_llm_mlperf_v40_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/trt_llm_mlperf_v40_inference.py -------------------------------------------------------------------------------- /dags/inference/trt_llm_mlperf_v41_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/trt_llm_mlperf_v41_inference.py -------------------------------------------------------------------------------- /dags/inference/trtllm_bench_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/trtllm_bench_inference.py -------------------------------------------------------------------------------- /dags/inference/utils/maxtext_gpu_microbenchmark_jsonl_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/utils/maxtext_gpu_microbenchmark_jsonl_converter.py -------------------------------------------------------------------------------- /dags/inference/utils/trtllm_bench_jsonl_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/inference/utils/trtllm_bench_jsonl_converter.py -------------------------------------------------------------------------------- /dags/infra/clean_up.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/infra/clean_up.py -------------------------------------------------------------------------------- /dags/legacy_test/templates/base.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/base.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/gpus.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/gpus.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/metrics.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/metrics.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/mixins.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/mixins.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/timeouts.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/timeouts.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/tpus.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/tpus.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/utils.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/utils.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/templates/volumes.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/templates/volumes.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/all_tests.jsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/all_tests.jsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/common.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/common.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/experimental.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/experimental.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/list_tests.jsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/list_tests.jsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/common.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/common.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/experimental.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/experimental.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/accelerate-smoke.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/accelerate-smoke.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/ci.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/ci.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/common.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/common.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/hf-bert.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/hf-bert.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/llama2-model.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/llama2-model.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/mnist.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/mnist.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/resnet50-mp.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/resnet50-mp.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/stable-diffusion-2.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/stable-diffusion-2.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/nightly/targets.jsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/nightly/targets.jsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/accelerate-smoke.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/accelerate-smoke.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/ci.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/ci.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/common.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/common.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/hf-bert.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/hf-bert.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/llama2-model.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/llama2-model.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/mnist.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/mnist.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/resnet50-mp.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/resnet50-mp.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/stable-diffusion-2.libsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/stable-diffusion-2.libsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/r2.8/targets.jsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/r2.8/targets.jsonnet -------------------------------------------------------------------------------- /dags/legacy_test/tests/pytorch/targets.jsonnet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/legacy_test/tests/pytorch/targets.jsonnet -------------------------------------------------------------------------------- /dags/mantaray/build_mantaray_docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/mantaray/build_mantaray_docker.py -------------------------------------------------------------------------------- /dags/mantaray/run_mantaray_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/mantaray/run_mantaray_jobs.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3mega/gpt3_175b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3mega/gpt3_175b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3mega/llama_3_1_70b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3mega/llama_3_1_70b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3mega/llama_3_1_70b_nemo_gcsfuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3mega/llama_3_1_70b_nemo_gcsfuse.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3mega/llama_3_70b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3mega/llama_3_70b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3mega/mixtral_8_7b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3mega/mixtral_8_7b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3mega/nemo_two_node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3mega/nemo_two_node.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/llama_3_1_405b_maxtext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/llama_3_1_405b_maxtext.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/llama_3_1_405b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/llama_3_1_405b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/llama_3_1_70b_maxtext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/llama_3_1_70b_maxtext.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/llama_3_1_70b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/llama_3_1_70b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/mixtral_8_7b_maxtext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/mixtral_8_7b_maxtext.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/mixtral_8_7b_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/mixtral_8_7b_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a3ultra/nemo_two_node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a3ultra/nemo_two_node.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a4/llama3_1_70b/maxtext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a4/llama3_1_70b/maxtext.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a4/llama3_1_70b/nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a4/llama3_1_70b/nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a4/llama_3_1_405b/maxtext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a4/llama_3_1_405b/maxtext.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a4/llama_3_1_405b/nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a4/llama_3_1_405b/nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a4/mixtral_8_7b/nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a4/mixtral_8_7b/nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/a4/two_node_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/a4/two_node_nemo.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/a3mega_maxtext_benchmarking_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/a3mega_maxtext_benchmarking_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/a3mega_maxtext_inference_benchmarking_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/a3mega_maxtext_inference_benchmarking_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/a3ultra_maxtext_benchmarking_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/a3ultra_maxtext_benchmarking_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/a3ultra_nemo_benchmarking_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/a3ultra_nemo_benchmarking_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/a4_maxtext_benchmarking_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/a4_maxtext_benchmarking_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/backfill_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/backfill_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/cleanup_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/cleanup_dags.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/dag_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/dag_configs.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/dag_configs_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/dag_configs_inference.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/sample_a3mega_maxtext_single_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/sample_a3mega_maxtext_single_run.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/sample_a3ultra_maxtext_single_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/sample_a3ultra_maxtext_single_run.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/sample_a3ultra_nemo_single_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/sample_a3ultra_nemo_single_run.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/sample_a4_maxtext_single_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/sample_a4_maxtext_single_run.py -------------------------------------------------------------------------------- /dags/map_reproducibility/internal_runs/sample_a4_nemo_single_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/internal_runs/sample_a4_nemo_single_run.py -------------------------------------------------------------------------------- /dags/map_reproducibility/tests/test_common_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/tests/test_common_utils.py -------------------------------------------------------------------------------- /dags/map_reproducibility/tests/test_sample_workload_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/tests/test_sample_workload_utils.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/benchmarkdb_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/benchmarkdb_utils.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/common_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/common_utils.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/common_utils_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/common_utils_inference.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/constants.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/file_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/file_comparison.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/gcs_automation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/gcs_automation_utils.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/internal_aotc_inference_workload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/internal_aotc_inference_workload.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/internal_aotc_workload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/internal_aotc_workload.py -------------------------------------------------------------------------------- /dags/map_reproducibility/utils/sample_workload_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/map_reproducibility/utils/sample_workload_utils.py -------------------------------------------------------------------------------- /dags/maxtext_pathways/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/maxtext_pathways/configs/commands.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/maxtext_pathways/configs/commands.py -------------------------------------------------------------------------------- /dags/maxtext_pathways/configs/model_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/maxtext_pathways/configs/model_configs.py -------------------------------------------------------------------------------- /dags/maxtext_pathways/configs/parameters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/maxtext_pathways/configs/parameters.py -------------------------------------------------------------------------------- /dags/maxtext_pathways/configs/recipe_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/maxtext_pathways/configs/recipe_config.py -------------------------------------------------------------------------------- /dags/maxtext_pathways/pw_mcjax_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/maxtext_pathways/pw_mcjax_dags.py -------------------------------------------------------------------------------- /dags/multipod/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/multipod/configs/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/common.py -------------------------------------------------------------------------------- /dags/multipod/configs/gke_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/gke_config.py -------------------------------------------------------------------------------- /dags/multipod/configs/jax_tests_gce_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/jax_tests_gce_config.py -------------------------------------------------------------------------------- /dags/multipod/configs/jax_tests_gke_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/jax_tests_gke_config.py -------------------------------------------------------------------------------- /dags/multipod/configs/maxtext_gce_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/maxtext_gce_config.py -------------------------------------------------------------------------------- /dags/multipod/configs/maxtext_sweep_gke_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/maxtext_sweep_gke_config.py -------------------------------------------------------------------------------- /dags/multipod/configs/mxla_collective_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/mxla_collective_config.py -------------------------------------------------------------------------------- /dags/multipod/configs/pytorch_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/configs/pytorch_config.py -------------------------------------------------------------------------------- /dags/multipod/jax_functional_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/jax_functional_tests.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_checkpointing.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_configs_aot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_configs_aot.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_configs_aot_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_configs_aot_gpu.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_configs_aot_hybridsim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_configs_aot_hybridsim.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_convergence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_convergence.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_end_to_end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_end_to_end.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_gpu_end_to_end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_gpu_end_to_end.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_multi_tier_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_multi_tier_checkpointing.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_profiling.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_profiling_vertex_ai_tensorboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_profiling_vertex_ai_tensorboard.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_sft_trainer.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_trillium_configs_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_trillium_configs_perf.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_v5e_configs_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_v5e_configs_perf.py -------------------------------------------------------------------------------- /dags/multipod/maxtext_v5p_configs_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/maxtext_v5p_configs_perf.py -------------------------------------------------------------------------------- /dags/multipod/mxla_collective_nightly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/mxla_collective_nightly.py -------------------------------------------------------------------------------- /dags/multipod/mxla_gpt3_6b_nightly_gke.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/mxla_gpt3_6b_nightly_gke.py -------------------------------------------------------------------------------- /dags/multipod/mxla_maxtext_nightly_gke.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/mxla_maxtext_nightly_gke.py -------------------------------------------------------------------------------- /dags/multipod/pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/multipod/pytorch.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_emc_restore_gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_emc_restore_gcs.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_emc_restore_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_emc_restore_local.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_emc_resume_gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_emc_resume_gcs.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_emc_save_gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_emc_save_gcs.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_mtc_emergency_save_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_mtc_emergency_save_local.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_mtc_restore_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_mtc_restore_local.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_mtc_resume_gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_mtc_resume_gcs.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_mtc_save_gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_mtc_save_gcs.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_reg_restore_gcs_with_node_disruption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_reg_restore_gcs_with_node_disruption.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_reg_restore_gcs_with_resumed_workload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_reg_restore_gcs_with_resumed_workload.py -------------------------------------------------------------------------------- /dags/orbax/maxtext_reg_save_gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/maxtext_reg_save_gcs.py -------------------------------------------------------------------------------- /dags/orbax/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/orbax/util/checkpoint_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/util/checkpoint_util.py -------------------------------------------------------------------------------- /dags/orbax/util/test_config_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/util/test_config_util.py -------------------------------------------------------------------------------- /dags/orbax/util/validation_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/orbax/util/validation_util.py -------------------------------------------------------------------------------- /dags/pytorch_xla/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/pytorch_xla/configs/pytorchxla_torchbench_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/configs/pytorchxla_torchbench_config.py -------------------------------------------------------------------------------- /dags/pytorch_xla/nightly-gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/nightly-gpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/nightly-tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/nightly-tpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/pytorchxla-torchbench-release-gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/pytorchxla-torchbench-release-gpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/pytorchxla-torchbench-release-tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/pytorchxla-torchbench-release-tpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/pytorchxla2_torchbench_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/pytorchxla2_torchbench_gpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/pytorchxla2_torchbench_tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/pytorchxla2_torchbench_tpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/pytorchxla_torchbench_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/pytorchxla_torchbench_gpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/pytorchxla_torchbench_tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/pytorchxla_torchbench_tpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/r2_8_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/r2_8_gpu.py -------------------------------------------------------------------------------- /dags/pytorch_xla/r2_8_tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/pytorch_xla/r2_8_tpu.py -------------------------------------------------------------------------------- /dags/solutions_team/configs/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/solutions_team/configs/tensorflow/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/configs/tensorflow/common.py -------------------------------------------------------------------------------- /dags/solutions_team/configs/tensorflow/solutionsteam_tf_nightly_supported_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/configs/tensorflow/solutionsteam_tf_nightly_supported_config.py -------------------------------------------------------------------------------- /dags/solutions_team/configs/tensorflow/solutionsteam_tf_release_supported_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/configs/tensorflow/solutionsteam_tf_release_supported_config.py -------------------------------------------------------------------------------- /dags/solutions_team/configs/vllm/vllm_benchmark_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/configs/vllm/vllm_benchmark_config.py -------------------------------------------------------------------------------- /dags/solutions_team/solutionsteam_tf_dlrm_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/solutionsteam_tf_dlrm_benchmarks.py -------------------------------------------------------------------------------- /dags/solutions_team/solutionsteam_tf_nightly_supported.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/solutionsteam_tf_nightly_supported.py -------------------------------------------------------------------------------- /dags/solutions_team/solutionsteam_tf_release_se_supported.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/solutionsteam_tf_release_se_supported.py -------------------------------------------------------------------------------- /dags/solutions_team/solutionsteam_tf_release_supported.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/solutionsteam_tf_release_supported.py -------------------------------------------------------------------------------- /dags/solutions_team/solutionsteam_vllm_benchmarks_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/solutionsteam_vllm_benchmarks_gpu.py -------------------------------------------------------------------------------- /dags/solutions_team/solutionsteam_vllm_benchmarks_tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/solutions_team/solutionsteam_vllm_benchmarks_tpu.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/configs/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/configs/common.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/configs/gke_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/configs/gke_config.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/configs/project_bite_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/configs/project_bite_config.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/jax_ai_image_candidate_tpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/jax_ai_image_candidate_tpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/jax_ai_image_gpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/jax_ai_image_gpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/jax_ai_image_tpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/jax_ai_image_tpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/maxdiffusion_gpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/maxdiffusion_gpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/maxdiffusion_tpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/maxdiffusion_tpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/maxtext_moe_gpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/maxtext_moe_gpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/maxtext_moe_tpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/maxtext_moe_tpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/project_bite_gpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/project_bite_gpu_e2e.py -------------------------------------------------------------------------------- /dags/sparsity_diffusion_devx/project_bite_tpu_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/sparsity_diffusion_devx/project_bite_tpu_e2e.py -------------------------------------------------------------------------------- /dags/tpu_observability/configs/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/configs/common.py -------------------------------------------------------------------------------- /dags/tpu_observability/interruption_validation_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/interruption_validation_dag.py -------------------------------------------------------------------------------- /dags/tpu_observability/multi_host_nodepool_rollback_dag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/multi_host_nodepool_rollback_dag.py -------------------------------------------------------------------------------- /dags/tpu_observability/node_pool_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/node_pool_status.py -------------------------------------------------------------------------------- /dags/tpu_observability/tpu_info_format_validation_dags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/tpu_info_format_validation_dags.py -------------------------------------------------------------------------------- /dags/tpu_observability/update_node_pool_label.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/update_node_pool_label.py -------------------------------------------------------------------------------- /dags/tpu_observability/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/tpu_observability/utils/gcp_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/utils/gcp_util.py -------------------------------------------------------------------------------- /dags/tpu_observability/utils/jobset_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/utils/jobset_util.py -------------------------------------------------------------------------------- /dags/tpu_observability/utils/node_pool_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/utils/node_pool_util.py -------------------------------------------------------------------------------- /dags/tpu_observability/utils/subprocess_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/utils/subprocess_util.py -------------------------------------------------------------------------------- /dags/tpu_observability/utils/time_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/utils/time_util.py -------------------------------------------------------------------------------- /dags/tpu_observability/utils/tpu_info_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/dags/tpu_observability/utils/tpu_info_util.py -------------------------------------------------------------------------------- /deployment/.terraform.lock.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/.terraform.lock.hcl -------------------------------------------------------------------------------- /deployment/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/README.md -------------------------------------------------------------------------------- /deployment/artifact_registry.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/artifact_registry.tf -------------------------------------------------------------------------------- /deployment/bigquery.auto.tfvars: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/bigquery.auto.tfvars -------------------------------------------------------------------------------- /deployment/bigquery_template.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/bigquery_template.tf -------------------------------------------------------------------------------- /deployment/cloud_composer.auto.tfvars: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/cloud_composer.auto.tfvars -------------------------------------------------------------------------------- /deployment/cloud_composer_template.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/cloud_composer_template.tf -------------------------------------------------------------------------------- /deployment/clusters.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/clusters.tf -------------------------------------------------------------------------------- /deployment/deployment_composer_env/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/deployment_composer_env/README.md -------------------------------------------------------------------------------- /deployment/deployment_composer_env/composer_env.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/deployment_composer_env/composer_env.tf -------------------------------------------------------------------------------- /deployment/deployment_composer_env/provider.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/deployment_composer_env/provider.tf -------------------------------------------------------------------------------- /deployment/deployment_composer_env/setup_terraform.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/deployment_composer_env/setup_terraform.sh -------------------------------------------------------------------------------- /deployment/deployment_composer_env/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/deployment_composer_env/variables.tf -------------------------------------------------------------------------------- /deployment/modules/composer_env/main.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/modules/composer_env/main.tf -------------------------------------------------------------------------------- /deployment/modules/composer_env/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/modules/composer_env/variables.tf -------------------------------------------------------------------------------- /deployment/provider.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/provider.tf -------------------------------------------------------------------------------- /deployment/schema/job_history.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/schema/job_history.json -------------------------------------------------------------------------------- /deployment/schema/metadata_history.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/schema/metadata_history.json -------------------------------------------------------------------------------- /deployment/schema/metric_history.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/deployment/schema/metric_history.json -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/docs/code-of-conduct.md -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/docs/contributing.md -------------------------------------------------------------------------------- /pipeline/auto-push.cloudbuild.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/pipeline/auto-push.cloudbuild.yml -------------------------------------------------------------------------------- /plugins/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/plugins/README.md -------------------------------------------------------------------------------- /plugins/allow_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/plugins/allow_list.txt -------------------------------------------------------------------------------- /plugins/block_list.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugins/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "enable_plugin_by_default": false 3 | } -------------------------------------------------------------------------------- /plugins/on_failure_actions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/plugins/on_failure_actions.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/code-style.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/scripts/code-style.sh -------------------------------------------------------------------------------- /scripts/dag-check.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/scripts/dag-check.sh -------------------------------------------------------------------------------- /scripts/gen-configs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/scripts/gen-configs.sh -------------------------------------------------------------------------------- /scripts/local-airflow.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/scripts/local-airflow.sh -------------------------------------------------------------------------------- /scripts/upload-tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/scripts/upload-tests.sh -------------------------------------------------------------------------------- /xlml/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xlml/apis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xlml/apis/gcp_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/gcp_config.py -------------------------------------------------------------------------------- /xlml/apis/gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/gcs.py -------------------------------------------------------------------------------- /xlml/apis/metric_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/metric_config.py -------------------------------------------------------------------------------- /xlml/apis/mlcompass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/mlcompass.py -------------------------------------------------------------------------------- /xlml/apis/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/task.py -------------------------------------------------------------------------------- /xlml/apis/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/test_config.py -------------------------------------------------------------------------------- /xlml/apis/xpk_cluster_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/apis/xpk_cluster_config.py -------------------------------------------------------------------------------- /xlml/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xlml/utils/bigquery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/bigquery.py -------------------------------------------------------------------------------- /xlml/utils/bigquery_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/bigquery_test.py -------------------------------------------------------------------------------- /xlml/utils/composer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/composer.py -------------------------------------------------------------------------------- /xlml/utils/gke.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/gke.py -------------------------------------------------------------------------------- /xlml/utils/gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/gpu.py -------------------------------------------------------------------------------- /xlml/utils/mantaray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/mantaray.py -------------------------------------------------------------------------------- /xlml/utils/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/metric.py -------------------------------------------------------------------------------- /xlml/utils/metric_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/metric_test.py -------------------------------------------------------------------------------- /xlml/utils/name_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/name_format.py -------------------------------------------------------------------------------- /xlml/utils/ssh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/ssh.py -------------------------------------------------------------------------------- /xlml/utils/startup_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/startup_script.py -------------------------------------------------------------------------------- /xlml/utils/tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/tpu.py -------------------------------------------------------------------------------- /xlml/utils/xpk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/ml-auto-solutions/HEAD/xlml/utils/xpk.py --------------------------------------------------------------------------------