├── .dockerignore ├── .github └── workflows │ ├── release_helm.yaml │ ├── release_pypi.yaml │ ├── test.yaml │ └── test_docs.yaml ├── .gitignore ├── .readthedocs.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── adaptdl ├── adaptdl │ ├── __init__.py │ ├── _signal.py │ ├── checkpoint.py │ ├── checkpoint_test.py │ ├── collective.py │ ├── collective_test.py │ ├── conftest.py │ ├── env.py │ ├── fit_test.py │ ├── goodput.py │ ├── goodput_test.py │ ├── reducer.py │ ├── reducer_test.py │ ├── sched_hints.py │ ├── torch │ │ ├── __init__.py │ │ ├── _metrics.py │ │ ├── _metrics_test.py │ │ ├── accumulator.py │ │ ├── accumulator_test.py │ │ ├── data.py │ │ ├── data_test.py │ │ ├── epoch.py │ │ ├── epoch_test.py │ │ ├── gradient_noise_scale.py │ │ ├── gradient_noise_scale_test.py │ │ ├── iterator.py │ │ ├── parallel.py │ │ ├── parallel_test.py │ │ ├── scaling_rules.py │ │ └── scaling_rules_test.py │ └── utils.py ├── requirements.txt └── setup.py ├── cli ├── adaptdl_cli │ ├── __init__.py │ ├── proxy.py │ ├── pvc.py │ └── tensorboard.py ├── bin │ └── adaptdl ├── check_requirements.py ├── requirements.txt └── setup.py ├── deploy ├── eks │ └── adaptdl-eks-cluster-on-demand.yaml └── microk8s │ └── setup_ubuntu.sh ├── docs ├── .gitignore ├── Makefile ├── README.rst ├── _static │ ├── css │ │ └── custom.css │ └── img │ │ ├── AdaptDLFavicon.png │ │ ├── AdaptDLHorizLogo.png │ │ ├── Petuum.png │ │ ├── autobsz-performance.png │ │ └── scheduling-performance.png ├── adaptdl-pytorch.rst ├── artifacts │ └── ca-v1.14.yaml ├── commandline │ ├── index.rst │ ├── simple-job.rst │ └── tensorboard.rst ├── conf.py ├── index.rst ├── installation │ ├── deploy-eks.rst │ ├── deploy-microk8s.rst │ ├── index.rst │ └── install-adaptdl.rst ├── make.bat ├── ray │ ├── aws_ray_adaptdl.rst │ └── tune_tutorial.rst ├── requirements.txt ├── standalone-training.rst └── tutorial │ ├── Dockerfile │ ├── mnist_original.py │ ├── mnist_step_1.py │ ├── mnist_step_2.py │ ├── mnist_step_3.py │ ├── mnist_step_4.py │ ├── mnist_step_5.py │ └── mnist_tensorboard.py ├── examples ├── BERT │ ├── Dockerfile │ ├── adaptdljob.yaml │ ├── data.py │ ├── metrics.py │ ├── mlm_task.py │ ├── mlm_task_adaptdl.py │ ├── model.py │ ├── ns_task.py │ ├── ns_task_adaptdl.py │ ├── requirements.txt │ └── utils.py ├── Dockerfile ├── NCF │ ├── data_utils.py │ ├── evaluate.py │ ├── main.py │ └── model.py ├── dcgan │ └── dcgan.py ├── linear_regression │ └── main.py ├── pytorch-cifar │ ├── LICENSE │ ├── README.md │ ├── image.txt │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ ├── densenet.py │ │ ├── dpn.py │ │ ├── googlenet.py │ │ ├── lenet.py │ │ ├── mobilenet.py │ │ ├── mobilenetv2.py │ │ ├── pnasnet.py │ │ ├── preact_resnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── senet.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ └── vgg.py │ ├── requirements.txt │ └── utils.py ├── ray │ └── aws │ │ └── cluster.yaml ├── requirements.txt └── transformer │ ├── transformer.py │ └── transformer_multireplica_local.py ├── grafana └── dashboard.json ├── helm └── adaptdl-sched │ ├── .gitignore │ ├── .helmignore │ ├── Chart.yaml │ ├── requirements.yaml │ ├── templates │ ├── adaptdl-cluster-role-binding.yaml │ ├── adaptdl-cluster-role.yaml │ ├── adaptdl-crd.yaml │ ├── adaptdl-sa.yaml │ ├── adaptdl-sched.yaml │ ├── config.yaml │ ├── metrics-service.yaml │ ├── metrics-servicemonitor.yaml │ ├── supervisor-service.yaml │ ├── validator-deployment.yaml │ ├── validator-service.yaml │ └── validator-webhook.yaml │ └── values.yaml ├── ray ├── adaptdl_ray │ ├── __init__.py │ ├── adaptdl │ │ ├── __init__.py │ │ ├── adaptdl_allocator.py │ │ ├── adaptdl_job_mixin.py │ │ ├── config.py │ │ ├── config_test.py │ │ └── utils.py │ ├── aws │ │ ├── README.rst │ │ ├── __init__.py │ │ ├── _example_worker.py │ │ ├── _test_setup.py │ │ ├── controller.py │ │ ├── launch_job.py │ │ ├── optimizer.py │ │ ├── test_controller.py │ │ ├── test_controller_mocked_ray.py │ │ ├── test_worker.py │ │ ├── utils.py │ │ └── worker.py │ ├── examples │ │ ├── hyperopt_example.py │ │ ├── hyperopt_example_baseline.py │ │ └── tune_proposal.py │ ├── tests │ │ └── test_trial_sched.py │ └── tune │ │ ├── __init__.py │ │ ├── adaptdl_patch.py │ │ ├── adaptdl_trainable.py │ │ ├── adaptdl_trainable_test.py │ │ ├── adaptdl_trial.py │ │ ├── adaptdl_trial_sched.py │ │ └── adaptdl_trial_test.py ├── requirements.txt └── setup.py ├── requirements-test.txt ├── sched ├── Dockerfile ├── adaptdl_sched │ ├── __init__.py │ ├── __main__.py │ ├── allocator.py │ ├── cluster_expander.py │ ├── config.py │ ├── controller.py │ ├── k8s_templates.py │ ├── policy │ │ ├── __init__.py │ │ ├── non_preemptible_test.py │ │ ├── pollux.py │ │ ├── pollux_test.py │ │ ├── speedup.py │ │ ├── speedup_test.py │ │ └── utils.py │ ├── resources.py │ ├── resources_test.py │ ├── supervisor.py │ ├── utils.py │ ├── validator.py │ └── validator_test.py ├── requirements.txt └── setup.py ├── tests ├── README.md ├── long-workload │ ├── bert.sh │ ├── dcgan.sh │ ├── densenet121-cifar10.sh │ ├── lr-v2-elastic-cpu.sh │ ├── ncf.sh │ ├── ncf_accumulation.sh │ ├── resnet18-cifar10-elastic-min-replicas.sh │ ├── resnet18-cifar10-elastic.sh │ ├── resnet18-cifar10-inelastic.sh │ ├── resnet18-cifar10-mixed-precision.sh │ ├── transformer-wikitext2-elastic.sh │ └── transformer-wikitext2.sh ├── short-workload │ ├── densenet121-cifar10.sh │ └── resnet18-cifar10.sh ├── test-localmode2.sh └── testworkload.sh └── tutorial ├── Dockerfile ├── adaptdljob.yaml ├── mnist_original.py ├── mnist_step_1.py ├── mnist_step_2.py ├── mnist_step_3.py ├── mnist_step_4.py ├── mnist_step_5.py ├── mnist_tensorboard.py ├── run.sh └── run_tensorboard.sh /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/workflows/release_helm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.github/workflows/release_helm.yaml -------------------------------------------------------------------------------- /.github/workflows/release_pypi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.github/workflows/release_pypi.yaml -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.github/workflows/test.yaml -------------------------------------------------------------------------------- /.github/workflows/test_docs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.github/workflows/test_docs.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.gitignore -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/Makefile -------------------------------------------------------------------------------- /adaptdl/adaptdl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/__init__.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/_signal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/_signal.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/checkpoint.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/checkpoint_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/checkpoint_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/collective.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/collective.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/collective_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/collective_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/conftest.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/env.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/fit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/fit_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/goodput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/goodput.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/goodput_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/goodput_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/reducer.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/reducer_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/reducer_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/sched_hints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/sched_hints.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/__init__.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/_metrics.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/_metrics_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/_metrics_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/accumulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/accumulator.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/accumulator_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/accumulator_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/data.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/data_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/data_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/epoch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/epoch.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/epoch_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/epoch_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/gradient_noise_scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/gradient_noise_scale.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/gradient_noise_scale_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/gradient_noise_scale_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/iterator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/iterator.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/parallel.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/parallel_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/parallel_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/scaling_rules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/scaling_rules.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/torch/scaling_rules_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/torch/scaling_rules_test.py -------------------------------------------------------------------------------- /adaptdl/adaptdl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/adaptdl/utils.py -------------------------------------------------------------------------------- /adaptdl/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/requirements.txt -------------------------------------------------------------------------------- /adaptdl/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/adaptdl/setup.py -------------------------------------------------------------------------------- /cli/adaptdl_cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/adaptdl_cli/__init__.py -------------------------------------------------------------------------------- /cli/adaptdl_cli/proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/adaptdl_cli/proxy.py -------------------------------------------------------------------------------- /cli/adaptdl_cli/pvc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/adaptdl_cli/pvc.py -------------------------------------------------------------------------------- /cli/adaptdl_cli/tensorboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/adaptdl_cli/tensorboard.py -------------------------------------------------------------------------------- /cli/bin/adaptdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/bin/adaptdl -------------------------------------------------------------------------------- /cli/check_requirements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/check_requirements.py -------------------------------------------------------------------------------- /cli/requirements.txt: -------------------------------------------------------------------------------- 1 | kubernetes>=9.0.0 2 | python-dateutil>=2.7.3 3 | mitmproxy>=5.2 4 | portpicker>=1.3 5 | -------------------------------------------------------------------------------- /cli/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/cli/setup.py -------------------------------------------------------------------------------- /deploy/eks/adaptdl-eks-cluster-on-demand.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/deploy/eks/adaptdl-eks-cluster-on-demand.yaml -------------------------------------------------------------------------------- /deploy/microk8s/setup_ubuntu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/deploy/microk8s/setup_ubuntu.sh -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /api 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/README.rst -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/_static/css/custom.css -------------------------------------------------------------------------------- /docs/_static/img/AdaptDLFavicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/_static/img/AdaptDLFavicon.png -------------------------------------------------------------------------------- /docs/_static/img/AdaptDLHorizLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/_static/img/AdaptDLHorizLogo.png -------------------------------------------------------------------------------- /docs/_static/img/Petuum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/_static/img/Petuum.png -------------------------------------------------------------------------------- /docs/_static/img/autobsz-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/_static/img/autobsz-performance.png -------------------------------------------------------------------------------- /docs/_static/img/scheduling-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/_static/img/scheduling-performance.png -------------------------------------------------------------------------------- /docs/adaptdl-pytorch.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/adaptdl-pytorch.rst -------------------------------------------------------------------------------- /docs/artifacts/ca-v1.14.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/artifacts/ca-v1.14.yaml -------------------------------------------------------------------------------- /docs/commandline/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/commandline/index.rst -------------------------------------------------------------------------------- /docs/commandline/simple-job.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/commandline/simple-job.rst -------------------------------------------------------------------------------- /docs/commandline/tensorboard.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/commandline/tensorboard.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/installation/deploy-eks.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/installation/deploy-eks.rst -------------------------------------------------------------------------------- /docs/installation/deploy-microk8s.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/installation/deploy-microk8s.rst -------------------------------------------------------------------------------- /docs/installation/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/installation/index.rst -------------------------------------------------------------------------------- /docs/installation/install-adaptdl.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/installation/install-adaptdl.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/ray/aws_ray_adaptdl.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/ray/aws_ray_adaptdl.rst -------------------------------------------------------------------------------- /docs/ray/tune_tutorial.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/ray/tune_tutorial.rst -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/standalone-training.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/standalone-training.rst -------------------------------------------------------------------------------- /docs/tutorial/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/Dockerfile -------------------------------------------------------------------------------- /docs/tutorial/mnist_original.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_original.py -------------------------------------------------------------------------------- /docs/tutorial/mnist_step_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_step_1.py -------------------------------------------------------------------------------- /docs/tutorial/mnist_step_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_step_2.py -------------------------------------------------------------------------------- /docs/tutorial/mnist_step_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_step_3.py -------------------------------------------------------------------------------- /docs/tutorial/mnist_step_4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_step_4.py -------------------------------------------------------------------------------- /docs/tutorial/mnist_step_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_step_5.py -------------------------------------------------------------------------------- /docs/tutorial/mnist_tensorboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/docs/tutorial/mnist_tensorboard.py -------------------------------------------------------------------------------- /examples/BERT/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/Dockerfile -------------------------------------------------------------------------------- /examples/BERT/adaptdljob.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/adaptdljob.yaml -------------------------------------------------------------------------------- /examples/BERT/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/data.py -------------------------------------------------------------------------------- /examples/BERT/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/metrics.py -------------------------------------------------------------------------------- /examples/BERT/mlm_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/mlm_task.py -------------------------------------------------------------------------------- /examples/BERT/mlm_task_adaptdl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/mlm_task_adaptdl.py -------------------------------------------------------------------------------- /examples/BERT/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/model.py -------------------------------------------------------------------------------- /examples/BERT/ns_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/ns_task.py -------------------------------------------------------------------------------- /examples/BERT/ns_task_adaptdl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/ns_task_adaptdl.py -------------------------------------------------------------------------------- /examples/BERT/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/requirements.txt -------------------------------------------------------------------------------- /examples/BERT/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/BERT/utils.py -------------------------------------------------------------------------------- /examples/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/Dockerfile -------------------------------------------------------------------------------- /examples/NCF/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/NCF/data_utils.py -------------------------------------------------------------------------------- /examples/NCF/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/NCF/evaluate.py -------------------------------------------------------------------------------- /examples/NCF/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/NCF/main.py -------------------------------------------------------------------------------- /examples/NCF/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/NCF/model.py -------------------------------------------------------------------------------- /examples/dcgan/dcgan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/dcgan/dcgan.py -------------------------------------------------------------------------------- /examples/linear_regression/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/linear_regression/main.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/LICENSE -------------------------------------------------------------------------------- /examples/pytorch-cifar/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/README.md -------------------------------------------------------------------------------- /examples/pytorch-cifar/image.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/image.txt -------------------------------------------------------------------------------- /examples/pytorch-cifar/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/main.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/__init__.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/densenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/densenet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/dpn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/dpn.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/googlenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/googlenet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/lenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/lenet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/mobilenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/mobilenet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/mobilenetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/mobilenetv2.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/pnasnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/pnasnet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/preact_resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/preact_resnet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/resnet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/resnext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/resnext.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/senet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/senet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/shufflenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/shufflenet.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/shufflenetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/shufflenetv2.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/models/vgg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/models/vgg.py -------------------------------------------------------------------------------- /examples/pytorch-cifar/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/requirements.txt -------------------------------------------------------------------------------- /examples/pytorch-cifar/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/pytorch-cifar/utils.py -------------------------------------------------------------------------------- /examples/ray/aws/cluster.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/ray/aws/cluster.yaml -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/requirements.txt -------------------------------------------------------------------------------- /examples/transformer/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/transformer/transformer.py -------------------------------------------------------------------------------- /examples/transformer/transformer_multireplica_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/examples/transformer/transformer_multireplica_local.py -------------------------------------------------------------------------------- /grafana/dashboard.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/grafana/dashboard.json -------------------------------------------------------------------------------- /helm/adaptdl-sched/.gitignore: -------------------------------------------------------------------------------- 1 | charts/*.tgz 2 | requirements.lock 3 | -------------------------------------------------------------------------------- /helm/adaptdl-sched/.helmignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/.helmignore -------------------------------------------------------------------------------- /helm/adaptdl-sched/Chart.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/Chart.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/requirements.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/requirements.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/adaptdl-cluster-role-binding.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/adaptdl-cluster-role-binding.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/adaptdl-cluster-role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/adaptdl-cluster-role.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/adaptdl-crd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/adaptdl-crd.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/adaptdl-sa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/adaptdl-sa.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/adaptdl-sched.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/adaptdl-sched.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/config.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/metrics-service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/metrics-service.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/metrics-servicemonitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/metrics-servicemonitor.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/supervisor-service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/supervisor-service.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/validator-deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/validator-deployment.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/validator-service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/validator-service.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/templates/validator-webhook.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/templates/validator-webhook.yaml -------------------------------------------------------------------------------- /helm/adaptdl-sched/values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/helm/adaptdl-sched/values.yaml -------------------------------------------------------------------------------- /ray/adaptdl_ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/__init__.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/adaptdl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/adaptdl/__init__.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/adaptdl/adaptdl_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/adaptdl/adaptdl_allocator.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/adaptdl/adaptdl_job_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/adaptdl/adaptdl_job_mixin.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/adaptdl/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/adaptdl/config.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/adaptdl/config_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/adaptdl/config_test.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/adaptdl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/adaptdl/utils.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/README.rst -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/__init__.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/_example_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/_example_worker.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/_test_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/_test_setup.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/controller.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/launch_job.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/launch_job.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/optimizer.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/test_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/test_controller.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/test_controller_mocked_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/test_controller_mocked_ray.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/test_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/test_worker.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/utils.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/aws/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/aws/worker.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/examples/hyperopt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/examples/hyperopt_example.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/examples/hyperopt_example_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/examples/hyperopt_example_baseline.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/examples/tune_proposal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/examples/tune_proposal.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tests/test_trial_sched.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tests/test_trial_sched.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/__init__.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/adaptdl_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/adaptdl_patch.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/adaptdl_trainable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/adaptdl_trainable.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/adaptdl_trainable_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/adaptdl_trainable_test.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/adaptdl_trial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/adaptdl_trial.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/adaptdl_trial_sched.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/adaptdl_trial_sched.py -------------------------------------------------------------------------------- /ray/adaptdl_ray/tune/adaptdl_trial_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/adaptdl_ray/tune/adaptdl_trial_test.py -------------------------------------------------------------------------------- /ray/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/requirements.txt -------------------------------------------------------------------------------- /ray/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/ray/setup.py -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/requirements-test.txt -------------------------------------------------------------------------------- /sched/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/Dockerfile -------------------------------------------------------------------------------- /sched/adaptdl_sched/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sched/adaptdl_sched/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/__main__.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/allocator.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/cluster_expander.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/cluster_expander.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/config.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/controller.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/k8s_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/k8s_templates.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/non_preemptible_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/policy/non_preemptible_test.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/pollux.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/policy/pollux.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/pollux_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/policy/pollux_test.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/speedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/policy/speedup.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/speedup_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/policy/speedup_test.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/policy/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/policy/utils.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/resources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/resources.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/resources_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/resources_test.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/supervisor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/supervisor.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/utils.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/validator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/validator.py -------------------------------------------------------------------------------- /sched/adaptdl_sched/validator_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/adaptdl_sched/validator_test.py -------------------------------------------------------------------------------- /sched/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/requirements.txt -------------------------------------------------------------------------------- /sched/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/sched/setup.py -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | #WARNINGS 2 | test-localMode1.sh does not work on a mac 3 | -------------------------------------------------------------------------------- /tests/long-workload/bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/bert.sh -------------------------------------------------------------------------------- /tests/long-workload/dcgan.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/dcgan.sh -------------------------------------------------------------------------------- /tests/long-workload/densenet121-cifar10.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/densenet121-cifar10.sh -------------------------------------------------------------------------------- /tests/long-workload/lr-v2-elastic-cpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/lr-v2-elastic-cpu.sh -------------------------------------------------------------------------------- /tests/long-workload/ncf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/ncf.sh -------------------------------------------------------------------------------- /tests/long-workload/ncf_accumulation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/ncf_accumulation.sh -------------------------------------------------------------------------------- /tests/long-workload/resnet18-cifar10-elastic-min-replicas.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/resnet18-cifar10-elastic-min-replicas.sh -------------------------------------------------------------------------------- /tests/long-workload/resnet18-cifar10-elastic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/resnet18-cifar10-elastic.sh -------------------------------------------------------------------------------- /tests/long-workload/resnet18-cifar10-inelastic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/resnet18-cifar10-inelastic.sh -------------------------------------------------------------------------------- /tests/long-workload/resnet18-cifar10-mixed-precision.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/resnet18-cifar10-mixed-precision.sh -------------------------------------------------------------------------------- /tests/long-workload/transformer-wikitext2-elastic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/transformer-wikitext2-elastic.sh -------------------------------------------------------------------------------- /tests/long-workload/transformer-wikitext2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/long-workload/transformer-wikitext2.sh -------------------------------------------------------------------------------- /tests/short-workload/densenet121-cifar10.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/short-workload/densenet121-cifar10.sh -------------------------------------------------------------------------------- /tests/short-workload/resnet18-cifar10.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/short-workload/resnet18-cifar10.sh -------------------------------------------------------------------------------- /tests/test-localmode2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/test-localmode2.sh -------------------------------------------------------------------------------- /tests/testworkload.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tests/testworkload.sh -------------------------------------------------------------------------------- /tutorial/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/Dockerfile -------------------------------------------------------------------------------- /tutorial/adaptdljob.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/adaptdljob.yaml -------------------------------------------------------------------------------- /tutorial/mnist_original.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_original.py -------------------------------------------------------------------------------- /tutorial/mnist_step_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_step_1.py -------------------------------------------------------------------------------- /tutorial/mnist_step_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_step_2.py -------------------------------------------------------------------------------- /tutorial/mnist_step_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_step_3.py -------------------------------------------------------------------------------- /tutorial/mnist_step_4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_step_4.py -------------------------------------------------------------------------------- /tutorial/mnist_step_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_step_5.py -------------------------------------------------------------------------------- /tutorial/mnist_tensorboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/mnist_tensorboard.py -------------------------------------------------------------------------------- /tutorial/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/run.sh -------------------------------------------------------------------------------- /tutorial/run_tensorboard.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petuum/adaptdl/HEAD/tutorial/run_tensorboard.sh --------------------------------------------------------------------------------