├── layouts ├── shortcodes │ ├── year.html │ ├── github.html │ ├── mermaid.html │ ├── cf-download.html │ ├── cf-launch.html │ ├── surveymonkey.html │ ├── tab.html │ ├── ghcontributors.html │ └── tabs.html ├── partials │ ├── custom-footer.html │ ├── favicon.html │ ├── google.html │ ├── menu-footer.html │ ├── logo.html │ ├── footer.html │ └── header.html └── 404.html ├── archetypes └── default.md ├── .gitmodules ├── static ├── images │ ├── intro │ │ ├── home.png │ │ ├── mlinfra.png │ │ ├── approaches.png │ │ ├── challenges.png │ │ ├── containers.png │ │ ├── how_it_runs.png │ │ ├── containers_ecr.png │ │ ├── forward_backward.png │ │ └── parallel_distributed.png │ ├── eks │ │ ├── workflow.png │ │ ├── create_repo.png │ │ ├── verify_eks.png │ │ ├── eksctl_launch.png │ │ ├── get_container.png │ │ ├── push_commands.png │ │ ├── subnet_image.png │ │ └── job_yaml_container.png │ ├── setup │ │ ├── go_to_IAM.png │ │ ├── admin_attach.png │ │ ├── attach_policy.png │ │ ├── launch_jupyter.png │ │ ├── notebook_iam.png │ │ ├── setup_notebook.png │ │ ├── launch_terminal.png │ │ ├── setup_aws_console.png │ │ ├── setup_fill_notebook.png │ │ └── setup_create_notebook.png │ ├── cleanup │ │ └── sm_cleanup.png │ ├── sagemaker │ │ ├── workflow.png │ │ ├── aws_console.png │ │ ├── sm_notebook.png │ │ └── tensorboard.png │ └── convert_script │ │ ├── single_instance.png │ │ └── distributed_script.png ├── 640px-Amazon_Web_Services_Logo.svg.png ├── tf-world-distributed-training-workshop.pdf ├── AWS-Logo.svg ├── Amazon_Web_Services_Logo.svg └── css │ ├── theme-mine.css │ └── jquery-ui.min.css ├── notebooks ├── part-3-kubernetes │ ├── Dockerfile.cpu │ ├── Dockerfile.gpu │ ├── specs │ │ ├── claim-fsx-s3.yaml │ │ ├── storage-class-fsx-s3-template.yaml │ │ ├── fsx_lustre_policy.json │ │ ├── eks_tf_training_job-cpu.yaml │ │ └── eks_tf_training_job-gpu.yaml │ ├── cpu_eks_cluster.sh │ ├── gpu_eks_cluster.sh │ └── code │ │ ├── model_def.py │ │ └── cifar10-multi-gpu-horovod-k8s.py ├── part-1-horovod │ ├── model_def.py │ ├── cifar10-single-instance.ipynb │ └── cifar10-distributed.ipynb ├── part-2-sagemaker │ ├── code │ │ ├── model_def.py │ │ └── cifar10-multi-gpu-horovod-sagemaker.py │ └── cifar10-sagemaker-distributed.ipynb └── generate_cifar10_tfrecords.py ├── content ├── cleanup │ ├── _index.md │ └── clean_resources.md ├── intro │ ├── addressing_challenges-1.md │ ├── challenges_solution.md │ ├── addressing_challenges.md │ ├── _index.md │ └── horovod.md ├── setup │ ├── _index.md │ ├── download_workshop.md │ ├── add_admin_policy.md │ └── sm_jupyter_instance.md ├── update_code_dist_training │ ├── _index.md │ ├── problem_setup.md │ ├── prepare_dataset.md │ ├── single_instance_script.md │ └── distributed_training_script.md ├── sagemaker_dist_training │ ├── _index.md │ ├── monitoring_results.md │ ├── workflow.md │ ├── training_scrip_updates.md │ └── sagemaker_training.md ├── _index.md └── kubernetes_dist_training │ ├── _index.md │ ├── verify_cluster.md │ ├── install_cli.md │ ├── setup_eks.md │ ├── workflow.md │ ├── submit_job.md │ ├── install_kubeflow.md │ ├── build_container.md │ └── fsx_lustre.md ├── README.md ├── config.toml └── .gitignore /layouts/shortcodes/year.html: -------------------------------------------------------------------------------- 1 | {{ .Page.Now.Year }} 2 | 3 | -------------------------------------------------------------------------------- /layouts/partials/custom-footer.html: -------------------------------------------------------------------------------- 1 | {{ template "_internal/google_analytics.html" . }} 2 | -------------------------------------------------------------------------------- /layouts/shortcodes/github.html: -------------------------------------------------------------------------------- 1 | {{ .Get 0 }} 2 | -------------------------------------------------------------------------------- /archetypes/default.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "{{ replace .Name "-" " " | title }}" 3 | date: {{ .Date }} 4 | --- 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "themes/learn"] 2 | path = themes/learn 3 | url = https://github.com/matcornic/hugo-theme-learn.git 4 | -------------------------------------------------------------------------------- /static/images/intro/home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/intro/home.png -------------------------------------------------------------------------------- /static/images/eks/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/workflow.png -------------------------------------------------------------------------------- /static/images/eks/create_repo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/create_repo.png -------------------------------------------------------------------------------- /static/images/eks/verify_eks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/verify_eks.png -------------------------------------------------------------------------------- /static/images/intro/mlinfra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/intro/mlinfra.png -------------------------------------------------------------------------------- /static/images/setup/go_to_IAM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/setup/go_to_IAM.png -------------------------------------------------------------------------------- /static/images/eks/eksctl_launch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/eksctl_launch.png -------------------------------------------------------------------------------- /static/images/eks/get_container.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/get_container.png -------------------------------------------------------------------------------- /static/images/eks/push_commands.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/push_commands.png -------------------------------------------------------------------------------- /static/images/eks/subnet_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/eks/subnet_image.png -------------------------------------------------------------------------------- /static/images/intro/approaches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/intro/approaches.png -------------------------------------------------------------------------------- /static/images/intro/challenges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/intro/challenges.png -------------------------------------------------------------------------------- /static/images/intro/containers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/intro/containers.png -------------------------------------------------------------------------------- /static/images/intro/how_it_runs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/distributed-training-workshop/HEAD/static/images/intro/how_it_runs.png -------------------------------------------------------------------------------- /layouts/shortcodes/mermaid.html: -------------------------------------------------------------------------------- 1 |
47 |
48 |{{T "message-404"}}
49 | 50 | 51 |