├── .gitignore ├── LICENSE ├── README.md ├── collective-in-ray ├── RFC-202011-collective-in-ray.md ├── arch-alternative.png └── arch.png ├── collective ├── __init__.py ├── collective.py ├── collective_group │ ├── __init__.py │ ├── base_collective_group.py │ ├── mpi_collective_group.py │ ├── nccl_collective_group.py │ └── nccl_util.py ├── requirements.txt └── scratch │ ├── interface.py │ └── test.py ├── cupy ├── test_cupy_distributed.py ├── test_cupy_pytorch_interoperatability.py └── test_cupy_single_process.py ├── pytorch └── microbenchmark │ ├── allreduce │ ├── pytorch_dpp_main.py │ ├── pytorch_dpp_train_multiple_node.sh │ └── pytorch_dpp_train_single_node.sh │ ├── primitives │ ├── pytorch │ │ ├── pytorch_benchmarks.py │ │ └── run_pytorch_benchmarks.py │ ├── ray │ │ ├── README.md │ │ ├── auto_ray_benchmarks.py │ │ ├── ray_benchmarks.py │ │ └── utils.py │ ├── results │ │ ├── README.md │ │ ├── distributed │ │ │ ├── pytorch-microbenchmark-gloo.csv │ │ │ ├── pytorch-microbenchmark-nccl.csv │ │ │ ├── ray-microbenchmark-cpu.csv │ │ │ └── ray-microbenchmark-gpu.csv │ │ ├── multigpu │ │ │ ├── pytorch-microbenchmark-gloo.csv │ │ │ ├── pytorch-microbenchmark-nccl.csv │ │ │ ├── ray-microbenchmark-cpu.csv │ │ │ └── ray-microbenchmark-gpu.csv │ │ ├── plots │ │ │ ├── distributed-cpu-allgather-16.png │ │ │ ├── distributed-cpu-allgather-2.png │ │ │ ├── distributed-cpu-allgather-4.png │ │ │ ├── distributed-cpu-allgather-8.png │ │ │ ├── distributed-cpu-allreduce-16.png │ │ │ ├── distributed-cpu-allreduce-2.png │ │ │ ├── distributed-cpu-allreduce-4.png │ │ │ ├── distributed-cpu-allreduce-8.png │ │ │ ├── distributed-cpu-broadcast-16.png │ │ │ ├── distributed-cpu-broadcast-2.png │ │ │ ├── distributed-cpu-broadcast-4.png │ │ │ ├── distributed-cpu-broadcast-8.png │ │ │ ├── distributed-cpu-gather-16.png │ │ │ ├── distributed-cpu-gather-2.png │ │ │ ├── distributed-cpu-gather-4.png │ │ │ ├── distributed-cpu-gather-8.png │ │ │ ├── distributed-cpu-reduce-16.png │ │ │ ├── distributed-cpu-reduce-2.png │ │ │ ├── distributed-cpu-reduce-4.png │ │ │ ├── distributed-cpu-reduce-8.png │ │ │ ├── distributed-cpu-sendrecv-16.png │ │ │ ├── distributed-cpu-sendrecv-2.png │ │ │ ├── distributed-cpu-sendrecv-4.png │ │ │ ├── distributed-cpu-sendrecv-8.png │ │ │ ├── distributed-gpu-allgather-16.png │ │ │ ├── distributed-gpu-allgather-2.png │ │ │ ├── distributed-gpu-allgather-4.png │ │ │ ├── distributed-gpu-allgather-8.png │ │ │ ├── distributed-gpu-allreduce-16.png │ │ │ ├── distributed-gpu-allreduce-2.png │ │ │ ├── distributed-gpu-allreduce-4.png │ │ │ ├── distributed-gpu-allreduce-8.png │ │ │ ├── distributed-gpu-broadcast-16.png │ │ │ ├── distributed-gpu-broadcast-2.png │ │ │ ├── distributed-gpu-broadcast-4.png │ │ │ ├── distributed-gpu-broadcast-8.png │ │ │ ├── distributed-gpu-reduce-16.png │ │ │ ├── distributed-gpu-reduce-2.png │ │ │ ├── distributed-gpu-reduce-4.png │ │ │ ├── distributed-gpu-reduce-8.png │ │ │ ├── multigpu-gpu-allgather-2.png │ │ │ ├── multigpu-gpu-allreduce-2.png │ │ │ ├── multigpu-gpu-broadcast-2.png │ │ │ └── multigpu-gpu-reduce-2.png │ │ └── visualize.py │ └── ucx │ │ └── README.md │ ├── pytorch-ps │ └── rpc_parameter_server.py │ └── rayps │ ├── __init__.py │ ├── ps.py │ ├── test.py │ └── train.py └── scripts ├── restart_ray_lambda.sh └── restart_ray_orca.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/README.md -------------------------------------------------------------------------------- /collective-in-ray/RFC-202011-collective-in-ray.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective-in-ray/RFC-202011-collective-in-ray.md -------------------------------------------------------------------------------- /collective-in-ray/arch-alternative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective-in-ray/arch-alternative.png -------------------------------------------------------------------------------- /collective-in-ray/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective-in-ray/arch.png -------------------------------------------------------------------------------- /collective/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/__init__.py -------------------------------------------------------------------------------- /collective/collective.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/collective.py -------------------------------------------------------------------------------- /collective/collective_group/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/collective_group/__init__.py -------------------------------------------------------------------------------- /collective/collective_group/base_collective_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/collective_group/base_collective_group.py -------------------------------------------------------------------------------- /collective/collective_group/mpi_collective_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/collective_group/mpi_collective_group.py -------------------------------------------------------------------------------- /collective/collective_group/nccl_collective_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/collective_group/nccl_collective_group.py -------------------------------------------------------------------------------- /collective/collective_group/nccl_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/collective_group/nccl_util.py -------------------------------------------------------------------------------- /collective/requirements.txt: -------------------------------------------------------------------------------- 1 | cupy-cuda100 -------------------------------------------------------------------------------- /collective/scratch/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/scratch/interface.py -------------------------------------------------------------------------------- /collective/scratch/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/collective/scratch/test.py -------------------------------------------------------------------------------- /cupy/test_cupy_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/cupy/test_cupy_distributed.py -------------------------------------------------------------------------------- /cupy/test_cupy_pytorch_interoperatability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/cupy/test_cupy_pytorch_interoperatability.py -------------------------------------------------------------------------------- /cupy/test_cupy_single_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/cupy/test_cupy_single_process.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/allreduce/pytorch_dpp_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/allreduce/pytorch_dpp_main.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/allreduce/pytorch_dpp_train_multiple_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/allreduce/pytorch_dpp_train_multiple_node.sh -------------------------------------------------------------------------------- /pytorch/microbenchmark/allreduce/pytorch_dpp_train_single_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/allreduce/pytorch_dpp_train_single_node.sh -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/pytorch/pytorch_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/pytorch/pytorch_benchmarks.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/pytorch/run_pytorch_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/pytorch/run_pytorch_benchmarks.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/ray/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/ray/README.md -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/ray/auto_ray_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/ray/auto_ray_benchmarks.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/ray/ray_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/ray/ray_benchmarks.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/ray/utils.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/README.md -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/distributed/pytorch-microbenchmark-gloo.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/distributed/pytorch-microbenchmark-gloo.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/distributed/pytorch-microbenchmark-nccl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/distributed/pytorch-microbenchmark-nccl.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/distributed/ray-microbenchmark-cpu.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/distributed/ray-microbenchmark-cpu.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/distributed/ray-microbenchmark-gpu.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/distributed/ray-microbenchmark-gpu.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/multigpu/pytorch-microbenchmark-gloo.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/multigpu/pytorch-microbenchmark-gloo.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/multigpu/pytorch-microbenchmark-nccl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/multigpu/pytorch-microbenchmark-nccl.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/multigpu/ray-microbenchmark-cpu.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/multigpu/ray-microbenchmark-cpu.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/multigpu/ray-microbenchmark-gpu.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/multigpu/ray-microbenchmark-gpu.csv -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allgather-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-allreduce-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-broadcast-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-gather-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-reduce-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-cpu-sendrecv-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allgather-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-allreduce-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-broadcast-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-16.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-4.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/distributed-gpu-reduce-8.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-allgather-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-allgather-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-allreduce-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-allreduce-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-broadcast-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-broadcast-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-reduce-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/plots/multigpu-gpu-reduce-2.png -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/results/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/results/visualize.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/primitives/ucx/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/primitives/ucx/README.md -------------------------------------------------------------------------------- /pytorch/microbenchmark/pytorch-ps/rpc_parameter_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/pytorch-ps/rpc_parameter_server.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/rayps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch/microbenchmark/rayps/ps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/rayps/ps.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/rayps/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/rayps/test.py -------------------------------------------------------------------------------- /pytorch/microbenchmark/rayps/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/pytorch/microbenchmark/rayps/train.py -------------------------------------------------------------------------------- /scripts/restart_ray_lambda.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/scripts/restart_ray_lambda.sh -------------------------------------------------------------------------------- /scripts/restart_ray_orca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhisbug/ray-scalable-ml-design/HEAD/scripts/restart_ray_orca.sh --------------------------------------------------------------------------------