├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── OWNERS ├── README.md ├── ROADMAP.md ├── deploy ├── Dockerfile ├── FindNCCL.cmake ├── ftlib-rbac.yaml └── ftlib-test.yaml ├── docs ├── design │ ├── communication_library.md │ ├── consensus.md │ └── ftlib.md └── imgs │ ├── ftlib.png │ ├── ftlib_flow.png │ └── role_in_distributed.png ├── ftlib ├── __init__.py ├── commlib │ ├── basic_commlib.py │ ├── commlib_status.py │ ├── gloo │ │ ├── CMakeLists.txt │ │ ├── Dockerfile │ │ ├── __init__.py │ │ ├── impl.py │ │ └── src │ │ │ └── gloo_lib.cxx │ ├── nccl │ │ ├── CMakeLists.txt │ │ ├── Dockerfile │ │ ├── Dockerfile.cn │ │ ├── __init__.py │ │ ├── impl.py │ │ └── src │ │ │ └── fault_tolerant_lib.cxx │ └── pytorch │ │ ├── __init__.py │ │ └── impl.py ├── consensus │ ├── basic_consensus.py │ ├── consensus_status.py │ ├── gossip │ │ ├── __init__.py │ │ ├── gen_shared_lib.sh │ │ ├── impl.py │ │ ├── memberlist │ │ │ ├── go.mod │ │ │ ├── go.sum │ │ │ └── main.go │ │ └── test.py │ └── shared_storage │ │ ├── __init__.py │ │ ├── impl.py │ │ ├── master_server.py │ │ ├── proto │ │ ├── __init__.py │ │ ├── communicate.proto │ │ └── gen_grpc.sh │ │ └── utils.py ├── ftlib_status.py ├── impl.py ├── rank_assign_scheme.py └── utils │ └── kubernetes │ ├── __init__.py │ └── svc.py ├── requirements-dev.txt ├── requirements.txt ├── scripts └── setup_k8s_env.sh ├── setup.py └── test ├── deprecated-tests ├── generic_test.py ├── pytorch_test.py └── tricky-data │ ├── data.py │ └── pytorch-gossip-tricky-data.py ├── kubernetes ├── Dockerfile ├── ftlib-deployment.yaml └── script │ ├── main.py │ └── test.py └── pytorch-gossip └── pytorch-gossip_test.py /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/.github/ISSUE_TEMPLATE.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/.isort.cfg -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/.travis.yml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/LICENSE -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/OWNERS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/README.md -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ROADMAP.md -------------------------------------------------------------------------------- /deploy/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/deploy/Dockerfile -------------------------------------------------------------------------------- /deploy/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/deploy/FindNCCL.cmake -------------------------------------------------------------------------------- /deploy/ftlib-rbac.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/deploy/ftlib-rbac.yaml -------------------------------------------------------------------------------- /deploy/ftlib-test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/deploy/ftlib-test.yaml -------------------------------------------------------------------------------- /docs/design/communication_library.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/docs/design/communication_library.md -------------------------------------------------------------------------------- /docs/design/consensus.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/docs/design/consensus.md -------------------------------------------------------------------------------- /docs/design/ftlib.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/docs/design/ftlib.md -------------------------------------------------------------------------------- /docs/imgs/ftlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/docs/imgs/ftlib.png -------------------------------------------------------------------------------- /docs/imgs/ftlib_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/docs/imgs/ftlib_flow.png -------------------------------------------------------------------------------- /docs/imgs/role_in_distributed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/docs/imgs/role_in_distributed.png -------------------------------------------------------------------------------- /ftlib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/__init__.py -------------------------------------------------------------------------------- /ftlib/commlib/basic_commlib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/basic_commlib.py -------------------------------------------------------------------------------- /ftlib/commlib/commlib_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/commlib_status.py -------------------------------------------------------------------------------- /ftlib/commlib/gloo/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/gloo/CMakeLists.txt -------------------------------------------------------------------------------- /ftlib/commlib/gloo/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/gloo/Dockerfile -------------------------------------------------------------------------------- /ftlib/commlib/gloo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/gloo/__init__.py -------------------------------------------------------------------------------- /ftlib/commlib/gloo/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/gloo/impl.py -------------------------------------------------------------------------------- /ftlib/commlib/gloo/src/gloo_lib.cxx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/gloo/src/gloo_lib.cxx -------------------------------------------------------------------------------- /ftlib/commlib/nccl/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/nccl/CMakeLists.txt -------------------------------------------------------------------------------- /ftlib/commlib/nccl/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/nccl/Dockerfile -------------------------------------------------------------------------------- /ftlib/commlib/nccl/Dockerfile.cn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/nccl/Dockerfile.cn -------------------------------------------------------------------------------- /ftlib/commlib/nccl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/nccl/__init__.py -------------------------------------------------------------------------------- /ftlib/commlib/nccl/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/nccl/impl.py -------------------------------------------------------------------------------- /ftlib/commlib/nccl/src/fault_tolerant_lib.cxx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/nccl/src/fault_tolerant_lib.cxx -------------------------------------------------------------------------------- /ftlib/commlib/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/pytorch/__init__.py -------------------------------------------------------------------------------- /ftlib/commlib/pytorch/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/commlib/pytorch/impl.py -------------------------------------------------------------------------------- /ftlib/consensus/basic_consensus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/basic_consensus.py -------------------------------------------------------------------------------- /ftlib/consensus/consensus_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/consensus_status.py -------------------------------------------------------------------------------- /ftlib/consensus/gossip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/__init__.py -------------------------------------------------------------------------------- /ftlib/consensus/gossip/gen_shared_lib.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/gen_shared_lib.sh -------------------------------------------------------------------------------- /ftlib/consensus/gossip/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/impl.py -------------------------------------------------------------------------------- /ftlib/consensus/gossip/memberlist/go.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/memberlist/go.mod -------------------------------------------------------------------------------- /ftlib/consensus/gossip/memberlist/go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/memberlist/go.sum -------------------------------------------------------------------------------- /ftlib/consensus/gossip/memberlist/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/memberlist/main.go -------------------------------------------------------------------------------- /ftlib/consensus/gossip/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/gossip/test.py -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/shared_storage/__init__.py -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/shared_storage/impl.py -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/master_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/shared_storage/master_server.py -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/proto/communicate.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/shared_storage/proto/communicate.proto -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/proto/gen_grpc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/shared_storage/proto/gen_grpc.sh -------------------------------------------------------------------------------- /ftlib/consensus/shared_storage/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/consensus/shared_storage/utils.py -------------------------------------------------------------------------------- /ftlib/ftlib_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/ftlib_status.py -------------------------------------------------------------------------------- /ftlib/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/impl.py -------------------------------------------------------------------------------- /ftlib/rank_assign_scheme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/rank_assign_scheme.py -------------------------------------------------------------------------------- /ftlib/utils/kubernetes/__init__.py: -------------------------------------------------------------------------------- 1 | from ftlib.utils.kubernetes.svc import get_peer_set # noqa: F401 2 | -------------------------------------------------------------------------------- /ftlib/utils/kubernetes/svc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/ftlib/utils/kubernetes/svc.py -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pre-commit 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/setup_k8s_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/scripts/setup_k8s_env.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/setup.py -------------------------------------------------------------------------------- /test/deprecated-tests/generic_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/deprecated-tests/generic_test.py -------------------------------------------------------------------------------- /test/deprecated-tests/pytorch_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/deprecated-tests/pytorch_test.py -------------------------------------------------------------------------------- /test/deprecated-tests/tricky-data/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/deprecated-tests/tricky-data/data.py -------------------------------------------------------------------------------- /test/deprecated-tests/tricky-data/pytorch-gossip-tricky-data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/deprecated-tests/tricky-data/pytorch-gossip-tricky-data.py -------------------------------------------------------------------------------- /test/kubernetes/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/kubernetes/Dockerfile -------------------------------------------------------------------------------- /test/kubernetes/ftlib-deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/kubernetes/ftlib-deployment.yaml -------------------------------------------------------------------------------- /test/kubernetes/script/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/kubernetes/script/main.py -------------------------------------------------------------------------------- /test/kubernetes/script/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/kubernetes/script/test.py -------------------------------------------------------------------------------- /test/pytorch-gossip/pytorch-gossip_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kleveross/ftlib/HEAD/test/pytorch-gossip/pytorch-gossip_test.py --------------------------------------------------------------------------------