├── .bumpversion.cfg ├── .circleci └── config.yml ├── .github ├── CODEOWNERS ├── commit_template.txt ├── pull_request_template.md └── semantic.yml ├── .gitignore ├── Dockerfile-base-cpu ├── Dockerfile-base-cuda ├── Dockerfile-default-cpu ├── Dockerfile-default-cuda ├── Dockerfile-default-rocm ├── Dockerfile-infinityhub-hpc ├── Dockerfile-infinityhub-pytorch ├── Dockerfile-ngc-hpc ├── Dockerfile-pytorch-ngc ├── Dockerfile-tensorflow-ngc ├── LICENSE ├── Makefile ├── README.md ├── VERSION ├── cloud ├── ansible.cfg ├── environments-packer.json ├── environments-playbook.yml ├── post-process.sh └── roles │ ├── aws-fs │ └── tasks │ │ └── main.yml │ ├── docker │ └── tasks │ │ └── main.yml │ ├── environments │ └── tasks │ │ └── main.yml │ ├── gpumon │ ├── defaults │ │ └── main.yml │ ├── files │ │ └── gpumon │ ├── tasks │ │ └── main.yml │ └── templates │ │ ├── 10-namespace.conf │ │ ├── gpumon.service │ │ └── gpumon.timer │ ├── journald-cloudwatch │ ├── defaults │ │ └── main.yml │ ├── tasks │ │ └── main.yml │ └── templates │ │ ├── journald-cloudwatch.conf │ │ └── journald-cloudwatch.service │ ├── nvidia-container-toolkit │ └── tasks │ │ └── main.yml │ ├── nvidia-drivers │ ├── defaults │ │ └── main.yml │ └── tasks │ │ └── main.yml │ ├── prevent-kernel-upgrades │ └── tasks │ │ └── main.yml │ ├── sysctl │ └── tasks │ │ └── main.yaml │ └── utilities │ └── tasks │ └── main.yml ├── dockerfile_scripts ├── add_det_nobody_user.sh ├── additional-requirements-rocm.txt ├── additional-requirements-tf.txt ├── additional-requirements-torch.txt ├── additional-requirements.txt ├── apex.patch ├── build_aws.sh ├── build_aws_rocm.sh ├── build_gdrcopy.sh ├── build_nccl.sh ├── install_apex.sh ├── install_deb_packages.sh ├── install_deepspeed.sh ├── install_deepspeed_rocm.sh ├── install_google_cloud_sdk.sh ├── install_libnss_determined.sh ├── install_package_fixes.sh ├── install_python.sh ├── libnss_determined │ ├── .clang-format │ ├── .gitignore │ ├── Dockerfile │ ├── Makefile │ ├── README.md │ ├── src │ │ ├── group.c │ │ ├── libnss_determined.h │ │ ├── parse.c │ │ ├── passwd.c │ │ ├── shadow.c │ │ └── util.c │ └── test │ │ ├── integration_test.sh │ │ └── unit_test.c ├── notebook-requirements.txt ├── ompi.sh ├── ompi_rocm.sh └── scrape_libs.sh ├── scripts ├── publish-docker.sh └── publish-versionless-docker.sh ├── tests ├── pytest.ini └── test_docker.py └── version-matrix.yaml /.bumpversion.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/.bumpversion.cfg -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/.circleci/config.yml -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/commit_template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/.github/commit_template.txt -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/semantic.yml: -------------------------------------------------------------------------------- 1 | titleOnly: true 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # JetBrains IDEs (e.g., PyCharm and GoLand) 2 | .idea/ 3 | .vscode/ 4 | -------------------------------------------------------------------------------- /Dockerfile-base-cpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-base-cpu -------------------------------------------------------------------------------- /Dockerfile-base-cuda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-base-cuda -------------------------------------------------------------------------------- /Dockerfile-default-cpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-default-cpu -------------------------------------------------------------------------------- /Dockerfile-default-cuda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-default-cuda -------------------------------------------------------------------------------- /Dockerfile-default-rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-default-rocm -------------------------------------------------------------------------------- /Dockerfile-infinityhub-hpc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-infinityhub-hpc -------------------------------------------------------------------------------- /Dockerfile-infinityhub-pytorch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-infinityhub-pytorch -------------------------------------------------------------------------------- /Dockerfile-ngc-hpc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-ngc-hpc -------------------------------------------------------------------------------- /Dockerfile-pytorch-ngc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-pytorch-ngc -------------------------------------------------------------------------------- /Dockerfile-tensorflow-ngc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Dockerfile-tensorflow-ngc -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/README.md -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.35.1 2 | -------------------------------------------------------------------------------- /cloud/ansible.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/ansible.cfg -------------------------------------------------------------------------------- /cloud/environments-packer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/environments-packer.json -------------------------------------------------------------------------------- /cloud/environments-playbook.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/environments-playbook.yml -------------------------------------------------------------------------------- /cloud/post-process.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/post-process.sh -------------------------------------------------------------------------------- /cloud/roles/aws-fs/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/aws-fs/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/docker/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/docker/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/environments/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/environments/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/gpumon/defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | gpumon_cloudwatch_metric_namespace: determined 3 | -------------------------------------------------------------------------------- /cloud/roles/gpumon/files/gpumon: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/gpumon/files/gpumon -------------------------------------------------------------------------------- /cloud/roles/gpumon/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/gpumon/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/gpumon/templates/10-namespace.conf: -------------------------------------------------------------------------------- 1 | [Service] 2 | Environment=GPUMON_NAMESPACE={{ gpumon_cloudwatch_metric_namespace }} 3 | -------------------------------------------------------------------------------- /cloud/roles/gpumon/templates/gpumon.service: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/gpumon/templates/gpumon.service -------------------------------------------------------------------------------- /cloud/roles/gpumon/templates/gpumon.timer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/gpumon/templates/gpumon.timer -------------------------------------------------------------------------------- /cloud/roles/journald-cloudwatch/defaults/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/journald-cloudwatch/defaults/main.yml -------------------------------------------------------------------------------- /cloud/roles/journald-cloudwatch/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/journald-cloudwatch/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/journald-cloudwatch/templates/journald-cloudwatch.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/journald-cloudwatch/templates/journald-cloudwatch.conf -------------------------------------------------------------------------------- /cloud/roles/journald-cloudwatch/templates/journald-cloudwatch.service: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/journald-cloudwatch/templates/journald-cloudwatch.service -------------------------------------------------------------------------------- /cloud/roles/nvidia-container-toolkit/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/nvidia-container-toolkit/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/nvidia-drivers/defaults/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/nvidia-drivers/defaults/main.yml -------------------------------------------------------------------------------- /cloud/roles/nvidia-drivers/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/nvidia-drivers/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/prevent-kernel-upgrades/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/prevent-kernel-upgrades/tasks/main.yml -------------------------------------------------------------------------------- /cloud/roles/sysctl/tasks/main.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/sysctl/tasks/main.yaml -------------------------------------------------------------------------------- /cloud/roles/utilities/tasks/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/cloud/roles/utilities/tasks/main.yml -------------------------------------------------------------------------------- /dockerfile_scripts/add_det_nobody_user.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/add_det_nobody_user.sh -------------------------------------------------------------------------------- /dockerfile_scripts/additional-requirements-rocm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/additional-requirements-rocm.txt -------------------------------------------------------------------------------- /dockerfile_scripts/additional-requirements-tf.txt: -------------------------------------------------------------------------------- 1 | tensorboard-plugin-profile 2 | -------------------------------------------------------------------------------- /dockerfile_scripts/additional-requirements-torch.txt: -------------------------------------------------------------------------------- 1 | torch-tb-profiler 2 | -------------------------------------------------------------------------------- /dockerfile_scripts/additional-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/additional-requirements.txt -------------------------------------------------------------------------------- /dockerfile_scripts/apex.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/apex.patch -------------------------------------------------------------------------------- /dockerfile_scripts/build_aws.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/build_aws.sh -------------------------------------------------------------------------------- /dockerfile_scripts/build_aws_rocm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/build_aws_rocm.sh -------------------------------------------------------------------------------- /dockerfile_scripts/build_gdrcopy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/build_gdrcopy.sh -------------------------------------------------------------------------------- /dockerfile_scripts/build_nccl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/build_nccl.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_apex.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_apex.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_deb_packages.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_deb_packages.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_deepspeed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_deepspeed.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_deepspeed_rocm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_deepspeed_rocm.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_google_cloud_sdk.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_google_cloud_sdk.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_libnss_determined.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_libnss_determined.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_package_fixes.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_package_fixes.sh -------------------------------------------------------------------------------- /dockerfile_scripts/install_python.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/install_python.sh -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/.clang-format -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/.gitignore: -------------------------------------------------------------------------------- 1 | test/unit_test 2 | libnss_determined.so.2 3 | -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/Dockerfile -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/Makefile -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/README.md -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/src/group.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/src/group.c -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/src/libnss_determined.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/src/libnss_determined.h -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/src/parse.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/src/parse.c -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/src/passwd.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/src/passwd.c -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/src/shadow.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/src/shadow.c -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/src/util.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/src/util.c -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/test/integration_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/test/integration_test.sh -------------------------------------------------------------------------------- /dockerfile_scripts/libnss_determined/test/unit_test.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/libnss_determined/test/unit_test.c -------------------------------------------------------------------------------- /dockerfile_scripts/notebook-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/notebook-requirements.txt -------------------------------------------------------------------------------- /dockerfile_scripts/ompi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/ompi.sh -------------------------------------------------------------------------------- /dockerfile_scripts/ompi_rocm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/ompi_rocm.sh -------------------------------------------------------------------------------- /dockerfile_scripts/scrape_libs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/dockerfile_scripts/scrape_libs.sh -------------------------------------------------------------------------------- /scripts/publish-docker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/scripts/publish-docker.sh -------------------------------------------------------------------------------- /scripts/publish-versionless-docker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/scripts/publish-versionless-docker.sh -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/tests/pytest.ini -------------------------------------------------------------------------------- /tests/test_docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/tests/test_docker.py -------------------------------------------------------------------------------- /version-matrix.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/determined-ai/environments/HEAD/version-matrix.yaml --------------------------------------------------------------------------------