├── examples
    ├── inference
    │   ├── pippy
    │   │   ├── requirements.txt
    │   │   ├── README.md
    │   │   ├── llama.py
    │   │   ├── bert.py
    │   │   ├── gpt2.py
    │   │   └── t5.py
    │   └── distributed
    │   │   ├── README.md
    │   │   ├── stable_diffusion.py
    │   │   └── phi2.py
    ├── requirements.txt
    ├── slurm
    │   ├── submit_multigpu.sh
    │   ├── submit_multinode.sh
    │   └── submit_multicpu.sh
    ├── deepspeed_config_templates
    │   ├── zero_stage1_config.json
    │   ├── zero_stage2_config.json
    │   ├── zero_stage3_config.json
    │   ├── zero_stage2_offload_config.json
    │   └── zero_stage3_offload_config.json
    └── multigpu_remote_launcher.py
├── tests
    ├── test_samples
    │   ├── test_command_file.sh
    │   └── MRPC
    │   │   ├── dev.csv
    │   │   └── train.csv
    ├── test_configs
    │   ├── README.md
    │   ├── 0_30_0_sagemaker.yaml
    │   ├── 0_11_0.yaml
    │   ├── 0_12_0.yaml
    │   ├── invalid_keys.yaml
    │   ├── latest.yaml
    │   └── 0_28_0_mpi.yaml
    ├── test_cpu.py
    ├── test_tpu.py
    ├── deepspeed
    │   ├── ds_config_zero2.json
    │   └── ds_config_zero3.json
    ├── test_grad_sync.py
    ├── test_metrics.py
    ├── test_sagemaker.py
    ├── xla_spawn.py
    ├── test_optimizer.py
    ├── test_logging.py
    └── test_imports.py
├── docs
    ├── source
    │   ├── imgs
    │   │   ├── course_banner.png
    │   │   ├── profile_export.png
    │   │   └── accelerate_logo.png
    │   ├── package_reference
    │   │   ├── fsdp.md
    │   │   ├── inference.md
    │   │   ├── launchers.md
    │   │   ├── logging.md
    │   │   ├── deepspeed.md
    │   │   ├── accelerator.md
    │   │   ├── megatron_lm.md
    │   │   ├── state.md
    │   │   ├── tracking.md
    │   │   ├── torch_wrappers.md
    │   │   ├── kwargs.md
    │   │   └── big_modeling.md
    │   ├── basic_tutorials
    │   │   ├── overview.md
    │   │   ├── tpu.md
    │   │   └── install.md
    │   ├── usage_guides
    │   │   ├── explore.md
    │   │   ├── mps.md
    │   │   └── checkpoint.md
    │   ├── concept_guides
    │   │   └── internal_mechanism.md
    │   └── index.md
    └── Makefile
├── .github
    ├── workflows
    │   ├── trufflehog.yml
    │   ├── upload_pr_documentation.yml
    │   ├── build_documentation.yml
    │   ├── build_pr_documentation.yml
    │   ├── stale.yml
    │   ├── quality.yml
    │   ├── test_imports.yml
    │   ├── build_and_run_tests.yml
    │   ├── integration_tests.yml
    │   ├── test.yml
    │   ├── build-docker-images-release.yml
    │   └── build_docker_images.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── ISSUE_TEMPLATE
    │   └── bug-report.yml
├── .pre-commit-config.yaml
├── src
    └── accelerate
    │   ├── commands
    │       ├── __init__.py
    │       ├── menu
    │       │   ├── __init__.py
    │       │   ├── helpers.py
    │       │   ├── cursor.py
    │       │   └── input.py
    │       ├── config
    │       │   ├── __init__.py
    │       │   ├── update.py
    │       │   ├── config.py
    │       │   └── config_utils.py
    │       ├── accelerate_cli.py
    │       ├── test.py
    │       └── merge.py
    │   ├── test_utils
    │       ├── scripts
    │       │   ├── __init__.py
    │       │   ├── external_deps
    │       │   │   ├── __init__.py
    │       │   │   └── test_zero3_integration.py
    │       │   ├── test_cli.py
    │       │   ├── test_ddp_comm_hook.py
    │       │   └── test_notebook.py
    │       └── __init__.py
    │   ├── utils
    │       ├── rich.py
    │       ├── tqdm.py
    │       ├── torch_xla.py
    │       ├── versions.py
    │       ├── constants.py
    │       └── transformer_engine.py
    │   ├── memory_utils.py
    │   └── __init__.py
├── docker
    ├── accelerate-cpu
    │   └── Dockerfile
    ├── accelerate-gpu
    │   └── Dockerfile
    ├── accelerate-gpu-deepspeed
    │   └── Dockerfile
    └── README.md
├── pyproject.toml
├── manim_animations
    └── dataloaders
    │   ├── stage_1.py
    │   ├── stage_0.py
    │   ├── stage_3.py
    │   └── stage_4.py
├── .devcontainer
    └── devcontainer.json
├── benchmarks
    ├── README.md
    └── measures_util.py
├── .gitignore
├── utils
    └── stale.py
└── Makefile


/examples/inference/pippy/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | pippy>=0.2.0


--------------------------------------------------------------------------------
/tests/test_samples/test_command_file.sh:
--------------------------------------------------------------------------------
1 | echo "hello world"
2 | echo "this is a second command"


--------------------------------------------------------------------------------
/docs/source/imgs/course_banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wetdog/accelerate/main/docs/source/imgs/course_banner.png


--------------------------------------------------------------------------------
/docs/source/imgs/profile_export.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wetdog/accelerate/main/docs/source/imgs/profile_export.png


--------------------------------------------------------------------------------
/docs/source/imgs/accelerate_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wetdog/accelerate/main/docs/source/imgs/accelerate_logo.png


--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate # used to be installed in Amazon SageMaker environment
2 | evaluate
3 | datasets==2.3.2
4 | schedulefree
5 | huggingface_hub>=0.20.0
6 | 


--------------------------------------------------------------------------------
/tests/test_configs/README.md:
--------------------------------------------------------------------------------
1 | This folder contains test configs for `accelerate config`. These should be generated for each major version
2 | and are written based on `accelerate config` and selecting the "No distributed training" option.


--------------------------------------------------------------------------------
/tests/test_configs/0_30_0_sagemaker.yaml:
--------------------------------------------------------------------------------
1 | compute_environment: AMAZON_SAGEMAKER
2 | debug: false
3 | distributed_type: NO
4 | mixed_precision: fp16
5 | debug: false
6 | use_cpu: false
7 | ec2_instance_type: MY_TYPE
8 | iam_role_name: MY_ROLE
9 | 


--------------------------------------------------------------------------------
/tests/test_configs/0_11_0.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config: {}
 3 | distributed_type: 'NO'
 4 | fsdp_config: {}
 5 | machine_rank: 0
 6 | main_process_ip: null
 7 | main_process_port: null
 8 | main_training_function: main
 9 | mixed_precision: 'no'
10 | num_machines: 1
11 | num_processes: 1
12 | use_cpu: false


--------------------------------------------------------------------------------
/.github/workflows/trufflehog.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 | 
 4 | name: Secret Leaks
 5 | 
 6 | jobs:
 7 |   trufflehog:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - name: Checkout code
11 |       uses: actions/checkout@v4
12 |       with:
13 |         fetch-depth: 0
14 |     - name: Secret Scanning
15 |       uses: trufflesecurity/trufflehog@main
16 | 


--------------------------------------------------------------------------------
/tests/test_configs/0_12_0.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config: {}
 3 | distributed_type: 'NO'
 4 | downcast_bf16: 'no'
 5 | fsdp_config: {}
 6 | machine_rank: 0
 7 | main_process_ip: null
 8 | main_process_port: null
 9 | main_training_function: main
10 | mixed_precision: 'no'
11 | num_machines: 1
12 | num_processes: 1
13 | use_cpu: false


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: v0.2.1
 4 |     hooks:
 5 |       - id: ruff
 6 |         args:
 7 |           - --fix
 8 |       - id: ruff-format
 9 |   - repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v4.5.0
11 |     hooks:
12 |       - id: check-merge-conflict
13 |       - id: check-yaml
14 | 


--------------------------------------------------------------------------------
/tests/test_configs/invalid_keys.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config: {}
 3 | distributed_type: 'NO'
 4 | downcast_bf16: 'no'
 5 | fsdp_config: {}
 6 | machine_rank: 0
 7 | main_process_ip: null
 8 | main_process_port: null
 9 | main_training_function: main
10 | mixed_precision: 'no'
11 | num_machines: 1
12 | num_processes: 1
13 | use_cpu: false
14 | invalid_key: "invalid_value"
15 | another_invalid_key: "another_invalid_value"


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Upload PR Documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Build PR Documentation"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 |     with:
13 |       package_name: accelerate
14 |     secrets:
15 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - doc-builder*
 8 |       - v*-release
 9 | 
10 | jobs:
11 |    build:
12 |     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
13 |     with:
14 |       commit_sha: ${{ github.sha }}
15 |       package: accelerate
16 |       custom_container: huggingface/transformers-doc-builder
17 |     secrets:
18 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
19 | 


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build PR Documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | concurrency:
 7 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 8 |   cancel-in-progress: true
 9 | 
10 | jobs:
11 |   build:
12 |     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
13 |     with:
14 |       commit_sha: ${{ github.event.pull_request.head.sha }}
15 |       pr_number: ${{ github.event.number }}
16 |       package: accelerate
17 |       custom_container: huggingface/transformers-doc-builder
18 | 


--------------------------------------------------------------------------------
/tests/test_configs/latest.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config: {}
 3 | distributed_type: 'NO'
 4 | downcast_bf16: 'no'
 5 | fsdp_config: {}
 6 | gpu_ids: all
 7 | machine_rank: 0
 8 | main_process_ip: null
 9 | main_process_port: null
10 | main_training_function: main
11 | megatron_lm_config: {}
12 | mixed_precision: 'no'
13 | num_machines: 1
14 | num_processes: 1
15 | rdzv_backend: static
16 | same_network: true
17 | use_cpu: false
18 | tpu_name: 'test-tpu'
19 | tpu_zone: 'us-central1-a'
20 | commands: null
21 | command_file: tests/test_samples/test_command_file.sh


--------------------------------------------------------------------------------
/tests/test_configs/0_28_0_mpi.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: MULTI_CPU
 4 | downcast_bf16: 'no'
 5 | ipex_config:
 6 |   ipex: true
 7 | machine_rank: 0
 8 | main_process_ip: 127.0.0.1
 9 | main_process_port: 29500
10 | main_training_function: main
11 | mixed_precision: 'no'
12 | mpirun_config:
13 |   mpirun_ccl: '1'
14 |   mpirun_hostfile: /home/user/hostfile
15 | num_machines: 4
16 | num_processes: 16
17 | rdzv_backend: static
18 | same_network: true
19 | tpu_env: []
20 | tpu_use_cluster: false
21 | tpu_use_sudo: false
22 | use_cpu: true
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/src/accelerate/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/src/accelerate/test_utils/scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/src/accelerate/test_utils/scripts/external_deps/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/examples/inference/distributed/README.md:
--------------------------------------------------------------------------------
 1 | # Distributed inference examples
 2 | 
 3 | This folder contains a variety of tutorials for running distributed inference with the following strategy: 
 4 | 
 5 | Load an entire model onto each GPU and sending chunks of a batch through each GPU’s model copy at a time
 6 | 
 7 | ## Installation
 8 | 
 9 | ```bash
10 | pip install accelerate torch
11 | ```
12 | 
13 | ## Running code
14 | 
15 | You can either use `torchrun` or the recommended way of `accelerate launch` (without needing to run `accelerate config`) on each script:
16 | 
17 | ```bash
18 | accelerate launch --num_processes {NUM_GPUS} phi2.py
19 | ```
20 | 
21 | Or:
22 | 
23 | ```bash
24 | torchrun --nproc-per-node {NUM_GPUS} phi2.py
25 | ```


--------------------------------------------------------------------------------
/src/accelerate/commands/menu/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .selection_menu import BulletMenu
15 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Stale Bot
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 15 * * *"
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   close_stale_issues:
10 |     name: Close Stale Issues
11 |     if: github.repository == 'huggingface/accelerate'
12 |     runs-on: ubuntu-latest
13 |     env:
14 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
15 |     steps:
16 |     - uses: actions/checkout@v3.1.0
17 |     
18 |     - name: Setup Python
19 |       uses: actions/setup-python@v3
20 |       with:
21 |         python-version: 3.8
22 |         cache: 'pip'
23 |         cache-dependency-path: 'setup.py'
24 |     
25 |     - name: Install requirements
26 |       run: |
27 |         pip install PyGithub
28 |     - name: Close stale issues
29 |       run: |
30 |         python utils/stale.py
31 | 


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
 1 | name: Quality Check
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   quality:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v3.1.0
10 |     - name: Set up Python 3.8
11 |       uses: actions/setup-python@v3
12 |       with:
13 |         python-version: 3.8
14 |         cache: 'pip'
15 |         cache-dependency-path: 'setup.py'
16 |     - name: Install Python dependencies
17 |       run: pip install -e .[quality]
18 |     - name: Run Quality check
19 |       run: make quality
20 |     - name: Check if failure
21 |       if: ${{ failure() }}
22 |       run: |
23 |         echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and rerun 'make style; make quality;'" >> $GITHUB_STEP_SUMMARY
24 | 
25 | 


--------------------------------------------------------------------------------
/src/accelerate/utils/rich.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .imports import is_rich_available
16 | 
17 | 
18 | if is_rich_available():
19 |     from rich.traceback import install
20 | 
21 |     install(show_locals=False)
22 | 
23 | else:
24 |     raise ModuleNotFoundError("To use the rich extension, install rich with `pip install rich`")
25 | 


--------------------------------------------------------------------------------
/src/accelerate/memory_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import warnings
16 | 
17 | 
18 | warnings.warn(
19 |     "memory_utils has been reorganized to utils.memory. Import `find_executable_batchsize` from the main `__init__`: "
20 |     "`from accelerate import find_executable_batch_size` to avoid this warning.",
21 |     FutureWarning,
22 | )
23 | 


--------------------------------------------------------------------------------
/src/accelerate/test_utils/scripts/test_cli.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | 
17 | def main():
18 |     if torch.cuda.is_available():
19 |         num_gpus = torch.cuda.device_count()
20 |     else:
21 |         num_gpus = 0
22 |     print(f"Successfully ran on {num_gpus} GPUs")
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/fsdp.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Utilities for Fully Sharded Data Parallelism
17 | 
18 | [[autodoc]] utils.merge_fsdp_weights
19 | 
20 | [[autodoc]] utils.FullyShardedDataParallelPlugin


--------------------------------------------------------------------------------
/examples/slurm/submit_multigpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH --job-name=multigpu
 4 | #SBATCH -D .
 5 | #SBATCH --output=O-%x.%j
 6 | #SBATCH --error=E-%x.%j
 7 | #SBATCH --nodes=1
 8 | #SBATCH --ntasks-per-node=1         # number of MP tasks
 9 | #SBATCH --gres=gpu:4                # number of GPUs per node
10 | #SBATCH --cpus-per-task=160         # number of cores per tasks
11 | #SBATCH --time=01:59:00             # maximum execution time (HH:MM:SS)
12 | 
13 | ######################
14 | ### Set enviroment ###
15 | ######################
16 | source activateEnvironment.sh
17 | export GPUS_PER_NODE=4
18 | ######################
19 | 
20 | export ACCELERATE_DIR="${ACCELERATE_DIR:-/accelerate}"
21 | export SCRIPT="${ACCELERATE_DIR}/examples/complete_nlp_example.py"
22 | export SCRIPT_ARGS=" \
23 |     --mixed_precision fp16 \
24 |     --output_dir ${ACCELERATE_DIR}/examples/output \
25 |     --with_tracking \
26 |     "
27 | 
28 | accelerate launch --num_processes $GPUS_PER_NODE $SCRIPT $SCRIPT_ARGS


--------------------------------------------------------------------------------
/docs/source/package_reference/inference.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # The inference API
17 | 
18 | These docs refer to the [PiPPy](https://github.com/PyTorch/PiPPy) integration.
19 | 
20 | [[autodoc]] inference.prepare_pippy
21 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/launchers.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Launchers
17 | 
18 | Functions for launching training on distributed processes.
19 | 
20 | 
21 | [[autodoc]] accelerate.notebook_launcher
22 | [[autodoc]] accelerate.debug_launcher


--------------------------------------------------------------------------------
/tests/test_cpu.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from accelerate import debug_launcher
18 | from accelerate.test_utils import require_cpu, test_ops, test_script
19 | 
20 | 
21 | @require_cpu
22 | class MultiCPUTester(unittest.TestCase):
23 |     def test_cpu(self):
24 |         debug_launcher(test_script.main)
25 | 
26 |     def test_ops(self):
27 |         debug_launcher(test_ops.main)
28 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/logging.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Logging with Accelerate
17 | 
18 | Refer to the [Troubleshooting guide](../usage_guides/troubleshooting#logging) or to the example below to learn 
19 | how to use 🤗 Accelerate's logger. 
20 | 
21 | [[autodoc]] logging.get_logger


--------------------------------------------------------------------------------
/docker/accelerate-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Builds CPU-only Docker image of PyTorch
 2 | # Uses multi-staged approach to reduce size
 3 | # Stage 1
 4 | FROM python:3.8-slim as compile-image
 5 | 
 6 | ARG DEBIAN_FRONTEND=noninteractive
 7 | 
 8 | RUN apt update
 9 | RUN apt-get install -y --no-install-recommends \
10 |     build-essential \
11 |     git \
12 |     gcc
13 | 
14 | # Setup virtual environment for Docker
15 | ENV VIRTUAL_ENV=/opt/venv
16 | RUN python3 -m venv ${VIRTUAL_ENV}
17 | # Make sure we use the virtualenv
18 | ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
19 | WORKDIR /workspace
20 | # Install specific CPU torch wheel to save on space
21 | RUN python3 -m pip install --upgrade --no-cache-dir pip
22 | RUN python3 -m pip install --no-cache-dir \
23 |     jupyter \
24 |     git+https://github.com/huggingface/accelerate#egg=accelerate[testing,test_trackers] \
25 |     --extra-index-url https://download.pytorch.org/whl/cpu
26 |     
27 | # Stage 2
28 | FROM python:3.8-slim AS build-image
29 | COPY --from=compile-image /opt/venv /opt/venv
30 | RUN useradd -ms /bin/bash user
31 | USER user
32 | 
33 | # Make sure we use the virtualenv
34 | ENV PATH="/opt/venv/bin:$PATH"
35 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docs/source/package_reference/deepspeed.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Utilities for DeepSpeed
17 | 
18 | [[autodoc]] utils.DeepSpeedPlugin
19 | 
20 | [[autodoc]] utils.deepspeed.DummyOptim
21 | 
22 | [[autodoc]] utils.deepspeed.DummyScheduler
23 | 
24 | [[autodoc]] utils.deepspeed.DeepSpeedEngineWrapper
25 | 
26 | [[autodoc]] utils.deepspeed.DeepSpeedOptimizerWrapper
27 | 
28 | [[autodoc]] utils.deepspeed.DeepSpeedSchedulerWrapper
29 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | line-length = 119
 3 | target-version = "py38"
 4 | 
 5 | [tool.ruff.lint]
 6 | preview = true
 7 | ignore-init-module-imports = true
 8 | extend-select = [
 9 |     "B009", # static getattr
10 |     "B010", # static setattr
11 |     "CPY", # Copyright
12 |     "E", # PEP8 errors
13 |     "F", # PEP8 formatting
14 |     "I", # Import sorting
15 |     "TID251", # Banned API
16 |     "UP", # Pyupgrade
17 |     "W", # PEP8 warnings
18 | ]
19 | ignore = [
20 |     "E501", # Line length (handled by ruff-format)
21 |     "E741", # Ambiguous variable name
22 |     "W605", # Invalid escape sequence
23 |     "UP007", # X | Y type annotations
24 | ]
25 | 
26 | [tool.ruff.lint.per-file-ignores]
27 | "__init__.py" = [
28 |     "F401", # Ignore seemingly unused imports (they're meant for re-export)
29 | ]
30 | "manim_animations/*" = ["ALL"]
31 | 
32 | [tool.ruff.lint.isort]
33 | lines-after-imports = 2
34 | known-first-party = ["accelerate"]
35 | 
36 | [tool.ruff.format]
37 | exclude = [
38 |     "manim_animations/*"
39 | ]
40 | 
41 | [tool.ruff.lint.flake8-tidy-imports.banned-api]
42 | "os.getenv".msg = "Use os.environ instead"
43 | "os.putenv".msg = "Use os.environ instead"
44 | "os.unsetenv".msg = "Use os.environ instead"
45 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/accelerator.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Accelerator
17 | 
18 | The [`Accelerator`] is the main class for enabling distributed training on any type of training setup. Read the [Add Accelerator to your code](../basic_tutorials/migration) tutorial to learn more about how to add the [`Accelerator`] to your script.
19 | 
20 | ## Accelerator[[api]]
21 | 
22 | [[autodoc]] Accelerator
23 | 
24 | ## Utilities
25 | 
26 | [[autodoc]] accelerate.utils.gather_object
27 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/megatron_lm.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Utilities for Megatron-LM
17 | 
18 | [[autodoc]] utils.MegatronLMPlugin
19 | 
20 | [[autodoc]] utils.MegatronLMDummyScheduler
21 | 
22 | [[autodoc]] utils.MegatronLMDummyDataLoader
23 | 
24 | [[autodoc]] utils.AbstractTrainStep
25 | 
26 | [[autodoc]] utils.GPTTrainStep
27 | 
28 | [[autodoc]] utils.BertTrainStep
29 | 
30 | [[autodoc]] utils.T5TrainStep
31 | 
32 | [[autodoc]] utils.avg_losses_across_data_parallel_group
33 | 


--------------------------------------------------------------------------------
/manim_animations/dataloaders/stage_1.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from manim import *
16 | 
17 | class Stage01(Scene):
18 |     def construct(self):
19 |         mascot = ImageMobject("mascot_bookie.png")
20 |         mascot.scale(.35)
21 |         mascot.move_to([-3.75,-1,0])
22 |         text = Paragraph(
23 |             "Distributed Training,\nHugging Face Accelerate,\nand PyTorch DataLoaders\n\nHow do they all interact?", 
24 |             font_size=36,
25 |             line_spacing=1,
26 |             alignment="center",
27 |             weight=BOLD,
28 |         )
29 |         text.move_to([1.75,.5,0])
30 |         self.add(mascot)
31 |         self.add(text)


--------------------------------------------------------------------------------
/docs/source/package_reference/state.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Stateful Classes
17 | 
18 | Below are variations of a [singleton class](https://en.wikipedia.org/wiki/Singleton_pattern) in the sense that all
19 | instances share the same state, which is initialized on the first instantiation.
20 | 
21 | These classes are immutable and store information about certain configurations or 
22 | states.
23 | 
24 | [[autodoc]] state.PartialState
25 | 
26 | [[autodoc]] state.AcceleratorState
27 | 
28 | [[autodoc]] state.GradientState


--------------------------------------------------------------------------------
/manim_animations/dataloaders/stage_0.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from manim import *
16 | 
17 | 
18 | class Stage0(Scene):
19 |     def construct(self):
20 |         mascot = ImageMobject("mascot_bookie.png")
21 |         mascot.scale(.35)
22 |         mascot.move_to([-3.75,-1,0])
23 |         text = Paragraph(
24 |             "Distributed Training,\nHugging Face Accelerate,\nand PyTorch DataLoaders\n\nHow do they all interact?", 
25 |             font_size=36,
26 |             line_spacing=1,
27 |             alignment="center",
28 |             weight=BOLD,
29 |         )
30 |         text.move_to([1.75,.5,0])
31 |         self.add(mascot)
32 |         self.add(text)


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // File only needed for VSCode users to have proper Docker based interpreters
 2 | {
 3 |     "name": "accelerate_dev_environment",
 4 |     "build": {
 5 |         // ACTION NEEDED: comment/uncomment the relevant line depending on whether you are in a CPU/GPU environment
 6 |          "dockerfile": "../docker/accelerate-cpu/Dockerfile"
 7 | //        "dockerfile": "../docker/accelerate-gpu/Dockerfile"
 8 |     },
 9 |     "runArgs": [
10 |         // ACTION NEEDED: uncomment the next line if your local machine has GPUs available
11 | //        "--gpus", "all",
12 |         // Enable the docker container to access system resources
13 |         "--ipc", "host"
14 |     ],
15 |     "remoteEnv": {
16 |         "PYTHONPATH": "${containerEnv:PATH}:${containerWorkspaceFolder}"
17 |     },
18 |     "customizations": {
19 |         "vscode": {
20 |             "extensions": [
21 |                 // Ensure we have IntelliSense in VSCode when running inside container
22 |                 "ms-python.python"
23 |             ]
24 |         }
25 |     },
26 |     "workspaceFolder": "/workspaces/accelerate",
27 |     // Need git for VSCode to color code modifications. Only runs when building environment.
28 |     "onCreateCommand": "apt-get update && apt-get install -y git && pip install -e '.[dev]'"
29 | }


--------------------------------------------------------------------------------
/docs/source/package_reference/tracking.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Experiment Tracking
17 | 
18 | ## The Base Tracker Class
19 | 
20 | [[autodoc]] tracking.GeneralTracker
21 | 
22 | ## Integrated Trackers
23 | 
24 | [[autodoc]] tracking.TensorBoardTracker
25 |     - __init__
26 | [[autodoc]] tracking.WandBTracker
27 |     - __init__
28 | [[autodoc]] tracking.CometMLTracker
29 |     - __init__
30 | [[autodoc]] tracking.AimTracker
31 |     - __init__
32 | [[autodoc]] tracking.MLflowTracker
33 |     - __init__
34 | [[autodoc]] tracking.ClearMLTracker
35 |     - __init__
36 | 


--------------------------------------------------------------------------------
/tests/test_tpu.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import sys
17 | import unittest
18 | 
19 | from accelerate.test_utils import execute_subprocess_async, path_in_accelerate_package, require_tpu
20 | 
21 | 
22 | class MultiTPUTester(unittest.TestCase):
23 |     test_file_path = path_in_accelerate_package("test_utils", "scripts", "test_script.py")
24 |     test_dir = os.path.dirname(__file__)
25 | 
26 |     @require_tpu
27 |     def test_tpu(self):
28 |         distributed_args = f"""
29 |             {self.test_dir}/xla_spawn.py
30 |             --num_cores 8
31 |             {self.test_file_path}
32 |         """.split()
33 |         cmd = [sys.executable] + distributed_args
34 |         execute_subprocess_async(cmd)
35 | 


--------------------------------------------------------------------------------
/examples/deepspeed_config_templates/zero_stage1_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "optimizer": {
11 |         "type": "AdamW",
12 |         "params": {
13 |             "lr": "auto",
14 |             "weight_decay": "auto",
15 |             "torch_adam": true,
16 |             "adam_w_mode": true
17 |         }
18 |     },
19 |     "scheduler": {
20 |         "type": "WarmupDecayLR",
21 |         "params": {
22 |             "warmup_min_lr": "auto",
23 |             "warmup_max_lr": "auto",
24 |             "warmup_num_steps": "auto",
25 |             "total_num_steps": "auto"
26 |         }
27 |     },
28 |     "zero_optimization": {
29 |         "stage": 1,
30 |         "allgather_partitions": true,
31 |         "allgather_bucket_size": 2e8,
32 |         "overlap_comm": true,
33 |         "reduce_scatter": true,
34 |         "reduce_bucket_size": "auto",
35 |         "contiguous_gradients": true
36 |     },
37 |     "gradient_accumulation_steps": 1,
38 |     "gradient_clipping": "auto",
39 |     "steps_per_print": 2000,
40 |     "train_batch_size": "auto",
41 |     "train_micro_batch_size_per_gpu": "auto",
42 |     "wall_clock_breakdown": false
43 | }


--------------------------------------------------------------------------------
/examples/deepspeed_config_templates/zero_stage2_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "optimizer": {
11 |         "type": "AdamW",
12 |         "params": {
13 |             "lr": "auto",
14 |             "weight_decay": "auto",
15 |             "torch_adam": true,
16 |             "adam_w_mode": true
17 |         }
18 |     },
19 |     "scheduler": {
20 |         "type": "WarmupDecayLR",
21 |         "params": {
22 |             "warmup_min_lr": "auto",
23 |             "warmup_max_lr": "auto",
24 |             "warmup_num_steps": "auto",
25 |             "total_num_steps": "auto"
26 |         }
27 |     },
28 |     "zero_optimization": {
29 |         "stage": 2,
30 |         "allgather_partitions": true,
31 |         "allgather_bucket_size": 2e8,
32 |         "overlap_comm": true,
33 |         "reduce_scatter": true,
34 |         "reduce_bucket_size": "auto",
35 |         "contiguous_gradients": true
36 |     },
37 |     "gradient_accumulation_steps": 1,
38 |     "gradient_clipping": "auto",
39 |     "steps_per_print": 2000,
40 |     "train_batch_size": "auto",
41 |     "train_micro_batch_size_per_gpu": "auto",
42 |     "wall_clock_breakdown": false
43 | }


--------------------------------------------------------------------------------
/tests/test_samples/MRPC/dev.csv:
--------------------------------------------------------------------------------
1 | label,sentence1,sentence2
2 | equivalent,He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .,""" The foodservice pie business does not fit our long-term growth strategy ."
3 | not_equivalent,Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .,"His wife said he was "" 100 percent behind George Bush "" and looked forward to using his years of training in the war ."
4 | not_equivalent,"The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .","The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent ."
5 | equivalent,The AFL-CIO is waiting until October to decide if it will endorse a candidate .,The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | not_equivalent,No dates have been set for the civil or the criminal trial .,"No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty ."
7 | equivalent,Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .,It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/tests/test_samples/MRPC/train.csv:
--------------------------------------------------------------------------------
1 | label,sentence1,sentence2
2 | equivalent,He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .,""" The foodservice pie business does not fit our long-term growth strategy ."
3 | not_equivalent,Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .,"His wife said he was "" 100 percent behind George Bush "" and looked forward to using his years of training in the war ."
4 | not_equivalent,"The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .","The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent ."
5 | equivalent,The AFL-CIO is waiting until October to decide if it will endorse a candidate .,The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | not_equivalent,No dates have been set for the civil or the criminal trial .,"No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty ."
7 | equivalent,Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .,It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/docs/source/basic_tutorials/overview.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Overview
17 | 
18 | Welcome to the 🤗 Accelerate tutorials! These introductory guides will help catch you up to speed on working with 🤗 Accelerate.
19 | You'll learn how to modify your code to have it work with the API seamlessly, how to launch your script properly,
20 | and more!
21 | 
22 | These tutorials assume some basic knowledge of Python and familiarity with the PyTorch framework.
23 | 
24 | If you have any questions about 🤗 Accelerate, feel free to join and ask the community on our [forum](https://discuss.huggingface.co/c/accelerate/18).


--------------------------------------------------------------------------------
/.github/workflows/test_imports.yml:
--------------------------------------------------------------------------------
 1 | name: Run Import Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - "src/**"
 7 |       - "tests/**"
 8 |       - ".github/**"
 9 |       - "examples/**"
10 |       - "setup.py"
11 |     types: [opened, synchronize, reopened]
12 | 
13 | env:
14 |   HF_HOME: ~/hf_cache
15 |   TESTING_MOCKED_DATALOADERS: "1"
16 |   IS_GITHUB_CI: "1"
17 | 
18 | jobs:
19 |   run-tests:
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         pytorch-version: [
25 |           latest,
26 |           minimum,
27 |         ]
28 |     steps:
29 |     - uses: actions/checkout@v3.1.0
30 |     - name: Set up python 3.8
31 |       uses: actions/setup-python@v3
32 |       with:
33 |         python-version: 3.8
34 |         cache: 'pip'
35 |         cache-dependency-path: 'setup.py'
36 |     
37 |     - name: Install the library
38 |       run: |
39 |         pip install -e .
40 |         pip install pytest-reportlog tabulate setuptools git+https://github.com/muellerzr/import-timer
41 | 
42 |     - name: Show installed libraries
43 |       run: |
44 |         pip freeze
45 |     
46 |     - name: Run Import Tests
47 |       env: 
48 |         PYTORCH_VERSION: ${{ matrix.pytorch-version }}
49 |       run: |
50 |         pytest -sv tests/test_imports.py
51 | 
52 |     - name: Generate Report
53 |       if: always()
54 |       run: |
55 |         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
56 | 


--------------------------------------------------------------------------------
/examples/inference/distributed/stable_diffusion.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | from diffusers import DiffusionPipeline
17 | 
18 | from accelerate import PartialState  # Can also be Accelerator or AcceleratorState
19 | 
20 | 
21 | pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
22 | distributed_state = PartialState()
23 | pipe.to(distributed_state.device)
24 | 
25 | # Assume two processes
26 | # On the first GPU, the prompts will be ["a dog", "a cat"],
27 | # and on the second GPU it will be ["a chicken", "a chicken"].
28 | # Make sure to drop the final sample, as it will be a duplicate of the previous one.
29 | with distributed_state.split_between_processes(["a dog", "a cat", "a chicken"], apply_padding=True) as prompt:
30 |     result = pipe(prompt).images
31 | 


--------------------------------------------------------------------------------
/examples/deepspeed_config_templates/zero_stage3_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "optimizer": {
11 |         "type": "AdamW",
12 |         "params": {
13 |             "lr": "auto",
14 |             "weight_decay": "auto"
15 |         }
16 |     },
17 |     "scheduler": {
18 |         "type": "WarmupDecayLR",
19 |         "params": {
20 |             "warmup_min_lr": "auto",
21 |             "warmup_max_lr": "auto",
22 |             "warmup_num_steps": "auto",
23 |             "total_num_steps": "auto"
24 |         }
25 |     },
26 |     "zero_optimization": {
27 |         "stage": 3,
28 |         "overlap_comm": true,
29 |         "contiguous_gradients": true,
30 |         "reduce_bucket_size": "auto",
31 |         "stage3_prefetch_bucket_size": "auto",
32 |         "stage3_param_persistence_threshold": "auto",
33 |         "sub_group_size": 1e9,
34 |         "stage3_max_live_parameters": 1e9,
35 |         "stage3_max_reuse_distance": 1e9,
36 |         "stage3_gather_16bit_weights_on_model_save": "auto"
37 |     },
38 |     "gradient_accumulation_steps": 1,
39 |     "gradient_clipping": "auto",
40 |     "steps_per_print": 2000,
41 |     "train_batch_size": "auto",
42 |     "train_micro_batch_size_per_gpu": "auto",
43 |     "wall_clock_breakdown": false
44 | }


--------------------------------------------------------------------------------
/tests/deepspeed/ds_config_zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 |     "optimizer": {
14 |         "type": "AdamW",
15 |         "params": {
16 |             "lr": "auto",
17 |             "weight_decay": "auto",
18 |             "torch_adam": true,
19 |             "adam_w_mode": true
20 |         }
21 |     },
22 |     "scheduler": {
23 |         "type": "WarmupLR",
24 |         "params": {
25 |             "warmup_min_lr": "auto",
26 |             "warmup_max_lr": "auto",
27 |             "warmup_num_steps": "auto"
28 |         }
29 |     },
30 |     "zero_optimization": {
31 |         "stage": 2,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "allgather_partitions": true,
37 |         "allgather_bucket_size": 2e8,
38 |         "overlap_comm": true,
39 |         "reduce_scatter": true,
40 |         "reduce_bucket_size": "auto",
41 |         "contiguous_gradients": true
42 |     },
43 |     "gradient_accumulation_steps": 1,
44 |     "gradient_clipping": "auto",
45 |     "steps_per_print": 2000,
46 |     "train_batch_size": "auto",
47 |     "train_micro_batch_size_per_gpu": "auto",
48 |     "wall_clock_breakdown": false
49 | }


--------------------------------------------------------------------------------
/examples/deepspeed_config_templates/zero_stage2_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "optimizer": {
11 |         "type": "AdamW",
12 |         "params": {
13 |             "lr": "auto",
14 |             "weight_decay": "auto",
15 |             "torch_adam": true,
16 |             "adam_w_mode": true
17 |         }
18 |     },
19 |     "scheduler": {
20 |         "type": "WarmupDecayLR",
21 |         "params": {
22 |             "warmup_min_lr": "auto",
23 |             "warmup_max_lr": "auto",
24 |             "warmup_num_steps": "auto",
25 |             "total_num_steps": "auto"
26 |         }
27 |     },
28 |     "zero_optimization": {
29 |         "stage": 2,
30 |         "offload_optimizer": {
31 |             "device": "cpu",
32 |             "pin_memory": true
33 |         },
34 |         "allgather_partitions": true,
35 |         "allgather_bucket_size": 2e8,
36 |         "overlap_comm": true,
37 |         "reduce_scatter": true,
38 |         "reduce_bucket_size": "auto",
39 |         "contiguous_gradients": true
40 |     },
41 |     "gradient_accumulation_steps": 1,
42 |     "gradient_clipping": "auto",
43 |     "steps_per_print": 2000,
44 |     "train_batch_size": "auto",
45 |     "train_micro_batch_size_per_gpu": "auto",
46 |     "wall_clock_breakdown": false
47 | }


--------------------------------------------------------------------------------
/docs/source/package_reference/torch_wrappers.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Wrapper classes for torch Dataloaders, Optimizers, and Schedulers
17 | 
18 | The internal classes Accelerate uses to prepare objects for distributed training
19 | when calling [`~Accelerator.prepare`].
20 | 
21 | ## Datasets and DataLoaders
22 | 
23 | [[autodoc]] data_loader.prepare_data_loader
24 | [[autodoc]] data_loader.skip_first_batches
25 | 
26 | [[autodoc]] data_loader.BatchSamplerShard
27 | [[autodoc]] data_loader.IterableDatasetShard
28 | [[autodoc]] data_loader.DataLoaderShard
29 | [[autodoc]] data_loader.DataLoaderDispatcher
30 | 
31 | ## Optimizers 
32 | 
33 | [[autodoc]] optimizer.AcceleratedOptimizer
34 | 
35 | ## Schedulers 
36 | 
37 | [[autodoc]] scheduler.AcceleratedScheduler


--------------------------------------------------------------------------------
/examples/slurm/submit_multinode.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH --job-name=multinode
 4 | #SBATCH -D .
 5 | #SBATCH --output=O-%x.%j
 6 | #SBATCH --error=E-%x.%j
 7 | #SBATCH --nodes=4                   # number of nodes
 8 | #SBATCH --ntasks-per-node=1         # number of MP tasks
 9 | #SBATCH --gres=gpu:4                # number of GPUs per node
10 | #SBATCH --cpus-per-task=160         # number of cores per tasks
11 | #SBATCH --time=01:59:00             # maximum execution time (HH:MM:SS)
12 | 
13 | ######################
14 | ### Set enviroment ###
15 | ######################
16 | source activateEnvironment.sh
17 | export GPUS_PER_NODE=4
18 | ######################
19 | 
20 | ######################
21 | #### Set network #####
22 | ######################
23 | head_node_ip=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
24 | ######################
25 | 
26 | export LAUNCHER="accelerate launch \
27 |     --num_processes $((SLURM_NNODES * GPUS_PER_NODE)) \
28 |     --num_machines $SLURM_NNODES \
29 |     --rdzv_backend c10d \
30 |     --main_process_ip $head_node_ip \
31 |     --main_process_port 29500 \
32 |     "
33 | export ACCELERATE_DIR="${ACCELERATE_DIR:-/accelerate}"
34 | export SCRIPT="${ACCELERATE_DIR}/examples/complete_nlp_example.py"
35 | export SCRIPT_ARGS=" \
36 |     --mixed_precision fp16 \
37 |     --output_dir ${ACCELERATE_DIR}/examples/output \
38 |     "
39 |     
40 | # This step is necessary because accelerate launch does not handle multiline arguments properly
41 | export CMD="$LAUNCHER $PYTHON_FILE $ARGS" 
42 | srun $CMD


--------------------------------------------------------------------------------
/.github/workflows/build_and_run_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Trigger docker images and run tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   workflow_dispatch:
 8 | 
 9 | env:
10 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
11 | 
12 | jobs:
13 |   check-for-source:
14 |     runs-on: ubuntu-latest
15 |     name: Check if setup was changed
16 |     outputs:
17 |       changed: ${{ steps.was_changed.outputs.changed }}
18 |     steps:
19 |       - uses: actions/checkout@v3.1.0
20 |         with: 
21 |           fetch-depth: "2"
22 |       
23 |       - name: Get changed files
24 |         id: changed-files
25 |         uses: tj-actions/changed-files@v41
26 |       
27 |       - name: Was setup changed 
28 |         id: was_changed
29 |         run: |
30 |           for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
31 |             if [ `basename "${file}"` == "setup.py" ]; then
32 |               echo "changed=1" >> $GITHUB_OUTPUT
33 |             fi
34 |           done
35 |           
36 |   build-docker-containers:
37 |     needs: check-for-source
38 |     if: (github.event_name == 'push') && (needs.check-for-source.outputs.changed == '1')
39 |     uses: ./.github/workflows/build_docker_images.yml
40 |     secrets: inherit
41 | 
42 |   run-merge-tests:
43 |     needs: build-docker-containers
44 |     if: always()
45 |     uses: ./.github/workflows/run_merge_tests.yml
46 | 
47 |   run-integration-tests:
48 |     needs: build-docker-containers
49 |     if: always()
50 |     uses: ./.github/workflows/self_hosted_integration_tests.yml


--------------------------------------------------------------------------------
/docs/source/package_reference/kwargs.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Kwargs Handlers
17 | 
18 | The following objects can be passed to the main [`Accelerator`] to customize how some PyTorch objects
19 | related to distributed training or mixed precision are created.
20 | 
21 | ## AutocastKwargs
22 | 
23 | [[autodoc]] AutocastKwargs
24 | 
25 | ## DistributedDataParallelKwargs
26 | 
27 | [[autodoc]] DistributedDataParallelKwargs
28 | 
29 | ## FP8RecipeKwargs
30 | 
31 | [[autodoc]] utils.FP8RecipeKwargs
32 | 
33 | ## ProfileKwargs
34 | 
35 | [[autodoc]] utils.ProfileKwargs
36 | 
37 | ## GradScalerKwargs
38 | 
39 | [[autodoc]] GradScalerKwargs
40 | 
41 | ## InitProcessGroupKwargs
42 | 
43 | [[autodoc]] InitProcessGroupKwargs
44 | 
45 | ## KwargsHandler
46 | 
47 | [[autodoc]] utils.KwargsHandler
48 | 


--------------------------------------------------------------------------------
/manim_animations/dataloaders/stage_3.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from manim import *
16 | 
17 | class Stage3(Scene):
18 |     def construct(self):
19 |         step_1 = MarkupText(
20 |             f"To combat this, Accelerate employs one of two different\nSampler wrapper methods depending on the scenario:",
21 |             font_size=24
22 |         )
23 |         step_1.move_to([0, 1.5, 0])
24 |         self.add(step_1)
25 |         step_2 = MarkupText(
26 |             f"1. Sharding the dataset before drawing:\n\t● <span fgcolor='{RED}'>IterableDatasetShard</span>\n\t● <span fgcolor='{RED}'>BatchSamplerShard</span>",
27 |             font_size=24,
28 |         ).next_to(step_1, direction=DOWN, aligned_edge=LEFT)
29 |         self.add(step_2)
30 |         step_3 = MarkupText(
31 |             f"\n\n2. Splitting the batch after drawing:\n\t● <span fgcolor='{BLUE}'>DataLoaderDispatcher</span>",
32 |             font_size=24,
33 |         ).next_to(step_2, direction=DOWN, aligned_edge=LEFT)
34 |         self.add(step_3)


--------------------------------------------------------------------------------
/examples/deepspeed_config_templates/zero_stage3_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "optimizer": {
11 |         "type": "AdamW",
12 |         "params": {
13 |             "lr": "auto",
14 |             "weight_decay": "auto"
15 |         }
16 |     },
17 |     "scheduler": {
18 |         "type": "WarmupDecayLR",
19 |         "params": {
20 |             "warmup_min_lr": "auto",
21 |             "warmup_max_lr": "auto",
22 |             "warmup_num_steps": "auto",
23 |             "total_num_steps": "auto"
24 |         }
25 |     },
26 |     "zero_optimization": {
27 |         "stage": 3,
28 |         "offload_optimizer": {
29 |             "device": "cpu",
30 |             "pin_memory": true
31 |         },
32 |         "offload_param": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "overlap_comm": true,
37 |         "contiguous_gradients": true,
38 |         "reduce_bucket_size": "auto",
39 |         "stage3_prefetch_bucket_size": "auto",
40 |         "stage3_param_persistence_threshold": "auto",
41 |         "sub_group_size": 1e9,
42 |         "stage3_max_live_parameters": 1e9,
43 |         "stage3_max_reuse_distance": 1e9,
44 |         "stage3_gather_16bit_weights_on_model_save": "auto"
45 |     },
46 |     "gradient_accumulation_steps": 1,
47 |     "gradient_clipping": "auto",
48 |     "steps_per_print": 2000,
49 |     "train_batch_size": "auto",
50 |     "train_micro_batch_size_per_gpu": "auto",
51 |     "wall_clock_breakdown": false
52 | }


--------------------------------------------------------------------------------
/docker/accelerate-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Builds GPU docker image of PyTorch specifically
 2 | # Uses multi-staged approach to reduce size
 3 | # Stage 1
 4 | # Use base conda image to reduce time
 5 | FROM continuumio/miniconda3:latest AS compile-image
 6 | # Specify py version
 7 | ENV PYTHON_VERSION=3.9
 8 | # Install apt libs
 9 | RUN apt-get update && \
10 |     apt-get install -y curl git wget && \
11 |     apt-get clean && \
12 |     rm -rf /var/lib/apt/lists*
13 | 
14 | # Create our conda env
15 | RUN conda create --name accelerate python=${PYTHON_VERSION} ipython jupyter pip
16 | # We don't install pytorch here yet since CUDA isn't available
17 | # instead we use the direct torch wheel
18 | ENV PATH /opt/conda/envs/accelerate/bin:$PATH
19 | # Activate our bash shell
20 | RUN chsh -s /bin/bash
21 | SHELL ["/bin/bash", "-c"]
22 | # Activate the conda env, install mpy4pi, and install torch + accelerate
23 | RUN source activate accelerate && conda install -c conda-forge mpi4py
24 | RUN source activate accelerate && \
25 |     python3 -m pip install --no-cache-dir \
26 |     git+https://github.com/huggingface/accelerate#egg=accelerate[testing,test_trackers] \
27 |     --extra-index-url https://download.pytorch.org/whl/cu117
28 | 
29 | RUN python3 -m pip install --no-cache-dir bitsandbytes
30 | 
31 | # Stage 2
32 | FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 AS build-image
33 | COPY --from=compile-image /opt/conda /opt/conda
34 | ENV PATH /opt/conda/bin:$PATH
35 | 
36 | # Install apt libs
37 | RUN apt-get update && \
38 |     apt-get install -y curl git wget && \
39 |     apt-get clean && \
40 |     rm -rf /var/lib/apt/lists*
41 | 
42 | RUN echo "source activate accelerate" >> ~/.profile
43 | 
44 | # Activate the virtualenv
45 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/src/accelerate/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | __version__ = "0.33.0.dev0"
15 | 
16 | from .accelerator import Accelerator
17 | from .big_modeling import (
18 |     cpu_offload,
19 |     cpu_offload_with_hook,
20 |     disk_offload,
21 |     dispatch_model,
22 |     init_empty_weights,
23 |     init_on_device,
24 |     load_checkpoint_and_dispatch,
25 | )
26 | from .data_loader import skip_first_batches
27 | from .inference import prepare_pippy
28 | from .launchers import debug_launcher, notebook_launcher
29 | from .state import PartialState
30 | from .utils import (
31 |     AutocastKwargs,
32 |     DataLoaderConfiguration,
33 |     DDPCommunicationHookType,
34 |     DeepSpeedPlugin,
35 |     DistributedDataParallelKwargs,
36 |     DistributedType,
37 |     FullyShardedDataParallelPlugin,
38 |     GradScalerKwargs,
39 |     InitProcessGroupKwargs,
40 |     ProfileKwargs,
41 |     find_executable_batch_size,
42 |     infer_auto_device_map,
43 |     is_rich_available,
44 |     load_checkpoint_in_model,
45 |     synchronize_rng_states,
46 | )
47 | 
48 | 
49 | if is_rich_available():
50 |     from .utils import rich
51 | 


--------------------------------------------------------------------------------
/.github/workflows/integration_tests.yml:
--------------------------------------------------------------------------------
 1 | # CI for specifically ensuring integrations work fine (`transformers` mainly)
 2 | # Useful tips:
 3 | #  - New integrations to test should have its own job, and follow a strategy method where we check both
 4 | #    the pypi and github versions.
 5 | #  - When checking the latest release of the integration, use
 6 | #    git checkout $(git describe --tags `git rev-list --tags --max-count=1`) to get the latest release.
 7 | 
 8 | name: Integration Tests
 9 | 
10 | on:
11 |   pull_request:
12 |     paths:
13 |       - "src/**"
14 |       - "tests/**"
15 |       - ".github/**"
16 |       - "examples/**"
17 |       - "setup.py"
18 |     types: [opened, synchronize, reopened]
19 | 
20 | env:
21 |   HF_HOME: ~/hf_cache
22 | 
23 | jobs:
24 |   run-trainer-tests:
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       fail-fast: false
28 |     steps:
29 |     - uses: actions/checkout@v3.1.0
30 |     - name: Set up python 3.8
31 |       uses: actions/setup-python@v3
32 |       with:
33 |         python-version: 3.8
34 |         cache: 'pip'
35 |         cache-dependency-path: 'setup.py'
36 | 
37 |     - name: Install Accelerate from source
38 |       run: |
39 |         pip install --upgrade pip
40 |         pip install -e .
41 |     
42 |     - name: Clone and install transformers
43 |       run: |
44 |         cd ..
45 |         git clone https://github.com/huggingface/transformers
46 |         cd transformers
47 |         pip install .[torch,testing]
48 | 
49 |     - name: Show installed libraries
50 |       run: |
51 |         pip freeze
52 | 
53 |     - name: Run Trainer tests
54 |       env:
55 |         WANDB_DISABLED: true
56 |       run: |
57 |         cd ../transformers
58 |         pytest -sv tests/trainer
59 | 


--------------------------------------------------------------------------------
/src/accelerate/test_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .testing import (
15 |     DEFAULT_LAUNCH_COMMAND,
16 |     are_the_same_tensors,
17 |     assert_exception,
18 |     device_count,
19 |     execute_subprocess_async,
20 |     get_launch_command,
21 |     memory_allocated_func,
22 |     path_in_accelerate_package,
23 |     require_bnb,
24 |     require_cpu,
25 |     require_cuda,
26 |     require_huggingface_suite,
27 |     require_mlu,
28 |     require_mps,
29 |     require_multi_device,
30 |     require_multi_gpu,
31 |     require_multi_xpu,
32 |     require_musa,
33 |     require_non_cpu,
34 |     require_non_torch_xla,
35 |     require_non_xpu,
36 |     require_npu,
37 |     require_pippy,
38 |     require_single_device,
39 |     require_single_gpu,
40 |     require_single_xpu,
41 |     require_torch_min_version,
42 |     require_torchvision,
43 |     require_tpu,
44 |     require_xpu,
45 |     skip,
46 |     slow,
47 |     torch_device,
48 | )
49 | from .training import RegressionDataset, RegressionModel, RegressionModel4XPU
50 | 
51 | 
52 | from .scripts import test_script, test_sync, test_ops  # isort: skip
53 | 


--------------------------------------------------------------------------------
/tests/deepspeed/ds_config_zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 |     "optimizer": {
14 |         "type": "AdamW",
15 |         "params": {
16 |             "lr": "auto",
17 |             "weight_decay": "auto",
18 |             "torch_adam": true,
19 |             "adam_w_mode": true
20 |         }
21 |     },
22 |     "scheduler": {
23 |         "type": "WarmupLR",
24 |         "params": {
25 |             "warmup_min_lr": "auto",
26 |             "warmup_max_lr": "auto",
27 |             "warmup_num_steps": "auto"
28 |         }
29 |     },
30 |     "zero_optimization": {
31 |         "stage": 3,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "offload_param": {
37 |             "device": "cpu",
38 |             "pin_memory": true
39 |         },
40 |         "overlap_comm": true,
41 |         "contiguous_gradients": true,
42 |         "sub_group_size": 1e9,
43 |         "reduce_bucket_size": "auto",
44 |         "stage3_prefetch_bucket_size": "auto",
45 |         "stage3_param_persistence_threshold": "auto",
46 |         "stage3_max_live_parameters": 1e9,
47 |         "stage3_max_reuse_distance": 1e9,
48 |         "stage3_gather_16bit_weights_on_model_save": "auto"
49 |     },
50 |     "gradient_accumulation_steps": 1,
51 |     "gradient_clipping": "auto",
52 |     "steps_per_print": 2000,
53 |     "train_batch_size": "auto",
54 |     "train_micro_batch_size_per_gpu": "auto",
55 |     "wall_clock_breakdown": false
56 | }


--------------------------------------------------------------------------------
/src/accelerate/commands/menu/helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | A variety of helper functions and constants when dealing with terminal menu choices, based on
17 | https://github.com/bchao1/bullet
18 | """
19 | 
20 | import enum
21 | import shutil
22 | import sys
23 | 
24 | 
25 | TERMINAL_WIDTH, _ = shutil.get_terminal_size()
26 | 
27 | CURSOR_TO_CHAR = {"UP": "A", "DOWN": "B", "RIGHT": "C", "LEFT": "D"}
28 | 
29 | 
30 | class Direction(enum.Enum):
31 |     UP = 0
32 |     DOWN = 1
33 | 
34 | 
35 | def forceWrite(content, end=""):
36 |     sys.stdout.write(str(content) + end)
37 |     sys.stdout.flush()
38 | 
39 | 
40 | def writeColor(content, color, end=""):
41 |     forceWrite(f"\u001b[{color}m{content}\u001b[0m", end)
42 | 
43 | 
44 | def reset_cursor():
45 |     forceWrite("\r")
46 | 
47 | 
48 | def move_cursor(num_lines: int, direction: str):
49 |     forceWrite(f"\033[{num_lines}{CURSOR_TO_CHAR[direction.upper()]}")
50 | 
51 | 
52 | def clear_line():
53 |     forceWrite(" " * TERMINAL_WIDTH)
54 |     reset_cursor()
55 | 
56 | 
57 | def linebreak():
58 |     reset_cursor()
59 |     forceWrite("-" * TERMINAL_WIDTH)
60 | 


--------------------------------------------------------------------------------
/docker/accelerate-gpu-deepspeed/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Builds GPU docker image of PyTorch specifically
 2 | # Uses multi-staged approach to reduce size
 3 | # Stage 1
 4 | # Use base conda image to reduce time
 5 | FROM continuumio/miniconda3:latest AS compile-image
 6 | # Specify py version
 7 | # Note: DeepSpeed beyond v0.12.6 requires py 3.10
 8 | ENV PYTHON_VERSION=3.10
 9 | # Install apt libs
10 | RUN apt-get update && \
11 |     apt-get install -y curl git wget && \
12 |     apt-get clean && \
13 |     rm -rf /var/lib/apt/lists*
14 | 
15 | # Create our conda env
16 | RUN conda create --name accelerate python=${PYTHON_VERSION} ipython jupyter pip
17 | # We don't install pytorch here yet since CUDA isn't available
18 | # instead we use the direct torch wheel
19 | ENV PATH /opt/conda/envs/accelerate/bin:$PATH
20 | # Activate our bash shell
21 | RUN chsh -s /bin/bash
22 | SHELL ["/bin/bash", "-c"]
23 | # Activate the conda env, install mpy4pi, and install torch + accelerate
24 | RUN source activate accelerate && conda install -c conda-forge mpi4py
25 | RUN source activate accelerate && \
26 |     python3 -m pip install --no-cache-dir \
27 |     git+https://github.com/huggingface/accelerate#egg=accelerate[testing,test_trackers,deepspeed] \
28 |     --extra-index-url https://download.pytorch.org/whl/cu117
29 | 
30 | RUN python3 -m pip install --no-cache-dir bitsandbytes
31 | 
32 | # Stage 2
33 | FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 AS build-image
34 | COPY --from=compile-image /opt/conda /opt/conda
35 | ENV PATH /opt/conda/bin:$PATH
36 | 
37 | # Install apt libs
38 | RUN apt-get update && \
39 |     apt-get install -y curl git wget && \
40 |     apt-get clean && \
41 |     rm -rf /var/lib/apt/lists*
42 | 
43 | RUN echo "source activate accelerate" >> ~/.profile
44 | 
45 | # Activate the virtualenv
46 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docs/source/package_reference/big_modeling.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Working with large models
17 | 
18 | ## Dispatching and Offloading Models
19 | 
20 | [[autodoc]] big_modeling.init_empty_weights
21 | [[autodoc]] big_modeling.cpu_offload
22 | [[autodoc]] big_modeling.cpu_offload_with_hook
23 | [[autodoc]] big_modeling.disk_offload
24 | [[autodoc]] big_modeling.dispatch_model
25 | [[autodoc]] big_modeling.load_checkpoint_and_dispatch
26 | [[autodoc]] big_modeling.load_checkpoint_in_model
27 | [[autodoc]] utils.infer_auto_device_map
28 | 
29 | ## Model Hooks
30 | 
31 | ### Hook Classes
32 | 
33 | [[autodoc]] hooks.ModelHook
34 | [[autodoc]] hooks.AlignDevicesHook
35 | [[autodoc]] hooks.SequentialHook
36 | 
37 | ### Adding Hooks
38 | 
39 | [[autodoc]] hooks.add_hook_to_module
40 | [[autodoc]] hooks.attach_execution_device_hook
41 | [[autodoc]] hooks.attach_align_device_hook
42 | [[autodoc]] hooks.attach_align_device_hook_on_blocks
43 | 
44 | ### Removing Hooks
45 | 
46 | [[autodoc]] hooks.remove_hook_from_module
47 | [[autodoc]] hooks.remove_hook_from_submodules


--------------------------------------------------------------------------------
/src/accelerate/test_utils/scripts/external_deps/test_zero3_integration.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch.distributed
16 | 
17 | from accelerate.test_utils import require_huggingface_suite, torch_device
18 | from accelerate.utils import is_transformers_available
19 | 
20 | 
21 | if is_transformers_available():
22 |     from transformers import AutoModel, TrainingArguments
23 | 
24 | 
25 | GPT2_TINY = "sshleifer/tiny-gpt2"
26 | 
27 | 
28 | @require_huggingface_suite
29 | def init_torch_dist_then_launch_deepspeed():
30 |     backend = "ccl" if torch_device == "xpu" else "nccl"
31 |     torch.distributed.init_process_group(backend=backend)
32 |     deepspeed_config = {
33 |         "zero_optimization": {
34 |             "stage": 3,
35 |         },
36 |         "train_batch_size": "auto",
37 |         "train_micro_batch_size_per_gpu": "auto",
38 |     }
39 |     train_args = TrainingArguments(
40 |         output_dir="./",
41 |         deepspeed=deepspeed_config,
42 |     )
43 |     model = AutoModel.from_pretrained(GPT2_TINY)
44 |     assert train_args is not None
45 |     assert model is not None
46 | 
47 | 
48 | def main():
49 |     init_torch_dist_then_launch_deepspeed()
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     main()
54 | 


--------------------------------------------------------------------------------
/tests/test_grad_sync.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from accelerate import debug_launcher
18 | from accelerate.test_utils import (
19 |     DEFAULT_LAUNCH_COMMAND,
20 |     device_count,
21 |     execute_subprocess_async,
22 |     path_in_accelerate_package,
23 |     require_cpu,
24 |     require_multi_device,
25 |     require_non_cpu,
26 |     test_sync,
27 | )
28 | from accelerate.utils import patch_environment
29 | 
30 | 
31 | class SyncScheduler(unittest.TestCase):
32 |     test_file_path = path_in_accelerate_package("test_utils", "scripts", "test_sync.py")
33 | 
34 |     @require_cpu
35 |     def test_gradient_sync_cpu_noop(self):
36 |         debug_launcher(test_sync.main, num_processes=1)
37 | 
38 |     @require_cpu
39 |     def test_gradient_sync_cpu_multi(self):
40 |         debug_launcher(test_sync.main)
41 | 
42 |     @require_non_cpu
43 |     def test_gradient_sync_gpu(self):
44 |         test_sync.main()
45 | 
46 |     @require_multi_device
47 |     def test_gradient_sync_gpu_multi(self):
48 |         print(f"Found {device_count} devices.")
49 |         cmd = DEFAULT_LAUNCH_COMMAND + [self.test_file_path]
50 |         with patch_environment(omp_num_threads=1):
51 |             execute_subprocess_async(cmd)
52 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/config/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | 
19 | from .config import config_command_parser
20 | from .config_args import default_config_file, load_config_from_file  # noqa: F401
21 | from .default import default_command_parser
22 | from .update import update_command_parser
23 | 
24 | 
25 | def get_config_parser(subparsers=None):
26 |     parent_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
27 |     # The main config parser
28 |     config_parser = config_command_parser(subparsers)
29 |     # The subparser to add commands to
30 |     subcommands = config_parser.add_subparsers(title="subcommands", dest="subcommand")
31 | 
32 |     # Then add other parsers with the parent parser
33 |     default_command_parser(subcommands, parents=[parent_parser])
34 |     update_command_parser(subcommands, parents=[parent_parser])
35 | 
36 |     return config_parser
37 | 
38 | 
39 | def main():
40 |     config_parser = get_config_parser()
41 |     args = config_parser.parse_args()
42 | 
43 |     if not hasattr(args, "func"):
44 |         config_parser.print_help()
45 |         exit(1)
46 | 
47 |     # Run
48 |     args.func(args)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/src/accelerate/utils/tqdm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import warnings
16 | 
17 | from .imports import is_tqdm_available
18 | 
19 | 
20 | if is_tqdm_available():
21 |     from tqdm.auto import tqdm as _tqdm
22 | 
23 | from ..state import PartialState
24 | 
25 | 
26 | def tqdm(*args, main_process_only: bool = True, **kwargs):
27 |     """
28 |     Wrapper around `tqdm.tqdm` that optionally displays only on the main process.
29 | 
30 |     Args:
31 |         main_process_only (`bool`, *optional*):
32 |             Whether to display the progress bar only on the main process
33 |     """
34 |     if not is_tqdm_available():
35 |         raise ImportError("Accelerate's `tqdm` module requires `tqdm` to be installed. Please run `pip install tqdm`.")
36 |     if len(args) > 0 and isinstance(args[0], bool):
37 |         warnings.warn(
38 |             f"Passing `{args[0]}` as the first argument to Accelerate's `tqdm` wrapper is deprecated "
39 |             "and will be removed in v0.33.0. Please use the `main_process_only` keyword argument instead.",
40 |             FutureWarning,
41 |         )
42 |         main_process_only = args[0]
43 |         args = args[1:]
44 |     disable = kwargs.pop("disable", False)
45 |     if main_process_only and not disable:
46 |         disable = PartialState().local_process_index != 0
47 |     return _tqdm(*args, **kwargs, disable=disable)
48 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/accelerate_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from accelerate.commands.config import get_config_parser
18 | from accelerate.commands.env import env_command_parser
19 | from accelerate.commands.estimate import estimate_command_parser
20 | from accelerate.commands.launch import launch_command_parser
21 | from accelerate.commands.merge import merge_command_parser
22 | from accelerate.commands.test import test_command_parser
23 | from accelerate.commands.tpu import tpu_command_parser
24 | from accelerate.commands.utils import CustomArgumentParser
25 | 
26 | 
27 | def main():
28 |     parser = CustomArgumentParser("Accelerate CLI tool", usage="accelerate <command> [<args>]", allow_abbrev=False)
29 |     subparsers = parser.add_subparsers(help="accelerate command helpers")
30 | 
31 |     # Register commands
32 |     get_config_parser(subparsers=subparsers)
33 |     estimate_command_parser(subparsers=subparsers)
34 |     env_command_parser(subparsers=subparsers)
35 |     launch_command_parser(subparsers=subparsers)
36 |     merge_command_parser(subparsers=subparsers)
37 |     tpu_command_parser(subparsers=subparsers)
38 |     test_command_parser(subparsers=subparsers)
39 | 
40 |     # Let's go
41 |     args = parser.parse_args()
42 | 
43 |     if not hasattr(args, "func"):
44 |         parser.print_help()
45 |         exit(1)
46 | 
47 |     # Run
48 |     args.func(args)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Run Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - "src/**"
 7 |       - "tests/**"
 8 |       - ".github/**"
 9 |       - "examples/**"
10 |       - "setup.py"
11 |     types: [opened, synchronize, reopened]
12 | 
13 | env:
14 |   HF_HOME: ~/hf_cache
15 |   TESTING_MOCKED_DATALOADERS: "1"
16 |   IS_GITHUB_CI: "1"
17 | 
18 | jobs:
19 |   run-tests:
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         pytorch-version: [
25 |           latest,
26 |           minimum,
27 |         ]
28 |         test-kind: [
29 |           test_prod,
30 |           test_core,
31 |           test_cli,
32 |           test_big_modeling,
33 |           test_deepspeed,
34 |           test_fsdp,
35 |           test_example_differences,
36 |           test_checkpoint_step,
37 |           test_checkpoint_epoch,
38 |           test_rest
39 |         ]
40 |     steps:
41 |     - uses: actions/checkout@v3.1.0
42 |     - name: Set up python 3.8
43 |       uses: actions/setup-python@v3
44 |       with:
45 |         python-version: 3.8
46 |         cache: 'pip'
47 |         cache-dependency-path: 'setup.py'
48 |     
49 |     - name: Install the library
50 |       run: |
51 |         if [[ ${{ matrix.test-kind }} = test_prod ]]; then pip install -e .[test_prod]; fi
52 |         if [[ ${{ matrix.test-kind }} != test_prod ]]; then pip install -e .[testing,test_trackers]; fi
53 |         if [[ ${{ matrix.test-kind }} = test_rest ]]; then pip uninstall comet_ml -y; fi
54 |         if [[ ${{ matrix.pytorch-version }} = minimum ]]; then pip install torchvision==0.18.1 torch==2.3.1; fi
55 |         pip install pytest-reportlog tabulate setuptools
56 | 
57 |     - name: Show installed libraries
58 |       run: |
59 |         pip freeze
60 |     
61 |     - name: Run Tests
62 |       env: 
63 |         PYTORCH_VERSION: ${{ matrix.pytorch-version }}
64 |       run: |
65 |         make ${{ matrix.test-kind }}
66 | 
67 |     - name: Generate Report
68 |       if: always()
69 |       run: |
70 |         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
71 | 


--------------------------------------------------------------------------------
/docs/source/usage_guides/explore.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Learning how to incorporate 🤗 Accelerate features quickly!
17 | 
18 | Please use the interactive tool below to help you get started with learning about a particular 
19 | feature of 🤗 Accelerate and how to utilize it! It will provide you with a code diff, an explanation
20 | towards what is going on, as well as provide you with some useful links to explore more within
21 | the documentation!
22 | 
23 | Most code examples start from the following python code before integrating 🤗 Accelerate in some way:
24 | 
25 | ```python
26 | for batch in dataloader:
27 |     optimizer.zero_grad()
28 |     inputs, targets = batch
29 |     inputs = inputs.to(device)
30 |     targets = targets.to(device)
31 |     outputs = model(inputs)
32 |     loss = loss_function(outputs, targets)
33 |     loss.backward()
34 |     optimizer.step()
35 |     scheduler.step()
36 | ```
37 | 
38 | <div class="block dark:hidden">
39 | 	<iframe 
40 |         src="https://hf-accelerate-accelerate-examples.hf.space?__theme=light"
41 |         width="850"
42 |         height="1600"
43 |     ></iframe>
44 | </div>
45 | <div class="hidden dark:block">
46 |     <iframe 
47 |         src="https://hf-accelerate-accelerate-examples.hf.space?__theme=dark"
48 |         width="850"
49 |         height="1600"
50 |     ></iframe>
51 | </div>
52 | 


--------------------------------------------------------------------------------
/src/accelerate/utils/torch_xla.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib.metadata
16 | import subprocess
17 | import sys
18 | 
19 | 
20 | def install_xla(upgrade: bool = False):
21 |     """
22 |     Helper function to install appropriate xla wheels based on the `torch` version in Google Colaboratory.
23 | 
24 |     Args:
25 |         upgrade (`bool`, *optional*, defaults to `False`):
26 |             Whether to upgrade `torch` and install the latest `torch_xla` wheels.
27 | 
28 |     Example:
29 | 
30 |     ```python
31 |     >>> from accelerate.utils import install_xla
32 | 
33 |     >>> install_xla(upgrade=True)
34 |     ```
35 |     """
36 |     in_colab = False
37 |     if "IPython" in sys.modules:
38 |         in_colab = "google.colab" in str(sys.modules["IPython"].get_ipython())
39 | 
40 |     if in_colab:
41 |         if upgrade:
42 |             torch_install_cmd = ["pip", "install", "-U", "torch"]
43 |             subprocess.run(torch_install_cmd, check=True)
44 |         # get the current version of torch
45 |         torch_version = importlib.metadata.version("torch")
46 |         torch_version_trunc = torch_version[: torch_version.rindex(".")]
47 |         xla_wheel = f"https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-{torch_version_trunc}-cp37-cp37m-linux_x86_64.whl"
48 |         xla_install_cmd = ["pip", "install", xla_wheel]
49 |         subprocess.run(xla_install_cmd, check=True)
50 |     else:
51 |         raise RuntimeError("`install_xla` utility works only on google colab.")
52 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from accelerate import debug_launcher
18 | from accelerate.test_utils import (
19 |     DEFAULT_LAUNCH_COMMAND,
20 |     device_count,
21 |     execute_subprocess_async,
22 |     path_in_accelerate_package,
23 |     require_cpu,
24 |     require_huggingface_suite,
25 |     require_multi_device,
26 |     require_single_device,
27 | )
28 | from accelerate.utils import patch_environment
29 | 
30 | 
31 | @require_huggingface_suite
32 | class MetricTester(unittest.TestCase):
33 |     def setUp(self):
34 |         self.test_file_path = path_in_accelerate_package("test_utils", "scripts", "external_deps", "test_metrics.py")
35 | 
36 |         from accelerate.test_utils.scripts.external_deps import test_metrics  # noqa: F401
37 | 
38 |         self.test_metrics = test_metrics
39 | 
40 |     @require_cpu
41 |     def test_metric_cpu_noop(self):
42 |         debug_launcher(self.test_metrics.main, num_processes=1)
43 | 
44 |     @require_cpu
45 |     def test_metric_cpu_multi(self):
46 |         debug_launcher(self.test_metrics.main)
47 | 
48 |     @require_single_device
49 |     def test_metric_accelerator(self):
50 |         self.test_metrics.main()
51 | 
52 |     @require_multi_device
53 |     def test_metric_accelerator_multi(self):
54 |         print(f"Found {device_count} devices.")
55 |         cmd = DEFAULT_LAUNCH_COMMAND + [self.test_file_path]
56 |         with patch_environment(omp_num_threads=1, ACCELERATE_LOG_LEVEL="INFO"):
57 |             execute_subprocess_async(cmd)
58 | 


--------------------------------------------------------------------------------
/benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # Big model inference benchmarks
 2 | 
 3 | Running inference with Accelerate on big models.
 4 | 
 5 | ## Setup
 6 | 
 7 | These benchmarks use the `transformers` library:
 8 | 
 9 | ```bash
10 | pip install transformers
11 | ```
12 | 
13 | To reproduce or test a new setup, run
14 | 
15 | ```py
16 | python inference_acc.py model_name
17 | ```
18 | 
19 | This script supports `gpt-j-6b`, `gpt-neox`, `opt` (30B version) and `T0pp` out of the box, but you can specify any valid checkpoint for `model_name`.
20 | 
21 | To force a different `torch_dtype` than the one in the config: `--torch_dtype xxx`.
22 | 
23 | If you get an error linked to disk offload, you need to add the option `--disk-offload`
24 | 
25 | ## Results
26 | 
27 | On a setup with two Titan RTXs (24GB of RAM) and 32GB of RAM, we get the following benchmarks (T0pp does not run in float16, which is why it's not included).
28 | 
29 | | Model | Model load time | Generation time | dtype | GPU 0 use | GPU 1 use | CPU use | Disk offload |
30 | |:-----:|:---------------:|:---------------:|:-----:|:---------:|:---------:|:-------:|:------------:|
31 | | GPT-J-6B | 8.7s | 0.05s per token | float16 | 11.7GB | 0GB | 0GB | no |
32 | | GPT-J-6B | 12.4s | 0.06s per token | float32 | 21.9GB | 1.5GB | 0GB | no |
33 | | GPT-Neo-X-20B | 30.9s | 0.08s per token | float16 | 21.5GB | 18GB | 0GB | no |
34 | | GPT-Neo-X-20B | 78.2s | 10.72s per token | float32 | 20.3GB | 22.7 GB | 24.4GB | yes |
35 | | T0pp (11B) | 29.4s | 0.05s per token | float32 | 21.1GB | 21.3GB | 0GB | no |
36 | | OPT-30B | 34.5s | 2.37s per token | float16 | 20.7GB | 22.3GB | 14.1GB | no |
37 | | OPT-30B | 112.3s | 33.9s per token | float32 | 20.2GB | 21.2GB | 23.5GB | yes |
38 | 
39 | Note on the results:
40 | - using two GPUs instead of one does not slow down generation
41 | - using CPU offload slows down a bit (see OPT-30b)
42 | - using disk offload slows down a lot (need to implement prefetching)
43 | 
44 | You will also note that Accelerate does not use anymore GPU and CPU RAM than necessary:
45 | - peak GPU memory is exactly the size of the model put on a given GPU
46 | - peak CPU memory is either the size of the biggest checkpoint shard or the part of the model offloaded on CPU, whichever is bigger.


--------------------------------------------------------------------------------
/examples/inference/pippy/README.md:
--------------------------------------------------------------------------------
 1 | # Distributed inference examples with PiPPy
 2 | 
 3 | This repo contains a variety of tutorials for using the [PiPPy](https://github.com/PyTorch/PiPPy) pipeline parallelism library with accelerate. You will find examples covering:
 4 | 
 5 | 1. How to trace the model using `accelerate.prepare_pippy`
 6 | 2. How to specify inputs based on what the model expects (when to use `kwargs`, `args`, and such)
 7 | 3. How to gather the results at the end.
 8 | 
 9 | ## Installation
10 | 
11 | This requires the `main` branch of accelerate (or a version at least 0.27.0),  `pippy` version of 0.2.0 or greater, and at least python 3.9. Please install using `pip install .` to pull from the `setup.py` in this repo, or run manually:
12 | 
13 | ```bash
14 | pip install 'accelerate>=0.27.0' 'torchpippy>=0.2.0'
15 | ```
16 | 
17 | ## Running code
18 | 
19 | You can either use `torchrun` or the recommended way of `accelerate launch` (without needing to run `accelerate config`) on each script:
20 | 
21 | ```bash
22 | accelerate launch bert.py
23 | ```
24 | 
25 | Or:
26 | 
27 | ```bash
28 | accelerate launch --num_processes {NUM_GPUS} bert.py
29 | ```
30 | 
31 | Or:
32 | 
33 | ```bash
34 | torchrun --nproc-per-node {NUM_GPUS} bert.py
35 | ```
36 | 
37 | ## General speedups
38 | 
39 | One can expect that PiPPy will outperform native model parallism by a multiplicative factor since all GPUs are running at all times with inputs, rather than one input being passed through a GPU at a time waiting for the prior to finish. 
40 | 
41 | Below are some benchmarks we have found when using the accelerate-pippy integration for a few models when running on 2x4090's:
42 | 
43 | ### Bert
44 | 
45 | |  | Accelerate/Sequential | PiPPy + Accelerate |
46 | |---|---|---|
47 | | First batch | 0.2137s | 0.3119s |
48 | | Average of 5 batches | 0.0099s | **0.0062s** |
49 | 
50 | ### GPT2
51 | 
52 | |  | Accelerate/Sequential | PiPPy + Accelerate |
53 | |---|---|---|
54 | | First batch | 0.1959s | 0.4189s |
55 | | Average of 5 batches | 0.0205s | **0.0126s** |
56 | 
57 | ### T5
58 | 
59 | |  | Accelerate/Sequential | PiPPy + Accelerate |
60 | |---|---|---|
61 | | First batch | 0.2789s | 0.3809s |
62 | | Average of 5 batches | 0.0198s | **0.0166s** |


--------------------------------------------------------------------------------
/examples/slurm/submit_multicpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --job-name=multicpu
 4 | #SBATCH --nodes=2                       # number of Nodes
 5 | #SBATCH --ntasks-per-node=1             # number of MP tasks
 6 | #SBATCH --exclusive
 7 | #SBATCH --output=O-%x.%j
 8 | #SBATCH --error=E-%x.%j
 9 | 
10 | ######################
11 | ### Set enviroment ###
12 | ######################
13 | source activateEnvironment.sh
14 | 
15 | ######################
16 | #### Set network #####
17 | ######################
18 | head_node_ip=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
19 | ######################
20 | 
21 | # Setup env variables for distributed jobs
22 | export MASTER_PORT="${MASTER_PORT:-29555 }"
23 | echo "head_node_ip=${head_node_ip}"
24 | echo "MASTER_PORT=${MASTER_PORT}"
25 | 
26 | INSTANCES_PER_NODE="${INSTANCES_PER_NODE:-1}"
27 | 
28 | if [[ $SLURM_NNODES == 1 ]] && [[ $INSTANCES_PER_NODE == 1 ]]; then
29 |   export CCL_WORKER_COUNT=0
30 |   LAUNCHER=""
31 | else
32 |   # Setup env variables for distributed jobs
33 |   export CCL_WORKER_COUNT="${CCL_WORKER_COUNT:-2}"  
34 |   echo "CCL_WORKER_COUNT=${CCL_WORKER_COUNT}"
35 | 
36 |   # Write hostfile
37 |   HOSTFILE_PATH=hostfile
38 |   scontrol show hostname $SLURM_JOB_NODELIST | perl -ne 'chomb; print "$_"x1'> ${HOSTFILE_PATH}
39 | 
40 |   export LAUNCHER="accelerate launch \
41 |     --num_processes $((SLURM_NNODES * ${INSTANCES_PER_NODE})) \
42 |     --num_machines $SLURM_NNODES \
43 |     --rdzv_backend c10d \
44 |     --main_process_ip $head_node_ip \
45 |     --main_process_port $MASTER_PORT \
46 |     --mpirun_hostfile $HOSTFILE_PATH \
47 |     --mpirun_ccl $CCL_WORKER_COUNT"
48 | fi
49 | 
50 | # This step is necessary because accelerate launch does not handle multiline arguments properly
51 | export ACCELERATE_DIR="${ACCELERATE_DIR:-/accelerate}"
52 | export SCRIPT="${ACCELERATE_DIR}/examples/complete_nlp_example.py"
53 | export SCRIPT_ARGS=" \
54 |     --cpu \
55 |     --output_dir ${ACCELERATE_DIR}/examples/output \
56 |     "
57 |     
58 | # This step is necessary because accelerate launch does not handle multiline arguments properly
59 | export CMD="$LAUNCHER $SCRIPT $SCRIPT_ARGS" 
60 | # Print the command
61 | echo $CMD
62 | echo ""
63 | 
64 | # Run the command
65 | eval $CMD


--------------------------------------------------------------------------------
/src/accelerate/commands/menu/cursor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | A utility for showing and hiding the terminal cursor on Windows and Linux, based on https://github.com/bchao1/bullet
17 | """
18 | 
19 | import os
20 | import sys
21 | from contextlib import contextmanager
22 | 
23 | 
24 | # Windows only
25 | if os.name == "nt":
26 |     import ctypes
27 |     import msvcrt  # noqa
28 | 
29 |     class CursorInfo(ctypes.Structure):
30 |         # _fields is a specific attr expected by ctypes
31 |         _fields_ = [("size", ctypes.c_int), ("visible", ctypes.c_byte)]
32 | 
33 | 
34 | def hide_cursor():
35 |     if os.name == "nt":
36 |         ci = CursorInfo()
37 |         handle = ctypes.windll.kernel32.GetStdHandle(-11)
38 |         ctypes.windll.kernel32.GetConsoleCursorInfo(handle, ctypes.byref(ci))
39 |         ci.visible = False
40 |         ctypes.windll.kernel32.SetConsoleCursorInfo(handle, ctypes.byref(ci))
41 |     elif os.name == "posix":
42 |         sys.stdout.write("\033[?25l")
43 |         sys.stdout.flush()
44 | 
45 | 
46 | def show_cursor():
47 |     if os.name == "nt":
48 |         ci = CursorInfo()
49 |         handle = ctypes.windll.kernel32.GetStdHandle(-11)
50 |         ctypes.windll.kernel32.GetConsoleCursorInfo(handle, ctypes.byref(ci))
51 |         ci.visible = True
52 |         ctypes.windll.kernel32.SetConsoleCursorInfo(handle, ctypes.byref(ci))
53 |     elif os.name == "posix":
54 |         sys.stdout.write("\033[?25h")
55 |         sys.stdout.flush()
56 | 
57 | 
58 | @contextmanager
59 | def hide():
60 |     "Context manager to hide the terminal cursor"
61 |     try:
62 |         hide_cursor()
63 |         yield
64 |     finally:
65 |         show_cursor()
66 | 


--------------------------------------------------------------------------------
/manim_animations/dataloaders/stage_4.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from manim import *
16 | 
17 | class Stage4(Scene):
18 |     def construct(self):
19 | 
20 |         step_1 = MarkupText(
21 |             f"To understand the next part fully, let's define two terms,\n<span fgcolor='{RED}'>`batch_size`</span> and <span fgcolor='{BLUE}'>`global_batch_size`</span>:",
22 |             font_size=18
23 |         )
24 |         step_1.move_to([0, 1.5, 0])
25 |         # <span fgcolor='{YELLOW}'>●</span>
26 |         step_2 = MarkupText(
27 |             f"\n\n● <span fgcolor='{RED}'>`batch_size`</span>: \n\tThis will be defined as the batch size seen on a given\n\t*individual* GPU",
28 |             font_size=18,
29 |         ).next_to(step_1, direction=DOWN, aligned_edge=LEFT)
30 | 
31 |         step_3 = MarkupText(
32 |             f"\n\n● <span fgcolor='{BLUE}'>`global_batch_size`</span>:\n\tThis will be defined as the *total* number of\n\tdifferent items seen in the dataset, across all GPUs",
33 |             font_size=18,
34 |         ).next_to(step_2, direction=DOWN, aligned_edge=LEFT)
35 | 
36 |         step_4 = MarkupText(
37 |             f"\n\nSo if we have a dataset of 64 items, 8 GPUs, \nand a `batch_size` of 8, each *step* will go through\nthe entire dataset one time as 8*8=64",
38 |             font_size=18,
39 |         ).next_to(step_3, direction=DOWN, aligned_edge=LEFT)
40 |         self.play(
41 |             Write(step_1, run_time=4),
42 |         )
43 |         self.play(
44 |             Write(step_2, run_time=4)
45 |         )
46 |         self.play(
47 |             Write(step_3, run_time=4)
48 |         )
49 |         self.play(
50 |             Write(step_4, run_time=6)
51 |         )
52 |         self.wait()


--------------------------------------------------------------------------------
/src/accelerate/utils/versions.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib.metadata
16 | from typing import Union
17 | 
18 | from packaging.version import Version, parse
19 | 
20 | from .constants import STR_OPERATION_TO_FUNC
21 | 
22 | 
23 | torch_version = parse(importlib.metadata.version("torch"))
24 | 
25 | 
26 | def compare_versions(library_or_version: Union[str, Version], operation: str, requirement_version: str):
27 |     """
28 |     Compares a library version to some requirement using a given operation.
29 | 
30 |     Args:
31 |         library_or_version (`str` or `packaging.version.Version`):
32 |             A library name or a version to check.
33 |         operation (`str`):
34 |             A string representation of an operator, such as `">"` or `"<="`.
35 |         requirement_version (`str`):
36 |             The version to compare the library version against
37 |     """
38 |     if operation not in STR_OPERATION_TO_FUNC.keys():
39 |         raise ValueError(f"`operation` must be one of {list(STR_OPERATION_TO_FUNC.keys())}, received {operation}")
40 |     operation = STR_OPERATION_TO_FUNC[operation]
41 |     if isinstance(library_or_version, str):
42 |         library_or_version = parse(importlib.metadata.version(library_or_version))
43 |     return operation(library_or_version, parse(requirement_version))
44 | 
45 | 
46 | def is_torch_version(operation: str, version: str):
47 |     """
48 |     Compares the current PyTorch version to a given reference with an operation.
49 | 
50 |     Args:
51 |         operation (`str`):
52 |             A string representation of an operator, such as `">"` or `"<="`
53 |         version (`str`):
54 |             A string version of PyTorch
55 |     """
56 |     return compare_versions(torch_version, operation, version)
57 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | 
19 | from accelerate.test_utils import execute_subprocess_async, path_in_accelerate_package
20 | 
21 | 
22 | def test_command_parser(subparsers=None):
23 |     if subparsers is not None:
24 |         parser = subparsers.add_parser("test")
25 |     else:
26 |         parser = argparse.ArgumentParser("Accelerate test command")
27 | 
28 |     parser.add_argument(
29 |         "--config_file",
30 |         default=None,
31 |         help=(
32 |             "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
33 |             "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
34 |             "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
35 |             "with 'huggingface'."
36 |         ),
37 |     )
38 | 
39 |     if subparsers is not None:
40 |         parser.set_defaults(func=test_command)
41 |     return parser
42 | 
43 | 
44 | def test_command(args):
45 |     script_name = path_in_accelerate_package("test_utils", "scripts", "test_script.py")
46 | 
47 |     if args.config_file is None:
48 |         test_args = [script_name]
49 |     else:
50 |         test_args = f"--config_file={args.config_file} {script_name}".split()
51 | 
52 |     cmd = ["accelerate-launch"] + test_args
53 |     result = execute_subprocess_async(cmd)
54 |     if result.returncode == 0:
55 |         print("Test is a success! You are ready for your distributed training!")
56 | 
57 | 
58 | def main():
59 |     parser = test_command_parser()
60 |     args = parser.parse_args()
61 |     test_command(args)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/examples/inference/pippy/llama.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | from transformers import AutoModelForCausalLM, AutoTokenizer
16 | 
17 | from accelerate import PartialState, prepare_pippy
18 | 
19 | 
20 | # sdpa implementation which is the default torch>2.1.2 fails with the tracing + attention mask kwarg
21 | # with attn_implementation="eager" mode, the forward is very slow for some reason
22 | model = AutoModelForCausalLM.from_pretrained(
23 |     "meta-llama/Llama-2-7b-chat-hf", low_cpu_mem_usage=True, attn_implementation="sdpa"
24 | )
25 | model.eval()
26 | 
27 | # Input configs
28 | # Create example inputs for the model
29 | tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
30 | prompts = ("I would like to", "I really like to", "The weather is pretty")  # bs = 3
31 | tokenizer.pad_token = tokenizer.eos_token
32 | inputs = tokenizer(prompts, return_tensors="pt", padding=True)
33 | 
34 | # Create a pipeline stage from the model
35 | # Using `auto` is equivalent to letting `device_map="auto"` figure
36 | # out device mapping and will also split the model according to the
37 | # number of total GPUs available if it fits on one GPU
38 | model = prepare_pippy(model, split_points="auto", example_kwargs=inputs)
39 | 
40 | # You can pass `gather_output=True` to have the output from the model
41 | # available on all GPUs
42 | # model = prepare_pippy(model, split_points="auto", example_args=(input,), gather_output=True)
43 | 
44 | # currently we don't support `model.generate`
45 | # output = model.generate(**inputs, max_new_tokens=1)
46 | inputs = inputs.to(0)
47 | with torch.no_grad():
48 |     output = model(**inputs)
49 | 
50 | # The outputs are only on the final process by default
51 | if PartialState().is_last_process:
52 |     next_token_logits = output[0][:, -1, :]
53 |     next_token = torch.argmax(next_token_logits, dim=-1)
54 |     print(tokenizer.batch_decode(next_token))
55 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # What does this PR do?
 2 | 
 3 | <!--
 4 | Congratulations! You've made it this far! You're not quite done yet though.
 5 | 
 6 | Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution.
 7 | 
 8 | Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change.
 9 | 
10 | Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost.
11 | -->
12 | 
13 | <!-- Remove if not applicable -->
14 | 
15 | Fixes # (issue)
16 | 
17 | 
18 | ## Before submitting
19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
20 | - [ ] Did you read the [contributor guideline](https://github.com/huggingface/accelerate/blob/main/CONTRIBUTING.md#submitting-a-pull-request-pr),
21 |       Pull Request section?
22 | - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link
23 |       to it if that's the case.
24 | - [ ] Did you make sure to update the documentation with your changes? Here are the
25 |       [documentation guidelines](https://github.com/huggingface/accelerate/tree/main/docs), and
26 |       [here are tips on formatting docstrings](https://github.com/huggingface/accelerate/tree/main/docs#writing-documentation---specification).
27 | - [ ] Did you write any new necessary tests?
28 | 
29 | 
30 | ## Who can review?
31 | 
32 | Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
33 | members/contributors who may be interested in your PR.
34 | 
35 | <!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @
36 | 
37 |  If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
38 | 
39 | - Big modeling: @SunMarc
40 | - Fully-Sharded Data Parallism: @muellerzr
41 | - DeepSpeed: @muellerzr
42 | - Command Line Interface: @muellerzr
43 | - Documentation: @muellerzr
44 | - Core parts of the library: @muellerzr @BenjaminBossan @SunMarc
45 | - Maintained examples: @muellerzr or @SunMarc
46 | 
47 |  -->


--------------------------------------------------------------------------------
/docs/source/basic_tutorials/tpu.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # TPU training
17 | 
18 | A [TPU (Tensor Processing Unit)](https://cloud.google.com/tpu/docs/intro-to-tpu) is a type of hardware specifically designed for training models efficiently. Accelerate supports TPU training, but there are a few things you should be aware of, namely graph compilation. This tutorial briefly discusses compilation, and for more details, take a look at the [Training on TPUs with Accelerate](../concept_guides/training_tpu) guide.
19 | 
20 | ## Compilation
21 | 
22 | A TPU creates a graph of all the operations in the training step such as the forward pass, backward pass and optimizer step. This is why the first training step always takes a while because building and compiling this graph takes time. But once compilation is complete, it is cached and all subsequent steps are much faster.
23 | 
24 | The key is to avoid compiling your code again or else training is super slow. This means all your operations must be exactly the same:
25 | 
26 | * all tensors in your batches must have the same length (for example, no dynamic padding for NLP tasks)
27 | * your code must be static (for example, no layers with for loops that have different lengths depending on the input such as a LSTM)
28 | 
29 | ## Weight tying
30 | 
31 | A common language model design is to tie the weights of the embedding and softmax layers. However, moving the model to a TPU (either yourself or passing it to the [`~Accelerator.prepare`] method) breaks the weight tying and you'll need to retie the weights.
32 | 
33 | To add special behavior (like weight tying) in your script for TPUs, set [`~Accelerator.distributed_type`] to `DistributedType.TPU` first. Then you can use the [`~transformers.PreTrainedModel.tie_weights`] method to tie the weights.
34 | 
35 | ```py
36 | if accelerator.distributed_type == DistributedType.TPU:
37 |     model.tie_weights()
38 | ```
39 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/config/update.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from pathlib import Path
18 | 
19 | from .config_args import default_config_file, load_config_from_file
20 | from .config_utils import SubcommandHelpFormatter
21 | 
22 | 
23 | description = "Update an existing config file with the latest defaults while maintaining the old configuration."
24 | 
25 | 
26 | def update_config(args):
27 |     """
28 |     Update an existing config file with the latest defaults while maintaining the old configuration.
29 |     """
30 |     config_file = args.config_file
31 |     if config_file is None and Path(default_config_file).exists():
32 |         config_file = default_config_file
33 |     elif not Path(config_file).exists():
34 |         raise ValueError(f"The passed config file located at {config_file} doesn't exist.")
35 |     config = load_config_from_file(config_file)
36 | 
37 |     if config_file.endswith(".json"):
38 |         config.to_json_file(config_file)
39 |     else:
40 |         config.to_yaml_file(config_file)
41 |     return config_file
42 | 
43 | 
44 | def update_command_parser(parser, parents):
45 |     parser = parser.add_parser("update", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
46 |     parser.add_argument(
47 |         "--config_file",
48 |         default=None,
49 |         help=(
50 |             "The path to the config file to update. Will default to a file named default_config.yaml in the cache "
51 |             "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
52 |             "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
53 |             "with 'huggingface'."
54 |         ),
55 |     )
56 | 
57 |     parser.set_defaults(func=update_config_command)
58 |     return parser
59 | 
60 | 
61 | def update_config_command(args):
62 |     config_file = update_config(args)
63 |     print(f"Sucessfully updated the configuration file at {config_file}.")
64 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/merge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | from accelerate.commands.utils import CustomArgumentParser
17 | from accelerate.utils import merge_fsdp_weights
18 | 
19 | 
20 | description = """Utility to merge the weights from multiple FSDP checkpoints into a single combined checkpoint. Should be used if
21 | `SHARDED_STATE_DICT` was used for the model. Weights will be saved to `{output_path}`.
22 | 
23 | This is a CPU-bound process and requires enough RAM to load the entire model state dict."""
24 | 
25 | 
26 | def merge_command(args):
27 |     merge_fsdp_weights(
28 |         args.checkpoint_directory, args.output_path, not args.unsafe_serialization, args.remove_checkpoint_dir
29 |     )
30 | 
31 | 
32 | def merge_command_parser(subparsers=None):
33 |     if subparsers is not None:
34 |         parser = subparsers.add_parser("merge-weights", description=description)
35 |     else:
36 |         parser = CustomArgumentParser(description=description)
37 | 
38 |     parser.add_argument("checkpoint_directory", type=str, help="A directory containing sharded weights saved by FSDP.")
39 |     parser.add_argument(
40 |         "output_path",
41 |         type=str,
42 |         help="The path to save the merged weights. Defaults to the current directory. ",
43 |     )
44 |     parser.add_argument(
45 |         "--unsafe_serialization",
46 |         action="store_false",
47 |         default=False,
48 |         help="Whether to save the merged weights as `.bin` rather than `.safetensors` (not recommended).",
49 |     )
50 |     parser.add_argument(
51 |         "--remove_checkpoint_dir",
52 |         action="store_true",
53 |         help="Whether to remove the checkpoint directory after merging.",
54 |         default=False,
55 |     )
56 | 
57 |     if subparsers is not None:
58 |         parser.set_defaults(func=merge_command)
59 |     return parser
60 | 
61 | 
62 | def main():
63 |     parser = merge_command_parser()
64 |     args = parser.parse_args()
65 |     merge_command(args)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # VSCode
132 | .vscode
133 | 
134 | # IntelliJ
135 | .idea
136 | 
137 | # Mac .DS_Store
138 | .DS_Store
139 | 
140 | # More test things
141 | wandb
142 | 
143 | # ruff
144 | .ruff_cache
145 | 


--------------------------------------------------------------------------------
/examples/inference/pippy/bert.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import time
15 | 
16 | import torch
17 | from transformers import AutoModelForMaskedLM
18 | 
19 | from accelerate import PartialState, prepare_pippy
20 | from accelerate.utils import set_seed
21 | 
22 | 
23 | # Set the random seed to have reproducable outputs
24 | set_seed(42)
25 | 
26 | # Create an example model
27 | model = AutoModelForMaskedLM.from_pretrained("bert-base-uncased")
28 | model.eval()
29 | 
30 | # Input configs
31 | # Create example inputs for the model
32 | input = torch.randint(
33 |     low=0,
34 |     high=model.config.vocab_size,
35 |     size=(2, 512),  # bs x seq_len
36 |     device="cpu",
37 |     dtype=torch.int64,
38 |     requires_grad=False,
39 | )
40 | 
41 | 
42 | # Create a pipeline stage from the model
43 | # Using `auto` is equivalent to letting `device_map="auto"` figure
44 | # out device mapping and will also split the model according to the
45 | # number of total GPUs available if it fits on one GPU
46 | model = prepare_pippy(model, split_points="auto", example_args=(input,))
47 | 
48 | # You can pass `gather_output=True` to have the output from the model
49 | # available on all GPUs
50 | # model = prepare_pippy(model, split_points="auto", example_args=(input,), gather_output=True)
51 | 
52 | # Move the inputs to the first device
53 | input = input.to("cuda:0")
54 | 
55 | # Take an average of 5 times
56 | # Measure first batch
57 | torch.cuda.synchronize()
58 | start_time = time.time()
59 | with torch.no_grad():
60 |     output = model(input)
61 | torch.cuda.synchronize()
62 | end_time = time.time()
63 | first_batch = end_time - start_time
64 | 
65 | # Now that CUDA is init, measure after
66 | torch.cuda.synchronize()
67 | start_time = time.time()
68 | for i in range(5):
69 |     with torch.no_grad():
70 |         output = model(input)
71 | torch.cuda.synchronize()
72 | end_time = time.time()
73 | 
74 | # The outputs are only on the final process by default
75 | if PartialState().is_last_process:
76 |     output = torch.stack(tuple(output[0]))
77 |     print(f"Time of first pass: {first_batch}")
78 |     print(f"Average time per batch: {(end_time - start_time) / 5}")
79 | 


--------------------------------------------------------------------------------
/examples/inference/pippy/gpt2.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import time
15 | 
16 | import torch
17 | from transformers import AutoModelForSequenceClassification
18 | 
19 | from accelerate import PartialState, prepare_pippy
20 | from accelerate.utils import set_seed
21 | 
22 | 
23 | # Set the random seed to have reproducable outputs
24 | set_seed(42)
25 | 
26 | # Create an example model
27 | model = AutoModelForSequenceClassification.from_pretrained("gpt2")
28 | model.eval()
29 | 
30 | # Input configs
31 | # Create example inputs for the model
32 | input = torch.randint(
33 |     low=0,
34 |     high=model.config.vocab_size,
35 |     size=(2, 1024),  # bs x seq_len
36 |     device="cpu",
37 |     dtype=torch.int64,
38 |     requires_grad=False,
39 | )
40 | 
41 | # Create a pipeline stage from the model
42 | # Using `auto` is equivalent to letting `device_map="auto"` figure
43 | # out device mapping and will also split the model according to the
44 | # number of total GPUs available if it fits on one GPU
45 | model = prepare_pippy(model, split_points="auto", example_args=(input,))
46 | 
47 | # You can pass `gather_output=True` to have the output from the model
48 | # available on all GPUs
49 | # model = prepare_pippy(model, split_points="auto", example_args=(input,), gather_output=True)
50 | 
51 | # Move the inputs to the first device
52 | input = input.to("cuda:0")
53 | 
54 | # Take an average of 5 times
55 | # Measure first batch
56 | torch.cuda.synchronize()
57 | start_time = time.time()
58 | with torch.no_grad():
59 |     output = model(input)
60 | torch.cuda.synchronize()
61 | end_time = time.time()
62 | first_batch = end_time - start_time
63 | 
64 | # Now that CUDA is init, measure after
65 | torch.cuda.synchronize()
66 | start_time = time.time()
67 | for i in range(5):
68 |     with torch.no_grad():
69 |         output = model(input)
70 | torch.cuda.synchronize()
71 | end_time = time.time()
72 | 
73 | # The outputs are only on the final process by default
74 | if PartialState().is_last_process:
75 |     output = torch.stack(tuple(output[0]))
76 |     print(f"Time of first pass: {first_batch}")
77 |     print(f"Average time per batch: {(end_time - start_time) / 5}")
78 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug Report"
 2 | description: Submit a bug report to help us improve Accelerate
 3 | body:
 4 |   - type: markdown
 5 |     attributes: 
 6 |       value: | 
 7 |         Thanks for taking the time to submit a bug report! 🐛 
 8 |         If this is not a bug related to the Accelerate library directly, but instead a general question about your code or the library specifically please use the [forums](https://discuss.huggingface.co/c/accelerate/18).
 9 | 
10 |   - type: textarea
11 |     id: system-info
12 |     attributes:
13 |       label: System Info
14 |       description: Please share your accelerate configuration with us. You can run the command `accelerate env` and copy-paste its outputs below
15 |       render: Shell
16 |       placeholder: accelerate version, OS, python version, numpy version, torch version, and accelerate's configuration
17 |     validations:
18 |       required: true
19 |   
20 |   - type: checkboxes
21 |     id: information-scripts-examples
22 |     attributes:
23 |       label: Information
24 |       description: 'The problem arises when using:'
25 |       options:
26 |         - label: "The official example scripts"
27 |         - label: "My own modified scripts"
28 |   
29 |   - type: checkboxes
30 |     id: information-tasks
31 |     attributes:
32 |       label: Tasks
33 |       description: "The tasks I am working on are:"
34 |       options:
35 |         - label: "One of the scripts in the examples/ folder of Accelerate or an officially supported `no_trainer` script in the `examples` folder of the `transformers` repo (such as `run_no_trainer_glue.py`)"
36 |         - label: "My own task or dataset (give details below)"
37 |   
38 |   - type: textarea
39 |     id: reproduction
40 |     validations:
41 |       required: true
42 |     attributes:
43 |       label: Reproduction
44 |       description: |
45 |         Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
46 |         If you have code snippets, error messages, stack traces please provide them here as well.
47 |         Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
48 |         Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
49 | 
50 |       placeholder: |
51 |         Steps to reproduce the behavior:
52 |           
53 |           1.
54 |           2.
55 |           3.
56 | 
57 |   - type: textarea
58 |     id: expected-behavior
59 |     validations:
60 |       required: true
61 |     attributes:
62 |       label: Expected behavior
63 |       description: "A clear and concise description of what you would expect to happen."
64 | 


--------------------------------------------------------------------------------
/tests/test_sagemaker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import unittest
15 | from dataclasses import dataclass
16 | 
17 | import pytest
18 | 
19 | from accelerate.commands.config.config_args import SageMakerConfig
20 | from accelerate.utils import ComputeEnvironment
21 | from accelerate.utils.launch import _convert_nargs_to_dict
22 | 
23 | 
24 | @dataclass
25 | class MockLaunchConfig(SageMakerConfig):
26 |     compute_environment = ComputeEnvironment.AMAZON_SAGEMAKER
27 |     fp16 = True
28 |     ec2_instance_type = "ml.p3.2xlarge"
29 |     iam_role_name = "accelerate_sagemaker_execution_role"
30 |     profile = "hf-sm"
31 |     region = "us-east-1"
32 |     num_machines = 1
33 |     base_job_name = "accelerate-sagemaker-1"
34 |     pytorch_version = "1.6"
35 |     transformers_version = "4.4"
36 |     training_script = "train.py"
37 |     success_training_script_args = [
38 |         "--model_name_or_path",
39 |         "bert",
40 |         "--do_train",
41 |         "False",
42 |         "--epochs",
43 |         "3",
44 |         "--learning_rate",
45 |         "5e-5",
46 |         "--max_steps",
47 |         "50.5",
48 |     ]
49 |     fail_training_script_args = [
50 |         "--model_name_or_path",
51 |         "bert",
52 |         "--do_train",
53 |         "--do_test",
54 |         "False",
55 |         "--do_predict",
56 |         "--epochs",
57 |         "3",
58 |         "--learning_rate",
59 |         "5e-5",
60 |         "--max_steps",
61 |         "50.5",
62 |     ]
63 | 
64 | 
65 | class SageMakerLaunch(unittest.TestCase):
66 |     def test_args_convert(self):
67 |         # If no defaults are changed, `to_kwargs` returns an empty dict.
68 |         converted_args = _convert_nargs_to_dict(MockLaunchConfig.success_training_script_args)
69 |         assert isinstance(converted_args["model_name_or_path"], str)
70 |         assert isinstance(converted_args["do_train"], bool)
71 |         assert isinstance(converted_args["epochs"], int)
72 |         assert isinstance(converted_args["learning_rate"], float)
73 |         assert isinstance(converted_args["max_steps"], float)
74 | 
75 |         with pytest.raises(ValueError):
76 |             _convert_nargs_to_dict(MockLaunchConfig.fail_training_script_args)
77 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docker-images-release.yml:
--------------------------------------------------------------------------------
 1 | name: Build Docker images (releases)
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   release:
 6 |     types: [published]
 7 | 
 8 | concurrency:
 9 |   group: docker-image-builds
10 |   cancel-in-progress: false
11 | 
12 | jobs:
13 |   get-version:
14 |     runs-on: ubuntu-latest
15 |     outputs:
16 |       version: ${{ steps.step1.outputs.version }}
17 |     steps:
18 |       - uses: actions/checkout@v3.1.0
19 |       - id: step1
20 |         run: echo "version=$(python setup.py --version)" >> $GITHUB_OUTPUT
21 | 
22 |   version-cpu:
23 |     name: "Latest Accelerate CPU [version]"
24 |     runs-on:
25 |       group: aws-general-8-plus
26 |     needs: get-version
27 |     steps:
28 |       - name: Set up Docker Buildx
29 |         uses: docker/setup-buildx-action@v2
30 |       - name: Login to DockerHub
31 |         uses: docker/login-action@v2
32 |         with:
33 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
34 |           password: ${{ secrets.DOCKERHUB_PASSWORD }}
35 | 
36 |       - name: Build and Push CPU
37 |         uses: docker/build-push-action@v4
38 |         with:
39 |           file: docker/accelerate-cpu/Dockerfile
40 |           push: true
41 |           tags: huggingface/accelerate:cpu-release-${{ needs.get-version.outputs.version }}
42 | 
43 |   version-cuda:
44 |     name: "Latest Accelerate GPU [version]"
45 |     runs-on:
46 |       group: aws-g6-4xlarge-plus
47 |     needs: get-version
48 |     steps:
49 |       - name: Set up Docker Buildx
50 |         uses: docker/setup-buildx-action@v2
51 |       - name: Login to DockerHub
52 |         uses: docker/login-action@v2
53 |         with:
54 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
55 |           password: ${{ secrets.DOCKERHUB_PASSWORD }}
56 | 
57 |       - name: Build and Push GPU
58 |         uses: docker/build-push-action@v4
59 |         with:
60 |           file: docker/accelerate-gpu/Dockerfile
61 |           push: true
62 |           tags: huggingface/accelerate:gpu-release-${{needs.get-version.outputs.version}}
63 | 
64 |   version-cuda-deepspeed:
65 |     name: "Latest Accelerate GPU DeepSpeed [version]"
66 |     runs-on:
67 |       group: aws-g6-4xlarge-plus
68 |     needs: get-version
69 |     steps:
70 |       - name: Set up Docker Buildx
71 |         uses: docker/setup-buildx-action@v2
72 |       - name: Login to DockerHub
73 |         uses: docker/login-action@v2
74 |         with:
75 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
76 |           password: ${{ secrets.DOCKERHUB_PASSWORD }}
77 | 
78 |       - name: Build and Push GPU
79 |         uses: docker/build-push-action@v4
80 |         with:
81 |           file: docker/accelerate-gpu-deepspeed/Dockerfile
82 |           push: true
83 |           tags: huggingface/accelerate:gpu-deepspeed-release-${{needs.get-version.outputs.version}}
84 | 
85 | 


--------------------------------------------------------------------------------
/tests/xla_spawn.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | A simple launcher script for TPU training
17 | 
18 | Inspired by https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py
19 | 
20 | ::
21 |     >>> python xla_spawn.py --num_cores=NUM_CORES_YOU_HAVE
22 |                YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other
23 |                arguments of your training script)
24 | 
25 | """
26 | 
27 | import importlib
28 | import sys
29 | from argparse import REMAINDER, ArgumentParser
30 | from pathlib import Path
31 | 
32 | import torch_xla.distributed.xla_multiprocessing as xmp
33 | 
34 | 
35 | def parse_args():
36 |     """
37 |     Helper function parsing the command line options
38 |     @retval ArgumentParser
39 |     """
40 |     parser = ArgumentParser(
41 |         description=(
42 |             "PyTorch TPU distributed training launch "
43 |             "helper utility that will spawn up "
44 |             "multiple distributed processes"
45 |         )
46 |     )
47 | 
48 |     # Optional arguments for the launch helper
49 |     parser.add_argument("--num_cores", type=int, default=1, help="Number of TPU cores to use (1 or 8).")
50 | 
51 |     # positional
52 |     parser.add_argument(
53 |         "training_script",
54 |         type=str,
55 |         help=(
56 |             "The full path to the single TPU training "
57 |             "program/script to be launched in parallel, "
58 |             "followed by all the arguments for the "
59 |             "training script"
60 |         ),
61 |     )
62 | 
63 |     # rest from the training program
64 |     parser.add_argument("training_script_args", nargs=REMAINDER)
65 | 
66 |     return parser.parse_args()
67 | 
68 | 
69 | def main():
70 |     args = parse_args()
71 | 
72 |     # Import training_script as a module.
73 |     script_fpath = Path(args.training_script)
74 |     sys.path.append(str(script_fpath.parent.resolve()))
75 |     mod_name = script_fpath.stem
76 |     mod = importlib.import_module(mod_name)
77 | 
78 |     # Patch sys.argv
79 |     sys.argv = [args.training_script] + args.training_script_args + ["--tpu_num_cores", str(args.num_cores)]
80 |     xmp.spawn(mod._mp_fn, args=(), nprocs=args.num_cores)
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/menu/input.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | This file contains utilities for handling input from the user and registering specific keys to specific functions,
17 | based on https://github.com/bchao1/bullet
18 | """
19 | 
20 | from typing import List
21 | 
22 | from .keymap import KEYMAP, get_character
23 | 
24 | 
25 | def mark(key: str):
26 |     """
27 |     Mark the function with the key code so it can be handled in the register
28 |     """
29 | 
30 |     def decorator(func):
31 |         handle = getattr(func, "handle_key", [])
32 |         handle += [key]
33 |         func.handle_key = handle
34 |         return func
35 | 
36 |     return decorator
37 | 
38 | 
39 | def mark_multiple(*keys: List[str]):
40 |     """
41 |     Mark the function with the key codes so it can be handled in the register
42 |     """
43 | 
44 |     def decorator(func):
45 |         handle = getattr(func, "handle_key", [])
46 |         handle += keys
47 |         func.handle_key = handle
48 |         return func
49 | 
50 |     return decorator
51 | 
52 | 
53 | class KeyHandler(type):
54 |     """
55 |     Metaclass that adds the key handlers to the class
56 |     """
57 | 
58 |     def __new__(cls, name, bases, attrs):
59 |         new_cls = super().__new__(cls, name, bases, attrs)
60 |         if not hasattr(new_cls, "key_handler"):
61 |             new_cls.key_handler = {}
62 |         new_cls.handle_input = KeyHandler.handle_input
63 | 
64 |         for value in attrs.values():
65 |             handled_keys = getattr(value, "handle_key", [])
66 |             for key in handled_keys:
67 |                 new_cls.key_handler[key] = value
68 |         return new_cls
69 | 
70 |     @staticmethod
71 |     def handle_input(cls):
72 |         "Finds and returns the selected character if it exists in the handler"
73 |         char = get_character()
74 |         if char != KEYMAP["undefined"]:
75 |             char = ord(char)
76 |         handler = cls.key_handler.get(char)
77 |         if handler:
78 |             cls.current_selection = char
79 |             return handler(cls)
80 |         else:
81 |             return None
82 | 
83 | 
84 | def register(cls):
85 |     """Adds KeyHandler metaclass to the class"""
86 |     return KeyHandler(cls.__name__, cls.__bases__, cls.__dict__.copy())
87 | 


--------------------------------------------------------------------------------
/utils/stale.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team, the AllenNLP library authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Script to close stale issue. Taken in part from the AllenNLP repository.
16 | https://github.com/allenai/allennlp.
17 | """
18 | 
19 | import os
20 | from datetime import datetime as dt
21 | from datetime import timezone
22 | 
23 | from github import Github
24 | 
25 | 
26 | LABELS_TO_EXEMPT = [
27 |     "good first issue",
28 |     "feature request",
29 |     "wip",
30 | ]
31 | 
32 | 
33 | def main():
34 |     g = Github(os.environ["GITHUB_TOKEN"])
35 |     repo = g.get_repo("huggingface/accelerate")
36 |     open_issues = repo.get_issues(state="open")
37 | 
38 |     for issue in open_issues:
39 |         comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
40 |         last_comment = comments[0] if len(comments) > 0 else None
41 |         current_time = dt.now(timezone.utc)
42 |         days_since_updated = (current_time - issue.updated_at).days
43 |         days_since_creation = (current_time - issue.created_at).days
44 |         if (
45 |             last_comment is not None
46 |             and last_comment.user.login == "github-actions[bot]"
47 |             and days_since_updated > 7
48 |             and days_since_creation >= 30
49 |             and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
50 |         ):
51 |             # Close issue since it has been 7 days of inactivity since bot mention.
52 |             issue.edit(state="closed")
53 |         elif (
54 |             days_since_updated > 23
55 |             and days_since_creation >= 30
56 |             and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
57 |         ):
58 |             # Add stale comment
59 |             issue.create_comment(
60 |                 "This issue has been automatically marked as stale because it has not had "
61 |                 "recent activity. If you think this still needs to be addressed "
62 |                 "please comment on this thread.\n\nPlease note that issues that do not follow the "
63 |                 "[contributing guidelines](https://github.com/huggingface/accelerate/blob/main/CONTRIBUTING.md) "
64 |                 "are likely to be ignored."
65 |             )
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/.github/workflows/build_docker_images.yml:
--------------------------------------------------------------------------------
 1 | name: Build Docker images (scheduled)
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 |   schedule:
 7 |     - cron: "0 1 * * *"
 8 | 
 9 | concurrency:
10 |   group: docker-image-builds
11 |   cancel-in-progress: false
12 | 
13 | jobs:
14 |   latest-cpu:
15 |     name: "Latest Accelerate CPU [dev]"
16 |     runs-on:
17 |       group: aws-general-8-plus
18 |     steps:
19 |       - name: Set up Docker Buildx
20 |         uses: docker/setup-buildx-action@v2
21 |       - name: Login to DockerHub
22 |         uses: docker/login-action@v2
23 |         with:
24 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
25 |           password: ${{ secrets.DOCKERHUB_PASSWORD }}
26 |       - name: Get current date
27 |         id: date
28 |         run: |
29 |           echo "date=$(date '+%Y-%m-%d')" >> $GITHUB_ENV
30 |       - name: Build and Push CPU
31 |         uses: docker/build-push-action@v4
32 |         with:
33 |           file: docker/accelerate-cpu/Dockerfile
34 |           push: true
35 |           tags: |
36 |             huggingface/accelerate:cpu-nightly
37 |             huggingface/accelerate:cpu-nightly-${{ env.date }}
38 | 
39 |   latest-cuda:
40 |     name: "Latest Accelerate GPU [dev]"
41 |     runs-on:
42 |       group: aws-g6-4xlarge-plus
43 |     steps:
44 |       - name: Set up Docker Buildx
45 |         uses: docker/setup-buildx-action@v2
46 |       - name: Login to DockerHub
47 |         uses: docker/login-action@v2
48 |         with:
49 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
50 |           password: ${{ secrets.DOCKERHUB_PASSWORD }}
51 |       - name: Get current date
52 |         id: date
53 |         run: |
54 |           echo "date=$(date '+%Y-%m-%d')" >> $GITHUB_ENV
55 |       - name: Build and Push GPU
56 |         uses: docker/build-push-action@v4
57 |         with:
58 |           file: docker/accelerate-gpu/Dockerfile
59 |           push: true
60 |           tags: |
61 |             huggingface/accelerate:gpu-nightly
62 |             huggingface/accelerate:gpu-nightly-${{ env.date }}
63 | 
64 |   latest-cuda-deepspeed:
65 |     name: "Latest Accelerate GPU DeepSpeed [dev]"
66 |     runs-on:
67 |       group: aws-g6-4xlarge-plus
68 |     steps:
69 |       - name: Set up Docker Buildx
70 |         uses: docker/setup-buildx-action@v2
71 |       - name: Login to DockerHub
72 |         uses: docker/login-action@v2
73 |         with:
74 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
75 |           password: ${{ secrets.DOCKERHUB_PASSWORD }}
76 |       - name: Get current date
77 |         id: date
78 |         run: |
79 |           echo "date=$(date '+%Y-%m-%d')" >> $GITHUB_ENV
80 |       - name: Build and Push GPU
81 |         uses: docker/build-push-action@v4
82 |         with:
83 |           file: docker/accelerate-gpu-deepspeed/Dockerfile
84 |           push: true
85 |           tags: |
86 |             huggingface/accelerate:gpu-deepspeed-nightly
87 |             huggingface/accelerate:gpu-deepspeed-nightly-${{ env.date }}
88 | 
89 | 


--------------------------------------------------------------------------------
/examples/multigpu_remote_launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import argparse
15 | 
16 | import runhouse as rh
17 | import torch
18 | from nlp_example import training_function
19 | 
20 | from accelerate.utils import PrepareForLaunch, patch_environment
21 | 
22 | 
23 | def launch_train(*args):
24 |     num_processes = torch.cuda.device_count()
25 |     print(f"Device count: {num_processes}")
26 |     with patch_environment(
27 |         world_size=num_processes, master_addr="127.0.0.1", master_port="29500", mixed_precision=args[1].mixed_precision
28 |     ):
29 |         launcher = PrepareForLaunch(training_function, distributed_type="MULTI_GPU")
30 |         torch.multiprocessing.start_processes(launcher, args=args, nprocs=num_processes, start_method="spawn")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     # Refer to https://runhouse-docs.readthedocs-hosted.com/en/main/rh_primitives/cluster.html#hardware-setup
35 |     # for cloud access setup instructions (if using on-demand hardware), and for API specifications.
36 | 
37 |     # on-demand GPU
38 |     # gpu = rh.cluster(name='rh-cluster', instance_type='V100:1', provider='cheapest', use_spot=False)  # single GPU
39 |     gpu = rh.cluster(name="rh-cluster", instance_type="V100:4", provider="cheapest", use_spot=False)  # multi GPU
40 |     gpu.up_if_not()
41 | 
42 |     # on-prem GPU
43 |     # gpu = rh.cluster(
44 |     #           ips=["ip_addr"], ssh_creds={ssh_user:"<username>", ssh_private_key:"<key_path>"}, name="rh-cluster"
45 |     #       )
46 | 
47 |     # Set up remote function
48 |     reqs = [
49 |         "pip:./",
50 |         "transformers",
51 |         "datasets",
52 |         "evaluate",
53 |         "tqdm",
54 |         "scipy",
55 |         "scikit-learn",
56 |         "tensorboard",
57 |         "torch --upgrade --extra-index-url https://download.pytorch.org/whl/cu117",
58 |     ]
59 |     launch_train_gpu = rh.function(fn=launch_train, system=gpu, reqs=reqs, name="train_bert_glue")
60 | 
61 |     # Define train args/config, run train function
62 |     train_args = argparse.Namespace(cpu=False, mixed_precision="fp16")
63 |     config = {"lr": 2e-5, "num_epochs": 3, "seed": 42, "batch_size": 16}
64 |     launch_train_gpu(config, train_args, stream_logs=True)
65 | 
66 |     # Alternatively, we can just run as instructed in the README (but only because there's already a wrapper CLI):
67 |     # gpu.install_packages(reqs)
68 |     # gpu.run(['accelerate launch --multi_gpu accelerate/examples/nlp_example.py'])
69 | 


--------------------------------------------------------------------------------
/examples/inference/pippy/t5.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import time
15 | 
16 | import torch
17 | from transformers import AutoModelForSeq2SeqLM
18 | 
19 | from accelerate import PartialState, prepare_pippy
20 | from accelerate.utils import set_seed
21 | 
22 | 
23 | # Set the random seed to have reproducable outputs
24 | set_seed(42)
25 | 
26 | # Create an example model
27 | model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
28 | model.eval()
29 | 
30 | # Input configs
31 | # Create example inputs for the model
32 | input = torch.randint(
33 |     low=0,
34 |     high=model.config.vocab_size,
35 |     size=(2, 1024),  # bs x seq_len
36 |     device="cpu",
37 |     dtype=torch.int64,
38 |     requires_grad=False,
39 | )
40 | 
41 | example_inputs = {"input_ids": input, "decoder_input_ids": input}
42 | 
43 | # Create a pipeline stage from the model
44 | # Using `auto` is equivalent to letting `device_map="auto"` figure
45 | # out device mapping and will also split the model according to the
46 | # number of total GPUs available if it fits on one GPU
47 | model = prepare_pippy(
48 |     model,
49 |     no_split_module_classes=["T5Block"],
50 |     example_kwargs=example_inputs,
51 | )
52 | 
53 | # You can pass `gather_output=True` to have the output from the model
54 | # available on all GPUs
55 | # model = prepare_pippy(
56 | #     model,
57 | #     no_split_module_classes=["T5Block"],
58 | #     example_kwargs=example_inputs,
59 | #     gather_outputs=True
60 | # )
61 | 
62 | # The model expects a tuple during real inference
63 | # with the data on the first device
64 | args = (example_inputs["input_ids"].to("cuda:0"), example_inputs["decoder_input_ids"].to("cuda:0"))
65 | 
66 | # Take an average of 5 times
67 | # Measure first batch
68 | torch.cuda.synchronize()
69 | start_time = time.time()
70 | with torch.no_grad():
71 |     output = model(*args)
72 | torch.cuda.synchronize()
73 | end_time = time.time()
74 | first_batch = end_time - start_time
75 | 
76 | # Now that CUDA is init, measure after
77 | torch.cuda.synchronize()
78 | start_time = time.time()
79 | for i in range(5):
80 |     with torch.no_grad():
81 |         output = model(*args)
82 | torch.cuda.synchronize()
83 | end_time = time.time()
84 | 
85 | # The outputs are only on the final process by default
86 | if PartialState().is_last_process:
87 |     output = torch.stack(tuple(output[0]))
88 |     print(f"Time of first pass: {first_batch}")
89 |     print(f"Average time per batch: {(end_time - start_time) / 5}")
90 | 


--------------------------------------------------------------------------------
/src/accelerate/utils/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import operator as op
16 | 
17 | 
18 | SCALER_NAME = "scaler.pt"
19 | MODEL_NAME = "pytorch_model"
20 | SAFE_MODEL_NAME = "model"
21 | RNG_STATE_NAME = "random_states"
22 | OPTIMIZER_NAME = "optimizer"
23 | SCHEDULER_NAME = "scheduler"
24 | SAMPLER_NAME = "sampler"
25 | PROFILE_PATTERN_NAME = "profile_{suffix}.json"
26 | WEIGHTS_NAME = f"{MODEL_NAME}.bin"
27 | WEIGHTS_PATTERN_NAME = "pytorch_model{suffix}.bin"
28 | WEIGHTS_INDEX_NAME = f"{WEIGHTS_NAME}.index.json"
29 | SAFE_WEIGHTS_NAME = f"{SAFE_MODEL_NAME}.safetensors"
30 | SAFE_WEIGHTS_PATTERN_NAME = "model{suffix}.safetensors"
31 | SAFE_WEIGHTS_INDEX_NAME = f"{SAFE_WEIGHTS_NAME}.index.json"
32 | SAGEMAKER_PYTORCH_VERSION = "1.10.2"
33 | SAGEMAKER_PYTHON_VERSION = "py38"
34 | SAGEMAKER_TRANSFORMERS_VERSION = "4.17.0"
35 | SAGEMAKER_PARALLEL_EC2_INSTANCES = ["ml.p3.16xlarge", "ml.p3dn.24xlarge", "ml.p4dn.24xlarge"]
36 | FSDP_SHARDING_STRATEGY = ["FULL_SHARD", "SHARD_GRAD_OP", "NO_SHARD", "HYBRID_SHARD", "HYBRID_SHARD_ZERO2"]
37 | FSDP_AUTO_WRAP_POLICY = ["TRANSFORMER_BASED_WRAP", "SIZE_BASED_WRAP", "NO_WRAP"]
38 | FSDP_BACKWARD_PREFETCH = ["BACKWARD_PRE", "BACKWARD_POST", "NO_PREFETCH"]
39 | FSDP_STATE_DICT_TYPE = ["FULL_STATE_DICT", "LOCAL_STATE_DICT", "SHARDED_STATE_DICT"]
40 | FSDP_PYTORCH_VERSION = "2.1.0"
41 | FSDP_MODEL_NAME = "pytorch_model_fsdp"
42 | DEEPSPEED_MULTINODE_LAUNCHERS = ["pdsh", "standard", "openmpi", "mvapich", "mpich"]
43 | TORCH_DYNAMO_MODES = ["default", "reduce-overhead", "max-autotune"]
44 | ELASTIC_LOG_LINE_PREFIX_TEMPLATE_PYTORCH_VERSION = "2.2.0"
45 | 
46 | STR_OPERATION_TO_FUNC = {">": op.gt, ">=": op.ge, "==": op.eq, "!=": op.ne, "<=": op.le, "<": op.lt}
47 | 
48 | # These are the args for `torch.distributed.launch` for pytorch < 1.9
49 | TORCH_LAUNCH_PARAMS = [
50 |     "nnodes",
51 |     "nproc_per_node",
52 |     "rdzv_backend",
53 |     "rdzv_endpoint",
54 |     "rdzv_id",
55 |     "rdzv_conf",
56 |     "standalone",
57 |     "max_restarts",
58 |     "monitor_interval",
59 |     "start_method",
60 |     "role",
61 |     "module",
62 |     "m",
63 |     "no_python",
64 |     "run_path",
65 |     "log_dir",
66 |     "r",
67 |     "redirects",
68 |     "t",
69 |     "tee",
70 |     "node_rank",
71 |     "master_addr",
72 |     "master_port",
73 | ]
74 | 
75 | CUDA_DISTRIBUTED_TYPES = ["DEEPSPEED", "MULTI_GPU", "FSDP", "MEGATRON_LM"]
76 | TORCH_DISTRIBUTED_OPERATION_TYPES = CUDA_DISTRIBUTED_TYPES + [
77 |     "MULTI_NPU",
78 |     "MULTI_MLU",
79 |     "MULTI_MUSA",
80 |     "MULTI_XPU",
81 |     "MULTI_CPU",
82 | ]
83 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: quality style test docs utils
 2 | 
 3 | check_dirs := .
 4 | 
 5 | # Check that source code meets quality standards
 6 | 
 7 | extra_quality_checks:
 8 | 	python utils/check_copies.py
 9 | 	python utils/check_dummies.py
10 | 	python utils/check_repo.py
11 | 	doc-builder style src/accelerate docs/source --max_len 119
12 | 
13 | # this target runs checks on all files
14 | quality:
15 | 	ruff check $(check_dirs)
16 | 	ruff format --check $(check_dirs)
17 | 	doc-builder style src/accelerate docs/source --max_len 119 --check_only
18 | 
19 | # Format source code automatically and check is there are any problems left that need manual fixing
20 | style:
21 | 	ruff check $(check_dirs) --fix
22 | 	ruff format $(check_dirs)
23 | 	doc-builder style src/accelerate docs/source --max_len 119
24 | 	
25 | # Run tests for the library
26 | test_big_modeling:
27 | 	python -m pytest -s -v ./tests/test_big_modeling.py ./tests/test_modeling_utils.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_big_modeling.log",)
28 | 
29 | test_core:
30 | 	python -m pytest -s -v ./tests/ --ignore=./tests/test_examples.py --ignore=./tests/deepspeed --ignore=./tests/test_big_modeling.py \
31 | 	--ignore=./tests/fsdp --ignore=./tests/test_cli.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_core.log",)
32 | 
33 | test_cli:
34 | 	python -m pytest -s -v ./tests/test_cli.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_cli.log",)
35 | 
36 | test_deepspeed:
37 | 	python -m pytest -s -v ./tests/deepspeed $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_deepspeed.log",)
38 | 
39 | test_fsdp:
40 | 	python -m pytest -s -v ./tests/fsdp $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_fsdp.log",)
41 | 
42 | # Since the new version of pytest will *change* how things are collected, we need `deepspeed` to 
43 | # run after test_core and test_cli
44 | test:
45 | 	$(MAKE) test_core
46 | 	$(MAKE) test_cli
47 | 	$(MAKE) test_big_modeling
48 | 	$(MAKE) test_deepspeed
49 | 	$(MAKE) test_fsdp
50 | 
51 | test_examples:
52 | 	python -m pytest -s -v ./tests/test_examples.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_examples.log",)
53 | 
54 | # Broken down example tests for the CI runners
55 | test_integrations:
56 | 	python -m pytest -s -v ./tests/deepspeed ./tests/fsdp $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_integrations.log",)
57 | 
58 | test_example_differences:
59 | 	python -m pytest -s -v ./tests/test_examples.py::ExampleDifferenceTests $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_example_diff.log",)
60 | 
61 | test_checkpoint_epoch:
62 | 	python -m pytest -s -v ./tests/test_examples.py::FeatureExamplesTests -k "by_epoch" $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_checkpoint_epoch.log",)
63 | 
64 | test_checkpoint_step:
65 | 	python -m pytest -s -v ./tests/test_examples.py::FeatureExamplesTests -k "by_step" $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_checkpoint_step.log",)
66 | 
67 | # Same as test but used to install only the base dependencies
68 | test_prod:
69 | 	$(MAKE) test_core
70 | 
71 | test_rest:
72 | 	python -m pytest -s -v ./tests/test_examples.py::FeatureExamplesTests -k "not by_step and not by_epoch" $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_rest.log",)
73 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Official Hugging Face Accelerate Docker Images
18 | 
19 | Accelerate publishes a variety of docker versions as part of our CI that users can also use. These are stable images that Accelerate can run off of which comes with a variety of different setup configurations, all of which are officially hosted on [Docker Hub](https://hub.docker.com/r/huggingface/accelerate).
20 | 
21 | A breakdown of each are given below
22 | 
23 | ## Naming Conventions
24 | 
25 | Accelerate docker images follow a tagging convention of:
26 | 
27 | ```bash
28 | huggingface/accelerate:{accelerator}-{nightly,release}
29 | ```
30 | 
31 | `accelerator` in this instance is one of many applical pre-configured backend supports:
32 | * `gpu`: Comes compiled off of the `nvidia/cuda` image and includes core parts like `bitsandbytes`. Runs off python 3.9.
33 | * `cpu`: Comes compiled off of `python:3.9-slim` and is designed for non-CUDA based workloads.
34 | * More to come soon
35 | * `gpu-deepspeed`: Comes compiled off of the `nvidia/cuda` image and includes core parts like `bitsandbytes` as well as the latest `deepspeed` version. Runs off python 3.10.
36 | 
37 | ## Nightlies vs Releases
38 | 
39 | Each release a new build is pushed with a version number included in the name. For a GPU-supported image of version 0.28.0 for instance, it would look like the following:
40 | 
41 | ```bash
42 | huggingface/accelerate:gpu-release-0.28.0
43 | ```
44 | 
45 | Nightlies contain two different image tags. There is a general `nightly` tag which is built each night, and a `nightly-YYYY-MM-DD` which corresponds to a build from a particular date.
46 | 
47 | For instance, here is an example nightly CPU image from 3/14/2024
48 | 
49 | ```bash
50 | huggingface/accelerate:cpu-nightly-2024-03-14
51 | ```
52 | 
53 | ## Running the images
54 | 
55 | Each image comes compiled with `conda` and an `accelerate` environment contains all of the installed dependencies. 
56 | 
57 | To pull down the latest nightly run:
58 | 
59 | ```bash
60 | docker pull huggingface/accelerate:gpu-nightly
61 | ```
62 | 
63 | To then run it in interactive mode with GPU-memory available, run:
64 | 
65 | ```bash
66 | docker container run --gpus all -it huggingface/accelerate:gpu-nightly
67 | ```
68 | 
69 | ## DEPRECATED IMAGES
70 | 
71 | CPU and GPU docker images were hosted at `huggingface/accelerate-gpu` and `huggingface/accelerate-cpu`. These builds are now outdated and will not receive updates. 
72 | 
73 | The builds at the corresponding `huggingface/accelerate:{gpu,cpu}` contain the same `Dockerfile`, so it's as simple as changing the docker image to the desired ones from above. We will not be deleting these images for posterity, but they will not be receiving updates going forward.


--------------------------------------------------------------------------------
/benchmarks/measures_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import gc
15 | import threading
16 | import time
17 | 
18 | import psutil
19 | import torch
20 | 
21 | 
22 | class PeakCPUMemory:
23 |     def __init__(self):
24 |         self.process = psutil.Process()
25 |         self.peak_monitoring = False
26 | 
27 |     def peak_monitor(self):
28 |         self.cpu_memory_peak = -1
29 | 
30 |         while True:
31 |             self.cpu_memory_peak = max(self.process.memory_info().rss, self.cpu_memory_peak)
32 | 
33 |             # can't sleep or will not catch the peak right (this comment is here on purpose)
34 |             if not self.peak_monitoring:
35 |                 break
36 | 
37 |     def start(self):
38 |         self.peak_monitoring = True
39 |         self.thread = threading.Thread(target=self.peak_monitor)
40 |         self.thread.daemon = True
41 |         self.thread.start()
42 | 
43 |     def stop(self):
44 |         self.peak_monitoring = False
45 |         self.thread.join()
46 |         return self.cpu_memory_peak
47 | 
48 | 
49 | cpu_peak_tracker = PeakCPUMemory()
50 | 
51 | 
52 | def start_measure():
53 |     # Time
54 |     measures = {"time": time.time()}
55 | 
56 |     gc.collect()
57 |     torch.cuda.empty_cache()
58 | 
59 |     # CPU mem
60 |     measures["cpu"] = psutil.Process().memory_info().rss
61 |     cpu_peak_tracker.start()
62 | 
63 |     # GPU mem
64 |     for i in range(torch.cuda.device_count()):
65 |         measures[str(i)] = torch.cuda.memory_allocated(i)
66 |     torch.cuda.reset_peak_memory_stats()
67 | 
68 |     return measures
69 | 
70 | 
71 | def end_measure(start_measures):
72 |     # Time
73 |     measures = {"time": time.time() - start_measures["time"]}
74 | 
75 |     gc.collect()
76 |     torch.cuda.empty_cache()
77 | 
78 |     # CPU mem
79 |     measures["cpu"] = (psutil.Process().memory_info().rss - start_measures["cpu"]) / 2**20
80 |     measures["cpu-peak"] = (cpu_peak_tracker.stop() - start_measures["cpu"]) / 2**20
81 | 
82 |     # GPU mem
83 |     for i in range(torch.cuda.device_count()):
84 |         measures[str(i)] = (torch.cuda.memory_allocated(i) - start_measures[str(i)]) / 2**20
85 |         measures[f"{i}-peak"] = (torch.cuda.max_memory_allocated(i) - start_measures[str(i)]) / 2**20
86 | 
87 |     return measures
88 | 
89 | 
90 | def log_measures(measures, description):
91 |     print(f"{description}:")
92 |     print(f"- Time: {measures['time']:.2f}s")
93 |     for i in range(torch.cuda.device_count()):
94 |         print(f"- GPU {i} allocated: {measures[str(i)]:.2f}MiB")
95 |         peak = measures[f"{i}-peak"]
96 |         print(f"- GPU {i} peak: {peak:.2f}MiB")
97 |     print(f"- CPU RAM allocated: {measures['cpu']:.2f}MiB")
98 |     print(f"- CPU RAM peak: {measures['cpu-peak']:.2f}MiB")
99 | 


--------------------------------------------------------------------------------
/src/accelerate/commands/config/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | import os
19 | 
20 | from accelerate.utils import ComputeEnvironment
21 | 
22 | from .cluster import get_cluster_input
23 | from .config_args import cache_dir, default_config_file, default_yaml_config_file, load_config_from_file  # noqa: F401
24 | from .config_utils import _ask_field, _ask_options, _convert_compute_environment  # noqa: F401
25 | from .sagemaker import get_sagemaker_input
26 | 
27 | 
28 | description = "Launches a series of prompts to create and save a `default_config.yaml` configuration file for your training system. Should always be ran first on your machine"
29 | 
30 | 
31 | def get_user_input():
32 |     compute_environment = _ask_options(
33 |         "In which compute environment are you running?",
34 |         ["This machine", "AWS (Amazon SageMaker)"],
35 |         _convert_compute_environment,
36 |     )
37 |     if compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
38 |         config = get_sagemaker_input()
39 |     else:
40 |         config = get_cluster_input()
41 |     return config
42 | 
43 | 
44 | def config_command_parser(subparsers=None):
45 |     if subparsers is not None:
46 |         parser = subparsers.add_parser("config", description=description)
47 |     else:
48 |         parser = argparse.ArgumentParser("Accelerate config command", description=description)
49 | 
50 |     parser.add_argument(
51 |         "--config_file",
52 |         default=None,
53 |         help=(
54 |             "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
55 |             "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
56 |             "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
57 |             "with 'huggingface'."
58 |         ),
59 |     )
60 | 
61 |     if subparsers is not None:
62 |         parser.set_defaults(func=config_command)
63 |     return parser
64 | 
65 | 
66 | def config_command(args):
67 |     config = get_user_input()
68 |     if args.config_file is not None:
69 |         config_file = args.config_file
70 |     else:
71 |         if not os.path.isdir(cache_dir):
72 |             os.makedirs(cache_dir)
73 |         config_file = default_yaml_config_file
74 | 
75 |     if config_file.endswith(".json"):
76 |         config.to_json_file(config_file)
77 |     else:
78 |         config.to_yaml_file(config_file)
79 |     print(f"accelerate configuration saved at {config_file}")
80 | 
81 | 
82 | def main():
83 |     parser = config_command_parser()
84 |     args = parser.parse_args()
85 |     config_command(args)
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/src/accelerate/test_utils/scripts/test_ddp_comm_hook.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | from accelerate import Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs
17 | 
18 | 
19 | class MockModel(torch.nn.Module):
20 |     def __init__(self):
21 |         super().__init__()
22 |         torch.manual_seed(0)
23 |         self.p = torch.nn.Parameter(torch.randn(40, 20))
24 | 
25 |     def forward(self, x, rank):
26 |         return self.p * (x ** (1 + rank))
27 | 
28 | 
29 | def _run_and_get_grads(model, rank):
30 |     torch.manual_seed(2024)
31 |     input = torch.randn(40, 20)
32 |     output = model(input, rank)
33 |     output.mean().backward()
34 |     param = next(model.parameters())
35 |     return param.grad
36 | 
37 | 
38 | def test_ddp_comm_hook(comm_hook, comm_wrapper, comm_state_option):
39 |     ddp_kwargs = DistributedDataParallelKwargs(
40 |         comm_hook=comm_hook,
41 |         comm_wrapper=comm_wrapper,
42 |         comm_state_option=comm_state_option,
43 |     )
44 |     accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
45 | 
46 |     model = accelerator.prepare(MockModel())
47 |     hook_grads = _run_and_get_grads(model, accelerator.local_process_index)
48 | 
49 |     reference_model = torch.nn.parallel.DistributedDataParallel(
50 |         MockModel().to(accelerator.device),
51 |         device_ids=[accelerator.local_process_index],
52 |         output_device=accelerator.local_process_index,
53 |     )
54 |     reference_grads = _run_and_get_grads(reference_model, accelerator.local_process_index)
55 | 
56 |     torch.testing.assert_close(hook_grads, reference_grads, rtol=1e-2, atol=1e-2)
57 | 
58 | 
59 | def main():
60 |     for comm_hook, comm_wrapper, comm_state_option in [
61 |         (DDPCommunicationHookType.NO, DDPCommunicationHookType.NO, {}),
62 |         (DDPCommunicationHookType.FP16, DDPCommunicationHookType.NO, {}),
63 |         (DDPCommunicationHookType.BF16, DDPCommunicationHookType.NO, {}),
64 |         (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.NO, {}),
65 |         (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.FP16, {}),
66 |         (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.BF16, {}),
67 |         (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.NO, {"matrix_approximation_rank": 2}),
68 |         (DDPCommunicationHookType.BATCHED_POWER_SGD, DDPCommunicationHookType.NO, {}),
69 |         (DDPCommunicationHookType.BATCHED_POWER_SGD, DDPCommunicationHookType.FP16, {}),
70 |         (DDPCommunicationHookType.BATCHED_POWER_SGD, DDPCommunicationHookType.BF16, {}),
71 |     ]:
72 |         print(f"Test DDP comm hook: {comm_hook}, comm wrapper: {comm_wrapper}")
73 |         test_ddp_comm_hook(comm_hook, comm_wrapper, comm_state_option)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     main()
78 | 


--------------------------------------------------------------------------------
/tests/test_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pickle
16 | import unittest
17 | 
18 | import torch
19 | 
20 | from accelerate import Accelerator
21 | from accelerate.state import AcceleratorState
22 | from accelerate.test_utils import require_cpu, require_non_cpu, require_non_xpu
23 | 
24 | 
25 | @require_cpu
26 | class CPUOptimizerTester(unittest.TestCase):
27 |     def test_accelerated_optimizer_pickling(self):
28 |         model = torch.nn.Linear(10, 10)
29 |         optimizer = torch.optim.SGD(model.parameters(), 0.1)
30 |         accelerator = Accelerator()
31 |         optimizer = accelerator.prepare(optimizer)
32 |         try:
33 |             pickle.loads(pickle.dumps(optimizer))
34 |         except Exception as e:
35 |             self.fail(f"Accelerated optimizer pickling failed with {e}")
36 |         AcceleratorState._reset_state()
37 | 
38 | 
39 | @require_non_cpu
40 | @require_non_xpu
41 | class OptimizerTester(unittest.TestCase):
42 |     def test_accelerated_optimizer_step_was_skipped(self):
43 |         model = torch.nn.Linear(5, 5)
44 |         optimizer = torch.optim.SGD(model.parameters(), 0.1)
45 |         accelerator = Accelerator(mixed_precision="fp16")
46 |         model, optimizer = accelerator.prepare(model, optimizer)
47 | 
48 |         loss = model(torch.randn(2, 5, device=accelerator.device)).sum()
49 |         accelerator.backward(loss)
50 |         for p in model.parameters():
51 |             # Fake the gradients, as if there's no overflow
52 |             p.grad.fill_(0.01)
53 | 
54 |         optimizer.step()
55 |         assert optimizer.step_was_skipped is False
56 | 
57 |         loss = model(torch.randn(2, 5, device=accelerator.device)).sum()
58 |         accelerator.backward(loss)
59 |         for p in model.parameters():
60 |             p.grad.fill_(0.01)
61 |         # Manually set the gradients to be NaN, as if there's an overflow
62 |         p.grad[0] = torch.tensor(float("nan"))
63 | 
64 |         optimizer.step()
65 |         assert optimizer.step_was_skipped is True
66 | 
67 |         loss = model(torch.randn(2, 5, device=accelerator.device)).sum()
68 |         accelerator.backward(loss)
69 |         for p in model.parameters():
70 |             p.grad.fill_(0.01)
71 |         # Manually set the gradients to be NaN, as if there's an overflow
72 |         p.grad[0] = torch.tensor(float("nan"))
73 | 
74 |         optimizer.step()
75 |         assert optimizer.step_was_skipped is True
76 | 
77 |         loss = model(torch.randn(2, 5, device=accelerator.device)).sum()
78 |         accelerator.backward(loss)
79 |         for p in model.parameters():
80 |             # Fake the gradients, as if there's no overflow
81 |             p.grad.fill_(0.01)
82 | 
83 |         optimizer.step()
84 |         assert optimizer.step_was_skipped is False
85 | 
86 |         AcceleratorState._reset_state()
87 | 


--------------------------------------------------------------------------------
/docs/source/usage_guides/mps.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Accelerated PyTorch Training on Mac
17 | 
18 | With PyTorch v1.12 release, developers and researchers can take advantage of Apple silicon GPUs for significantly faster model training. 
19 | This unlocks the ability to perform machine learning workflows like prototyping and fine-tuning locally, right on Mac.
20 | Apple's Metal Performance Shaders (MPS) as a backend for PyTorch enables this and can be used via the new `"mps"` device. 
21 | This will map computational graphs and primitives on the MPS Graph framework and tuned kernels provided by MPS.
22 | For more information please refer official documents [Introducing Accelerated PyTorch Training on Mac](https://pytorch.org/blog/introducing-accelerated-pytorch-training-on-mac/)
23 | and [MPS BACKEND](https://pytorch.org/docs/stable/notes/mps.html).
24 | 
25 | ### Benefits of Training and Inference using Apple Silicon Chips
26 | 
27 | 1. Enables users to train larger networks or batch sizes locally
28 | 2. Reduces data retrieval latency and provides the GPU with direct access to the full memory store due to unified memory architecture. 
29 | Therefore, improving end-to-end performance.
30 | 3. Reduces costs associated with cloud-based development or the need for additional local GPUs.
31 | 
32 | **Pre-requisites**: To install torch with mps support, 
33 | please follow this nice medium article [GPU-Acceleration Comes to PyTorch on M1 Macs](https://medium.com/towards-data-science/gpu-acceleration-comes-to-pytorch-on-m1-macs-195c399efcc1).
34 | 
35 | 
36 | ## How it works out of the box
37 | It is enabled by default on MacOs machines with MPS enabled Apple Silicon GPUs.
38 | To disable it, pass `--cpu` flag to `accelerate launch` command or answer the corresponding question when answering the `accelerate config` questionnaire.
39 | 
40 | You can directly run the following script to test it out on MPS enabled Apple Silicon machines:
41 | ```bash
42 | accelerate launch /examples/cv_example.py --data_dir images
43 | ```
44 | 
45 | ## A few caveats to be aware of
46 | 
47 | 1. We strongly recommend to install PyTorch >= 1.13 (nightly version at the time of writing) on your MacOS machine. 
48 | It has major fixes related to model correctness and performance improvements for transformer based models.
49 | Please refer to https://github.com/pytorch/pytorch/issues/82707 for more details.
50 | 2. Distributed setups `gloo` and `nccl` are not working with `mps` device. 
51 | This means that currently only single GPU of `mps` device type can be used.
52 | 
53 | Finally, please, remember that, 🤗 `Accelerate` only integrates MPS backend, therefore if you
54 | have any problems or questions with regards to MPS backend usage, please, file an issue with [PyTorch GitHub](https://github.com/pytorch/pytorch/issues).


--------------------------------------------------------------------------------
/src/accelerate/commands/config/config_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2021 The HuggingFace Team. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import argparse
 18 | 
 19 | from ...utils.dataclasses import (
 20 |     ComputeEnvironment,
 21 |     DistributedType,
 22 |     DynamoBackend,
 23 |     PrecisionType,
 24 |     SageMakerDistributedType,
 25 | )
 26 | from ..menu import BulletMenu
 27 | 
 28 | 
 29 | DYNAMO_BACKENDS = [
 30 |     "EAGER",
 31 |     "AOT_EAGER",
 32 |     "INDUCTOR",
 33 |     "AOT_TS_NVFUSER",
 34 |     "NVPRIMS_NVFUSER",
 35 |     "CUDAGRAPHS",
 36 |     "OFI",
 37 |     "FX2TRT",
 38 |     "ONNXRT",
 39 |     "TENSORRT",
 40 |     "AOT_TORCHXLA_TRACE_ONCE",
 41 |     "TORHCHXLA_TRACE_ONCE",
 42 |     "IPEX",
 43 |     "TVM",
 44 | ]
 45 | 
 46 | 
 47 | def _ask_field(input_text, convert_value=None, default=None, error_message=None):
 48 |     ask_again = True
 49 |     while ask_again:
 50 |         result = input(input_text)
 51 |         try:
 52 |             if default is not None and len(result) == 0:
 53 |                 return default
 54 |             return convert_value(result) if convert_value is not None else result
 55 |         except Exception:
 56 |             if error_message is not None:
 57 |                 print(error_message)
 58 | 
 59 | 
 60 | def _ask_options(input_text, options=[], convert_value=None, default=0):
 61 |     menu = BulletMenu(input_text, options)
 62 |     result = menu.run(default_choice=default)
 63 |     return convert_value(result) if convert_value is not None else result
 64 | 
 65 | 
 66 | def _convert_compute_environment(value):
 67 |     value = int(value)
 68 |     return ComputeEnvironment(["LOCAL_MACHINE", "AMAZON_SAGEMAKER"][value])
 69 | 
 70 | 
 71 | def _convert_distributed_mode(value):
 72 |     value = int(value)
 73 |     return DistributedType(
 74 |         ["NO", "MULTI_CPU", "MULTI_XPU", "MULTI_GPU", "MULTI_NPU", "MULTI_MLU", "MULTI_MUSA", "XLA"][value]
 75 |     )
 76 | 
 77 | 
 78 | def _convert_dynamo_backend(value):
 79 |     value = int(value)
 80 |     return DynamoBackend(DYNAMO_BACKENDS[value]).value
 81 | 
 82 | 
 83 | def _convert_mixed_precision(value):
 84 |     value = int(value)
 85 |     return PrecisionType(["no", "fp16", "bf16", "fp8"][value])
 86 | 
 87 | 
 88 | def _convert_sagemaker_distributed_mode(value):
 89 |     value = int(value)
 90 |     return SageMakerDistributedType(["NO", "DATA_PARALLEL", "MODEL_PARALLEL"][value])
 91 | 
 92 | 
 93 | def _convert_yes_no_to_bool(value):
 94 |     return {"yes": True, "no": False}[value.lower()]
 95 | 
 96 | 
 97 | class SubcommandHelpFormatter(argparse.RawDescriptionHelpFormatter):
 98 |     """
 99 |     A custom formatter that will remove the usage line from the help message for subcommands.
100 |     """
101 | 
102 |     def _format_usage(self, usage, actions, groups, prefix):
103 |         usage = super()._format_usage(usage, actions, groups, prefix)
104 |         usage = usage.replace("<command> [<args>] ", "")
105 |         return usage
106 | 


--------------------------------------------------------------------------------
/tests/test_logging.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import inspect
15 | import logging
16 | import os
17 | 
18 | import pytest
19 | 
20 | from accelerate import Accelerator
21 | from accelerate.logging import get_logger
22 | 
23 | 
24 | def current_lineno() -> int:
25 |     # A simple helper that returns the lineno of its call-site.
26 |     caller_frame = inspect.currentframe().f_back
27 |     caller_info = inspect.getframeinfo(caller_frame)
28 |     return caller_info.lineno
29 | 
30 | 
31 | class CustomLogger(logging.LoggerAdapter):
32 |     # Mocks a user-defined custom logger wrapper that sets `stacklevel=3`.
33 |     def log(self, level, msg, *args, **kwargs):
34 |         # E.g. the user wants to modify `stacklevel`, `accelerate.logging`
35 |         # should respect the user's `stacklevel`. For the specific value
36 |         # of `3`, calling `CustomLogger.log()`, etc., should log that callsite,
37 |         # rather than the callsite of the following `self.logger.log()`.
38 |         kwargs["stacklevel"] = 3
39 |         self.logger.log(level, msg, *args, **kwargs)
40 | 
41 | 
42 | @pytest.fixture(scope="module")
43 | def accelerator():
44 |     return Accelerator()
45 | 
46 | 
47 | @pytest.mark.usefixtures("accelerator")
48 | def test_log_stack(caplog):
49 |     logger = get_logger(__name__)
50 |     logging.basicConfig(
51 |         format="%(filename)s:%(name)s:%(lineno)s:%(funcName)s - %(message)s",
52 |         datefmt="%m/%d %H:%M:%S",
53 |     )
54 | 
55 |     message = "Test"
56 |     lineno = current_lineno() + 1  # the next line is the actual callsite
57 |     logger.warning(message)
58 | 
59 |     assert len(caplog.records) == 1
60 |     rec = caplog.records[0]
61 |     assert rec.levelname == logging.getLevelName(logging.WARNING)
62 |     assert rec.filename == os.path.basename(__file__)
63 |     assert rec.name == __name__
64 |     assert rec.lineno == lineno
65 |     assert rec.funcName == test_log_stack.__name__
66 |     assert rec.message == message
67 | 
68 | 
69 | @pytest.mark.usefixtures("accelerator")
70 | def test_custom_stacklevel(caplog):
71 |     wrapped_logger = get_logger(__name__)
72 |     logging.basicConfig(
73 |         format="%(filename)s:%(name)s:%(lineno)s:%(funcName)s - %(message)s",
74 |         datefmt="%m/%d %H:%M:%S",
75 |     )
76 |     logger = CustomLogger(wrapped_logger, {})
77 | 
78 |     message = "Test"
79 |     lineno = current_lineno() + 1  # the next line is the actual callsite
80 |     logger.warning(message)
81 | 
82 |     # `CustomLogger.log` set custom `stacklevel=3`, so `logger.warning` should
83 |     # log its callsite (rather than those of the `warpped_logger`).
84 |     assert len(caplog.records) == 1
85 |     rec = caplog.records[0]
86 |     assert rec.levelname == logging.getLevelName(logging.WARNING)
87 |     assert rec.filename == os.path.basename(__file__)
88 |     assert rec.name == __name__
89 |     assert rec.lineno == lineno
90 |     assert rec.funcName == test_custom_stacklevel.__name__
91 |     assert rec.message == message
92 | 


--------------------------------------------------------------------------------
/docs/source/basic_tutorials/install.md:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | -->
 15 | 
 16 | # Installation and Configuration
 17 | 
 18 | Before you start, you will need to setup your environment, install the appropriate packages, and configure 🤗 Accelerate. 🤗 Accelerate is tested on **Python 3.8+**.
 19 | 
 20 | ## Installing 🤗 Accelerate
 21 | 
 22 | 🤗 Accelerate is available on pypi and conda, as well as on GitHub. Details to install from each are below:
 23 | 
 24 | ### pip 
 25 | 
 26 | To install 🤗 Accelerate from pypi, perform:
 27 | 
 28 | ```bash
 29 | pip install accelerate
 30 | ```
 31 | 
 32 | ### conda
 33 | 
 34 | 🤗 Accelerate can also be installed with conda with:
 35 | 
 36 | ```bash
 37 | conda install -c conda-forge accelerate
 38 | ```
 39 | 
 40 | ### Source
 41 | 
 42 | New features are added every day that haven't been released yet. To try them out yourself, install
 43 | from the GitHub repository:
 44 | 
 45 | ```bash
 46 | pip install git+https://github.com/huggingface/accelerate
 47 | ```
 48 | 
 49 | If you're working on contributing to the library or wish to play with the source code and see live 
 50 | results as you run the code, an editable version can be installed from a locally-cloned version of the 
 51 | repository:
 52 | 
 53 | ```bash
 54 | git clone https://github.com/huggingface/accelerate
 55 | cd accelerate
 56 | pip install -e .
 57 | ```
 58 | 
 59 | ## Configuring 🤗 Accelerate
 60 | 
 61 | After installing, you need to configure 🤗 Accelerate for how the current system is setup for training. 
 62 | To do so run the following and answer the questions prompted to you:
 63 | 
 64 | ```bash
 65 | accelerate config
 66 | ```
 67 | 
 68 | To write a barebones configuration that doesn't include options such as DeepSpeed configuration or running on TPUs, you can quickly run:
 69 | 
 70 | ```bash
 71 | python -c "from accelerate.utils import write_basic_config; write_basic_config(mixed_precision='fp16')"
 72 | ```
 73 | 🤗 Accelerate will automatically utilize the maximum number of GPUs available and set the mixed precision mode.
 74 | 
 75 | To check that your configuration looks fine, run:
 76 | 
 77 | ```bash
 78 | accelerate env
 79 | ```
 80 | 
 81 | An example output is shown below, which describes two GPUs on a single machine with no mixed precision being used:
 82 | 
 83 | ```bash
 84 | - `Accelerate` version: 0.11.0.dev0
 85 | - Platform: Linux-5.10.0-15-cloud-amd64-x86_64-with-debian-11.3
 86 | - Python version: 3.7.12
 87 | - Numpy version: 1.19.5
 88 | - PyTorch version (GPU?): 1.12.0+cu102 (True)
 89 | - `Accelerate` default config:
 90 |         - compute_environment: LOCAL_MACHINE
 91 |         - distributed_type: MULTI_GPU
 92 |         - mixed_precision: no
 93 |         - use_cpu: False
 94 |         - num_processes: 2
 95 |         - machine_rank: 0
 96 |         - num_machines: 1
 97 |         - main_process_ip: None
 98 |         - main_process_port: None
 99 |         - main_training_function: main
100 |         - deepspeed_config: {}
101 |         - fsdp_config: {}
102 | ```


--------------------------------------------------------------------------------
/tests/test_imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import subprocess
15 | 
16 | from accelerate.test_utils.testing import TempDirTestCase, require_import_timer
17 | from accelerate.utils import is_import_timer_available
18 | 
19 | 
20 | if is_import_timer_available():
21 |     from import_timer import calculate_total_time, read_import_profile
22 |     from import_timer.core import get_paths_above_threshold, sort_nodes_by_total_time
23 | 
24 | 
25 | def convert_list_to_string(data):
26 |     end_result = ""
27 |     arrow_right = "->"
28 |     for path in data:
29 |         end_result += f"{arrow_right.join(path[0])} {path[1]:.3f}s\n"
30 |     return end_result
31 | 
32 | 
33 | def run_import_time(command: str):
34 |     output = subprocess.run(["python3", "-X", "importtime", "-c", command], capture_output=True, text=True)
35 |     return output.stderr
36 | 
37 | 
38 | @require_import_timer
39 | class ImportSpeedTester(TempDirTestCase):
40 |     """
41 |     Test suite which checks if imports have seen slowdowns
42 |     based on a particular baseline.
43 | 
44 |     If the error messages are not clear enough to get a
45 |     full view of what is slowing things down (or to
46 |     figure out how deep the initial depth should be),
47 |     please view the profile with the `tuna` framework:
48 |     `tuna import.log`.
49 |     """
50 | 
51 |     clear_on_setup = False
52 | 
53 |     @classmethod
54 |     def setUpClass(cls):
55 |         super().setUpClass()
56 |         output = run_import_time("import torch")
57 |         data = read_import_profile(output)
58 |         total_time = calculate_total_time(data)
59 |         cls.pytorch_time = total_time
60 | 
61 |     def test_base_import(self):
62 |         output = run_import_time("import accelerate")
63 |         data = read_import_profile(output)
64 |         total_time = calculate_total_time(data)
65 |         pct_more = (total_time - self.pytorch_time) / self.pytorch_time * 100
66 |         # Base import should never be more than 20% slower than raw torch import
67 |         err_msg = f"Base import is more than 20% slower than raw torch import ({pct_more:.2f}%), please check the attached `tuna` profile:\n"
68 |         sorted_data = sort_nodes_by_total_time(data)
69 |         paths_above_threshold = get_paths_above_threshold(sorted_data, 0.05, max_depth=7)
70 |         err_msg += f"\n{convert_list_to_string(paths_above_threshold)}"
71 |         self.assertLess(pct_more, 20, err_msg)
72 | 
73 |     def test_cli_import(self):
74 |         output = run_import_time("from accelerate.commands.launch import launch_command_parser")
75 |         data = read_import_profile(output)
76 |         total_time = calculate_total_time(data)
77 |         pct_more = (total_time - self.pytorch_time) / self.pytorch_time * 100
78 |         # Base import should never be more than 20% slower than raw torch import
79 |         err_msg = f"Base import is more than 20% slower than raw torch import ({pct_more:.2f}%), please check the attached `tuna` profile:\n"
80 |         sorted_data = sort_nodes_by_total_time(data)
81 |         paths_above_threshold = get_paths_above_threshold(sorted_data, 0.05, max_depth=7)
82 |         err_msg += f"\n{convert_list_to_string(paths_above_threshold)}"
83 |         self.assertLess(pct_more, 20, err_msg)
84 | 


--------------------------------------------------------------------------------
/src/accelerate/utils/transformer_engine.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch.nn as nn
16 | 
17 | from .imports import is_fp8_available
18 | 
19 | 
20 | if is_fp8_available():
21 |     import transformer_engine.pytorch as te
22 | 
23 | 
24 | def convert_model(model, to_transformer_engine=True, _convert_linear=True, _convert_ln=True):
25 |     """
26 |     Recursively converts the linear and layernorm layers of a model to their `transformers_engine` counterpart.
27 |     """
28 |     if not is_fp8_available():
29 |         raise ImportError("Using `convert_model` requires transformer_engine to be installed.")
30 |     for name, module in model.named_children():
31 |         if isinstance(module, nn.Linear) and to_transformer_engine and _convert_linear:
32 |             # Return early if the linear layer weights are not multiples of 16
33 |             if any(p % 16 != 0 for p in module.weight.shape):
34 |                 return
35 |             has_bias = module.bias is not None
36 |             te_module = te.Linear(
37 |                 module.in_features, module.out_features, bias=has_bias, params_dtype=module.weight.dtype
38 |             )
39 |             te_module.weight.copy_(module.weight)
40 |             if has_bias:
41 |                 te_module.bias.copy_(module.bias)
42 | 
43 |             setattr(model, name, te_module)
44 |         elif isinstance(module, nn.LayerNorm) and to_transformer_engine and _convert_ln:
45 |             te_module = te.LayerNorm(module.normalized_shape[0], eps=module.eps, params_dtype=module.weight.dtype)
46 |             te_module.weight.copy_(module.weight)
47 |             te_module.bias.copy_(module.bias)
48 | 
49 |             setattr(model, name, te_module)
50 |         elif isinstance(module, te.Linear) and not to_transformer_engine and _convert_linear:
51 |             has_bias = module.bias is not None
52 |             new_module = nn.Linear(
53 |                 module.in_features, module.out_features, bias=has_bias, params_dtype=module.weight.dtype
54 |             )
55 |             new_module.weight.copy_(module.weight)
56 |             if has_bias:
57 |                 new_module.bias.copy_(module.bias)
58 | 
59 |             setattr(model, name, new_module)
60 |         elif isinstance(module, te.LayerNorm) and not to_transformer_engine and _convert_ln:
61 |             new_module = nn.LayerNorm(module.normalized_shape[0], eps=module.eps, params_dtype=module.weight.dtype)
62 |             new_module.weight.copy_(module.weight)
63 |             new_module.bias.copy_(module.bias)
64 | 
65 |             setattr(model, name, new_module)
66 |         else:
67 |             convert_model(
68 |                 module,
69 |                 to_transformer_engine=to_transformer_engine,
70 |                 _convert_linear=_convert_linear,
71 |                 _convert_ln=_convert_ln,
72 |             )
73 | 
74 | 
75 | def has_transformer_engine_layers(model):
76 |     """
77 |     Returns whether a given model has some `transformer_engine` layer or not.
78 |     """
79 |     if not is_fp8_available():
80 |         raise ImportError("Using `has_transformer_engine_layers` requires transformer_engine to be installed.")
81 |     for m in model.modules():
82 |         if isinstance(m, (te.LayerNorm, te.Linear, te.TransformerLayer)):
83 |             return True
84 |     return False
85 | 


--------------------------------------------------------------------------------
/examples/inference/distributed/phi2.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | from transformers import AutoModelForCausalLM, AutoTokenizer
17 | 
18 | from accelerate import PartialState
19 | from accelerate.utils import gather_object
20 | 
21 | 
22 | # Start up the distributed environment without needing the Accelerator.
23 | distributed_state = PartialState()
24 | 
25 | # You can change the model to any LLM such as mistralai/Mistral-7B-v0.1 or meta-llama/Llama-2-7b-chat-hf
26 | model_name = "microsoft/phi-2"
27 | model = AutoModelForCausalLM.from_pretrained(
28 |     model_name, device_map=distributed_state.device, torch_dtype=torch.float16
29 | )
30 | 
31 | tokenizer = AutoTokenizer.from_pretrained(model_name)
32 | # Need to set the padding token to the eos token for generation
33 | tokenizer.pad_token = tokenizer.eos_token
34 | 
35 | prompts = [
36 |     "I would like to",
37 |     "hello how are you",
38 |     "what is going on",
39 |     "roses are red and",
40 |     "welcome to the hotel",
41 | ]
42 | 
43 | # You can change the batch size depending on your GPU RAM
44 | batch_size = 2
45 | # We set it to 8 since it is better for some hardware. More information here https://github.com/huggingface/tokenizers/issues/991
46 | pad_to_multiple_of = 8
47 | 
48 | # Split into batches
49 | # We will get the following results:
50 | # [ ["I would like to", "hello how are you"], [ "what is going on", "roses are red and"], [ "welcome to the hotel"] ]
51 | formatted_prompts = [prompts[i : i + batch_size] for i in range(0, len(prompts), batch_size)]
52 | 
53 | # Apply padding on the left since we are doing generation
54 | padding_side_default = tokenizer.padding_side
55 | tokenizer.padding_side = "left"
56 | # Tokenize each batch
57 | tokenized_prompts = [
58 |     tokenizer(formatted_prompt, padding=True, pad_to_multiple_of=pad_to_multiple_of, return_tensors="pt")
59 |     for formatted_prompt in formatted_prompts
60 | ]
61 | # Put back the original padding behavior
62 | tokenizer.padding_side = padding_side_default
63 | 
64 | completions_per_process = []
65 | # We automatically split the batched data we passed to it across all the processes. We also set apply_padding=True
66 | # so that the GPUs will have the same number of prompts, and you can then gather the results.
67 | # For example, if we have 2 gpus, the distribution will be:
68 | # GPU 0: ["I would like to", "hello how are you"],  "what is going on", "roses are red and"]
69 | # GPU 1: ["welcome to the hotel"], ["welcome to the hotel"] -> this prompt is duplicated to ensure that all gpus have the same number of prompts
70 | with distributed_state.split_between_processes(tokenized_prompts, apply_padding=True) as batched_prompts:
71 |     for batch in batched_prompts:
72 |         # Move the batch to the device
73 |         batch = batch.to(distributed_state.device)
74 |         # We generate the text, decode it and add it to the list completions_per_process
75 |         outputs = model.generate(**batch, max_new_tokens=20)
76 |         generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
77 |         completions_per_process.extend(generated_text)
78 | 
79 | # We are gathering string, so we need to use gather_object.
80 | # If you need to gather tensors, you can use gather from accelerate.utils
81 | completions_gather = gather_object(completions_per_process)
82 | 
83 | # Drop duplicates produced by apply_padding in split_between_processes
84 | completions = completions_gather[: len(prompts)]
85 | 
86 | distributed_state.print(completions)
87 | 


--------------------------------------------------------------------------------
/docs/source/usage_guides/checkpoint.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Checkpointing
17 | 
18 | When training a PyTorch model with 🤗 Accelerate, you may often want to save and continue a state of training. Doing so requires
19 | saving and loading the model, optimizer, RNG generators, and the GradScaler. Inside 🤗 Accelerate are two convenience functions to achieve this quickly:
20 | - Use [`~Accelerator.save_state`] for saving everything mentioned above to a folder location
21 | - Use [`~Accelerator.load_state`] for loading everything stored from an earlier `save_state`
22 | 
23 | To further customize where and how states are saved through [`~Accelerator.save_state`] the [`~utils.ProjectConfiguration`] class can be used. For example 
24 | if `automatic_checkpoint_naming` is enabled each saved checkpoint will be located then at `Accelerator.project_dir/checkpoints/checkpoint_{checkpoint_number}`.
25 | 
26 | It should be noted that the expectation is that those states come from the same training script, they should not be from two separate scripts.
27 | 
28 | - By using [`~Accelerator.register_for_checkpointing`], you can register custom objects to be automatically stored or loaded from the two prior functions,
29 | so long as the object has a `state_dict` **and** a `load_state_dict` functionality. This could include objects such as a learning rate scheduler. 
30 | 
31 | 
32 | Below is a brief example using checkpointing to save and reload a state during training:
33 | 
34 | ```python
35 | from accelerate import Accelerator
36 | import torch
37 | 
38 | accelerator = Accelerator(project_dir="my/save/path")
39 | 
40 | my_scheduler = torch.optim.lr_scheduler.StepLR(my_optimizer, step_size=1, gamma=0.99)
41 | my_model, my_optimizer, my_training_dataloader = accelerator.prepare(my_model, my_optimizer, my_training_dataloader)
42 | 
43 | # Register the LR scheduler
44 | accelerator.register_for_checkpointing(my_scheduler)
45 | 
46 | # Save the starting state
47 | accelerator.save_state()
48 | 
49 | device = accelerator.device
50 | my_model.to(device)
51 | 
52 | # Perform training
53 | for epoch in range(num_epochs):
54 |     for batch in my_training_dataloader:
55 |         my_optimizer.zero_grad()
56 |         inputs, targets = batch
57 |         inputs = inputs.to(device)
58 |         targets = targets.to(device)
59 |         outputs = my_model(inputs)
60 |         loss = my_loss_function(outputs, targets)
61 |         accelerator.backward(loss)
62 |         my_optimizer.step()
63 |     my_scheduler.step()
64 | 
65 | # Restore the previous state
66 | accelerator.load_state("my/save/path/checkpointing/checkpoint_0")
67 | ```
68 | 
69 | ## Restoring the state of the DataLoader 
70 | 
71 | After resuming from a checkpoint, it may also be desirable to resume from a particular point in the active `DataLoader` if 
72 | the state was saved during the middle of an epoch. You can use [`~Accelerator.skip_first_batches`] to do so. 
73 | 
74 | ```python
75 | from accelerate import Accelerator
76 | 
77 | accelerator = Accelerator(project_dir="my/save/path")
78 | 
79 | train_dataloader = accelerator.prepare(train_dataloader)
80 | accelerator.load_state("my_state")
81 | 
82 | # Assume the checkpoint was saved 100 steps into the epoch
83 | skipped_dataloader = accelerator.skip_first_batches(train_dataloader, 100)
84 | 
85 | # After the first iteration, go back to `train_dataloader`
86 | 
87 | # First epoch
88 | for batch in skipped_dataloader:
89 |     # Do something
90 |     pass
91 | 
92 | # Second epoch
93 | for batch in train_dataloader:
94 |     # Do something
95 |     pass
96 | ```
97 | 


--------------------------------------------------------------------------------
/docs/source/concept_guides/internal_mechanism.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # 🤗 Accelerate's internal mechanisms
17 | 
18 | Internally, 🤗 Accelerate works by first analyzing the environment in which the script is launched to determine which
19 | kind of distributed setup is used, how many different processes there are and which one the current script is in. All
20 | that information is stored in the [`~AcceleratorState`].
21 | 
22 | This class is initialized the first time you instantiate an [`~Accelerator`] as well as performing any
23 | specific initialization your distributed setup needs. Its state is then uniquely shared through all instances of
24 | [`~state.AcceleratorState`]. (The same can also be done with the [`PartialState`], a more barebones version it inherits)
25 | 
26 | Then, when calling [`~Accelerator.prepare`], the library:
27 | 
28 | - wraps your model(s) in the container adapted for the distributed setup,
29 | - wraps your optimizer(s) in an [`~optimizer.AcceleratedOptimizer`],
30 | - wraps your scheduler(s) in an [`~scheduler.AcceleratedScheduler`]
31 | - creates a new version of your dataloader(s) in a [`~data_loader.DataLoaderShard`] or [`~data_loader.DataLoaderDispatcher`]
32 | 
33 | While the model(s), optimizer(s), and scheduler(s) are just put in simple wrappers, the dataloader(s) are re-created. This is mostly
34 | because PyTorch does not let the user change the `batch_sampler` of a dataloader once it's been created and the
35 | library handles the sharding of your data between processes by changing that `batch_sampler` to yield every other
36 | `num_processes` batches (if enabled).
37 | 
38 | The [`~data_loader.DataLoaderShard`] subclasses `DataLoader` to add the following functionality:
39 | 
40 | - it synchronizes the appropriate random number generator of all processes at each new iteration, to ensure any
41 |   randomization (like shuffling) is done the exact same way across processes.
42 | - it puts the batches on the proper device before yielding them (unless you have opted out of
43 |   `device_placement=True`).
44 |   
45 | The [`~data_loader.DataLoaderDispatcher`] subclasses differs from the [`~data_loader.DataLoaderShard`] in that when iterating through the `DataLoader`, the data is all starting from process 0 and *then* split and sent off to each process rather than it happening at the dataset level.
46 | 
47 | The random number generator synchronization will by default synchronize:
48 | 
49 | - the `generator` attribute of a given sampler (like the PyTorch `RandomSampler`) for PyTorch >= 1.6
50 | - the main random number generator in PyTorch <=1.5.1
51 | 
52 | You can choose which random number generator(s) to synchronize with the `rng_types` argument of the main
53 | [`Accelerator`]. In PyTorch >= 1.6, it is recommended to rely on a local `generator` to avoid
54 | setting the same seed in the main random number generator in all processes.
55 | 
56 | <Tip warning={true}>
57 | 
58 |     Synchronization of the main torch (or CUDA or XLA) random number generator will affect any other potential random
59 |     artifacts you could have in your dataset (like random data augmentation) in the sense that all processes will get
60 |     the same random numbers from the torch random modules (so will apply the same random data augmentation if it's
61 |     controlled by torch).
62 | 
63 | </Tip>
64 | 
65 | <Tip>
66 | 
67 |     The randomization part of your custom sampler, batch sampler or iterable dataset should be done using a local
68 |     `torch.Generator` object (in PyTorch >= 1.6), see the traditional `RandomSampler`, as an example.
69 | 
70 | </Tip>
71 | 
72 | For more details about the internals, see the [Internals page](package_reference/torch_wrappers).
73 | 


--------------------------------------------------------------------------------
/src/accelerate/test_utils/scripts/test_notebook.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Test file to ensure that in general certain situational setups for notebooks work.
 16 | """
 17 | 
 18 | import os
 19 | import time
 20 | from multiprocessing import Queue
 21 | 
 22 | from pytest import mark, raises
 23 | from torch.distributed.elastic.multiprocessing.errors import ChildFailedError
 24 | 
 25 | from accelerate import PartialState, notebook_launcher
 26 | from accelerate.test_utils import require_bnb
 27 | from accelerate.utils import is_bnb_available
 28 | 
 29 | 
 30 | def basic_function():
 31 |     # Just prints the PartialState
 32 |     print(f"PartialState:\n{PartialState()}")
 33 | 
 34 | 
 35 | def tough_nut_function(queue: Queue):
 36 |     if queue.empty():
 37 |         return
 38 |     trial = queue.get()
 39 |     if trial > 0:
 40 |         queue.put(trial - 1)
 41 |         raise RuntimeError("The nut hasn't cracked yet! Try again.")
 42 | 
 43 |     print(f"PartialState:\n{PartialState()}")
 44 | 
 45 | 
 46 | def bipolar_sleep_function(sleep_sec: int):
 47 |     state = PartialState()
 48 |     if state.process_index % 2 == 0:
 49 |         raise RuntimeError("I'm an even process. I don't like to sleep.")
 50 |     else:
 51 |         time.sleep(sleep_sec)
 52 | 
 53 | 
 54 | NUM_PROCESSES = int(os.environ.get("ACCELERATE_NUM_PROCESSES", 1))
 55 | 
 56 | 
 57 | def test_can_initialize():
 58 |     notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES)
 59 | 
 60 | 
 61 | @mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test static rendezvous backends")
 62 | def test_static_rdzv_backend():
 63 |     notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES, rdzv_backend="static")
 64 | 
 65 | 
 66 | @mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test c10d rendezvous backends")
 67 | def test_c10d_rdzv_backend():
 68 |     notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES, rdzv_backend="c10d")
 69 | 
 70 | 
 71 | @mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test fault tolerance")
 72 | def test_fault_tolerant(max_restarts: int = 3):
 73 |     queue = Queue()
 74 |     queue.put(max_restarts)
 75 |     notebook_launcher(tough_nut_function, (queue,), num_processes=NUM_PROCESSES, max_restarts=max_restarts)
 76 | 
 77 | 
 78 | @mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test monitoring")
 79 | def test_monitoring(monitor_interval: float = 0.01, sleep_sec: int = 100):
 80 |     start_time = time.time()
 81 |     with raises(ChildFailedError, match="I'm an even process. I don't like to sleep."):
 82 |         notebook_launcher(
 83 |             bipolar_sleep_function,
 84 |             (sleep_sec,),
 85 |             num_processes=NUM_PROCESSES,
 86 |             monitor_interval=monitor_interval,
 87 |         )
 88 |     assert time.time() - start_time < sleep_sec, "Monitoring did not stop the process in time."
 89 | 
 90 | 
 91 | @require_bnb
 92 | def test_problematic_imports():
 93 |     with raises(RuntimeError, match="Please keep these imports"):
 94 |         import bitsandbytes as bnb  # noqa: F401
 95 | 
 96 |         notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES)
 97 | 
 98 | 
 99 | def main():
100 |     print("Test basic notebook can be ran")
101 |     test_can_initialize()
102 |     print("Test static rendezvous backend")
103 |     test_static_rdzv_backend()
104 |     print("Test c10d rendezvous backend")
105 |     test_c10d_rdzv_backend()
106 |     print("Test fault tolerant")
107 |     test_fault_tolerant()
108 |     print("Test monitoring")
109 |     test_monitoring()
110 |     if is_bnb_available():
111 |         print("Test problematic imports (bnb)")
112 |         test_problematic_imports()
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     main()
117 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # Accelerate
17 | 
18 | 🤗 Accelerate is a library that enables the same PyTorch code to be run across any distributed configuration by adding just four lines of code! In short, training and inference at scale made simple, efficient and adaptable.
19 | 
20 | ```diff
21 | + from accelerate import Accelerator
22 | + accelerator = Accelerator()
23 | 
24 | + model, optimizer, training_dataloader, scheduler = accelerator.prepare(
25 | +     model, optimizer, training_dataloader, scheduler
26 | + )
27 | 
28 |   for batch in training_dataloader:
29 |       optimizer.zero_grad()
30 |       inputs, targets = batch
31 |       inputs = inputs.to(device)
32 |       targets = targets.to(device)
33 |       outputs = model(inputs)
34 |       loss = loss_function(outputs, targets)
35 | +     accelerator.backward(loss)
36 |       optimizer.step()
37 |       scheduler.step()
38 | ```
39 | 
40 | Built on `torch_xla` and `torch.distributed`, 🤗 Accelerate takes care of the heavy lifting, so you don't have to write any custom code to adapt to these platforms.
41 | Convert existing codebases to utilize [DeepSpeed](usage_guides/deepspeed), perform [fully sharded data parallelism](usage_guides/fsdp), and have automatic support for mixed-precision training! 
42 | 
43 | <Tip> 
44 | 
45 |   To get a better idea of this process, make sure to check out the [Tutorials](basic_tutorials/overview)! 
46 | 
47 | </Tip>
48 | 
49 | 
50 | This code can then be launched on any system through Accelerate's CLI interface:
51 | ```bash
52 | accelerate launch {my_script.py}
53 | ```
54 | 
55 | <div class="mt-10">
56 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
57 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./basic_tutorials/overview"
58 |       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div>
59 |       <p class="text-gray-700">Learn the basics and become familiar with using 🤗 Accelerate. Start here if you are using 🤗 Accelerate for the first time!</p>
60 |     </a>
61 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./usage_guides/explore"
62 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
63 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Accelerate to solve real-world problems.</p>
64 |     </a>
65 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./concept_guides/gradient_synchronization"
66 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
67 |       <p class="text-gray-700">High-level explanations for building a better understanding of important topics such as avoiding subtle nuances and pitfalls in distributed training and DeepSpeed.</p>
68 |    </a>
69 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/accelerator"
70 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
71 |       <p class="text-gray-700">Technical descriptions of how 🤗 Accelerate classes and methods work.</p>
72 |     </a>
73 |   </div>
74 | </div>
75 | 


--------------------------------------------------------------------------------