├── .github ├── ISSUE_TEMPLATE │ ├── bug_template.md │ ├── doc_template.md │ └── feature_template.md ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md └── workflows │ ├── doc-build.yml │ ├── main_cpp.yml │ ├── main_distributed.yaml │ └── main_python.yml ├── .gitignore ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cpp ├── .clang-format ├── autograd │ ├── CMakeLists.txt │ ├── README.md │ └── autograd.cpp ├── custom-dataset │ ├── CMakeLists.txt │ ├── README.md │ ├── custom-dataset.cpp │ └── info.txt ├── dcgan │ ├── CMakeLists.txt │ ├── README.md │ ├── dcgan.cpp │ └── display_samples.py ├── distributed │ ├── CMakeLists.txt │ ├── README.md │ └── dist-mnist.cpp ├── mnist │ ├── CMakeLists.txt │ ├── README.md │ └── mnist.cpp ├── regression │ ├── CMakeLists.txt │ ├── README.md │ └── regression.cpp ├── tools │ ├── InstallingOpenCV.md │ └── download_mnist.py └── transfer-learning │ ├── CMakeLists.txt │ ├── README.md │ ├── classify.cpp │ ├── convert.py │ ├── main.cpp │ └── main.h ├── dcgan ├── .gitignore ├── README.md ├── main.py └── requirements.txt ├── distributed ├── FSDP │ ├── .gitignore │ ├── README.md │ ├── T5_training.py │ ├── configs │ │ ├── __init__.py │ │ ├── fsdp.py │ │ └── training.py │ ├── download_dataset.sh │ ├── model_checkpointing │ │ ├── __init__.py │ │ └── checkpoint_handler.py │ ├── policies │ │ ├── __init__.py │ │ ├── activation_checkpointing_functions.py │ │ ├── mixed_precision.py │ │ └── wrapping.py │ ├── requirements.txt │ ├── summarization_dataset.py │ └── utils │ │ ├── __init__.py │ │ ├── environment.py │ │ └── train_utils.py ├── ddp-tutorial-series │ ├── README.md │ ├── datautils.py │ ├── multigpu.py │ ├── multigpu_torchrun.py │ ├── multinode.py │ ├── requirements.txt │ ├── single_gpu.py │ └── slurm │ │ ├── config.yaml.template │ │ ├── sbatch_run.sh │ │ └── setup_pcluster_slurm.md ├── ddp │ ├── README.md │ ├── example.py │ ├── main.py │ └── requirements.txt ├── minGPT-ddp │ ├── README.md │ ├── mingpt │ │ ├── char_dataset.py │ │ ├── data │ │ │ └── input.txt │ │ ├── gpt2_train_cfg.yaml │ │ ├── main.py │ │ ├── model.py │ │ ├── slurm │ │ │ ├── config.yaml.template │ │ │ ├── sbatch_run.sh │ │ │ └── setup_pcluster_slurm.md │ │ └── trainer.py │ └── requirements.txt ├── rpc │ ├── batch │ │ ├── README.md │ │ ├── parameter_server.py │ │ ├── reinforce.py │ │ └── requirements.txt │ ├── ddp_rpc │ │ ├── README.md │ │ ├── main.py │ │ └── requirements.txt │ ├── parameter_server │ │ ├── README.md │ │ └── rpc_parameter_server.py │ ├── pipeline │ │ ├── README.md │ │ ├── main.py │ │ └── requirements.txt │ ├── rl │ │ ├── README.md │ │ ├── main.py │ │ └── requirements.txt │ └── rnn │ │ ├── README.md │ │ ├── main.py │ │ ├── requirements.txt │ │ └── rnn.py └── tensor_parallelism │ ├── README.md │ ├── fsdp_tp_example.py │ ├── llama2_model.py │ ├── log_utils.py │ ├── requirements.txt │ ├── run_example.sh │ ├── sequence_parallel_example.py │ └── tensor_parallel_example.py ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ └── index.rst ├── fast_neural_style ├── README.md ├── download_saved_models.py ├── images │ ├── content-images │ │ └── amber.jpg │ ├── output-images │ │ ├── amber-candy.jpg │ │ ├── amber-mosaic.jpg │ │ ├── amber-rain-princess.jpg │ │ └── amber-udnie.jpg │ └── style-images │ │ ├── candy.jpg │ │ ├── mosaic.jpg │ │ ├── rain-princess-cropped.jpg │ │ ├── rain-princess.jpg │ │ └── udnie.jpg └── neural_style │ ├── __init__.py │ ├── neural_style.py │ ├── transformer_net.py │ ├── utils.py │ └── vgg.py ├── fx ├── README.md ├── custom_tracer.py ├── inline_function.py ├── invert.py ├── module_tracer.py ├── native_interpreter │ ├── CMakeLists.txt │ ├── README.md │ ├── interpreter.cpp │ └── use_interpreter.py ├── primitive_library.py ├── profiling_tracer.py ├── proxy_based_graph_creation.py ├── replace_op.py ├── subgraph_rewriter_basic_use.py └── wrap_output_dynamically.py ├── gat ├── README.md ├── main.py └── requirements.txt ├── gcn ├── README.md ├── main.py └── requirements.txt ├── imagenet ├── README.md ├── extract_ILSVRC.sh ├── main.py └── requirements.txt ├── language_translation ├── README.md ├── main.py ├── requirements.txt └── src │ ├── data.py │ └── model.py ├── legacy └── snli │ ├── README.md │ ├── model.py │ ├── requirements.txt │ ├── train.py │ └── util.py ├── mnist ├── README.md ├── main.py └── requirements.txt ├── mnist_forward_forward ├── README.md ├── main.py └── requirements.txt ├── mnist_hogwild ├── README.md ├── main.py ├── requirements.txt └── train.py ├── mnist_rnn ├── README.md ├── main.py └── requirements.txt ├── regression ├── README.md └── main.py ├── reinforcement_learning ├── README.md ├── actor_critic.py ├── reinforce.py └── requirements.txt ├── run_cpp_examples.sh ├── run_distributed_examples.sh ├── run_python_examples.sh ├── runtime.txt ├── siamese_network ├── README.md ├── main.py └── requirements.txt ├── super_resolution ├── README.md ├── data.py ├── dataset.py ├── main.py ├── model.py └── super_resolve.py ├── time_sequence_prediction ├── README.md ├── generate_sine_wave.py ├── requirements.txt └── train.py ├── utils.sh ├── vae ├── README.md ├── main.py ├── requirements.txt └── results │ └── .gitignore └── word_language_model ├── README.md ├── data.py ├── data └── wikitext-2 │ ├── README │ ├── test.txt │ ├── train.txt │ └── valid.txt ├── generate.py ├── main.py ├── model.py └── requirements.txt /.github/ISSUE_TEMPLATE/bug_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug report" 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | Your issue may already be reported! 8 | Please search on the [issue tracker](https://github.com/pytorch/examples/issues) before creating one. 9 | 10 | ## Context 11 | 12 | 13 | * Pytorch version: 14 | * Operating System and version: 15 | 16 | ## Your Environment 17 | 18 | * Installed using source? [yes/no]: 19 | * Are you planning to deploy it using docker container? [yes/no]: 20 | * Is it a CPU or GPU environment?: 21 | * Which example are you using: 22 | * Link to code or data to repro [if any]: 23 | 24 | ## Expected Behavior 25 | 26 | 27 | ## Current Behavior 28 | 29 | 30 | ## Possible Solution 31 | 32 | 33 | ## Steps to Reproduce 34 | 35 | 36 | 1. 37 | 2. 38 | ... 39 | 40 | ## Failure Logs [if any] 41 | 42 | 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4DA Documentation" 3 | about: Report a documentation related issue 4 | 5 | --- 6 | 7 | ## 📚 Documentation 8 | 9 | 12 | 13 | ## Is your feature request related to a problem? Please describe. 14 | 15 | 16 | ## Describe the solution 17 | 18 | 19 | ## Describe alternatives solution 20 | 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Pull Request" 3 | about: Fix a bug or create new example 4 | 5 | --- 6 | 7 | ## Description 8 | 9 | Please include a summary of the newly proposed example or issue being fixed. Please also include relevant motivation, context. 10 | 11 | If this is a new example, how is your example different enough from the remaining examples in the repo. 12 | 13 | If this is a bug fix please link the issue you are fixing. Fixes #(issue) 14 | 15 | ## Type of change 16 | 17 | Please delete options that are not relevant. 18 | 19 | - [ ] Bug fix (non-breaking change which fixes an issue) 20 | - [ ] New Example (new example contribution) 21 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 22 | - [ ] This change requires a documentation update 23 | 24 | ## Feature/Issue validation/testing 25 | 26 | Please describe the tests [UT/IT] that you ran to verify your changes and relevant result summary. If this is a bug fix please run `run_python_examples.sh` before and after your change locally to make sure it works and add the logs here. 27 | 28 | - [ ] Logs before change 29 | - [ ] Logs after change 30 | 31 | - Logs 32 | 33 | If this is a new example please add a corresponding test in `run_python_examples.sh` 34 | 35 | - [ ] Test Added 36 | 37 | ## Checklist: 38 | 39 | - [ ] Have you added tests that prove your fix is effective or that this example works? 40 | - [ ] Has code been commented, particularly in hard-to-understand areas? 41 | - [ ] Have you made corresponding changes to the documentation? 42 | -------------------------------------------------------------------------------- /.github/workflows/doc-build.yml: -------------------------------------------------------------------------------- 1 | name: Doc Build 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build_docs_job: 11 | runs-on: ubuntu-latest 12 | # Grant write permission here so that the doc can be pushed to gh-pages branch 13 | permissions: 14 | contents: write 15 | strategy: 16 | matrix: 17 | python-version: [3.9] 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v2 21 | - name: Dependencies 22 | run: | 23 | echo `python3 --version` 24 | sudo apt-get install -y python-setuptools 25 | sudo apt-get install -y python3-sphinx 26 | python3 -m pip install --upgrade pip 27 | python3 -m pip install setuptools 28 | id: build 29 | - name: Build the docset 30 | run: | 31 | cd docs 32 | pip install -r requirements.txt 33 | make html 34 | - name: Get output time 35 | run: echo "The time was ${{ steps.build.outputs.time }}" 36 | - name: Deploy 37 | uses: JamesIves/github-pages-deploy-action@releases/v3 38 | with: 39 | ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} 40 | BRANCH: gh-pages # The branch the action should deploy to. 41 | FOLDER: ./docs/build/html # The folder the action should deploy. 42 | -------------------------------------------------------------------------------- /.github/workflows/main_cpp.yml: -------------------------------------------------------------------------------- 1 | name: Run CPP Examples 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | schedule: 9 | # Every day at 3:00am 10 | - cron: '0 3 * * *' 11 | 12 | 13 | jobs: 14 | test: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python 3.11 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: 3.11 24 | 25 | - name: Install Cmake, Make, g++, MKL 26 | run: | 27 | sudo apt update && sudo apt upgrade 28 | sudo apt install cmake g++ make 29 | sudo apt-get -y install intel-mkl 30 | - name: Install OpenCV 31 | run: | 32 | sudo apt -y install libtbb-dev 33 | sudo apt install libopencv-dev 34 | - name: Install argparse 35 | run: | 36 | git clone https://github.com/p-ranav/argparse 37 | cd argparse 38 | mkdir build 39 | cd build 40 | cmake -DARGPARSE_BUILD_SAMPLES=off -DARGPARSE_BUILD_TESTS=off .. 41 | sudo make install 42 | # Alternatively, you can install OpenCV from source 43 | # - name: Install OpenCV from source 44 | # run: | 45 | # wget -O opencv.zip https://github.com/opencv/opencv/archive/4.x.zip 46 | # unzip opencv.zip 47 | # mkdir -p build && cd build 48 | # cmake ../opencv-4.x 49 | # cmake --build . 50 | # sudo make install 51 | 52 | - name: Run Cpp Tests 53 | run: | 54 | chmod +x ./run_cpp_examples.sh 55 | ./run_cpp_examples.sh "get_libtorch,run_all,clean" 56 | -------------------------------------------------------------------------------- /.github/workflows/main_distributed.yaml: -------------------------------------------------------------------------------- 1 | name: Run Distributed Examples 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | schedule: 9 | # Every day at 3:00am 10 | - cron: '0 3 * * *' 11 | 12 | 13 | jobs: 14 | test: 15 | 16 | runs-on: 4-core-ubuntu-gpu-t4 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python 3.8 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: 3.8 24 | - name: Install PyTorch 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu118/torch_nightly.html 28 | - name: Run Tests 29 | run: | 30 | ./run_distributed_examples.sh "run_all,clean" 31 | - name: Open issue on failure 32 | if: ${{ failure() && github.event_name == 'schedule' }} 33 | uses: rishabhgupta/git-action-issue@v2 34 | with: 35 | token: ${{ secrets.GITHUB_TOKEN }} 36 | title: Daily CI failed 37 | body: Commit ${{ github.sha }} daily scheduled [CI run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) failed, please check why 38 | assignees: '' 39 | -------------------------------------------------------------------------------- /.github/workflows/main_python.yml: -------------------------------------------------------------------------------- 1 | name: Run Python Examples 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | schedule: 9 | # Every day at 3:00am 10 | - cron: '0 3 * * *' 11 | 12 | 13 | jobs: 14 | test: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python 3.10 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: '3.10' 24 | - name: Install PyTorch 25 | run: | 26 | python -m pip install --upgrade pip 27 | # Install CPU-based pytorch 28 | pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html 29 | # Maybe use the CUDA 10.2 version instead? 30 | # pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html 31 | - name: Run Tests 32 | run: | 33 | ./run_python_examples.sh "install_deps,run_all,clean" 34 | - name: Open issue on failure 35 | if: ${{ failure() && github.event_name == 'schedule' }} 36 | uses: rishabhgupta/git-action-issue@v2 37 | with: 38 | token: ${{ secrets.GITHUB_TOKEN }} 39 | title: Daily CI failed 40 | body: Commit ${{ github.sha }} daily scheduled [CI run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) failed, please check why 41 | assignees: '' 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dcgan/data 2 | data 3 | *.pyc 4 | OpenNMT/data 5 | cpp/mnist/build 6 | cpp/dcgan/build 7 | dcgan/*.png 8 | dcgan/*.pth 9 | snli/.data 10 | snli/.vector_cache 11 | snli/results 12 | word_language_model/model.pt 13 | fast_neural_style/saved_models 14 | fast_neural_style/saved_models.zip 15 | gcn/cora/ 16 | gat/cora/ 17 | docs/build 18 | docs/venv 19 | 20 | # vi backups 21 | *~ 22 | .*.swp 23 | 24 | # development 25 | .vscode 26 | **/.DS_Store 27 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This is a comment. 2 | # Each line is a file pattern followed by one or more owners. 3 | 4 | # Github Actions, tests and CI 5 | ./github/ @msaroufim 6 | run_python_examples.sh @msaroufim 7 | 8 | # Distributed examples 9 | # Can also add the distributed oncall 10 | ./distributed/ @mrshenli @pritamdamania87 @rohan-varma @H-Huang 11 | ./mnist_hogwild/ @mrshenli @pritamdamania87 @rohan-varma @H-Huang 12 | 13 | # FX examples 14 | ./fx/ @jamesr66a @Chillee 15 | 16 | # Domain Examples 17 | ./reinforcement_learning/ @msaroufim 18 | ./word_language_model/ @msaroufim 19 | 20 | # Need an owner 21 | ./regression/ 22 | ./mnist/ 23 | ./imagenet/ 24 | ./super_resolution/ 25 | ./time_sequence_prediction/ 26 | ./vae/ 27 | 28 | # Legacy examples 29 | ./cpp/ 30 | ./legacy/snli/ 31 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | - Using welcoming and inclusive language 18 | - Being respectful of differing viewpoints and experiences 19 | - Gracefully accepting constructive criticism 20 | - Focusing on what is best for the community 21 | - Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | - The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | - Trolling, insulting/derogatory comments, and personal or political attacks 28 | - Public or private harassment 29 | - Publishing other's private information, such as physical or electronic 30 | address, without explicit permission 31 | - Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to examples 2 | 3 | We want to make contributing to this project as easy and transparent as 4 | possible. 5 | 6 | ## Pull Requests 7 | 8 | We actively welcome your pull requests. 9 | 10 | If you're new, we encourage you to take a look at issues tagged with [good first issue](https://github.com/pytorch/examples/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) 11 | 12 | ### For new examples 13 | 14 | 0. Create a GitHub issue proposing a new example and make sure it's substantially different from an existing one. 15 | 1. Fork the repo and create your branch from `main`. 16 | 2. If you've added code that should be tested, add tests to `run_python_examples.sh`. 17 | 3. Create a `README.md`. 18 | 4. Add a card with a brief description of your example and link to the repo to 19 | the `docs/source/index.rst` file and build the docs by running: 20 | 21 | ``` 22 | cd docs 23 | virtualenv venv 24 | source venv/bin/activate 25 | pip install -r requirements.txt 26 | make html 27 | ``` 28 | 29 | When done working with `virtualenv`, run `deactivate`. 30 | 31 | 5. Verify that there are no issues in your doc build. You can check the preview locally 32 | by installing [sphinx-serve](https://pypi.org/project/sphinx-serve/) 33 | then running `sphinx-serve -b build`. 34 | 6. Ensure your test passes locally. 35 | 7. If you haven't already, complete the Contributor License Agreement ("CLA"). 36 | 8. Address any feedback in code review promptly. 37 | 38 | ## For bug fixes 39 | 40 | 1. Fork the repo and create your branch from `main`. 41 | 2. Make sure you have a GPU-enabled machine, either locally or in the cloud. `g4dn.4xlarge` is a good starting point on AWS. 42 | 3. Make your code change. 43 | 4. First, install all dependencies with `./run_python_examples.sh "install_deps"`. 44 | 5. Then, make sure that `./run_python_examples.sh` passes locally by running the script end to end. 45 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 46 | 7. Address any feedback in code review promptly. 47 | 48 | ## Contributor License Agreement ("CLA") 49 | 50 | To accept your pull request, we need you to submit a CLA. You only need 51 | to do this once to work on any of Facebook's open source projects. 52 | 53 | Complete your CLA here: 54 | 55 | ## Issues 56 | 57 | We use GitHub issues to track public bugs. Please ensure your description is 58 | clear and has sufficient instructions to be able to reproduce the issue. 59 | 60 | ## License 61 | 62 | By contributing to examples, you agree that your contributions will be licensed 63 | under the LICENSE file in the root directory of this source tree. 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Pytorch contributors 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Examples 2 | 3 | ![Run Examples](https://github.com/pytorch/examples/workflows/Run%20Examples/badge.svg) 4 | 5 | https://pytorch.org/examples/ 6 | 7 | `pytorch/examples` is a repository showcasing examples of using [PyTorch](https://github.com/pytorch/pytorch). The goal is to have curated, short, few/no dependencies _high quality_ examples that are substantially different from each other that can be emulated in your existing work. 8 | 9 | - For tutorials: https://github.com/pytorch/tutorials 10 | - For changes to pytorch.org: https://github.com/pytorch/pytorch.github.io 11 | - For a general model hub: https://pytorch.org/hub/ or https://huggingface.co/models 12 | - For recipes on how to run PyTorch in production: https://github.com/facebookresearch/recipes 13 | - For general Q&A and support: https://discuss.pytorch.org/ 14 | 15 | ## Available models 16 | 17 | - [Image classification (MNIST) using Convnets](./mnist/README.md) 18 | - [Word-level Language Modeling using RNN and Transformer](./word_language_model/README.md) 19 | - [Training Imagenet Classifiers with Popular Networks](./imagenet/README.md) 20 | - [Generative Adversarial Networks (DCGAN)](./dcgan/README.md) 21 | - [Variational Auto-Encoders](./vae/README.md) 22 | - [Superresolution using an efficient sub-pixel convolutional neural network](./super_resolution/README.md) 23 | - [Hogwild training of shared ConvNets across multiple processes on MNIST](mnist_hogwild) 24 | - [Training a CartPole to balance in OpenAI Gym with actor-critic](./reinforcement_learning/README.md) 25 | - [Natural Language Inference (SNLI) with GloVe vectors, LSTMs, and torchtext](snli) 26 | - [Time sequence prediction - use an LSTM to learn Sine waves](./time_sequence_prediction/README.md) 27 | - [Implement the Neural Style Transfer algorithm on images](./fast_neural_style/README.md) 28 | - [Reinforcement Learning with Actor Critic and REINFORCE algorithms on OpenAI gym](./reinforcement_learning/README.md) 29 | - [PyTorch Module Transformations using fx](./fx/README.md) 30 | - Distributed PyTorch examples with [Distributed Data Parallel](./distributed/ddp/README.md) and [RPC](./distributed/rpc) 31 | - [Several examples illustrating the C++ Frontend](cpp) 32 | - [Image Classification Using Forward-Forward](./mnist_forward_forward/README.md) 33 | - [Language Translation using Transformers](./language_translation/README.md) 34 | 35 | 36 | 37 | Additionally, a list of good examples hosted in their own repositories: 38 | 39 | - [Neural Machine Translation using sequence-to-sequence RNN with attention (OpenNMT)](https://github.com/OpenNMT/OpenNMT-py) 40 | 41 | ## Contributing 42 | 43 | If you'd like to contribute your own example or fix a bug please make sure to take a look at [CONTRIBUTING.md](CONTRIBUTING.md). 44 | -------------------------------------------------------------------------------- /cpp/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignAfterOpenBracket: AlwaysBreak 4 | AlignConsecutiveAssignments: false 5 | AlignConsecutiveDeclarations: false 6 | AlignEscapedNewlinesLeft: true 7 | AlignOperands: false 8 | AlignTrailingComments: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: false 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: Empty 13 | AllowShortIfStatementsOnASingleLine: false 14 | AllowShortLoopsOnASingleLine: false 15 | AlwaysBreakAfterReturnType: None 16 | AlwaysBreakBeforeMultilineStrings: true 17 | AlwaysBreakTemplateDeclarations: true 18 | BinPackArguments: false 19 | BinPackParameters: false 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: false 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: false 26 | AfterObjCDeclaration: false 27 | AfterStruct: false 28 | AfterUnion: false 29 | BeforeCatch: false 30 | BeforeElse: false 31 | IndentBraces: false 32 | BreakBeforeBinaryOperators: None 33 | BreakBeforeBraces: Attach 34 | BreakBeforeTernaryOperators: true 35 | BreakConstructorInitializersBeforeComma: false 36 | BreakAfterJavaFieldAnnotations: false 37 | BreakStringLiterals: false 38 | ColumnLimit: 80 39 | CommentPragmas: '^ IWYU pragma:' 40 | CompactNamespaces: false 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | ConstructorInitializerIndentWidth: 4 43 | ContinuationIndentWidth: 4 44 | Cpp11BracedListStyle: true 45 | DerivePointerAlignment: false 46 | DisableFormat: false 47 | ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ] 48 | IncludeCategories: 49 | - Regex: '^<.*\.h(pp)?>' 50 | Priority: 1 51 | - Regex: '^<.*' 52 | Priority: 2 53 | - Regex: '.*' 54 | Priority: 3 55 | IndentCaseLabels: true 56 | IndentWidth: 2 57 | IndentWrappedFunctionNames: false 58 | KeepEmptyLinesAtTheStartOfBlocks: false 59 | MacroBlockBegin: '' 60 | MacroBlockEnd: '' 61 | MaxEmptyLinesToKeep: 1 62 | NamespaceIndentation: None 63 | ObjCBlockIndentWidth: 2 64 | ObjCSpaceAfterProperty: false 65 | ObjCSpaceBeforeProtocolList: false 66 | PenaltyBreakBeforeFirstCallParameter: 1 67 | PenaltyBreakComment: 300 68 | PenaltyBreakFirstLessLess: 120 69 | PenaltyBreakString: 1000 70 | PenaltyExcessCharacter: 1000000 71 | PenaltyReturnTypeOnItsOwnLine: 2000000 72 | PointerAlignment: Left 73 | ReflowComments: true 74 | SortIncludes: true 75 | SpaceAfterCStyleCast: false 76 | SpaceBeforeAssignmentOperators: true 77 | SpaceBeforeParens: ControlStatements 78 | SpaceInEmptyParentheses: false 79 | SpacesBeforeTrailingComments: 1 80 | SpacesInAngles: false 81 | SpacesInContainerLiterals: true 82 | SpacesInCStyleCastParentheses: false 83 | SpacesInParentheses: false 84 | SpacesInSquareBrackets: false 85 | Standard: Cpp11 86 | TabWidth: 8 87 | UseTab: Never 88 | ... 89 | -------------------------------------------------------------------------------- /cpp/autograd/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(autograd) 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | find_package(Torch REQUIRED) 7 | 8 | add_executable(${PROJECT_NAME} "autograd.cpp") 9 | target_link_libraries(${PROJECT_NAME} "${TORCH_LIBRARIES}") 10 | 11 | # The following code block is suggested to be used on Windows. 12 | # According to https://github.com/pytorch/pytorch/issues/25457, 13 | # the DLLs need to be copied to avoid memory errors. 14 | if (MSVC) 15 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 16 | add_custom_command(TARGET ${PROJECT_NAME} 17 | POST_BUILD 18 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 19 | ${TORCH_DLLS} 20 | $) 21 | endif (MSVC) 22 | -------------------------------------------------------------------------------- /cpp/autograd/README.md: -------------------------------------------------------------------------------- 1 | # C++ autograd example 2 | 3 | `autograd.cpp` contains several examples of doing autograd in PyTorch C++ frontend. 4 | 5 | To build the code, run the following commands from your terminal: 6 | 7 | ```shell 8 | $ cd autograd 9 | $ mkdir build 10 | $ cd build 11 | $ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. 12 | $ make 13 | ``` 14 | 15 | where `/path/to/libtorch` should be the path to the unzipped _LibTorch_ 16 | distribution, which you can get from the [PyTorch 17 | homepage](https://pytorch.org/get-started/locally/). 18 | 19 | Execute the compiled binary to run: 20 | 21 | ```shell 22 | $ ./autograd 23 | ====== Running: "Basic autograd operations" ====== 24 | 1 1 25 | 1 1 26 | [ CPUFloatType{2,2} ] 27 | 3 3 28 | 3 3 29 | [ CPUFloatType{2,2} ] 30 | AddBackward1 31 | 27 27 32 | 27 27 33 | [ CPUFloatType{2,2} ] 34 | MulBackward1 35 | 27 36 | [ CPUFloatType{} ] 37 | MeanBackward0 38 | false 39 | true 40 | SumBackward0 41 | 4.5000 4.5000 42 | 4.5000 4.5000 43 | [ CPUFloatType{2,2} ] 44 | 813.6625 45 | 1015.0142 46 | -664.8849 47 | [ CPUFloatType{3} ] 48 | MulBackward1 49 | 204.8000 50 | 2048.0000 51 | 0.2048 52 | [ CPUFloatType{3} ] 53 | true 54 | true 55 | false 56 | true 57 | false 58 | true 59 | 60 | ====== Running "Computing higher-order gradients in C++" ====== 61 | 0.0025 0.0946 0.1474 0.1387 62 | 0.0238 -0.0018 0.0259 0.0094 63 | 0.0513 -0.0549 -0.0604 0.0210 64 | [ CPUFloatType{3,4} ] 65 | 66 | ====== Running "Using custom autograd function in C++" ====== 67 | -3.5513 3.7160 3.6477 68 | -3.5513 3.7160 3.6477 69 | [ CPUFloatType{2,3} ] 70 | 0.3095 1.4035 -0.0349 71 | 0.3095 1.4035 -0.0349 72 | 0.3095 1.4035 -0.0349 73 | 0.3095 1.4035 -0.0349 74 | [ CPUFloatType{4,3} ] 75 | 5.5000 76 | 5.5000 77 | [ CPUFloatType{2} ] 78 | ``` 79 | -------------------------------------------------------------------------------- /cpp/custom-dataset/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(custom-dataset) 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | find_package(Torch REQUIRED) 7 | find_package(OpenCV REQUIRED COMPONENTS core imgproc imgcodecs) 8 | 9 | message(STATUS "OpenCV include dirs: ${OpenCV_INCLUDE_DIRS}") 10 | message(STATUS "OpenCV libraries: ${OpenCV_LIBS}") 11 | 12 | 13 | include_directories(${OpenCV_INCLUDE_DIRS}) 14 | add_executable(${PROJECT_NAME} "custom-dataset.cpp") 15 | target_link_libraries(${PROJECT_NAME} "${OpenCV_LIBS}") 16 | target_link_libraries(${PROJECT_NAME} "${TORCH_LIBRARIES}") 17 | 18 | configure_file("info.txt" "info.txt" COPYONLY) 19 | 20 | # The following code block is suggested to be used on Windows. 21 | # According to https://github.com/pytorch/pytorch/issues/25457, 22 | # the DLLs need to be copied to avoid memory errors. 23 | if (MSVC) 24 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 25 | add_custom_command(TARGET ${PROJECT_NAME} 26 | POST_BUILD 27 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 28 | ${TORCH_DLLS} 29 | $) 30 | endif (MSVC) 31 | -------------------------------------------------------------------------------- /cpp/custom-dataset/README.md: -------------------------------------------------------------------------------- 1 | # Custom Dataset Example with the PyTorch C++ Frontend 2 | 3 | This folder contains an example of loading a custom image dataset with OpenCV and training a model to label images, using the PyTorch C++ frontend. 4 | 5 | The dataset used here is [Caltech 101](https://data.caltech.edu/records/mzrjq-6wc02) dataset. 6 | 7 | The entire training code is contained in custom-data.cpp. 8 | 9 | You can find instructions on how to install OpenCV [here](../tools/InstallingOpenCV.md). 10 | 11 | To build the code, run the following commands from your terminal: 12 | 13 | ```shell 14 | $ cd custom-dataset 15 | $ mkdir build 16 | $ cd build 17 | $ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. 18 | $ make 19 | ``` 20 | 21 | where /path/to/libtorch should be the path to the unzipped LibTorch distribution, which you can get from the [PyTorch homepage](https://pytorch.org/get-started/locally/). 22 | 23 | If you see an error like `undefined reference to cv::imread(std::string const&, int)` when running the `make` command, you should build LibTorch from source using the instructions [here](https://github.com/pytorch/pytorch#from-source), and then set `CMAKE_PREFIX_PATH` to that PyTorch source directory. An alternative solution is to use `libtorch-cxx11-abi-shared-with-deps` instead of `libtorch-shared-with-deps` as the latter is not compatible with openCV (reported [here](https://discuss.pytorch.org/t/library-conflict-between-libtorch-and-opencv/64489)). 24 | 25 | The build directory should look like this: 26 | 27 | ``` 28 | . 29 | ├── custom-dataset 30 | ├── dataset 31 | │   ├── accordion 32 | │   │   ├── image_0001.jpg 33 | │   │   ├── ... 34 | │   ├── airplanes 35 | │   │   ├── ... 36 | │   ├── ... 37 | ├── info.txt 38 | └── Makefile 39 | └── ... 40 | ``` 41 | 42 | `info.txt` file gets copied from source directory during build. 43 | 44 | Execute the compiled binary to train the model: 45 | 46 | ```shell 47 | ./custom-dataset 48 | Running on: CUDA 49 | Train Epoch: 1 16/7281 Loss: 0.314655 Acc: 0 50 | Train Epoch: 1 176/7281 Loss: 0.532111 Acc: 0.0681818 51 | Train Epoch: 1 336/7281 Loss: 0.538482 Acc: 0.0714286 52 | Train Epoch: 1 496/7281 Loss: 0.535302 Acc: 0.0705645 53 | Train Epoch: 1 656/7281 Loss: 0.536113 Acc: 0.0716463 54 | Train Epoch: 1 816/7281 Loss: 0.537626 Acc: 0.0784314 55 | Train Epoch: 1 976/7281 Loss: 0.537055 Acc: 0.079918 56 | ... 57 | 58 | ``` -------------------------------------------------------------------------------- /cpp/dcgan/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(dcgan) 3 | 4 | find_package(Torch REQUIRED) 5 | 6 | option(DOWNLOAD_MNIST "Download the MNIST dataset from the internet" ON) 7 | if (DOWNLOAD_MNIST) 8 | message(STATUS "Downloading MNIST dataset") 9 | execute_process( 10 | COMMAND python ${CMAKE_CURRENT_LIST_DIR}/../tools/download_mnist.py 11 | -d ${CMAKE_BINARY_DIR}/data 12 | ERROR_VARIABLE DOWNLOAD_ERROR) 13 | if (DOWNLOAD_ERROR) 14 | message(FATAL_ERROR "Error downloading MNIST dataset: ${DOWNLOAD_ERROR}") 15 | endif() 16 | endif() 17 | 18 | add_executable(dcgan dcgan.cpp) 19 | target_link_libraries(dcgan "${TORCH_LIBRARIES}") 20 | set_property(TARGET dcgan PROPERTY CXX_STANDARD 17) 21 | 22 | if (MSVC) 23 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 24 | add_custom_command(TARGET dcgan 25 | POST_BUILD 26 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 27 | ${TORCH_DLLS} 28 | $) 29 | endif (MSVC) 30 | -------------------------------------------------------------------------------- /cpp/dcgan/README.md: -------------------------------------------------------------------------------- 1 | # DCGAN Example with the PyTorch C++ Frontend 2 | 3 | This folder contains an example of training a DCGAN to generate MNIST digits 4 | with the PyTorch C++ frontend. 5 | 6 | The entire training code is contained in `dcgan.cpp`. 7 | 8 | You can find the commands to install argparse [here](https://github.com/pytorch/examples/blob/main/.github/workflows/main_cpp.yml#L34). 9 | 10 | To build the code, run the following commands from your terminal: 11 | 12 | ```shell 13 | $ cd dcgan 14 | $ mkdir build 15 | $ cd build 16 | $ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. 17 | $ make 18 | ``` 19 | 20 | where `/path/to/libtorch` should be the path to the unzipped _LibTorch_ 21 | distribution, which you can get from the [PyTorch 22 | homepage](https://pytorch.org/get-started/locally/). 23 | 24 | Execute the compiled binary to train the model: 25 | 26 | ```shell 27 | $ ./dcgan 28 | [ 1/30][200/938] D_loss: 0.4953 | G_loss: 4.0195 29 | -> checkpoint 1 30 | [ 1/30][400/938] D_loss: 0.3610 | G_loss: 4.8148 31 | -> checkpoint 2 32 | [ 1/30][600/938] D_loss: 0.4072 | G_loss: 4.36760 33 | -> checkpoint 3 34 | [ 1/30][800/938] D_loss: 0.4444 | G_loss: 4.0250 35 | -> checkpoint 4 36 | [ 2/30][200/938] D_loss: 0.3761 | G_loss: 3.8790 37 | -> checkpoint 5 38 | [ 2/30][400/938] D_loss: 0.3977 | G_loss: 3.3315 39 | -> checkpoint 6 40 | [ 2/30][600/938] D_loss: 0.3815 | G_loss: 3.5696 41 | -> checkpoint 7 42 | [ 2/30][800/938] D_loss: 0.4039 | G_loss: 3.2759 43 | -> checkpoint 8 44 | [ 3/30][200/938] D_loss: 0.4236 | G_loss: 4.5132 45 | -> checkpoint 9 46 | [ 3/30][400/938] D_loss: 0.3645 | G_loss: 3.9759 47 | -> checkpoint 10 48 | ... 49 | ``` 50 | 51 | We can also specify the `--epochs` to change the number of epochs to train as follows: 52 | 53 | ```shell 54 | $ ./dcgan --epochs 10 55 | ``` 56 | Without specifying the `--epochs` flag, the default number of epochs to train is 30. 57 | 58 | 59 | The training script periodically generates image samples. Use the 60 | `display_samples.py` script situated in this folder to generate a plot image. 61 | For example: 62 | 63 | ```shell 64 | $ python display_samples.py -i dcgan-sample-10.pt 65 | Saved out.png 66 | ``` 67 | -------------------------------------------------------------------------------- /cpp/dcgan/display_samples.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | import argparse 5 | 6 | import matplotlib.pyplot as plt 7 | import torch 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("-i", "--sample-file", required=True) 12 | parser.add_argument("-o", "--out-file", default="out.png") 13 | parser.add_argument("-d", "--dimension", type=int, default=3) 14 | options = parser.parse_args() 15 | 16 | module = torch.jit.load(options.sample_file) 17 | images = list(module.parameters())[0] 18 | 19 | for index in range(options.dimension * options.dimension): 20 | image = images[index].detach().cpu().reshape(28, 28).mul(255).to(torch.uint8) 21 | array = image.numpy() 22 | axis = plt.subplot(options.dimension, options.dimension, 1 + index) 23 | plt.imshow(array, cmap="gray") 24 | axis.get_xaxis().set_visible(False) 25 | axis.get_yaxis().set_visible(False) 26 | 27 | plt.savefig(options.out_file) 28 | print("Saved ", options.out_file) 29 | -------------------------------------------------------------------------------- /cpp/distributed/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(dist-mnist) 3 | 4 | find_package(Torch REQUIRED) 5 | 6 | find_package(MPI REQUIRED) 7 | 8 | include_directories(SYSTEM ${MPI_C_INCLUDE_PATH} ${MPI_CXX_INCLUDE_PATH}) 9 | 10 | add_executable(dist-mnist dist-mnist.cpp) 11 | target_link_libraries(dist-mnist ${TORCH_LIBRARIES}) 12 | target_link_libraries(dist-mnist ${MPI_LIBRARIES}) 13 | target_link_libraries(dist-mnist ${CMAKE_PREFIX_PATH}/lib/libc10d.a) 14 | 15 | if(MPI_COMPILE_FLAGS) 16 | set_target_properties(dist-mnist PROPERTIES 17 | COMPILE_FLAGS "${MPI_COMPILE_FLAGS}") 18 | endif() 19 | 20 | if(MPI_LINK_FLAGS) 21 | set_target_properties(dist-mnist PROPERTIES 22 | LINK_FLAGS "${MPI_LINK_FLAGS}") 23 | endif() 24 | -------------------------------------------------------------------------------- /cpp/distributed/README.md: -------------------------------------------------------------------------------- 1 | # Distributed Training on MNIST using PyTorch C++ Frontend (Libtorch) 2 | 3 | This folder contains an example of data-parallel training of a convolutional neural network on the MNIST dataset. For parallelization, Message Passing Interface (MPI) is used. 4 | 5 | The entire code is contained in dist-mnist.cpp 6 | 7 | You can find instructions on how to install MPI [here] (https://www.open-mpi.org/faq/?category=building). This code was tested on Open MPI but it should run on other MPI distributions as well such as MPICH, MVAPICH, etc. 8 | 9 | To build the code, run the following commands from the terminal: 10 | 11 | ```shell 12 | $ cd distributed 13 | $ mkdir build 14 | $ cd build 15 | $ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. 16 | $ make 17 | ``` 18 | 19 | where /path/to/libtorch should be the path to the unzipped LibTorch distribution. Note that the LibTorch from the [PyTorch homepage] ((https://pytorch.org/get-started/locally/) does not include MPI headers and cannot be used for this example. You have to compile LibTorch manually - a set of guidelines is provided [here] (https://gist.github.com/lasagnaphil/3e0099816837318e8e8bcab7edcfd5d9), however this may vary for different systems. 20 | 21 | To run the code, 22 | 23 | ```shell 24 | mpirun -np {NUM-PROCS} ./dist-mnist 25 | ``` 26 | -------------------------------------------------------------------------------- /cpp/mnist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(mnist) 3 | set(CMAKE_CXX_STANDARD 17) 4 | 5 | find_package(Torch REQUIRED) 6 | 7 | option(DOWNLOAD_MNIST "Download the MNIST dataset from the internet" ON) 8 | if (DOWNLOAD_MNIST) 9 | message(STATUS "Downloading MNIST dataset") 10 | execute_process( 11 | COMMAND python ${CMAKE_CURRENT_LIST_DIR}/../tools/download_mnist.py 12 | -d ${CMAKE_BINARY_DIR}/data 13 | ERROR_VARIABLE DOWNLOAD_ERROR) 14 | if (DOWNLOAD_ERROR) 15 | message(FATAL_ERROR "Error downloading MNIST dataset: ${DOWNLOAD_ERROR}") 16 | endif() 17 | endif() 18 | 19 | add_executable(mnist mnist.cpp) 20 | target_compile_features(mnist PUBLIC cxx_range_for) 21 | target_link_libraries(mnist ${TORCH_LIBRARIES}) 22 | 23 | if (MSVC) 24 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 25 | add_custom_command(TARGET mnist 26 | POST_BUILD 27 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 28 | ${TORCH_DLLS} 29 | $) 30 | endif (MSVC) 31 | -------------------------------------------------------------------------------- /cpp/mnist/README.md: -------------------------------------------------------------------------------- 1 | # MNIST Example with the PyTorch C++ Frontend 2 | 3 | This folder contains an example of training a computer vision model to recognize 4 | digits in images from the MNIST dataset, using the PyTorch C++ frontend. 5 | 6 | The entire training code is contained in `mnist.cpp`. 7 | 8 | To build the code, run the following commands from your terminal: 9 | 10 | ```shell 11 | $ cd mnist 12 | $ mkdir build 13 | $ cd build 14 | $ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. 15 | $ make 16 | ``` 17 | 18 | where `/path/to/libtorch` should be the path to the unzipped _LibTorch_ 19 | distribution, which you can get from the [PyTorch 20 | homepage](https://pytorch.org/get-started/locally/). 21 | 22 | Execute the compiled binary to train the model: 23 | 24 | ```shell 25 | $ ./mnist 26 | Train Epoch: 1 [59584/60000] Loss: 0.4232 27 | Test set: Average loss: 0.1989 | Accuracy: 0.940 28 | Train Epoch: 2 [59584/60000] Loss: 0.1926 29 | Test set: Average loss: 0.1338 | Accuracy: 0.959 30 | Train Epoch: 3 [59584/60000] Loss: 0.1390 31 | Test set: Average loss: 0.0997 | Accuracy: 0.969 32 | Train Epoch: 4 [59584/60000] Loss: 0.1239 33 | Test set: Average loss: 0.0875 | Accuracy: 0.972 34 | ... 35 | ``` 36 | -------------------------------------------------------------------------------- /cpp/regression/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(regression) 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | find_package(Torch REQUIRED) 7 | 8 | add_executable(${PROJECT_NAME} "regression.cpp") 9 | target_link_libraries(${PROJECT_NAME} "${TORCH_LIBRARIES}") 10 | 11 | # The following code block is suggested to be used on Windows. 12 | # According to https://github.com/pytorch/pytorch/issues/25457, 13 | # the DLLs need to be copied to avoid memory errors. 14 | if (MSVC) 15 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 16 | add_custom_command(TARGET ${PROJECT_NAME} 17 | POST_BUILD 18 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 19 | ${TORCH_DLLS} 20 | $) 21 | endif (MSVC) 22 | -------------------------------------------------------------------------------- /cpp/regression/README.md: -------------------------------------------------------------------------------- 1 | # Linear regression example 2 | 3 | Trains a single fully-connected layer to fit a 4th degree polynomial. 4 | 5 | To build the code, run the following commands from your terminal: 6 | 7 | ```shell 8 | $ cd regression 9 | $ mkdir build 10 | $ cd build 11 | $ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. 12 | $ make 13 | ``` 14 | 15 | where `/path/to/libtorch` should be the path to the unzipped _LibTorch_ 16 | distribution, which you can get from the [PyTorch 17 | homepage](https://pytorch.org/get-started/locally/). 18 | 19 | Execute the compiled binary to run: 20 | 21 | ```shell 22 | $ ./regression 23 | Loss: 0.000301158 after 584 batches 24 | ==> Learned function: y = 11.6441 x^4 -3.10164 x^3 2.19786 x^2 -3.83606 x^1 + 4.37066 25 | ==> Actual function: y = 11.669 x^4 -3.16023 x^3 2.19182 x^2 -3.81505 x^1 + 4.38219 26 | ... 27 | ``` 28 | -------------------------------------------------------------------------------- /cpp/regression/regression.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define POLY_DEGREE 4 8 | 9 | // Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]. 10 | torch::Tensor make_features(torch::Tensor x) { 11 | x = x.unsqueeze(1); 12 | std::vector xs; 13 | for (int64_t i = 0; i < POLY_DEGREE; ++i) 14 | xs.push_back(x.pow(i + 1)); 15 | return torch::cat(xs, 1); 16 | } 17 | 18 | // Approximated function. 19 | torch::Tensor f( 20 | torch::Tensor x, 21 | torch::Tensor W_target, 22 | torch::Tensor b_target) { 23 | return x.mm(W_target) + b_target.item(); 24 | } 25 | 26 | // Creates a string description of a polynomial. 27 | std::string poly_desc(torch::Tensor W, torch::Tensor b) { 28 | auto size = W.size(0); 29 | std::ostringstream stream; 30 | 31 | stream << "y = "; 32 | for (int64_t i = 0; i < size; ++i) 33 | stream << W[i].item() << " x^" << size - i << " "; 34 | stream << "+ " << b[0].item(); 35 | return stream.str(); 36 | } 37 | 38 | // Builds a batch i.e. (x, f(x)) pair. 39 | std::pair get_batch( 40 | torch::Tensor W_target, 41 | torch::Tensor b_target, 42 | int64_t batch_size = 32) { 43 | auto random = torch::randn({batch_size}); 44 | auto x = make_features(random); 45 | auto y = f(x, W_target, b_target); 46 | return std::make_pair(x, y); 47 | } 48 | 49 | int main() { 50 | auto W_target = torch::randn({POLY_DEGREE, 1}) * 5; 51 | auto b_target = torch::randn({1}) * 5; 52 | 53 | // Define the model and optimizer 54 | auto fc = torch::nn::Linear(W_target.size(0), 1); 55 | torch::optim::SGD optim(fc->parameters(), .1); 56 | 57 | float loss = 0; 58 | int64_t batch_idx = 0; 59 | 60 | while (++batch_idx) { 61 | // Get data 62 | torch::Tensor batch_x, batch_y; 63 | std::tie(batch_x, batch_y) = get_batch(W_target, b_target); 64 | 65 | // Reset gradients 66 | optim.zero_grad(); 67 | 68 | // Forward pass 69 | auto output = torch::smooth_l1_loss(fc(batch_x), batch_y); 70 | loss = output.item(); 71 | 72 | // Backward pass 73 | output.backward(); 74 | 75 | // Apply gradients 76 | optim.step(); 77 | 78 | // Stop criterion 79 | if (loss < 1e-3f) 80 | break; 81 | } 82 | 83 | std::cout << "Loss: " << loss << " after " << batch_idx << " batches" 84 | << std::endl; 85 | std::cout << "==> Learned function:\t" 86 | << poly_desc(fc->weight.view({-1}), fc->bias) << std::endl; 87 | std::cout << "==> Actual function:\t" 88 | << poly_desc(W_target.view({-1}), b_target) << std::endl; 89 | 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /cpp/tools/InstallingOpenCV.md: -------------------------------------------------------------------------------- 1 | # Installing OpenCV 2 | 3 | ## Linux with Package Manager 4 | 5 | ### Arch Linux 6 | 7 | ```shell 8 | pacman -Syu base-devel opencv 9 | ``` 10 | 11 | ### Fedora 12 | 13 | ```shell 14 | sudo dnf install opencv opencv-dev 15 | ``` 16 | 17 | ## Linux From Source 18 | 19 | Required Packages: 20 | 21 | ```shell 22 | sudo apt-get install build-essential cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev 23 | ``` 24 | 25 | Optional Packages: 26 | 27 | ```shell 28 | sudo apt-get install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev 29 | ``` 30 | 31 | Building from Source: 32 | 33 | ```shell 34 | git clone https://github.com/opencv/opencv.git 35 | git clone https://github.com/opencv/opencv_contrib.git 36 | 37 | cd opencv && mkdir build && cd build 38 | cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local .. 39 | make -j8 # runs 8 jobs in parallel 40 | sudo make install 41 | ``` 42 | 43 | ## Windows 44 | 45 | You can download the pre-built libraries from [OpenCV releases](https://github.com/opencv/opencv/releases) and install them easily. 46 | -------------------------------------------------------------------------------- /cpp/tools/download_mnist.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import argparse 5 | import gzip 6 | import os 7 | import sys 8 | import urllib 9 | 10 | try: 11 | from urllib.error import URLError 12 | from urllib.request import urlretrieve 13 | except ImportError: 14 | from urllib2 import URLError 15 | from urllib import urlretrieve 16 | 17 | RESOURCES = [ 18 | 'train-images-idx3-ubyte.gz', 19 | 'train-labels-idx1-ubyte.gz', 20 | 't10k-images-idx3-ubyte.gz', 21 | 't10k-labels-idx1-ubyte.gz', 22 | ] 23 | 24 | 25 | def report_download_progress(chunk_number, chunk_size, file_size): 26 | if file_size != -1: 27 | percent = min(1, (chunk_number * chunk_size) / file_size) 28 | bar = '#' * int(64 * percent) 29 | sys.stdout.write('\r0% |{:<64}| {}%'.format(bar, int(percent * 100))) 30 | 31 | 32 | def download(destination_path, url, quiet): 33 | if os.path.exists(destination_path): 34 | if not quiet: 35 | print('{} already exists, skipping ...'.format(destination_path)) 36 | else: 37 | print('Downloading {} ...'.format(url)) 38 | try: 39 | hook = None if quiet else report_download_progress 40 | urlretrieve(url, destination_path, reporthook=hook) 41 | except URLError: 42 | raise RuntimeError('Error downloading resource!') 43 | finally: 44 | if not quiet: 45 | # Just a newline. 46 | print() 47 | 48 | 49 | def unzip(zipped_path, quiet): 50 | unzipped_path = os.path.splitext(zipped_path)[0] 51 | if os.path.exists(unzipped_path): 52 | if not quiet: 53 | print('{} already exists, skipping ... '.format(unzipped_path)) 54 | return 55 | with gzip.open(zipped_path, 'rb') as zipped_file: 56 | with open(unzipped_path, 'wb') as unzipped_file: 57 | unzipped_file.write(zipped_file.read()) 58 | if not quiet: 59 | print('Unzipped {} ...'.format(zipped_path)) 60 | 61 | 62 | def main(): 63 | parser = argparse.ArgumentParser( 64 | description='Download the MNIST dataset from the internet') 65 | parser.add_argument( 66 | '-d', '--destination', default='.', help='Destination directory') 67 | parser.add_argument( 68 | '-q', 69 | '--quiet', 70 | action='store_true', 71 | help="Don't report about progress") 72 | options = parser.parse_args() 73 | 74 | if not os.path.exists(options.destination): 75 | os.makedirs(options.destination) 76 | 77 | try: 78 | for resource in RESOURCES: 79 | path = os.path.join(options.destination, resource) 80 | # url = 'http://yann.lecun.com/exdb/mnist/{}'.format(resource) 81 | url = 'https://ossci-datasets.s3.amazonaws.com/mnist/{}'.format(resource) 82 | download(path, url, options.quiet) 83 | unzip(path, options.quiet) 84 | except KeyboardInterrupt: 85 | print('Interrupted') 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /cpp/transfer-learning/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(example) 3 | 4 | find_package(Torch REQUIRED) 5 | find_package(OpenCV 4.1.0 REQUIRED) 6 | 7 | include_directories(${OpenCV_INCLUDE_DIRS}) 8 | 9 | add_executable(example main.cpp main.h) 10 | add_executable(classify classify.cpp) 11 | 12 | target_link_libraries(example ${OpenCV_LIBS}) 13 | target_link_libraries(example "${TORCH_LIBRARIES}") 14 | target_link_libraries(classify ${OpenCV_LIBS}) 15 | target_link_libraries(classify "${TORCH_LIBRARIES}") 16 | 17 | set_property(TARGET classify PROPERTY CXX_STANDARD 17) 18 | set_property(TARGET example PROPERTY CXX_STANDARD 17) 19 | -------------------------------------------------------------------------------- /cpp/transfer-learning/README.md: -------------------------------------------------------------------------------- 1 | # Transfer Learning on Dogs vs Cats Dataset using Libtorch and OpenCV 2 | 3 | Transfer Learning on Dogs vs Cats dataset using PyTorch C++ API. 4 | 5 | ## Usage 6 | 7 | For **training**: 8 | 9 | 1. Remove final layer of `ResNet18` pre-trained model and convert to `torch.jit` module: `python3 convert.py`. 10 | 2. Create build directory: `mkdir build && cd build` 11 | 3. `cmake -DCMAKE_PREFIX_PATH=/absolute/path/to/libtorch ..` 12 | 4. `make` 13 | 5. Run training code: `./example ` 14 | 15 | For **prediction**: 16 | 17 | 1. `cd build` 18 | 2. `./classify ` : `./classify ../resnet18_without_last_layer.pt model_linear.pt` 19 | 20 | Detailed blog on applying Transfer Learning using Libtorch: https://krshrimali.github.io/Applying-Transfer-Learning-Dogs-Cats/. 21 | -------------------------------------------------------------------------------- /cpp/transfer-learning/classify.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // classify.cpp 3 | // transfer-learning 4 | // 5 | // Created by Kushashwa Ravi Shrimali on 15/08/19. 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | // Utility function to load image from given folder 15 | // File type accepted: .jpg 16 | std::vector load_images(std::string folder_name) { 17 | std::vector list_images; 18 | std::string base_name = folder_name; 19 | DIR* dir; 20 | struct dirent *ent; 21 | if((dir = opendir(base_name.c_str())) != NULL) { 22 | while((ent = readdir(dir)) != NULL) { 23 | std::string filename = ent->d_name; 24 | if(filename.length() > 4 && filename.substr(filename.length() - 3) == "jpg") { 25 | std::string newf = base_name + filename; 26 | list_images.push_back(newf); 27 | } 28 | } 29 | } 30 | return list_images; 31 | } 32 | 33 | void print_probabilities(std::string loc, std::string model_path, std::string model_path_linear) { 34 | // Load image with OpenCV. 35 | cv::Mat img = cv::imread(loc); 36 | cv::resize(img, img, cv::Size(224, 224), cv::INTER_CUBIC); 37 | // Convert the image and label to a tensor. 38 | torch::Tensor img_tensor = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kByte); 39 | img_tensor = img_tensor.permute({0, 3, 1, 2}); // convert to CxHxW 40 | img_tensor = img_tensor.to(torch::kF32); 41 | 42 | // Load the model. 43 | torch::jit::script::Module model; 44 | model = torch::jit::load(model_path); 45 | 46 | torch::nn::Linear model_linear(512, 2); 47 | torch::load(model_linear, model_path_linear); 48 | 49 | // Predict the probabilities for the classes. 50 | std::vector input; 51 | input.push_back(img_tensor); 52 | torch::Tensor prob = model.forward(input).toTensor(); 53 | prob = prob.view({prob.size(0), -1}); 54 | prob = model_linear(prob); 55 | 56 | std::cout << "Printing for location: " << loc << std::endl; 57 | std::cout << "Cat prob: " << *(prob.data())*100. << std::endl; 58 | std::cout << "Dog prob: " << *(prob.data()+1)*100. << std::endl; 59 | } 60 | 61 | int main(int arc, char** argv) 62 | { 63 | // argv[1] should is the test image 64 | std::string location = argv[1]; 65 | 66 | // argv[2] contains pre-trained model without last layer 67 | // argv[3] contains trained last FC layer 68 | std::string model_path = argv[2]; 69 | std::string model_path_linear = argv[3]; 70 | 71 | // Load the model. 72 | // You can also use: auto model = torch::jit::load(model_path); 73 | torch::jit::script::Module model = torch::jit::load(model_path); 74 | 75 | // Print probabilities for dog and cat classes 76 | print_probabilities(location, model_path, model_path_linear); 77 | return 0; 78 | } 79 | -------------------------------------------------------------------------------- /cpp/transfer-learning/convert.py: -------------------------------------------------------------------------------- 1 | """ 2 | This python script converts the network into Script Module 3 | """ 4 | import torch 5 | from torchvision import models 6 | 7 | # Download and load the pre-trained model 8 | model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) 9 | 10 | # Set upgrading the gradients to False 11 | for param in model.parameters(): 12 | param.requires_grad = False 13 | 14 | # Save the model except the final FC Layer 15 | resnet18 = torch.nn.Sequential(*list(model.children())[:-1]) 16 | 17 | example_input = torch.rand(1, 3, 224, 224) 18 | script_module = torch.jit.trace(resnet18, example_input) 19 | script_module.save('resnet18_without_last_layer.pt') 20 | -------------------------------------------------------------------------------- /cpp/transfer-learning/main.h: -------------------------------------------------------------------------------- 1 | // 2 | // main.h 3 | // transfer-learning 4 | // 5 | // Created by Kushashwa Ravi Shrimali on 15/08/19. 6 | // 7 | 8 | #ifndef main_h 9 | #define main_h 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | // Function to return image read at location given as type torch::Tensor 18 | // Resizes image to (224, 224, 3) 19 | torch::Tensor read_data(std::string location); 20 | 21 | // Function to return label from int (0, 1 for binary and 0, 1, ..., n-1 for n-class classification) as type torch::Tensor 22 | torch::Tensor read_label(int label); 23 | 24 | // Function returns vector of tensors (images) read from the list of images in a folder 25 | std::vector process_images(std::vector list_images); 26 | 27 | // Function returns vector of tensors (labels) read from the list of labels 28 | std::vector process_labels(std::vector list_labels); 29 | 30 | // Function to load data from given folder(s) name(s) (folders_name) 31 | // Returns pair of vectors of string (image locations) and int (respective labels) 32 | std::pair, std::vector> load_data_from_folder(std::vector folders_name); 33 | 34 | // Function to train the network on train data 35 | template 36 | void train(torch::jit::script::Module net, torch::nn::Linear lin, Dataloader& data_loader, torch::optim::Optimizer& optimizer, size_t dataset_size); 37 | 38 | // Function to test the network on test data 39 | template 40 | void test(torch::jit::script::Module network, torch::nn::Linear lin, Dataloader& loader, size_t data_size); 41 | 42 | // Custom Dataset class 43 | class CustomDataset : public torch::data::Dataset { 44 | private: 45 | /* data */ 46 | // Should be 2 tensors 47 | std::vector states, labels; 48 | size_t ds_size; 49 | public: 50 | CustomDataset(std::vector list_images, std::vector list_labels) { 51 | states = process_images(list_images); 52 | labels = process_labels(list_labels); 53 | ds_size = states.size(); 54 | }; 55 | 56 | torch::data::Example<> get(size_t index) override { 57 | /* This should return {torch::Tensor, torch::Tensor} */ 58 | torch::Tensor sample_img = states.at(index); 59 | torch::Tensor sample_label = labels.at(index); 60 | return {sample_img.clone(), sample_label.clone()}; 61 | }; 62 | 63 | torch::optional size() const override { 64 | return ds_size; 65 | }; 66 | }; 67 | 68 | #endif /* main_h */ 69 | -------------------------------------------------------------------------------- /dcgan/.gitignore: -------------------------------------------------------------------------------- 1 | lsun 2 | -------------------------------------------------------------------------------- /dcgan/README.md: -------------------------------------------------------------------------------- 1 | # Deep Convolution Generative Adversarial Networks 2 | 3 | This example implements the paper [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](http://arxiv.org/abs/1511.06434) 4 | 5 | The implementation is very close to the Torch implementation [dcgan.torch](https://github.com/soumith/dcgan.torch) 6 | 7 | After every 100 training iterations, the files `real_samples.png` and `fake_samples.png` are written to disk 8 | with the samples from the generative model. 9 | 10 | After every epoch, models are saved to: `netG_epoch_%d.pth` and `netD_epoch_%d.pth` 11 | 12 | ## Downloading the dataset 13 | 14 | You can download the LSUN dataset by cloning [this repo](https://github.com/fyu/lsun) and running 15 | 16 | ``` 17 | python download.py -c bedroom 18 | ``` 19 | 20 | ## Usage 21 | 22 | ``` 23 | usage: main.py [-h] --dataset DATASET --dataroot DATAROOT [--workers WORKERS] 24 | [--batchSize BATCHSIZE] [--imageSize IMAGESIZE] [--nz NZ] 25 | [--ngf NGF] [--ndf NDF] [--niter NITER] [--lr LR] 26 | [--beta1 BETA1] [--cuda] [--ngpu NGPU] [--netG NETG] 27 | [--netD NETD] [--mps] 28 | 29 | optional arguments: 30 | -h, --help show this help message and exit 31 | --dataset DATASET cifar10 | lsun | mnist |imagenet | folder | lfw | fake 32 | --dataroot DATAROOT path to dataset 33 | --workers WORKERS number of data loading workers 34 | --batchSize BATCHSIZE input batch size 35 | --imageSize IMAGESIZE the height / width of the input image to network 36 | --nz NZ size of the latent z vector 37 | --ngf NGF number of filters in the generator 38 | --ndf NDF number of filters in the discriminator 39 | --niter NITER number of epochs to train for 40 | --lr LR learning rate, default=0.0002 41 | --beta1 BETA1 beta1 for adam. default=0.5 42 | --cuda enables cuda 43 | --mps enables macOS GPU 44 | --ngpu NGPU number of GPUs to use 45 | --netG NETG path to netG (to continue training) 46 | --netD NETD path to netD (to continue training) 47 | --outf OUTF folder to output images and model checkpoints 48 | --manualSeed SEED manual seed 49 | --classes CLASSES comma separated list of classes for the lsun data set 50 | ``` 51 | -------------------------------------------------------------------------------- /dcgan/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | lmdb 4 | -------------------------------------------------------------------------------- /distributed/FSDP/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pt 3 | *.csv -------------------------------------------------------------------------------- /distributed/FSDP/README.md: -------------------------------------------------------------------------------- 1 | ## FSDP T5 2 | 3 | To run the T5 example with FSDP for text summarization: 4 | 5 | ## Get the wikihow dataset 6 | ```bash 7 | 8 | sh download_dataset.sh 9 | 10 | ``` 11 | 12 | ## Install the requirements: 13 | ~~~ 14 | pip install -r requirements.txt 15 | ~~~ 16 | ## Ensure you are running a recent version of PyTorch: 17 | see https://pytorch.org to install at least 1.12 and ideally a current nightly build. 18 | 19 | Start the training with Torchrun (adjust nproc_per_node to your GPU count): 20 | 21 | ``` 22 | torchrun --nnodes 1 --nproc_per_node 4 T5_training.py 23 | 24 | ``` 25 | -------------------------------------------------------------------------------- /distributed/FSDP/configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .fsdp import fsdp_config 2 | from .training import train_config 3 | -------------------------------------------------------------------------------- /distributed/FSDP/configs/fsdp.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import ClassVar 3 | from torch.distributed.fsdp import ShardingStrategy 4 | from torch.distributed.fsdp.fully_sharded_data_parallel import StateDictType 5 | 6 | @dataclass 7 | class fsdp_config: 8 | mixed_precision: bool=True 9 | use_fp16: bool=False 10 | seed: int=42 11 | fsdp_activation_checkpointing: bool=False 12 | limit_all_gathers: bool=True 13 | sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD #HYBRID_SHARD, SHARD_GRAD_OP 14 | checkpoint_type: StateDictType = StateDictType.FULL_STATE_DICT # alternatively can use SHARDED_STATE_DICT to avoid OOMs 15 | save_optimizer: bool=False 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /distributed/FSDP/configs/training.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | 5 | @dataclass 6 | class train_config: 7 | model_name: str="t5-base" 8 | run_validation: bool=True 9 | batch_size_training: int=4 10 | num_workers_dataloader: int=2 11 | lr: float=0.002 12 | weight_decay: float=0.0 13 | gamma: float= 0.85 14 | use_fp16: bool=False 15 | mixed_precision: bool=True 16 | save_model: bool=False 17 | 18 | 19 | -------------------------------------------------------------------------------- /distributed/FSDP/download_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create the "data" folder if it doesn't exist 4 | mkdir -p data 5 | 6 | # Download the files into the "data" folder 7 | wget -P data https://public-nlp-datasets.s3.us-west-2.amazonaws.com/wikihowAll.csv 8 | wget -P data https://public-nlp-datasets.s3.us-west-2.amazonaws.com/wikihowSep.csv 9 | -------------------------------------------------------------------------------- /distributed/FSDP/model_checkpointing/__init__.py: -------------------------------------------------------------------------------- 1 | from .checkpoint_handler import ( 2 | load_model_checkpoint, 3 | save_model_checkpoint, 4 | save_distributed_model_checkpoint, 5 | load_distributed_model_checkpoint, 6 | load_optimizer_checkpoint, 7 | save_optimizer_checkpoint, 8 | save_model_and_optimizer_sharded, 9 | load_model_sharded, 10 | ) 11 | -------------------------------------------------------------------------------- /distributed/FSDP/policies/__init__.py: -------------------------------------------------------------------------------- 1 | from .mixed_precision import * 2 | from .wrapping import * 3 | from .activation_checkpointing_functions import apply_fsdp_checkpointing 4 | -------------------------------------------------------------------------------- /distributed/FSDP/policies/activation_checkpointing_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import torch.distributed as dist 4 | from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( 5 | checkpoint_wrapper, 6 | CheckpointImpl, 7 | apply_activation_checkpointing, 8 | ) 9 | 10 | from transformers.models.t5.modeling_t5 import T5Block 11 | 12 | from functools import partial 13 | 14 | non_reentrant_wrapper = partial( 15 | checkpoint_wrapper, 16 | offload_to_cpu=False, 17 | checkpoint_impl=CheckpointImpl.NO_REENTRANT, 18 | ) 19 | 20 | check_fn = lambda submodule: isinstance(submodule, T5Block) 21 | 22 | 23 | def apply_fsdp_checkpointing(model): 24 | """apply activation checkpointing to model 25 | returns None as model is updated directly 26 | """ 27 | print(f"--> applying fdsp activation checkpointing...") 28 | 29 | apply_activation_checkpointing( 30 | model, checkpoint_wrapper_fn=non_reentrant_wrapper, check_fn=check_fn 31 | ) 32 | -------------------------------------------------------------------------------- /distributed/FSDP/policies/mixed_precision.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch.distributed.fsdp import ( 4 | # FullyShardedDataParallel as FSDP, 5 | # CPUOffload, 6 | MixedPrecision, 7 | # BackwardPrefetch, 8 | # ShardingStrategy, 9 | ) 10 | 11 | # requires grad scaler in main loop 12 | fpSixteen = MixedPrecision( 13 | param_dtype=torch.float16, 14 | # Gradient communication precision. 15 | reduce_dtype=torch.float16, 16 | # Buffer precision. 17 | buffer_dtype=torch.float16, 18 | ) 19 | 20 | bfSixteen = MixedPrecision( 21 | param_dtype=torch.bfloat16, 22 | # Gradient communication precision. 23 | reduce_dtype=torch.bfloat16, 24 | # Buffer precision. 25 | buffer_dtype=torch.bfloat16, 26 | ) 27 | 28 | bfSixteen_working = MixedPrecision( 29 | param_dtype=torch.float32, 30 | reduce_dtype=torch.bfloat16, 31 | buffer_dtype=torch.bfloat16, 32 | ) 33 | 34 | fp32_policy = MixedPrecision( 35 | param_dtype=torch.float32, 36 | reduce_dtype=torch.float32, 37 | buffer_dtype=torch.float32, 38 | ) 39 | -------------------------------------------------------------------------------- /distributed/FSDP/policies/wrapping.py: -------------------------------------------------------------------------------- 1 | # holds various wrapping policies for fsdp 2 | 3 | 4 | import torch.distributed as dist 5 | import torch.nn as nn 6 | import torch 7 | 8 | from transformers.models.t5.modeling_t5 import T5Block 9 | 10 | from torch.distributed.fsdp.fully_sharded_data_parallel import ( 11 | FullyShardedDataParallel as FSDP, 12 | CPUOffload, 13 | BackwardPrefetch, 14 | MixedPrecision, 15 | ) 16 | from torch.distributed.fsdp.wrap import ( 17 | transformer_auto_wrap_policy, 18 | size_based_auto_wrap_policy, 19 | enable_wrap, 20 | wrap, 21 | ) 22 | 23 | import functools 24 | from typing import Type 25 | 26 | 27 | def get_size_policy(min_params=1e8): 28 | num_wrap_policy = functools.partial( 29 | size_based_auto_wrap_policy, min_num_params=min_params 30 | ) 31 | return num_wrap_policy 32 | 33 | 34 | def get_t5_wrapper(): 35 | """we register our main layer class and use the fsdp transformer wrapping policy 36 | ensures embedding layers are in the root fsdp unit for shared access and that fsdp units map to transformer layers 37 | """ 38 | # ==== use new transformer wrapper 39 | 40 | t5_auto_wrap_policy = functools.partial( 41 | transformer_auto_wrap_policy, 42 | transformer_layer_cls={ 43 | T5Block, 44 | }, 45 | ) 46 | 47 | return t5_auto_wrap_policy 48 | -------------------------------------------------------------------------------- /distributed/FSDP/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | datasets 3 | tqdm 4 | protobuf 5 | SentencePiece 6 | nlp 7 | -------------------------------------------------------------------------------- /distributed/FSDP/summarization_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import os 4 | import json 5 | import time 6 | import logging 7 | import random 8 | import re 9 | from itertools import chain 10 | from string import punctuation 11 | 12 | import pandas as pd 13 | import numpy as np 14 | import torch 15 | from torch.utils.data import Dataset, DataLoader 16 | 17 | from nlp import load_dataset 18 | 19 | from transformers import ( 20 | AdamW, 21 | T5ForConditionalGeneration, 22 | T5Tokenizer, 23 | get_linear_schedule_with_warmup 24 | ) 25 | 26 | class wikihow(Dataset): 27 | def __init__(self, tokenizer, type_path, num_samples, input_length, output_length, print_text=False): 28 | self.dataset = load_dataset('wikihow', 'all', data_dir='data/', split=type_path) 29 | if num_samples: 30 | self.dataset = self.dataset.select(list(range(0, num_samples))) 31 | self.input_length = input_length 32 | self.tokenizer = tokenizer 33 | self.output_length = output_length 34 | self.print_text = print_text 35 | 36 | def __len__(self): 37 | return self.dataset.shape[0] 38 | 39 | def clean_text(self, text): 40 | text = text.replace('Example of text:', '') 41 | text = text.replace('Example of Summary:', '') 42 | text = text.replace('\n','') 43 | text = text.replace('``', '') 44 | text = text.replace('"', '') 45 | 46 | return text 47 | 48 | 49 | def convert_to_features(self, example_batch): 50 | # Tokenize contexts and questions (as pairs of inputs) 51 | 52 | if self.print_text: 53 | print("Input Text: ", self.clean_text(example_batch['text'])) 54 | # input_ = self.clean_text(example_batch['text']) + " " 55 | # target_ = self.clean_text(example_batch['headline']) + " " 56 | 57 | input_ = self.clean_text(example_batch['text']) 58 | target_ = self.clean_text(example_batch['headline']) 59 | 60 | source = self.tokenizer.batch_encode_plus([input_], max_length=self.input_length, 61 | padding='max_length', truncation=True, return_tensors="pt") 62 | 63 | targets = self.tokenizer.batch_encode_plus([target_], max_length=self.output_length, 64 | padding='max_length', truncation=True, return_tensors="pt") 65 | 66 | 67 | return source, targets 68 | 69 | def __getitem__(self, index): 70 | source, targets = self.convert_to_features(self.dataset[index]) 71 | 72 | source_ids = source["input_ids"].squeeze() 73 | target_ids = targets["input_ids"].squeeze() 74 | 75 | src_mask = source["attention_mask"].squeeze() 76 | target_mask = targets["attention_mask"].squeeze() 77 | 78 | return {"source_ids": source_ids, "source_mask": src_mask, "target_ids": target_ids, "target_mask": target_mask} 79 | 80 | def get_dataset(tokenizer, type_path, num_samples, args): 81 | return wikihow(tokenizer=tokenizer, type_path=type_path, num_samples=num_samples, input_length=max_input_length, 82 | output_length=max_output_length) 83 | -------------------------------------------------------------------------------- /distributed/FSDP/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .environment import bfloat_support 2 | from .train_utils import setup, cleanup, get_date_of_run, format_metrics_to_gb, train, validation,setup_model 3 | 4 | -------------------------------------------------------------------------------- /distributed/FSDP/utils/environment.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Meta Platforms, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the Apache-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # This is a simple check to confirm that your current server has full bfloat support - 8 | # both GPU native support, and Network communication support. 9 | 10 | # Be warned that if you run on V100 without a check like this, you will be running without native Bfloat16 11 | # support and will find significant performance degradation (but it will not complain via an error). 12 | # Hence the reason for a checker! 13 | 14 | from pkg_resources import packaging 15 | import torch 16 | import torch.cuda.nccl as nccl 17 | import torch.distributed as dist 18 | 19 | # global flag that confirms ampere architecture, cuda version and 20 | # nccl version to verify bfloat16 native support is ready 21 | 22 | def bfloat_support(): 23 | return ( 24 | torch.version.cuda 25 | and torch.cuda.is_bf16_supported() 26 | and packaging.version.parse(torch.version.cuda).release >= (11, 0) 27 | and dist.is_nccl_available() 28 | and nccl.version() >= (2, 10) 29 | ) 30 | -------------------------------------------------------------------------------- /distributed/FSDP/utils/train_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.distributed as dist 4 | from datetime import datetime 5 | import tqdm 6 | from transformers import AutoTokenizer, GPT2TokenizerFast 7 | from transformers import T5Tokenizer, T5ForConditionalGeneration 8 | 9 | g_gigabyte = 1024**3 10 | 11 | def setup(): 12 | # initialize the process group 13 | dist.init_process_group("nccl") 14 | 15 | 16 | def cleanup(): 17 | dist.destroy_process_group() 18 | 19 | def get_date_of_run(): 20 | """create date and time for file save uniqueness 21 | example: 2022-05-07-08:31:12_PM' 22 | """ 23 | date_of_run = datetime.now().strftime("%Y-%m-%d-%I:%M:%S_%p") 24 | print(f"--> current date and time of run = {date_of_run}") 25 | return date_of_run 26 | 27 | 28 | 29 | def format_metrics_to_gb(item): 30 | """quick function to format numbers to gigabyte and round to 4 digit precision""" 31 | metric_num = item / g_gigabyte 32 | metric_num = round(metric_num, ndigits=4) 33 | return metric_num 34 | 35 | def train(args, model, rank, world_size, train_loader, optimizer, epoch, sampler=None): 36 | model.train() 37 | local_rank = int(os.environ['LOCAL_RANK']) 38 | fsdp_loss = torch.zeros(2).to(local_rank) 39 | 40 | if sampler: 41 | sampler.set_epoch(epoch) 42 | if rank==0: 43 | inner_pbar = tqdm.tqdm( 44 | range(len(train_loader)), colour="blue", desc="r0 Training Epoch" 45 | ) 46 | for batch in train_loader: 47 | for key in batch.keys(): 48 | batch[key] = batch[key].to(local_rank) 49 | optimizer.zero_grad() 50 | output = model(input_ids=batch["source_ids"],attention_mask=batch["source_mask"],labels=batch["target_ids"] ) 51 | loss = output["loss"] 52 | loss.backward() 53 | optimizer.step() 54 | fsdp_loss[0] += loss.item() 55 | fsdp_loss[1] += len(batch) 56 | if rank==0: 57 | inner_pbar.update(1) 58 | 59 | dist.all_reduce(fsdp_loss, op=dist.ReduceOp.SUM) 60 | train_accuracy = fsdp_loss[0] / fsdp_loss[1] 61 | 62 | 63 | if rank == 0: 64 | inner_pbar.close() 65 | print( 66 | f"Train Epoch: \t{epoch}, Loss: \t{train_accuracy:.4f}" 67 | ) 68 | return train_accuracy 69 | 70 | 71 | def validation(model, rank, world_size, val_loader): 72 | model.eval() 73 | correct = 0 74 | local_rank = int(os.environ['LOCAL_RANK']) 75 | fsdp_loss = torch.zeros(2).to(local_rank) 76 | if rank == 0: 77 | inner_pbar = tqdm.tqdm( 78 | range(len(val_loader)), colour="green", desc="Validation Epoch" 79 | ) 80 | with torch.no_grad(): 81 | for batch in val_loader: 82 | for key in batch.keys(): 83 | batch[key] = batch[key].to(local_rank) 84 | output = model(input_ids=batch["source_ids"],attention_mask=batch["source_mask"],labels=batch["target_ids"]) 85 | fsdp_loss[0] += output["loss"].item() # sum up batch loss 86 | fsdp_loss[1] += len(batch) 87 | 88 | if rank==0: 89 | inner_pbar.update(1) 90 | 91 | dist.all_reduce(fsdp_loss, op=dist.ReduceOp.SUM) 92 | val_loss = fsdp_loss[0] / fsdp_loss[1] 93 | if rank == 0: 94 | inner_pbar.close() 95 | print(f"Validation Loss: {val_loss:.4f}") 96 | return val_loss 97 | 98 | 99 | def setup_model(model_name): 100 | model = T5ForConditionalGeneration.from_pretrained(model_name) 101 | tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False) 102 | return model, tokenizer 103 | -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/README.md: -------------------------------------------------------------------------------- 1 | # distributed-pytorch 2 | 3 | Code for the DDP tutorial series at https://pytorch.org/tutorials/beginner/ddp_series_intro.html 4 | 5 | Each code file extends upon the previous one. The series starts with a non-distributed script that runs on a single GPU and incrementally updates to end with multinode training on a Slurm cluster. 6 | 7 | ## Files 8 | * [single_gpu.py](single_gpu.py): Non-distributed training script 9 | 10 | * [multigpu.py](multigpu.py): DDP on a single node 11 | 12 | * [multigpu_torchrun.py](multigpu_torchrun.py): DDP on a single node using Torchrun 13 | 14 | * [multinode.py](multinode.py): DDP on multiple nodes using Torchrun (and optionally Slurm) 15 | * [slurm/setup_pcluster_slurm.md](slurm/setup_pcluster_slurm.md): instructions to set up an AWS cluster 16 | * [slurm/config.yaml.template](slurm/config.yaml.template): configuration to set up an AWS cluster 17 | * [slurm/sbatch_run.sh](slurm/sbatch_run.sh): slurm script to launch the training job 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/datautils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | 4 | class MyTrainDataset(Dataset): 5 | def __init__(self, size): 6 | self.size = size 7 | self.data = [(torch.rand(20), torch.rand(1)) for _ in range(size)] 8 | 9 | def __len__(self): 10 | return self.size 11 | 12 | def __getitem__(self, index): 13 | return self.data[index] -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/multigpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.utils.data import Dataset, DataLoader 4 | from datautils import MyTrainDataset 5 | 6 | import torch.multiprocessing as mp 7 | from torch.utils.data.distributed import DistributedSampler 8 | from torch.nn.parallel import DistributedDataParallel as DDP 9 | from torch.distributed import init_process_group, destroy_process_group 10 | import os 11 | 12 | 13 | def ddp_setup(rank, world_size): 14 | """ 15 | Args: 16 | rank: Unique identifier of each process 17 | world_size: Total number of processes 18 | """ 19 | os.environ["MASTER_ADDR"] = "localhost" 20 | os.environ["MASTER_PORT"] = "12355" 21 | torch.cuda.set_device(rank) 22 | init_process_group(backend="nccl", rank=rank, world_size=world_size) 23 | 24 | class Trainer: 25 | def __init__( 26 | self, 27 | model: torch.nn.Module, 28 | train_data: DataLoader, 29 | optimizer: torch.optim.Optimizer, 30 | gpu_id: int, 31 | save_every: int, 32 | ) -> None: 33 | self.gpu_id = gpu_id 34 | self.model = model.to(gpu_id) 35 | self.train_data = train_data 36 | self.optimizer = optimizer 37 | self.save_every = save_every 38 | self.model = DDP(model, device_ids=[gpu_id]) 39 | 40 | def _run_batch(self, source, targets): 41 | self.optimizer.zero_grad() 42 | output = self.model(source) 43 | loss = F.cross_entropy(output, targets) 44 | loss.backward() 45 | self.optimizer.step() 46 | 47 | def _run_epoch(self, epoch): 48 | b_sz = len(next(iter(self.train_data))[0]) 49 | print(f"[GPU{self.gpu_id}] Epoch {epoch} | Batchsize: {b_sz} | Steps: {len(self.train_data)}") 50 | self.train_data.sampler.set_epoch(epoch) 51 | for source, targets in self.train_data: 52 | source = source.to(self.gpu_id) 53 | targets = targets.to(self.gpu_id) 54 | self._run_batch(source, targets) 55 | 56 | def _save_checkpoint(self, epoch): 57 | ckp = self.model.module.state_dict() 58 | PATH = "checkpoint.pt" 59 | torch.save(ckp, PATH) 60 | print(f"Epoch {epoch} | Training checkpoint saved at {PATH}") 61 | 62 | def train(self, max_epochs: int): 63 | for epoch in range(max_epochs): 64 | self._run_epoch(epoch) 65 | if self.gpu_id == 0 and epoch % self.save_every == 0: 66 | self._save_checkpoint(epoch) 67 | 68 | 69 | def load_train_objs(): 70 | train_set = MyTrainDataset(2048) # load your dataset 71 | model = torch.nn.Linear(20, 1) # load your model 72 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 73 | return train_set, model, optimizer 74 | 75 | 76 | def prepare_dataloader(dataset: Dataset, batch_size: int): 77 | return DataLoader( 78 | dataset, 79 | batch_size=batch_size, 80 | pin_memory=True, 81 | shuffle=False, 82 | sampler=DistributedSampler(dataset) 83 | ) 84 | 85 | 86 | def main(rank: int, world_size: int, save_every: int, total_epochs: int, batch_size: int): 87 | ddp_setup(rank, world_size) 88 | dataset, model, optimizer = load_train_objs() 89 | train_data = prepare_dataloader(dataset, batch_size) 90 | trainer = Trainer(model, train_data, optimizer, rank, save_every) 91 | trainer.train(total_epochs) 92 | destroy_process_group() 93 | 94 | 95 | if __name__ == "__main__": 96 | import argparse 97 | parser = argparse.ArgumentParser(description='simple distributed training job') 98 | parser.add_argument('total_epochs', type=int, help='Total epochs to train the model') 99 | parser.add_argument('save_every', type=int, help='How often to save a snapshot') 100 | parser.add_argument('--batch_size', default=32, type=int, help='Input batch size on each device (default: 32)') 101 | args = parser.parse_args() 102 | 103 | world_size = torch.cuda.device_count() 104 | mp.spawn(main, args=(world_size, args.save_every, args.total_epochs, args.batch_size), nprocs=world_size) 105 | -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.11.0 -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/single_gpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.utils.data import Dataset, DataLoader 4 | from datautils import MyTrainDataset 5 | 6 | 7 | class Trainer: 8 | def __init__( 9 | self, 10 | model: torch.nn.Module, 11 | train_data: DataLoader, 12 | optimizer: torch.optim.Optimizer, 13 | gpu_id: int, 14 | save_every: int, 15 | ) -> None: 16 | self.gpu_id = gpu_id 17 | self.model = model.to(gpu_id) 18 | self.train_data = train_data 19 | self.optimizer = optimizer 20 | self.save_every = save_every 21 | 22 | def _run_batch(self, source, targets): 23 | self.optimizer.zero_grad() 24 | output = self.model(source) 25 | loss = F.cross_entropy(output, targets) 26 | loss.backward() 27 | self.optimizer.step() 28 | 29 | def _run_epoch(self, epoch): 30 | b_sz = len(next(iter(self.train_data))[0]) 31 | print(f"[GPU{self.gpu_id}] Epoch {epoch} | Batchsize: {b_sz} | Steps: {len(self.train_data)}") 32 | for source, targets in self.train_data: 33 | source = source.to(self.gpu_id) 34 | targets = targets.to(self.gpu_id) 35 | self._run_batch(source, targets) 36 | 37 | def _save_checkpoint(self, epoch): 38 | ckp = self.model.state_dict() 39 | PATH = "checkpoint.pt" 40 | torch.save(ckp, PATH) 41 | print(f"Epoch {epoch} | Training checkpoint saved at {PATH}") 42 | 43 | def train(self, max_epochs: int): 44 | for epoch in range(max_epochs): 45 | self._run_epoch(epoch) 46 | if epoch % self.save_every == 0: 47 | self._save_checkpoint(epoch) 48 | 49 | 50 | def load_train_objs(): 51 | train_set = MyTrainDataset(2048) # load your dataset 52 | model = torch.nn.Linear(20, 1) # load your model 53 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 54 | return train_set, model, optimizer 55 | 56 | 57 | def prepare_dataloader(dataset: Dataset, batch_size: int): 58 | return DataLoader( 59 | dataset, 60 | batch_size=batch_size, 61 | pin_memory=True, 62 | shuffle=True 63 | ) 64 | 65 | 66 | def main(device, total_epochs, save_every, batch_size): 67 | dataset, model, optimizer = load_train_objs() 68 | train_data = prepare_dataloader(dataset, batch_size) 69 | trainer = Trainer(model, train_data, optimizer, device, save_every) 70 | trainer.train(total_epochs) 71 | 72 | 73 | if __name__ == "__main__": 74 | import argparse 75 | parser = argparse.ArgumentParser(description='simple distributed training job') 76 | parser.add_argument('total_epochs', type=int, help='Total epochs to train the model') 77 | parser.add_argument('save_every', type=int, help='How often to save a snapshot') 78 | parser.add_argument('--batch_size', default=32, type=int, help='Input batch size on each device (default: 32)') 79 | args = parser.parse_args() 80 | 81 | device = 0 # shorthand for cuda:0 82 | main(device, args.total_epochs, args.save_every, args.batch_size) 83 | -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/slurm/config.yaml.template: -------------------------------------------------------------------------------- 1 | Region: us-east-1 2 | 3 | Image: 4 | Os: ubuntu1804 5 | 6 | SharedStorage: 7 | - MountDir: /shared 8 | Name: shared-fs 9 | StorageType: FsxLustre 10 | FsxLustreSettings: 11 | StorageCapacity: 1200 12 | DeploymentType: SCRATCH_1 13 | StorageType: SSD 14 | 15 | HeadNode: 16 | InstanceType: c5.xlarge 17 | Networking: 18 | SubnetId: subnet-xxxxxxx 19 | Ssh: 20 | KeyName: your-keyname-file 21 | 22 | Scheduling: 23 | Scheduler: slurm 24 | SlurmQueues: 25 | - Name: train 26 | ComputeResources: 27 | - Name: p32xlarge 28 | InstanceType: p3.2xlarge 29 | MinCount: 0 30 | MaxCount: 5 31 | Networking: 32 | SubnetIds: 33 | - subnet-xxxxxxx 34 | -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/slurm/sbatch_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --job-name=multinode-example 4 | #SBATCH --nodes=4 5 | #SBATCH --ntasks=4 6 | #SBATCH --gpus-per-task=1 7 | #SBATCH --cpus-per-task=4 8 | 9 | nodes=( $( scontrol show hostnames $SLURM_JOB_NODELIST ) ) 10 | nodes_array=($nodes) 11 | head_node=${nodes_array[0]} 12 | head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address) 13 | 14 | echo Node IP: $head_node_ip 15 | export LOGLEVEL=INFO 16 | 17 | srun torchrun \ 18 | --nnodes 4 \ 19 | --nproc_per_node 1 \ 20 | --rdzv_id $RANDOM \ 21 | --rdzv_backend c10d \ 22 | --rdzv_endpoint $head_node_ip:29500 \ 23 | /shared/examples/multinode_torchrun.py 50 10 -------------------------------------------------------------------------------- /distributed/ddp-tutorial-series/slurm/setup_pcluster_slurm.md: -------------------------------------------------------------------------------- 1 | # Setup AWS cluster with pcluster 2 | 3 | ## 1. Sign in to an AWS instance 4 | 5 | ## 2. Install pcluster 6 | ``` 7 | pip3 install awscli -U --user 8 | pip3 install "aws-parallelcluster" --upgrade --user 9 | ``` 10 | 11 | ## 3. Create a cluster config file 12 | ``` 13 | pcluster configure --config config.yaml 14 | ``` 15 | See config.yaml.template for an example 16 | 17 | 18 | ## 4. Create the cluster 19 | ``` 20 | pcluster create-cluster --cluster-name dist-ml --cluster-configuration config.yaml 21 | ``` 22 | 23 | ### 4a. Track progress 24 | ``` 25 | pcluster list-clusters 26 | ``` 27 | 28 | ## 5. Login to cluster headnode 29 | ``` 30 | pcluster ssh --cluster-name dist-ml -i your-keyname-file 31 | ``` 32 | 33 | ## 6. Install dependencies 34 | ``` 35 | sudo apt-get update 36 | sudo apt-get install -y python3-venv 37 | python3 -m venv /shared/venv/ 38 | source /shared/venv/bin/activate 39 | pip install wheel 40 | echo 'source /shared/venv/bin/activate' >> ~/.bashrc 41 | ``` 42 | 43 | ## 7. Download training code and install requirements 44 | ``` 45 | cd /shared 46 | git clone --depth 1 https://github.com/pytorch/examples; 47 | cd /shared/examples 48 | git filter-branch --prune-empty --subdirectory-filter distributed/ddp-tutorial-series 49 | python3 -m pip install setuptools==59.5.0 50 | pip install -r requirements.txt 51 | ``` -------------------------------------------------------------------------------- /distributed/ddp/example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import tempfile 5 | from urllib.parse import urlparse 6 | 7 | import torch 8 | import torch.distributed as dist 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | 12 | from torch.nn.parallel import DistributedDataParallel as DDP 13 | 14 | class ToyModel(nn.Module): 15 | def __init__(self): 16 | super(ToyModel, self).__init__() 17 | self.net1 = nn.Linear(10, 10) 18 | self.relu = nn.ReLU() 19 | self.net2 = nn.Linear(10, 5) 20 | 21 | def forward(self, x): 22 | return self.net2(self.relu(self.net1(x))) 23 | 24 | 25 | def demo_basic(local_world_size, local_rank): 26 | 27 | # setup devices for this process. For local_world_size = 2, num_gpus = 8, 28 | # rank 0 uses GPUs [0, 1, 2, 3] and 29 | # rank 1 uses GPUs [4, 5, 6, 7]. 30 | n = torch.cuda.device_count() // local_world_size 31 | device_ids = list(range(local_rank * n, (local_rank + 1) * n)) 32 | 33 | print( 34 | f"[{os.getpid()}] rank = {dist.get_rank()}, " 35 | + f"world_size = {dist.get_world_size()}, n = {n}, device_ids = {device_ids} \n", end='' 36 | ) 37 | 38 | model = ToyModel().cuda(device_ids[0]) 39 | ddp_model = DDP(model, device_ids) 40 | 41 | loss_fn = nn.MSELoss() 42 | optimizer = optim.SGD(ddp_model.parameters(), lr=0.001) 43 | 44 | optimizer.zero_grad() 45 | outputs = ddp_model(torch.randn(20, 10)) 46 | labels = torch.randn(20, 5).to(device_ids[0]) 47 | loss_fn(outputs, labels).backward() 48 | optimizer.step() 49 | 50 | 51 | def spmd_main(local_world_size, local_rank): 52 | # These are the parameters used to initialize the process group 53 | env_dict = { 54 | key: os.environ[key] 55 | for key in ("MASTER_ADDR", "MASTER_PORT", "RANK", "WORLD_SIZE") 56 | } 57 | 58 | if sys.platform == "win32": 59 | # Distributed package only covers collective communications with Gloo 60 | # backend and FileStore on Windows platform. Set init_method parameter 61 | # in init_process_group to a local file. 62 | if "INIT_METHOD" in os.environ.keys(): 63 | print(f"init_method is {os.environ['INIT_METHOD']}") 64 | url_obj = urlparse(os.environ["INIT_METHOD"]) 65 | if url_obj.scheme.lower() != "file": 66 | raise ValueError("Windows only supports FileStore") 67 | else: 68 | init_method = os.environ["INIT_METHOD"] 69 | else: 70 | # It is a example application, For convience, we create a file in temp dir. 71 | temp_dir = tempfile.gettempdir() 72 | init_method = f"file:///{os.path.join(temp_dir, 'ddp_example')}" 73 | dist.init_process_group(backend="gloo", init_method=init_method, rank=int(env_dict["RANK"]), world_size=int(env_dict["WORLD_SIZE"])) 74 | else: 75 | print(f"[{os.getpid()}] Initializing process group with: {env_dict}") 76 | dist.init_process_group(backend="nccl") 77 | 78 | print( 79 | f"[{os.getpid()}]: world_size = {dist.get_world_size()}, " 80 | + f"rank = {dist.get_rank()}, backend={dist.get_backend()} \n", end='' 81 | ) 82 | 83 | demo_basic(local_world_size, local_rank) 84 | 85 | # Tear down the process group 86 | dist.destroy_process_group() 87 | 88 | 89 | if __name__ == "__main__": 90 | parser = argparse.ArgumentParser() 91 | # This is passed in via launch.py 92 | parser.add_argument("--local_rank", type=int, default=0) 93 | # This needs to be explicitly passed in 94 | parser.add_argument("--local_world_size", type=int, default=1) 95 | args = parser.parse_args() 96 | # The main entry point is called directly without using subprocess 97 | spmd_main(args.local_world_size, args.local_rank) 98 | -------------------------------------------------------------------------------- /distributed/ddp/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/README.md: -------------------------------------------------------------------------------- 1 | # minGPT-DDP 2 | 3 | Code accompanying the tutorial at https://pytorch.org/tutorials/intermediate/ddp_series_minGPT.html for training a GPT-like model with Distributed Data Parallel (DDP) in PyTorch. 4 | 5 | Files marked with an asterisk (*) are adapted from the minGPT repo (https://github.com/karpathy/minGPT). 6 | 7 | - [trainer.py](mingpt/trainer.py) includes the Trainer class that runs the distributed training iterations on the model with the provided dataset. 8 | - [model.py *](mingpt/model.py) defines the model architecture. 9 | - [char_dataset.py *](mingpt/char_dataset.py) contains the `Dataset`class for a character-level dataset. 10 | - [gpt2_train_cfg.yaml](mingpt/gpt2_train_cfg.yaml) contains the configurations for data, model, optimizer and training run. 11 | - [main.py](mingpt/main.py) is the entry point to the trainig job. It sets up the DDP process group, reads all the configurations and runs the training job. 12 | - [slurm/](mingpt/slurm) contains files for setting up an AWS cluster and the slurm script to run multinode training. -------------------------------------------------------------------------------- /distributed/minGPT-ddp/mingpt/char_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | import fsspec 4 | from dataclasses import dataclass 5 | 6 | """ 7 | Adapted from https://github.com/karpathy/minGPT/blob/master/projects/chargpt/chargpt.py 8 | """ 9 | 10 | @dataclass 11 | class DataConfig: 12 | path: str = None 13 | block_size: int = None 14 | train_split: float = None 15 | truncate: float = 1.0 16 | 17 | class CharDataset(Dataset): 18 | 19 | def __init__(self, data_cfg: DataConfig): #data_path: str, block_size): 20 | data = fsspec.open(data_cfg.path).open().read().decode('utf-8') 21 | data = data[ : int(len(data) * data_cfg.truncate)] 22 | 23 | chars = sorted(list(set(data))) 24 | data_size, vocab_size = len(data), len(chars) 25 | print('Data has %d characters, %d unique.' % (data_size, vocab_size)) 26 | 27 | self.stoi = {ch: i for i, ch in enumerate(chars)} 28 | self.itos = {i: ch for i, ch in enumerate(chars)} 29 | self.block_size = data_cfg.block_size 30 | self.vocab_size = vocab_size 31 | self.data = data 32 | 33 | def __len__(self): 34 | return len(self.data) - self.block_size 35 | 36 | def __getitem__(self, idx): 37 | # grab a chunk of (block_size + 1) characters from the data 38 | chunk = self.data[idx:idx + self.block_size + 1] 39 | # encode every character to an integer 40 | dix = [self.stoi[s] for s in chunk] 41 | x = torch.tensor(dix[:-1], dtype=torch.long) 42 | y = torch.tensor(dix[1:], dtype=torch.long) 43 | return x, y 44 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/mingpt/gpt2_train_cfg.yaml: -------------------------------------------------------------------------------- 1 | data_config: 2 | path: https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt 3 | block_size: 128 4 | train_split: 0.9 5 | truncate: 0.05 6 | gpt_config: 7 | n_layer: 8 8 | n_head: 8 9 | n_embd: 512 10 | trainer_config: 11 | max_epochs: 10 12 | batch_size: 216 13 | data_loader_workers: 4 14 | grad_norm_clip: 1.0 15 | snapshot_path: gpt_snapshot.pt 16 | save_every: 3 17 | use_amp: True 18 | optimizer_config: 19 | weight_decay: 0.1 20 | learning_rate: 0.0003 21 | 22 | hydra: 23 | run: 24 | dir: ./ 25 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/mingpt/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.data import random_split 4 | from torch.distributed import init_process_group, destroy_process_group 5 | from model import GPT, GPTConfig, OptimizerConfig, create_optimizer 6 | from trainer import Trainer, TrainerConfig 7 | from char_dataset import CharDataset, DataConfig 8 | from omegaconf import DictConfig 9 | import hydra 10 | 11 | 12 | def ddp_setup(): 13 | init_process_group(backend="nccl") 14 | torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) 15 | 16 | def get_train_objs(gpt_cfg: GPTConfig, opt_cfg: OptimizerConfig, data_cfg: DataConfig): 17 | dataset = CharDataset(data_cfg) 18 | train_len = int(len(dataset) * data_cfg.train_split) 19 | train_set, test_set = random_split(dataset, [train_len, len(dataset) - train_len]) 20 | 21 | gpt_cfg.vocab_size = dataset.vocab_size 22 | gpt_cfg.block_size = dataset.block_size 23 | model = GPT(gpt_cfg) 24 | optimizer = create_optimizer(model, opt_cfg) 25 | 26 | return model, optimizer, train_set, test_set 27 | 28 | @hydra.main(version_base=None, config_path=".", config_name="gpt2_train_cfg") 29 | def main(cfg: DictConfig): 30 | ddp_setup() 31 | 32 | gpt_cfg = GPTConfig(**cfg['gpt_config']) 33 | opt_cfg = OptimizerConfig(**cfg['optimizer_config']) 34 | data_cfg = DataConfig(**cfg['data_config']) 35 | trainer_cfg = TrainerConfig(**cfg['trainer_config']) 36 | 37 | model, optimizer, train_data, test_data = get_train_objs(gpt_cfg, opt_cfg, data_cfg) 38 | trainer = Trainer(trainer_cfg, model, optimizer, train_data, test_data) 39 | trainer.train() 40 | 41 | destroy_process_group() 42 | 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/mingpt/slurm/config.yaml.template: -------------------------------------------------------------------------------- 1 | Region: us-east-1 2 | 3 | Image: 4 | Os: ubuntu1804 5 | 6 | SharedStorage: 7 | - MountDir: /shared 8 | Name: shared-fs 9 | StorageType: FsxLustre 10 | FsxLustreSettings: 11 | StorageCapacity: 1200 12 | DeploymentType: SCRATCH_1 13 | StorageType: SSD 14 | 15 | HeadNode: 16 | InstanceType: c5.xlarge 17 | Networking: 18 | SubnetId: subnet-xxxxxxx 19 | Ssh: 20 | KeyName: your-keyname-file 21 | 22 | Scheduling: 23 | Scheduler: slurm 24 | SlurmQueues: 25 | - Name: train 26 | ComputeResources: 27 | - Name: p32xlarge 28 | InstanceType: p3.2xlarge 29 | MinCount: 0 30 | MaxCount: 5 31 | Networking: 32 | SubnetIds: 33 | - subnet-xxxxxxx 34 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/mingpt/slurm/sbatch_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --job-name=multinode-example 4 | #SBATCH --nodes=2 5 | #SBATCH --ntasks=2 6 | #SBATCH --gpus-per-task=1 7 | #SBATCH --cpus-per-task=4 8 | 9 | nodes=( $( scontrol show hostnames $SLURM_JOB_NODELIST ) ) 10 | nodes_array=($nodes) 11 | head_node=${nodes_array[0]} 12 | head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address) 13 | 14 | echo Node IP: $head_node_ip 15 | export LOGLEVEL=INFO 16 | 17 | srun torchrun \ 18 | --nnodes 2 \ 19 | --nproc_per_node 1 \ 20 | --rdzv_id $RANDOM \ 21 | --rdzv_backend c10d \ 22 | --rdzv_endpoint $head_node_ip:29500 \ 23 | /shared/examples/mingpt/main.py 24 | 25 | 26 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/mingpt/slurm/setup_pcluster_slurm.md: -------------------------------------------------------------------------------- 1 | # Setup AWS cluster with pcluster 2 | Refer https://www.hpcworkshops.com/04-pcluster-cli.html 3 | 4 | ## 1. Sign in to an AWS instance 5 | 6 | ## 2. Install pcluster 7 | ``` 8 | pip3 install awscli -U --user 9 | pip3 install "aws-parallelcluster" --upgrade --user 10 | ``` 11 | 12 | ## 3. Create a cluster config file 13 | ``` 14 | pcluster configure --config config.yaml 15 | ``` 16 | See config.yaml.template for an example. Ensure you have a valid EC2 key-pair file 17 | 18 | 19 | ## 4. Create the cluster 20 | ``` 21 | pcluster create-cluster --cluster-name dist-ml --cluster-configuration config.yaml 22 | ``` 23 | 24 | ### 4a. Track progress 25 | ``` 26 | pcluster list-clusters 27 | ``` 28 | 29 | ## 5. Login to cluster headnode 30 | ``` 31 | pcluster ssh --cluster-name dist-ml -i your-keypair-file 32 | ``` 33 | 34 | ## 6. Install dependencies 35 | ``` 36 | sudo apt-get update 37 | sudo apt-get install -y python3-venv 38 | python3 -m venv /shared/venv/ 39 | source /shared/venv/bin/activate 40 | pip install wheel 41 | echo 'source /shared/venv/bin/activate' >> ~/.bashrc 42 | ``` 43 | 44 | ## 7. Download training code and install requirements 45 | ``` 46 | cd /shared 47 | git clone --depth 1 https://github.com/pytorch/examples; 48 | cd /shared/examples 49 | git filter-branch --prune-empty --subdirectory-filter distributed/minGPT-ddp 50 | python3 -m pip install setuptools==59.5.0 51 | pip install -r requirements.txt 52 | ``` 53 | -------------------------------------------------------------------------------- /distributed/minGPT-ddp/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.11.0 2 | fsspec 3 | boto3 4 | hydra-core 5 | requests 6 | aiohttp 7 | -------------------------------------------------------------------------------- /distributed/rpc/batch/README.md: -------------------------------------------------------------------------------- 1 | # Examples For Asynchronous RPC User Functions 2 | 3 | This folder contains two examples for [`@rpc.functions.async_execution`](https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.functions.async_execution): 4 | 5 | 1. Synchronized Batch Update Parameter Server: uses `@rpc.functions.async_execution` 6 | for parameter update and retrieving. This serves as a simple starter example 7 | for batch RPC. 8 | ``` 9 | pip install -r requirements.txt 10 | python parameter_server.py 11 | ``` 12 | 2. Multi-Observer with Batch-Processing Agent: uses `@rpc.functions.async_execution` 13 | to run multiple observed states through the policy to get actions. 14 | ``` 15 | pip install -r requirements.txt 16 | python reinforce.py 17 | ``` 18 | -------------------------------------------------------------------------------- /distributed/rpc/batch/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.9.0 2 | torchvision==0.7.0 3 | numpy 4 | gym 5 | -------------------------------------------------------------------------------- /distributed/rpc/ddp_rpc/README.md: -------------------------------------------------------------------------------- 1 | Distributed DataParallel + Distributed RPC Framework Example 2 | 3 | The example shows how to combine Distributed DataParallel with the Distributed 4 | RPC Framework. There are two trainer nodes, 1 master node and 1 parameter 5 | server in the example. 6 | 7 | The master node creates an embedding table on the parameter server and drives 8 | the training loop on the trainers. The model consists of a dense part 9 | (nn.Linear) replicated on the trainers via Distributed DataParallel and a 10 | sparse part (nn.EmbeddingBag) which resides on the parameter server. Each 11 | trainer performs an embedding lookup on the parameter server (using the 12 | Distributed RPC Framework) and then executes its local nn.Linear module. 13 | During the backward pass, the gradients for the dense part are aggregated via 14 | allreduce by DDP and the distributed backward pass updates the parameters for 15 | the embedding table on the parameter server. 16 | 17 | 18 | ``` 19 | pip install -r requirements.txt 20 | python main.py 21 | ``` 22 | -------------------------------------------------------------------------------- /distributed/rpc/ddp_rpc/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.6.0 2 | -------------------------------------------------------------------------------- /distributed/rpc/parameter_server/README.md: -------------------------------------------------------------------------------- 1 | ### RPC-based distributed training 2 | 3 | This is a basic example of RPC-based training that uses several trainers remotely train a model hosted on a server. 4 | 5 | To run the example locally, run the following command worker for the server and each worker you wish to spawn, in separate terminal windows: 6 | `python rpc_parameter_server.py --world_size=WORLD_SIZE --rank=RANK`. For example, for a master node with world size of 2, the command would be `python rpc_parameter_server.py --world_size=2 --rank=0`. The trainer can then be launched with the command `python rpc_parameter_server.py --world_size=2 --rank=1` in a separate window, and this will begin training with one server and a single trainer. 7 | 8 | Note that for demonstration purposes, this example supports only between 0-2 GPUs, although the pattern can be extended to make use of additional GPUs. To configure the number of GPUs, pass in `--num_gpus=N` to your training command. 9 | 10 | You can pass in the command line arguments `--master_addr=
` and `master_port=PORT` to indicate the address:port that the master worker is listening on. All workers will contact the master for rendezvous during worker discovery. By default, `master_addr` will be `localhost` and `master_port` will be 29500. 11 | -------------------------------------------------------------------------------- /distributed/rpc/pipeline/README.md: -------------------------------------------------------------------------------- 1 | Distributed Pipeline Parallel Example 2 | 3 | This example shows how to distribute a ResNet50 model on two RPC workers and 4 | then implement distributed pipeline parallelism using RPC. With pipeline 5 | parallelism, every input batch is divided into micro-batches and thse 6 | micro-batches are feed into the model in a pipelined fashion to increase the 7 | amortized device utilization. Note that this example only parallelizes the 8 | forward pass which can be viewed as the distributed counterpart of the 9 | [single machine pipeline parallel](https://pytorch.org/tutorials/intermediate/model_parallel_tutorial.html#speed-up-by-pipelining-inputs) 10 | example. 11 | 12 | ``` 13 | pip install -r requirements.txt 14 | python main.py 15 | ``` 16 | -------------------------------------------------------------------------------- /distributed/rpc/pipeline/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.9.0 2 | torchvision==0.7.0 -------------------------------------------------------------------------------- /distributed/rpc/rl/README.md: -------------------------------------------------------------------------------- 1 | Distributed Multi-Observer Single-Agent Reinforcement Learning Example 2 | 3 | This example demonstrates `torch.distributed.rpc` API using an CartPole 4 | reinforcement learning example. Please note that the goal is to present the RPC 5 | API instead of building the best CartPole solver. 6 | 7 | ``` 8 | pip install -r requirements.txt 9 | python main.py 10 | ``` 11 | -------------------------------------------------------------------------------- /distributed/rpc/rl/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | gymnasium 4 | -------------------------------------------------------------------------------- /distributed/rpc/rnn/README.md: -------------------------------------------------------------------------------- 1 | Distributed RNN Model Parallel Example 2 | 3 | This example shows how to build an RNN model using RPC where different 4 | components of the RNN model can be placed on different workers. 5 | 6 | ``` 7 | pip install -r requirements.txt 8 | python main.py 9 | ``` 10 | -------------------------------------------------------------------------------- /distributed/rpc/rnn/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.distributed.autograd as dist_autograd 5 | import torch.distributed.rpc as rpc 6 | import torch.multiprocessing as mp 7 | import torch.optim as optim 8 | from torch.distributed.optim import DistributedOptimizer 9 | 10 | import rnn 11 | 12 | 13 | def _run_trainer(): 14 | r""" 15 | The trainer creates a distributed RNNModel and a DistributedOptimizer. Then, 16 | it performs training using random input data. 17 | """ 18 | batch = 5 19 | ntoken = 7 20 | ninp = 2 21 | 22 | nhid = 3 23 | nindices = 6 24 | nlayers = 4 25 | hidden = ( 26 | torch.randn(nlayers, nindices, nhid), 27 | torch.randn(nlayers, nindices, nhid) 28 | ) 29 | 30 | model = rnn.RNNModel('ps', ntoken, ninp, nhid, nlayers) 31 | 32 | # setup distributed optimizer 33 | opt = DistributedOptimizer( 34 | optim.SGD, 35 | model.parameter_rrefs(), 36 | lr=0.05, 37 | ) 38 | 39 | criterion = torch.nn.CrossEntropyLoss() 40 | 41 | def get_next_batch(): 42 | for _ in range(5): 43 | data = torch.LongTensor(batch, nindices) % ntoken 44 | target = torch.LongTensor(batch, ntoken) % nindices 45 | yield data, target 46 | 47 | # train for 10 iterations 48 | for epoch in range(10): 49 | # create distributed autograd context 50 | for data, target in get_next_batch(): 51 | with dist_autograd.context() as context_id: 52 | hidden[0].detach_() 53 | hidden[1].detach_() 54 | output, hidden = model(data, hidden) 55 | loss = criterion(output, target) 56 | # run distributed backward pass 57 | dist_autograd.backward(context_id, [loss]) 58 | # run distributed optimizer 59 | opt.step(context_id) 60 | # not necessary to zero grads as each iteration creates a different 61 | # distributed autograd context which hosts different grads 62 | print("Training epoch {}".format(epoch)) 63 | 64 | 65 | def run_worker(rank, world_size): 66 | r""" 67 | A wrapper function that initializes RPC, calls the function, and shuts down 68 | RPC. 69 | """ 70 | os.environ['MASTER_ADDR'] = 'localhost' 71 | os.environ['MASTER_PORT'] = '29500' 72 | if rank == 1: 73 | rpc.init_rpc("trainer", rank=rank, world_size=world_size) 74 | _run_trainer() 75 | else: 76 | rpc.init_rpc("ps", rank=rank, world_size=world_size) 77 | # parameter server does nothing 78 | pass 79 | 80 | # block until all rpcs finish 81 | rpc.shutdown() 82 | 83 | 84 | if __name__ == "__main__": 85 | world_size = 2 86 | mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True) 87 | -------------------------------------------------------------------------------- /distributed/rpc/rnn/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | -------------------------------------------------------------------------------- /distributed/rpc/rnn/rnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.distributed.rpc as rpc 4 | from torch.distributed.rpc import RRef 5 | 6 | 7 | def _call_method(method, rref, *args, **kwargs): 8 | r""" 9 | a helper function to call a method on the given RRef 10 | """ 11 | return method(rref.local_value(), *args, **kwargs) 12 | 13 | 14 | def _remote_method(method, rref, *args, **kwargs): 15 | r""" 16 | a helper function to run method on the owner of rref and fetch back the 17 | result using RPC 18 | """ 19 | return rpc.rpc_sync( 20 | rref.owner(), 21 | _call_method, 22 | args=[method, rref] + list(args), 23 | kwargs=kwargs 24 | ) 25 | 26 | 27 | def _parameter_rrefs(module): 28 | r""" 29 | Create one RRef for each parameter in the given local module, and return a 30 | list of RRefs. 31 | """ 32 | param_rrefs = [] 33 | for param in module.parameters(): 34 | param_rrefs.append(RRef(param)) 35 | return param_rrefs 36 | 37 | 38 | class EmbeddingTable(nn.Module): 39 | r""" 40 | Encoding layers of the RNNModel 41 | """ 42 | def __init__(self, ntoken, ninp, dropout): 43 | super(EmbeddingTable, self).__init__() 44 | self.drop = nn.Dropout(dropout) 45 | self.encoder = nn.Embedding(ntoken, ninp) 46 | if torch.cuda.is_available(): 47 | self.encoder = self.encoder.cuda() 48 | nn.init.uniform_(self.encoder.weight, -0.1, 0.1) 49 | 50 | def forward(self, input): 51 | if torch.cuda.is_available(): 52 | input = input.cuda() 53 | return self.drop(self.encoder(input)).cpu() 54 | 55 | 56 | class Decoder(nn.Module): 57 | r""" 58 | Decoding layers of the RNNModel 59 | """ 60 | def __init__(self, ntoken, nhid, dropout): 61 | super(Decoder, self).__init__() 62 | self.drop = nn.Dropout(dropout) 63 | self.decoder = nn.Linear(nhid, ntoken) 64 | nn.init.zeros_(self.decoder.bias) 65 | nn.init.uniform_(self.decoder.weight, -0.1, 0.1) 66 | 67 | def forward(self, output): 68 | return self.decoder(self.drop(output)) 69 | 70 | 71 | class RNNModel(nn.Module): 72 | r""" 73 | A distributed RNN model which puts embedding table and decoder parameters on 74 | a remote parameter server, and locally holds parameters for the LSTM module. 75 | The structure of the RNN model is borrowed from the word language model 76 | example. See https://github.com/pytorch/examples/blob/main/word_language_model/model.py 77 | """ 78 | def __init__(self, ps, ntoken, ninp, nhid, nlayers, dropout=0.5): 79 | super(RNNModel, self).__init__() 80 | 81 | # setup embedding table remotely 82 | self.emb_table_rref = rpc.remote(ps, EmbeddingTable, args=(ntoken, ninp, dropout)) 83 | # setup LSTM locally 84 | self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) 85 | # setup decoder remotely 86 | self.decoder_rref = rpc.remote(ps, Decoder, args=(ntoken, nhid, dropout)) 87 | 88 | def forward(self, input, hidden): 89 | # pass input to the remote embedding table and fetch emb tensor back 90 | emb = _remote_method(EmbeddingTable.forward, self.emb_table_rref, input) 91 | output, hidden = self.rnn(emb, hidden) 92 | # pass output to the remote decoder and get the decoded output back 93 | decoded = _remote_method(Decoder.forward, self.decoder_rref, output) 94 | return decoded, hidden 95 | 96 | def parameter_rrefs(self): 97 | remote_params = [] 98 | # get RRefs of embedding table 99 | remote_params.extend(_remote_method(_parameter_rrefs, self.emb_table_rref)) 100 | # create RRefs for local parameters 101 | remote_params.extend(_parameter_rrefs(self.rnn)) 102 | # get RRefs of decoder 103 | remote_params.extend(_remote_method(_parameter_rrefs, self.decoder_rref)) 104 | return remote_params 105 | -------------------------------------------------------------------------------- /distributed/tensor_parallelism/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch native Tensor Parallel for distributed training 2 | 3 | This example demonstrates SPMD Megatron-LM style Tensor Parallel by using 4 | PyTorch native Tensor Parallel APIs, which include: 5 | 6 | 1. Simple module-level Tensor Parallelism on a dummy MLP model. 7 | 2. Simple module-level Tensor Parallelism with Sequence Parallel inputs/outputs on a dummy MLP model. 8 | 3. A E2E demo of Fully Sharded Data Parallel + Tensor Parallel (with Sequence Parallel) on a example Llama2 model. 9 | 10 | More details about the PyTorch native Tensor Parallel APIs, please see PyTorch docs: 11 | https://pytorch.org/docs/stable/distributed.tensor.parallel.html 12 | 13 | ``` 14 | pip install -r requirements.txt 15 | python example.py 16 | ``` 17 | -------------------------------------------------------------------------------- /distributed/tensor_parallelism/log_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | 4 | logging.basicConfig( 5 | format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p", level=logging.INFO 6 | ) 7 | 8 | def get_logger(): 9 | return logging.getLogger(__name__) 10 | 11 | 12 | def rank_log(_rank, logger, msg): 13 | """helper function to log only on global rank 0""" 14 | if _rank == 0: 15 | logger.info(f" {msg}") 16 | 17 | 18 | def verify_min_gpu_count(min_gpus: int = 2) -> bool: 19 | """ verification that we have at least 2 gpus to run dist examples """ 20 | has_cuda = torch.cuda.is_available() 21 | gpu_count = torch.cuda.device_count() 22 | return has_cuda and gpu_count >= min_gpus 23 | -------------------------------------------------------------------------------- /distributed/tensor_parallelism/requirements.txt: -------------------------------------------------------------------------------- 1 | # Python dependencies required for running the example 2 | 3 | --pre 4 | --extra-index-url https://download.pytorch.org/whl/nightly/cu118 5 | --extra-index-url https://download.pytorch.org/whl/nightly/cu121 6 | torch >= 2.3.0.dev0; sys_platform == "linux" 7 | -------------------------------------------------------------------------------- /distributed/tensor_parallelism/run_example.sh: -------------------------------------------------------------------------------- 1 | 2 | # To run samples: 3 | # bash run_example.sh {file_to_run.py} {num_gpus} 4 | # where file_to_run = example to launch. Default = 'fsdp_tp_example.py' 5 | # num_gpus = num local gpus to use (must be at least 2). Default = 4 6 | 7 | # samples to run include: 8 | # sequence_parallel_example.py 9 | # tensor_parallel_example.py 10 | # fsdp_tp_example.py 11 | 12 | echo "Launching ${1:-fsdp_tp_example.py} with ${2:-4} gpus" 13 | torchrun --nnodes=1 --nproc_per_node=${2:-4} --rdzv_id=101 --rdzv_endpoint="localhost:5972" ${1:-fsdp_tp_example.py} 14 | -------------------------------------------------------------------------------- /distributed/tensor_parallelism/sequence_parallel_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torch.nn as nn 5 | 6 | from torch.distributed._tensor import Shard 7 | 8 | from torch.distributed.tensor.parallel import ( 9 | parallelize_module, 10 | ColwiseParallel, 11 | RowwiseParallel, 12 | ) 13 | 14 | from log_utils import rank_log, get_logger, verify_min_gpu_count 15 | 16 | 17 | # ---- GPU check ------------ 18 | _min_gpu_count = 2 19 | 20 | if not verify_min_gpu_count(min_gpus=_min_gpu_count): 21 | print(f"Unable to locate sufficient {_min_gpu_count} gpus to run this example. Exiting.") 22 | sys.exit() 23 | # --------------------------- 24 | 25 | 26 | from torch.distributed._tensor.device_mesh import init_device_mesh 27 | 28 | 29 | 30 | """ 31 | This is the script to test Sequence Parallel(SP) on a toy model in a 32 | Megetron-LM SPMD style. We show an E2E working flow from forward, 33 | backward and optimization. 34 | 35 | We use the example of two `nn.Linear` layers with an element-wise `nn.RELU` 36 | in between to show an example of sequence parallel, which was proposed in paper: 37 | 38 | https://arxiv.org/pdf/2205.05198.pdf. 39 | 40 | Like tensor parallel, we parallelize the first linear layer by column 41 | and also parallelize the second linear layer by row. But the input in each rank 42 | now is different so that we need one all-gather for input and one reduce-scatter 43 | in the end of the second linear layer. 44 | """ 45 | 46 | 47 | class ToyModel(nn.Module): 48 | """MLP based model""" 49 | 50 | def __init__(self): 51 | super().__init__() 52 | self.in_proj = nn.Linear(10, 32) 53 | self.relu = nn.ReLU() 54 | self.out_proj = nn.Linear(32, 5) 55 | 56 | def forward(self, x): 57 | return self.out_proj(self.relu(self.in_proj(x))) 58 | 59 | 60 | """ 61 | Main body of the demo of a basic version of sequence parallel by using 62 | PyTorch native APIs. 63 | """ 64 | logger = get_logger() 65 | 66 | # create a device mesh based on the given world_size. 67 | device_mesh = init_device_mesh( 68 | device_type="cuda", mesh_shape=(int(os.environ["WORLD_SIZE"]),) 69 | ) 70 | 71 | _rank = device_mesh.get_rank() 72 | 73 | print(f"Starting PyTorch Sequence Parallel example on rank {_rank}.") 74 | 75 | rank_log(_rank, logger, f"Device Mesh created: {device_mesh=}") 76 | 77 | # create model and move it to GPU. Init_device_mesh has already assigned gpu ids... 78 | model = ToyModel().to("cuda") 79 | 80 | # Custom parallelization plan for the model 81 | sp_model = parallelize_module( 82 | module=model, 83 | device_mesh=device_mesh, 84 | parallelize_plan={ 85 | "in_proj": ColwiseParallel(input_layouts=Shard(0)), 86 | "out_proj": RowwiseParallel(output_layouts=Shard(0)), 87 | }, 88 | ) 89 | 90 | 91 | # Create a optimizer for the parallelized module. 92 | lr = 0.25 93 | optimizer = torch.optim.AdamW(sp_model.parameters(), lr=lr, foreach=True) 94 | 95 | 96 | # Perform a num of iterations of forward/backward 97 | # and optimizations for the sharded module. 98 | num_iters = 10 99 | rank_log(_rank, logger, "Sequence Parallel training starting...") 100 | 101 | for i in range(num_iters): 102 | # For SP, input can be different across all ranks. 103 | inp = torch.rand(20, 10, device="cuda") 104 | output = sp_model(inp) 105 | output.sum().backward() 106 | optimizer.step() 107 | rank_log(_rank, logger, f"Sequence Parallel iter {i} completed") 108 | 109 | rank_log(_rank, logger, "Sequence Parallel training completed!") 110 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.https://www.sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | # torch 3 | # PyTorch Theme 4 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 5 | sphinx-panels 6 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | # Configuration file for the Sphinx documentation builder. 9 | # 10 | # This file only contains a selection of the most common options. For a full 11 | # list see the documentation: 12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 13 | 14 | # -- Path setup -------------------------------------------------------------- 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | 23 | import pytorch_sphinx_theme 24 | 25 | current_dir = os.path.dirname(__file__) 26 | target_dir = os.path.abspath(os.path.join(current_dir, "../..")) 27 | sys.path.insert(0, target_dir) 28 | print(target_dir) 29 | 30 | # -- Project information ----------------------------------------------------- 31 | 32 | project = "PyTorchExamples" 33 | copyright = "2022, Meta" 34 | author = "Meta" 35 | 36 | # The full version, including alpha/beta/rc tags 37 | release = "1.11" 38 | 39 | # -- General configuration --------------------------------------------------- 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 43 | # ones. 44 | extensions = ["sphinx.ext.napoleon", "sphinx.ext.autodoc", 'sphinx_panels'] 45 | panels_add_bootstrap_css = False 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ["_templates"] 49 | 50 | # List of patterns, relative to source directory, that match files and 51 | # directories to ignore when looking for source files. 52 | # This pattern also affects html_static_path and html_extra_path. 53 | exclude_patterns = [] 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | # html_theme = 'alabaster' 61 | html_theme = "pytorch_sphinx_theme" 62 | html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] 63 | 64 | # Add any paths that contain custom static files (such as style sheets) here, 65 | # relative to this directory. They are copied after the builtin static files, 66 | # so a file named "default.css" will overwrite the builtin "default.css". 67 | 68 | html_static_path = ["_static"] 69 | panels_add_fontawesome_latex = True 70 | 71 | html_theme_options = { 72 | 'pytorch_project': 'examples', 73 | 'collapse_navigation': False, 74 | 'display_version': True, 75 | 'logo_only': False, 76 | 'analytics_id': 'UA-117752657-2', 77 | } 78 | -------------------------------------------------------------------------------- /fast_neural_style/README.md: -------------------------------------------------------------------------------- 1 | # fast-neural-style :city_sunrise: :rocket: 2 | 3 | This repository contains a pytorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting. 4 | 5 | The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf). The saved-models for examples shown in the README can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0). 6 | 7 |

8 | 9 | 10 | 11 |

12 | 13 | ## Requirements 14 | 15 | The program is written in Python, and uses [pytorch](http://pytorch.org/), [scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up especially for training a new model. Regular sized images can be styled on a laptop or desktop using saved models. 16 | 17 | ## Usage 18 | 19 | Stylize image 20 | 21 | ``` 22 | python neural_style/neural_style.py eval --content-image --model --output-image --cuda 0 23 | ``` 24 | 25 | - `--content-image`: path to content image you want to stylize. 26 | - `--model`: saved model to be used for stylizing the image (eg: `mosaic.pth`) 27 | - `--output-image`: path for saving the output image. 28 | - `--content-scale`: factor for scaling down the content image if memory is an issue (eg: value of 2 will halve the height and width of content-image) 29 | - `--cuda`: set it to 1 for running on GPU, 0 for CPU. 30 | - `--mps`: set it to 1 for running on macOS GPU 31 | 32 | Train model 33 | 34 | ```bash 35 | python neural_style/neural_style.py train --dataset --style-image --save-model-dir --epochs 2 --cuda 1 36 | ``` 37 | 38 | There are several command line arguments, the important ones are listed below 39 | 40 | - `--dataset`: path to training dataset, the path should point to a folder containing another folder with all the training images. I used COCO 2014 Training images dataset [80K/13GB] [(download)](https://cocodataset.org/#download). 41 | - `--style-image`: path to style-image. 42 | - `--save-model-dir`: path to folder where trained model will be saved. 43 | - `--cuda`: set it to 1 for running on GPU, 0 for CPU. 44 | - `--mps`: set it to 1 for running on macOS GPU 45 | 46 | Refer to `neural_style/neural_style.py` for other command line arguments. For training new models you might have to tune the values of `--content-weight` and `--style-weight`. The mosaic style model shown above was trained with `--content-weight 1e5` and `--style-weight 1e10`. The remaining 3 models were also trained with similar order of weight parameters with slight variation in the `--style-weight` (`5e10` or `1e11`). 47 | 48 | ## Models 49 | 50 | Models for the examples shown below can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0) or by running the script `download_saved_models.py`. 51 | 52 |
53 | 54 |
55 | 56 |
57 | 58 | 59 | 60 | 61 |
62 | 63 | 64 | 65 | 66 |
67 | -------------------------------------------------------------------------------- /fast_neural_style/download_saved_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | # PyTorch 1.1 moves _download_url_to_file 5 | # from torch.utils.model_zoo to torch.hub 6 | # PyTorch 1.0 exists another _download_url_to_file 7 | # 2 argument 8 | # TODO: If you remove support PyTorch 1.0 or older, 9 | # You should remove torch.utils.model_zoo 10 | # Ref. PyTorch #18758 11 | # https://github.com/pytorch/pytorch/pull/18758/commits 12 | try: 13 | from torch.utils.model_zoo import _download_url_to_file 14 | except ImportError: 15 | try: 16 | from torch.hub import download_url_to_file as _download_url_to_file 17 | except ImportError: 18 | from torch.hub import _download_url_to_file 19 | 20 | 21 | def unzip(source_filename, dest_dir): 22 | with zipfile.ZipFile(source_filename) as zf: 23 | zf.extractall(path=dest_dir) 24 | 25 | 26 | if __name__ == '__main__': 27 | _download_url_to_file('https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True) 28 | unzip('saved_models.zip', '.') 29 | -------------------------------------------------------------------------------- /fast_neural_style/images/content-images/amber.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/content-images/amber.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-candy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/output-images/amber-candy.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-mosaic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/output-images/amber-mosaic.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-rain-princess.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/output-images/amber-rain-princess.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-udnie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/output-images/amber-udnie.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/candy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/style-images/candy.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/mosaic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/style-images/mosaic.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/rain-princess-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/style-images/rain-princess-cropped.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/rain-princess.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/style-images/rain-princess.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/udnie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/images/style-images/udnie.jpg -------------------------------------------------------------------------------- /fast_neural_style/neural_style/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geohot/examples/07267b7138142b2979589a69a4d64470e29022d5/fast_neural_style/neural_style/__init__.py -------------------------------------------------------------------------------- /fast_neural_style/neural_style/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | 4 | 5 | def load_image(filename, size=None, scale=None): 6 | img = Image.open(filename).convert('RGB') 7 | if size is not None: 8 | img = img.resize((size, size), Image.ANTIALIAS) 9 | elif scale is not None: 10 | img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS) 11 | return img 12 | 13 | 14 | def save_image(filename, data): 15 | img = data.clone().clamp(0, 255).numpy() 16 | img = img.transpose(1, 2, 0).astype("uint8") 17 | img = Image.fromarray(img) 18 | img.save(filename) 19 | 20 | 21 | def gram_matrix(y): 22 | (b, ch, h, w) = y.size() 23 | features = y.view(b, ch, w * h) 24 | features_t = features.transpose(1, 2) 25 | gram = features.bmm(features_t) / (ch * h * w) 26 | return gram 27 | 28 | 29 | def normalize_batch(batch): 30 | # normalize using imagenet mean and std 31 | mean = batch.new_tensor([0.485, 0.456, 0.406]).view(-1, 1, 1) 32 | std = batch.new_tensor([0.229, 0.224, 0.225]).view(-1, 1, 1) 33 | batch = batch.div_(255.0) 34 | return (batch - mean) / std 35 | -------------------------------------------------------------------------------- /fast_neural_style/neural_style/vgg.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import torch 4 | from torchvision import models 5 | 6 | 7 | class Vgg16(torch.nn.Module): 8 | def __init__(self, requires_grad=False): 9 | super(Vgg16, self).__init__() 10 | vgg_pretrained_features = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features 11 | self.slice1 = torch.nn.Sequential() 12 | self.slice2 = torch.nn.Sequential() 13 | self.slice3 = torch.nn.Sequential() 14 | self.slice4 = torch.nn.Sequential() 15 | for x in range(4): 16 | self.slice1.add_module(str(x), vgg_pretrained_features[x]) 17 | for x in range(4, 9): 18 | self.slice2.add_module(str(x), vgg_pretrained_features[x]) 19 | for x in range(9, 16): 20 | self.slice3.add_module(str(x), vgg_pretrained_features[x]) 21 | for x in range(16, 23): 22 | self.slice4.add_module(str(x), vgg_pretrained_features[x]) 23 | if not requires_grad: 24 | for param in self.parameters(): 25 | param.requires_grad = False 26 | 27 | def forward(self, X): 28 | h = self.slice1(X) 29 | h_relu1_2 = h 30 | h = self.slice2(h) 31 | h_relu2_2 = h 32 | h = self.slice3(h) 33 | h_relu3_3 = h 34 | h = self.slice4(h) 35 | h_relu4_3 = h 36 | vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']) 37 | out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3) 38 | return out 39 | -------------------------------------------------------------------------------- /fx/README.md: -------------------------------------------------------------------------------- 1 | # FX Examples 2 | 3 | This folder contains several examples of program transformations implemented using `torch.fx`. More information about FX can be found in the [documentation](https://pytorch.org/docs/master/fx.html). 4 | 5 | Note that all examples should be runnable as standalone Python files. In the case of an exception, the example will appear in a subfolder with a `README.md` file explaining how to run the example. 6 | 7 | As FX is currently in a Beta release, the API or these examples are subject to change. 8 | -------------------------------------------------------------------------------- /fx/inline_function.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.fx import Proxy, symbolic_trace 3 | from torch.fx.node import map_arg 4 | 5 | 6 | ''' 7 | How to Inline a Function Into an Existing Graph 8 | 9 | One reason you might want to inline a function is to get around FX's 10 | default tracing behavior. For example, unless you've defined a custom 11 | Tracer, the out-of-the-box implementation of ``symbolic_trace`` causes 12 | references to ``torch.nn`` module instances to appear as 13 | ``call_module`` calls rather than being traced through. Let's say this 14 | behavior is almost what you need; the only problem is that there's a 15 | single module call that you want to replace with an inlined trace of the 16 | function. Creating a custom Tracer would be too much. Instead, you can 17 | accomplish this using Proxies. 18 | 19 | The following code demonstrates how to trace a module and inline it 20 | into an existing Graph using Proxy. We'll trace our Graph, then iterate 21 | through its Nodes until we find the right place to swap out the 22 | ``call_module`` Node with an inlined trace. At that point, we'll create 23 | Proxies from the Node's args and kwargs. Finally, we'll call the 24 | function we want to replace with those Proxies--which will, in essence, 25 | "trace" that function. Finally, we'll insert the result of that call 26 | into our Graph. (This last step will automatically inline the function.) 27 | ''' 28 | 29 | 30 | # Sample module 31 | class M(torch.nn.Module): 32 | def __init__(self): 33 | super().__init__() 34 | self.relu = torch.nn.ReLU() 35 | 36 | def forward(self, x): 37 | return self.relu(x) + 1.0 38 | 39 | # Symbolically trace an instance of `M`. After tracing, `self.relu` is 40 | # represented as a `call_module` Node. The full operation in the 41 | # generated `forward` function's code will appear as `self.relu(x)` 42 | m = symbolic_trace(M()) 43 | 44 | # Insert nodes from the ReLU graph in place of the original call to 45 | # `self.relu` 46 | # create a graph-appending tracer pointing to the original graph 47 | tracer = torch.fx.proxy.GraphAppendingTracer(m.graph) 48 | for node in m.graph.nodes: 49 | # Find `call_module` Node in `m` that corresponds to `self.relu`. 50 | # This is the Node we want to swap out for an inlined version of the 51 | # same call 52 | if (node.op, node.target) == ("call_module", "relu"): 53 | with m.graph.inserting_before(node): 54 | # Create a Proxy from each Node in the current Node's 55 | # args/kwargs 56 | proxy_args = map_arg(node.args, lambda n: Proxy(n, tracer)) 57 | proxy_kwargs = map_arg(node.kwargs, lambda n: Proxy(n, tracer)) 58 | # Call `m.relu` with the newly-created Proxy arguments. 59 | # `m.relu` is the generic version of the function; by 60 | # calling it with Proxies created from Nodes in `m`, we're 61 | # emitting Nodes that reference exiting values in the IR. 62 | # The result of this call is another Proxy, which we can 63 | # hook into our existing Graph to complete the function 64 | # inlining. 65 | proxy_output = m.relu(*proxy_args, **proxy_kwargs) 66 | # Replace the relu `call_module` node with the inlined 67 | # version of the function 68 | node.replace_all_uses_with(proxy_output.node) 69 | # Make sure that the old relu Node is erased 70 | m.graph.erase_node(node) 71 | -------------------------------------------------------------------------------- /fx/invert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.fx as fx 3 | 4 | # An inverse mapping is one that takes a function f(x) and returns a function g 5 | # such that f(g(x)) == x. For example,since log(exp(x)) == x, exp and log are 6 | # inverses. 7 | 8 | invert_mapping = {} 9 | def add_inverse(a, b): 10 | invert_mapping[a] = b 11 | invert_mapping[b] = a 12 | inverses = [ 13 | (torch.sin, torch.arcsin), 14 | (torch.cos, torch.arccos), 15 | (torch.tan, torch.arctan), 16 | (torch.exp, torch.log), 17 | ] 18 | for a, b in inverses: 19 | add_inverse(a, b) 20 | 21 | # The general strategy is that we walk the graph backwards, transforming each 22 | # node into its inverse. To do so, we swap the outputs and inputs of the 23 | # functions, and then we look up its inverse in `invert_mapping`. Note that 24 | # this transform assumes that all operations take in only one input and return 25 | # one output. 26 | def invert(model: torch.nn.Module) -> torch.nn.Module: 27 | fx_model = fx.symbolic_trace(model) 28 | new_graph = fx.Graph() # As we're building up a new graph 29 | env = {} 30 | for node in reversed(fx_model.graph.nodes): 31 | if node.op == 'call_function': 32 | # This creates a node in the new graph with the inverse function, 33 | # and passes `env[node.name]` (i.e. the previous output node) as 34 | # input. 35 | new_node = new_graph.call_function(invert_mapping[node.target], (env[node.name],)) 36 | env[node.args[0].name] = new_node 37 | elif node.op == 'output': 38 | # We turn the output into an input placeholder 39 | new_node = new_graph.placeholder(node.name) 40 | env[node.args[0].name] = new_node 41 | elif node.op == 'placeholder': 42 | # We turn the input placeholder into an output 43 | new_graph.output(env[node.name]) 44 | else: 45 | raise RuntimeError("Not implemented") 46 | 47 | new_graph.lint() 48 | return fx.GraphModule(fx_model, new_graph) 49 | 50 | 51 | def f(x): 52 | return torch.exp(torch.tan(x)) 53 | 54 | res = invert(f) 55 | print(res.code) 56 | """ 57 | def forward(self, output): 58 | log_1 = torch.log(output); output = None 59 | arctan_1 = torch.arctan(log_1); log_1 = None 60 | return arctan_1 61 | """ 62 | print(f(res((torch.arange(5) + 1)))) # [1., 2., 3., 4, 5.] 63 | -------------------------------------------------------------------------------- /fx/native_interpreter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1 FATAL_ERROR) 2 | project(interpreter) 3 | 4 | find_package(Torch REQUIRED) 5 | 6 | # Define our library target 7 | add_library(interpreter SHARED interpreter.cpp) 8 | set(CMAKE_CXX_STANDARD 17) 9 | # Link against LibTorch 10 | target_link_libraries(interpreter "${TORCH_LIBRARIES}") 11 | -------------------------------------------------------------------------------- /fx/native_interpreter/README.md: -------------------------------------------------------------------------------- 1 | # Converting PyTorch Code to a Native Runtime With FX and TorchScript Custom Classes 2 | 3 | In this example, we are going to build a pipeline that does the following things: 4 | 5 | 1. Converts (or “lowers”) code in a PyTorch module into another representation (we will define the representation within the example) 6 | 2. Registers an interpreter for that code representation that can be used in TorchScript or Python 7 | 3. Wrap the converted code into a format that can still be used in TorchScript compilation. 8 | 9 | We are going to build up a trivial interpreter for this example, but you can imagine extending the same process to work with more sophisticated backends, ones which may do code optimization or offloading to an accelerator. 10 | 11 | We will be using [TorchScript custom classes](https://pytorch.org/tutorials/advanced/torch_script_custom_classes.html) to expose this Interpreter to Python and TorchScript. You may want to review that tutorial and documentation before reading this example project. 12 | 13 | ### Defining the Interpreter 14 | 15 | We define the interpreter in `interpreter.cpp`. This interpreter is very limited: it only supports two element-wise operations (`add` and `mul`) and it only supports `Tensor` values. When this interpreter runs code, it iterates through the list of instructions and simply calls the appropriate PyTorch operator from C++. 16 | 17 | To build the interpreter into a shared-object file to be loaded in for use, use the following commands from this example’s root: 18 | 19 | 20 | ``` 21 | $ mkdir build 22 | $ cd build 23 | $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. 24 | $ make -j 25 | ``` 26 | 27 | After the build finishes, you should see `build/libinterpreter.so` (or with a different extension depending on your OS). We will use this dynamic library next when we load it up into a process to be used in execution. 28 | 29 | ### Defining the Transformation 30 | 31 | We define the code that transforms a `PyTorch` module to the format the interpreter understands in `use_interpreter.py`. Note that that file loads in the shared object we built in the previous step via a `torch.classes.load_library` call. `use_interpreter.py` contains driver code and the end that can be directly run to test the lowering transformation. 32 | 33 | ### Questions, Comments, Feedback 34 | 35 | Please direct questions and discussion to the [PyTorch forums](https://discuss.pytorch.org/). To report any issues with PyTorch (including FX and custom classes), please use the [issue tracker](https://github.com/pytorch/pytorch/issues). 36 | -------------------------------------------------------------------------------- /fx/proxy_based_graph_creation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.fx import Proxy, Graph, GraphModule 3 | 4 | 5 | ''' 6 | How to Create a Graph Using Proxy Objects Instead of Tracing 7 | 8 | It's possible to directly create a Proxy object around a raw Node. This 9 | can be used to create a Graph independently of symbolic tracing. 10 | 11 | The following code demonstrates how to use Proxy with a raw Node to 12 | append operations to a fresh Graph. We'll create two parameters (``x`` 13 | and ``y``), perform some operations on those parameters, then add 14 | everything we created to the new Graph. We'll then wrap that Graph in 15 | a GraphModule. Doing so creates a runnable instance of ``nn.Module`` 16 | where previously-created operations are represented in the Module's 17 | ``forward`` function. 18 | 19 | By the end of the tutorial, we'll have added the following method to an 20 | empty ``nn.Module`` class. 21 | 22 | .. code-block:: python 23 | 24 | def forward(self, x, y): 25 | cat_1 = torch.cat([x, y]); x = y = None 26 | tanh_1 = torch.tanh(cat_1); cat_1 = None 27 | neg_1 = torch.neg(tanh_1); tanh_1 = None 28 | return neg_1 29 | 30 | ''' 31 | 32 | 33 | # Create a graph independently of symbolic tracing 34 | graph = Graph() 35 | tracer = torch.fx.proxy.GraphAppendingTracer(graph) 36 | 37 | # Create raw Nodes 38 | raw1 = graph.placeholder('x') 39 | raw2 = graph.placeholder('y') 40 | 41 | # Initialize Proxies using the raw Nodes and graph's default tracer 42 | y = Proxy(raw1, tracer) 43 | z = Proxy(raw2, tracer) 44 | # y = Proxy(raw1) 45 | # z = Proxy(raw2) 46 | 47 | # Create other operations using the Proxies `y` and `z` 48 | a = torch.cat([y, z]) 49 | b = torch.tanh(a) 50 | c = torch.neg(b) 51 | # By using the graph's own appending tracer to create Proxies, 52 | # notice we can now use n-ary operators on operations without 53 | # multiple tracers being created at run-time (line 52) which leads 54 | # to errors # To try this out for yourself, replace lines 42, 43 55 | # with 44, 45 56 | z = torch.add(b, c) 57 | 58 | # Create a new output Node and add it to the Graph. By doing this, the 59 | # Graph will contain all the Nodes we just created (since they're all 60 | # linked to the output Node) 61 | graph.output(c.node) 62 | 63 | # Wrap our created Graph in a GraphModule to get a final, runnable 64 | # `nn.Module` instance 65 | mod = GraphModule(torch.nn.Module(), graph) 66 | -------------------------------------------------------------------------------- /fx/replace_op.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.fx import symbolic_trace 3 | import operator 4 | 5 | """ 6 | How to Replace One Op With Another 7 | 8 | 1. Iterate through all Nodes in your GraphModule's Graph. 9 | 2. Determine if the current Node should be replaced. (Suggested: match 10 | on the Node's ``target`` attribute). 11 | 3. Create a replacement Node and add it to the Graph. 12 | 4. Use the FX built-in ``replace_all_uses_with`` to replace all uses of 13 | the current Node with the replacement. 14 | 5. Delete the old Node from the graph. 15 | 6. Call ``recompile`` on the GraphModule. This updates the generated 16 | Python code to reflect the new Graph state. 17 | 18 | Currently, FX does not provide any way to guarantee that replaced 19 | operators are syntactically valid. It's up to the user to confirm that 20 | any new operators will work with the existing operands. 21 | 22 | The following code demonstrates an example of replacing any instance of 23 | addition with a bitwise AND. 24 | 25 | To examine how the Graph evolves during op replacement, add the 26 | statement `print(traced.graph)` after the line you want to inspect. 27 | Alternatively, call `traced.graph.print_tabular()` to see the IR in a 28 | tabular format. 29 | """ 30 | 31 | # Sample module 32 | class M(torch.nn.Module): 33 | def forward(self, x, y): 34 | return x + y, torch.add(x, y), x.add(y) 35 | 36 | # Symbolically trace an instance of the module 37 | traced = symbolic_trace(M()) 38 | 39 | # As demonstrated in the above example, there are several different ways 40 | # to denote addition. The possible cases are: 41 | # 1. `x + y` - A `call_function` Node with target `operator.add`. 42 | # We can match for equality on that `operator.add` directly. 43 | # 2. `torch.add(x, y)` - A `call_function` Node with target 44 | # `torch.add`. Similarly, we can match this function directly. 45 | # 3. `x.add(y)` - The Tensor method call, whose target we can match 46 | # as a string. 47 | 48 | patterns = set([operator.add, torch.add, "add"]) 49 | 50 | # Go through all the nodes in the Graph 51 | for n in traced.graph.nodes: 52 | # If the target matches one of the patterns 53 | if any(n.target == pattern for pattern in patterns): 54 | # Set the insert point, add the new node, and replace all uses 55 | # of `n` with the new node 56 | with traced.graph.inserting_after(n): 57 | new_node = traced.graph.call_function(torch.bitwise_and, n.args, n.kwargs) 58 | n.replace_all_uses_with(new_node) 59 | # Remove the old node from the graph 60 | traced.graph.erase_node(n) 61 | 62 | # Don't forget to recompile! 63 | traced.recompile() 64 | -------------------------------------------------------------------------------- /fx/subgraph_rewriter_basic_use.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.fx import symbolic_trace, replace_pattern 3 | 4 | 5 | ''' 6 | How to Use the FX Subgraph Rewriter 7 | 8 | For easy subgraph rewriting, FX exposes the utility function: 9 | 10 | replace_pattern(gm : GraphModule, 11 | pattern : Callable, 12 | replacement : Callable) 13 | -> None 14 | 15 | `replace_pattern` matches all possible non-overlapping sets of operators 16 | and their data dependencies (`pattern`) in the Graph of a GraphModule 17 | (`gm`), then replaces each of these matched subgraphs with another 18 | subgraph (`replacement). 19 | 20 | The docstring for `replace_pattern` (located in `subgraph_rewriter.py`) 21 | gives an in-depth explanation as to how `pattern` and `replacement` 22 | should be specified, what happens during pattern matching, and other 23 | important technical details. This tutorial, therefore, is only meant to 24 | give an overview as to the FX Subgraph Rewriter's basic functionality. 25 | Let's go rewrite a Graph! 26 | ''' 27 | 28 | # Sample module 29 | class M(torch.nn.Module): 30 | def __init__(self): 31 | super().__init__() 32 | 33 | def forward(self, x, w1, w2): 34 | val1 = torch.neg(w1) 35 | m1 = torch.cat([val1, w2]).sum() 36 | val2 = torch.neg(w1) 37 | m2 = torch.cat([val2, w2]).sum() 38 | return x + torch.max(m1) + torch.max(m2) 39 | 40 | # Symbolically trace an instance of `M` 41 | traced = symbolic_trace(M()) 42 | 43 | # Define the pattern. The FX Subgraph Rewriter will match all 44 | # non-overlapping instances of the pattern in the larger graph. 45 | # Note that Pattern-matching is done based on data dependencies, 46 | # not Node names. Even though we're operating on Nodes named `a1` and 47 | # `a2` instead of `w1` and `w2`, the pattern is still a valid match 48 | # for the two instances of `torch.cat([w1, w2]).sum()` above. Only 49 | # operations that contribute to the single output value of the pattern 50 | # are considered 51 | def pattern(a1, a2): 52 | val1 = torch.neg(a1) 53 | return torch.cat([val1, a2]).sum() 54 | 55 | # Define the replacement (same rules as the pattern) 56 | def replacement(w1, w2): 57 | return torch.stack([w1, w2]) 58 | 59 | # Replace `pattern` with `replacement` in `traced` 60 | replace_pattern(traced, pattern, replacement) 61 | 62 | # After calling `replace_pattern`, the generated code is: 63 | ''' 64 | def forward(self, x, w1, w2): 65 | stack = torch.stack([w1, w2]) 66 | max_1 = torch.max(stack); stack = None 67 | add = x + max_1; x = max_1 = None 68 | stack_1 = torch.stack([w1, w2]); w1 = w2 = None 69 | max_2 = torch.max(stack_1); stack_1 = None 70 | add_1 = add + max_2; add = max_2 = None 71 | return add_1 72 | ''' 73 | -------------------------------------------------------------------------------- /fx/wrap_output_dynamically.py: -------------------------------------------------------------------------------- 1 | 2 | from enum import Enum, auto 3 | 4 | import torch 5 | from torch.fx import GraphModule, Node, Proxy, symbolic_trace 6 | 7 | ''' 8 | Wrap Graph Output Dynamically 9 | 10 | The following code demonstrates how change an existing Graph based on 11 | parameters specified at runtime. We'll let the user specify an 12 | activation function from a predefined Enum list, then we'll symbolically 13 | trace it. Next, we'll create a Proxy from the last operation in the 14 | Graph. We'll call our traced activation function with this Proxy and 15 | insert the ``output`` Node from that call into our Graph. (This final 16 | step will automatically inline the entire traced function.) 17 | ''' 18 | 19 | 20 | # Sample module 21 | class M(torch.nn.Module): 22 | def __init__(self): 23 | super().__init__() 24 | 25 | def forward(self, x, y): 26 | y = torch.cat([x, y]) 27 | return y 28 | 29 | # Symbolically trace an instance of `M` 30 | traced = symbolic_trace(M()) 31 | 32 | # Selected activation functions 33 | class ActivationFunction(Enum): 34 | RELU = auto() 35 | LEAKY_RELU = auto() 36 | PRELU = auto() 37 | 38 | # Map activation function names to their implementation 39 | activation_functions = { 40 | ActivationFunction.RELU: torch.nn.ReLU(), 41 | ActivationFunction.LEAKY_RELU: torch.nn.LeakyReLU(), 42 | ActivationFunction.PRELU: torch.nn.PReLU(), 43 | } 44 | 45 | def wrap_in_activation_function(m: GraphModule, fn: ActivationFunction) -> GraphModule: 46 | # Get output node 47 | output_node: Optional[Node] = None 48 | for n in reversed(m.graph.nodes): 49 | if n.op == "output": 50 | output_node = n 51 | break 52 | assert output_node 53 | 54 | # Get the actual output (the "input" of the output node). This is 55 | # the Node we want to wrap in a user-specified activation function 56 | assert len(output_node.all_input_nodes) == 1 57 | wrap_node = output_node.all_input_nodes[0] 58 | 59 | # Wrap the actual output in a Proxy 60 | wrap_proxy = Proxy(wrap_node) 61 | 62 | # Get the implementation of the specified activation function and 63 | # symbolically trace it 64 | fn_impl = activation_functions[fn] 65 | fn_impl_traced = symbolic_trace(fn_impl) 66 | 67 | # Call the specified activation function using the Proxy wrapper for 68 | # `output_op`. The result of this call is another Proxy, which we 69 | # can hook into our existing Graph. 70 | with traced.graph.inserting_after(wrap_node): 71 | fn_impl_output_node = fn_impl_traced(wrap_proxy) 72 | new_args = (fn_impl_output_node.node,) 73 | output_node.args = new_args 74 | 75 | m.recompile() 76 | 77 | 78 | # Example call 79 | x, y = torch.randn(5, 3), torch.randn(5, 3) 80 | orig_output = traced(x, y) 81 | 82 | wrap_in_activation_function(traced, ActivationFunction.LEAKY_RELU) 83 | new_output = traced(x, y) 84 | 85 | torch.testing.assert_close(new_output, torch.nn.LeakyReLU()(orig_output)) 86 | -------------------------------------------------------------------------------- /gat/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | requests 3 | numpy<2 4 | -------------------------------------------------------------------------------- /gcn/README.md: -------------------------------------------------------------------------------- 1 | # Graph Convolutional Network 2 | 3 | This repository contains an implementation of Graph Convolutional Networks (GCN) based on the paper "Semi-Supervised Classification with Graph Convolutional Networks" by Thomas N. Kipf and Max Welling. 4 | 5 | ## Overview 6 | This project implements the GCN model proposed in the paper for semi-supervised node classification on graph-structured data. GCN leverages graph convolutions to aggregate information from neighboring nodes and learn node representations for downstream tasks. The implementation provides a flexible and efficient GCN model for graph-based machine learning tasks. 7 | 8 | # Requirements 9 | - Python 3.7 or higher 10 | - PyTorch 2.0 or higher 11 | - Requests 2.31 or higher 12 | - NumPy 1.24 or higher 13 | 14 | 15 | # Installation 16 | ```bash 17 | pip install -r requirements.txt 18 | python main.py 19 | ``` 20 | 21 | # Dataset 22 | The implementation includes support for the Cora dataset, a standard benchmark dataset for graph-based machine learning tasks. The Cora dataset consists of scientific publications, where nodes represent papers and edges represent citation relationships. Each paper is associated with a binary label indicating one of seven classes. The dataset is downloaded, preprocessed and ready to use. 23 | 24 | ## Model Architecture 25 | The GCN model architecture follows the details provided in the paper. It consists of multiple graph convolutional layers with ReLU activation, followed by a final softmax layer for classification. The implementation supports customizable hyperparameters such as the number of hidden units, the number of layers, and dropout rate. 26 | 27 | ## Usage 28 | To train and evaluate the GCN model on the Cora dataset, use the following command: 29 | ```bash 30 | python train.py --epochs 200 --lr 0.01 --l2 5e-4 --dropout-p 0.5 --hidden-dim 16 --val-every 20 --include-bias False --no-cuda False 31 | ``` 32 | 33 | # Results 34 | The model achieves a classification accuracy of 82.5% on the test set of the Cora dataset after 200 epochs of training. This result is comparable to the performance reported in the original paper. However, the results can vary due to the randomness of the train/val/test split. 35 | 36 | References 37 | Thomas N. Kipf and Max Welling. "Semi-Supervised Classification with Graph Convolutional Networks." Link to the paper 38 | 39 | Original paper repository: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn) -------------------------------------------------------------------------------- /gcn/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | requests 4 | numpy<2 5 | -------------------------------------------------------------------------------- /imagenet/extract_ILSVRC.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # script to extract ImageNet dataset 4 | # ILSVRC2012_img_train.tar (about 138 GB) 5 | # ILSVRC2012_img_val.tar (about 6.3 GB) 6 | # make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory 7 | # 8 | # Adapted from: 9 | # https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md 10 | # https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4 11 | # 12 | # imagenet/train/ 13 | # ├── n01440764 14 | # │ ├── n01440764_10026.JPEG 15 | # │ ├── n01440764_10027.JPEG 16 | # │ ├── ...... 17 | # ├── ...... 18 | # imagenet/val/ 19 | # ├── n01440764 20 | # │ ├── ILSVRC2012_val_00000293.JPEG 21 | # │ ├── ILSVRC2012_val_00002138.JPEG 22 | # │ ├── ...... 23 | # ├── ...... 24 | # 25 | # 26 | # Make imagnet directory 27 | # 28 | mkdir imagenet 29 | # 30 | # Extract the training data: 31 | # 32 | # Create train directory; move .tar file; change directory 33 | mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train 34 | # Extract training set; remove compressed file 35 | tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar 36 | # 37 | # At this stage imagenet/train will contain 1000 compressed .tar files, one for each category 38 | # 39 | # For each .tar file: 40 | # 1. create directory with same name as .tar file 41 | # 2. extract and copy contents of .tar file into directory 42 | # 3. remove .tar file 43 | find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done 44 | # 45 | # This results in a training directory like so: 46 | # 47 | # imagenet/train/ 48 | # ├── n01440764 49 | # │ ├── n01440764_10026.JPEG 50 | # │ ├── n01440764_10027.JPEG 51 | # │ ├── ...... 52 | # ├── ...... 53 | # 54 | # Change back to original directory 55 | cd ../.. 56 | # 57 | # Extract the validation data and move images to subfolders: 58 | # 59 | # Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file 60 | mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar 61 | # get script from soumith and run; this script creates all class directories and moves images into corresponding directories 62 | wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash 63 | # 64 | # This results in a validation directory like so: 65 | # 66 | # imagenet/val/ 67 | # ├── n01440764 68 | # │ ├── ILSVRC2012_val_00000293.JPEG 69 | # │ ├── ILSVRC2012_val_00002138.JPEG 70 | # │ ├── ...... 71 | # ├── ...... 72 | # 73 | # 74 | # Check total files after extract 75 | # 76 | # $ find train/ -name "*.JPEG" | wc -l 77 | # 1281167 78 | # $ find val/ -name "*.JPEG" | wc -l 79 | # 50000 80 | # 81 | -------------------------------------------------------------------------------- /imagenet/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | -------------------------------------------------------------------------------- /language_translation/README.md: -------------------------------------------------------------------------------- 1 | # Language Translation 2 | 3 | This example shows how one might use transformers for language translation. In particular, this implementation is loosely based on the [Attention is All You Need paper](https://arxiv.org/abs/1706.03762). 4 | 5 | ## Requirements 6 | 7 | We will need a tokenizer for our languages. Torchtext does include a tokenizer for English, but unfortunately, we will need more languages then that. We can get these tokenizers via ```spacy``` 8 | 9 | ```bash 10 | python3 -m spacy download 11 | python3 -m spacy download en 12 | python3 -m spacy download de 13 | ``` 14 | 15 | Spacy supports many languages. For a full accounting of supported languages, please look [here](https://spacy.io/usage/models). This example will default from German to English. 16 | 17 | Torchtext is also required: 18 | ```bash 19 | pip install torchtext 20 | ``` 21 | 22 | Just running these commands will get you started: 23 | ```bash 24 | pip install -r requirements.txt 25 | python3 -m spacy download 26 | ``` 27 | 28 | ## Usage 29 | 30 | This example contains a lot of flags that you can set to change the behavior / training of the module. You can see all of them by running: 31 | 32 | ```bash 33 | python3 main.py -h 34 | ``` 35 | 36 | But in general, all of the settings have "sensible" defaults; however, the default translation is to translate from German to English. To *train* the model, you only need to run the following command, but there is also an example for how to use any language you want: 37 | 38 | ```bash 39 | python3 main.py 40 | python3 main.py --src en --tgt fr # For english to french translation 41 | ``` 42 | 43 | For model inference, you can use this command: 44 | 45 | ```bash 46 | python3 main.py --inference --model_path 47 | ``` 48 | 49 | After some loading time, this will open an interactive interface where you can type in whatever sentence you are interested in translating. 50 | -------------------------------------------------------------------------------- /language_translation/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchtext 3 | torchdata==0.9.0 4 | spacy 5 | portalocker 6 | -------------------------------------------------------------------------------- /language_translation/src/model.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.nn import functional as F 5 | from torch import nn 6 | 7 | class PositionalEncoding(nn.Module): 8 | def __init__( 9 | self, 10 | emb_size, 11 | dropout, 12 | maxlen=5000 13 | ): 14 | super(PositionalEncoding, self).__init__() 15 | den = torch.exp(- torch.arange(0, emb_size, 2)* math.log(10000) / emb_size) 16 | pos = torch.arange(0, maxlen).reshape(maxlen, 1) 17 | pos_embedding = torch.zeros((maxlen, emb_size)) 18 | pos_embedding[:, 0::2] = torch.sin(pos * den) 19 | pos_embedding[:, 1::2] = torch.cos(pos * den) 20 | pos_embedding = pos_embedding.unsqueeze(-2) 21 | 22 | self.dropout = nn.Dropout(dropout) 23 | self.register_buffer('pos_embedding', pos_embedding) 24 | 25 | def forward(self, token_embedding): 26 | return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :]) 27 | 28 | class Translator(nn.Module): 29 | def __init__( 30 | self, 31 | num_encoder_layers, 32 | num_decoder_layers, 33 | embed_size, 34 | num_heads, 35 | src_vocab_size, 36 | tgt_vocab_size, 37 | dim_feedforward, 38 | dropout 39 | ): 40 | super(Translator, self).__init__() 41 | 42 | # Output of embedding must be equal (embed_size) 43 | self.src_embedding = nn.Embedding(src_vocab_size, embed_size) 44 | self.tgt_embedding = nn.Embedding(tgt_vocab_size, embed_size) 45 | 46 | self.pos_enc = PositionalEncoding(embed_size, dropout) 47 | 48 | self.transformer = nn.Transformer( 49 | d_model=embed_size, 50 | nhead=num_heads, 51 | num_encoder_layers=num_encoder_layers, 52 | num_decoder_layers=num_decoder_layers, 53 | dim_feedforward=dim_feedforward, 54 | dropout=dropout 55 | ) 56 | 57 | self.ff = nn.Linear(embed_size, tgt_vocab_size) 58 | 59 | self._init_weights() 60 | 61 | def _init_weights(self): 62 | for p in self.parameters(): 63 | if p.dim() > 1: 64 | nn.init.xavier_uniform_(p) 65 | 66 | def forward(self, src, trg, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, memory_key_padding_mask): 67 | 68 | src_emb = self.pos_enc(self.src_embedding(src)) 69 | tgt_emb = self.pos_enc(self.tgt_embedding(trg)) 70 | 71 | outs = self.transformer( 72 | src_emb, 73 | tgt_emb, 74 | src_mask, 75 | tgt_mask, 76 | None, 77 | src_padding_mask, 78 | tgt_padding_mask, 79 | memory_key_padding_mask 80 | ) 81 | 82 | return self.ff(outs) 83 | 84 | def encode(self, src, src_mask): 85 | 86 | embed = self.src_embedding(src) 87 | 88 | pos_enc = self.pos_enc(embed) 89 | 90 | return self.transformer.encoder(pos_enc, src_mask) 91 | 92 | def decode(self, tgt, memory, tgt_mask): 93 | 94 | embed = self.tgt_embedding(tgt) 95 | 96 | pos_enc = self.pos_enc(embed) 97 | 98 | return self.transformer.decoder(pos_enc, memory, tgt_mask) 99 | -------------------------------------------------------------------------------- /legacy/snli/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch-based NLI Training with SNLI 2 | 3 | ## 📝 Overview 4 | 5 | This repository contains Python scripts to train a Natural Language Inference (NLI) model, specifically the `SNLIClassifier`, using the Stanford Natural Language Inference (SNLI) corpus. The trained model predicts textual entailment, identifying if a statement is entailed, contradicted, or neither by another statement. 6 | 7 | ## ⚙️ Dependencies 8 | 9 | Install the necessary Python libraries with: 10 | 11 | ```bash 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | The `requirements.txt` file includes: 16 | 17 | ``` 18 | torch 19 | torchtext 20 | spacy 21 | ``` 22 | 23 | ## 💻 Usage 24 | 25 | Start the training process with: 26 | 27 | ```bash 28 | python train.py --lower --word-vectors [PATH_TO_WORD_VECTORS] --vector-cache [PATH_TO_VECTOR_CACHE] --epochs [NUMBER_OF_EPOCHS] --batch-size [BATCH_SIZE] --save-path [PATH_TO_SAVE_MODEL] --gpu [GPU_NUMBER] 29 | ``` 30 | 31 | ## 🏋️‍♀️ Training 32 | 33 | The script trains the model on mini-batches of data across a specified number of epochs. It saves the best-performing model on the validation set as a `.pt` file in the specified directory. 34 | 35 | ## 📚 Scripts 36 | 37 | - `model.py`: Defines the `SNLIClassifier` model and auxiliary classes. 38 | - `util.py`: Contains utility functions for directory creation and command-line argument parsing. 39 | 40 | ## 📣 Note 41 | 42 | Ensure the `model.py` and `util.py` scripts are available in your working directory. -------------------------------------------------------------------------------- /legacy/snli/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Bottle(nn.Module): 6 | 7 | def forward(self, input): 8 | if len(input.size()) <= 2: 9 | return super(Bottle, self).forward(input) 10 | size = input.size()[:2] 11 | out = super(Bottle, self).forward(input.view(size[0]*size[1], -1)) 12 | return out.view(size[0], size[1], -1) 13 | 14 | 15 | class Linear(Bottle, nn.Linear): 16 | pass 17 | 18 | 19 | class Encoder(nn.Module): 20 | 21 | def __init__(self, config): 22 | super(Encoder, self).__init__() 23 | self.config = config 24 | input_size = config.d_proj if config.projection else config.d_embed 25 | dropout = 0 if config.n_layers == 1 else config.dp_ratio 26 | self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden, 27 | num_layers=config.n_layers, dropout=dropout, 28 | bidirectional=config.birnn) 29 | 30 | def forward(self, inputs): 31 | batch_size = inputs.size()[1] 32 | state_shape = self.config.n_cells, batch_size, self.config.d_hidden 33 | h0 = c0 = inputs.new_zeros(state_shape) 34 | outputs, (ht, ct) = self.rnn(inputs, (h0, c0)) 35 | return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1) 36 | 37 | 38 | class SNLIClassifier(nn.Module): 39 | 40 | def __init__(self, config): 41 | super(SNLIClassifier, self).__init__() 42 | self.config = config 43 | self.embed = nn.Embedding(config.n_embed, config.d_embed) 44 | self.projection = Linear(config.d_embed, config.d_proj) 45 | self.encoder = Encoder(config) 46 | self.dropout = nn.Dropout(p=config.dp_ratio) 47 | self.relu = nn.ReLU() 48 | seq_in_size = 2*config.d_hidden 49 | if self.config.birnn: 50 | seq_in_size *= 2 51 | lin_config = [seq_in_size]*2 52 | self.out = nn.Sequential( 53 | Linear(*lin_config), 54 | self.relu, 55 | self.dropout, 56 | Linear(*lin_config), 57 | self.relu, 58 | self.dropout, 59 | Linear(*lin_config), 60 | self.relu, 61 | self.dropout, 62 | Linear(seq_in_size, config.d_out)) 63 | 64 | def forward(self, batch): 65 | prem_embed = self.embed(batch.premise) 66 | hypo_embed = self.embed(batch.hypothesis) 67 | if self.config.fix_emb: 68 | prem_embed = prem_embed.detach() 69 | hypo_embed = hypo_embed.detach() 70 | if self.config.projection: 71 | prem_embed = self.relu(self.projection(prem_embed)) 72 | hypo_embed = self.relu(self.projection(hypo_embed)) 73 | premise = self.encoder(prem_embed) 74 | hypothesis = self.encoder(hypo_embed) 75 | scores = self.out(torch.cat([premise, hypothesis], 1)) 76 | return scores 77 | -------------------------------------------------------------------------------- /legacy/snli/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchtext 3 | spacy 4 | -------------------------------------------------------------------------------- /legacy/snli/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser 3 | 4 | def makedirs(name): 5 | """helper function for python 2 and 3 to call os.makedirs() 6 | avoiding an error if the directory to be created already exists""" 7 | 8 | import os, errno 9 | 10 | try: 11 | os.makedirs(name) 12 | except OSError as ex: 13 | if ex.errno == errno.EEXIST and os.path.isdir(name): 14 | # ignore existing directory 15 | pass 16 | else: 17 | # a different error happened 18 | raise 19 | 20 | 21 | def get_args(): 22 | parser = ArgumentParser(description='PyTorch/torchtext SNLI example') 23 | parser.add_argument('--epochs', type=int, default=50, 24 | help='the number of total epochs to run.') 25 | parser.add_argument('--batch_size', type=int, default=128, 26 | help='batch size. (default: 128)') 27 | parser.add_argument('--d_embed', type=int, default=100, 28 | help='the size of each embedding vector.') 29 | parser.add_argument('--d_proj', type=int, default=300, 30 | help='the size of each projection layer.') 31 | parser.add_argument('--d_hidden', type=int, default=300, 32 | help='the number of features in the hidden state.') 33 | parser.add_argument('--n_layers', type=int, default=1, 34 | help='the number of recurrent layers. (default: 50)') 35 | parser.add_argument('--log_every', type=int, default=50, 36 | help='iteration period to output log.') 37 | parser.add_argument('--lr',type=float, default=.001, 38 | help='initial learning rate.') 39 | parser.add_argument('--dev_every', type=int, default=1000, 40 | help='log period of validation results.') 41 | parser.add_argument('--save_every', type=int, default=1000, 42 | help='model checkpoint period.') 43 | parser.add_argument('--dp_ratio', type=int, default=0.2, 44 | help='probability of an element to be zeroed.') 45 | parser.add_argument('--no-bidirectional', action='store_false', dest='birnn', 46 | help='disable bidirectional LSTM.') 47 | parser.add_argument('--preserve-case', action='store_false', dest='lower', 48 | help='case-sensitivity.') 49 | parser.add_argument('--no-projection', action='store_false', dest='projection', 50 | help='disable projection layer.') 51 | parser.add_argument('--train_embed', action='store_false', dest='fix_emb', 52 | help='enable embedding word training.') 53 | parser.add_argument('--gpu', type=int, default=0, 54 | help='gpu id to use. (default: 0)') 55 | parser.add_argument('--save_path', type=str, default='results', 56 | help='save path of results.') 57 | parser.add_argument('--vector_cache', type=str, default=os.path.join(os.getcwd(), '.vector_cache/input_vectors.pt'), 58 | help='name of vector cache directory, which saved input word-vectors.') 59 | parser.add_argument('--word_vectors', type=str, default='glove.6B.100d', 60 | help='one of or a list containing instantiations of the GloVe, CharNGram, or Vectors classes.' 61 | 'Alternatively, one of or a list of available pretrained vectors: ' 62 | 'charngram.100d fasttext.en.300d fasttext.simple.300d' 63 | 'glove.42B.300d glove.840B.300d glove.twitter.27B.25d' 64 | 'glove.twitter.27B.50d glove.twitter.27B.100d glove.twitter.27B.200d' 65 | 'glove.6B.50d glove.6B.100d glove.6B.200d glove.6B.300d') 66 | parser.add_argument('--resume_snapshot', type=str, default='', 67 | help='model snapshot to resume.') 68 | parser.add_argument('--dry-run', action='store_true', 69 | help='run only a few iterations') 70 | args = parser.parse_args() 71 | return args 72 | -------------------------------------------------------------------------------- /mnist/README.md: -------------------------------------------------------------------------------- 1 | # Basic MNIST Example 2 | 3 | ```bash 4 | pip install -r requirements.txt 5 | python main.py 6 | # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 7 | ``` 8 | -------------------------------------------------------------------------------- /mnist/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | -------------------------------------------------------------------------------- /mnist_forward_forward/README.md: -------------------------------------------------------------------------------- 1 | # Basic Forward-Forward Example 2 | 3 | This example implements the paper [The Forward-Forward Algorithm: Some Preliminary Investigations](https://arxiv.org/abs/2212.13345) by Geoffrey Hinton. 4 | 5 | the aim of this paper is to introduce a new learning procedure for neural networks. the forward and backward passes of backpropagation by two forward passes. 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | python main.py 10 | ``` 11 | 12 | The main.py script accepts the following arguments: 13 | 14 | ```bash 15 | optional arguments: 16 | -h, --help show this help message and exit 17 | --epochs EPOCHS number of epochs to train (default: 1000) 18 | --lr LR learning rate (default: 0.03) 19 | --no_cuda disables CUDA training 20 | --no_mps disables MPS training 21 | --seed SEED random seed (default: 1) 22 | --save_model For saving the current Model 23 | --train_size TRAIN_SIZE 24 | size of training set 25 | --threshold THRESHOLD 26 | threshold for training 27 | --test_size TEST_SIZE 28 | size of test set 29 | --save-model For Saving the current Model 30 | --log-interval LOG_INTERVAL 31 | logging training status interval 32 | ``` 33 | -------------------------------------------------------------------------------- /mnist_forward_forward/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | -------------------------------------------------------------------------------- /mnist_hogwild/README.md: -------------------------------------------------------------------------------- 1 | # MNIST Hogwild Example 2 | 3 | ```bash 4 | pip install -r requirements.txt 5 | python main.py 6 | ``` 7 | 8 | The main.py script accepts the following arguments: 9 | 10 | ```bash 11 | optional arguments: 12 | -h, --help show this help message and exit 13 | --batch_size input batch_size for training (default:64) 14 | --testing_batch_size input batch size for testing (default: 1000) 15 | --epochs EPOCHS number of epochs to train (default: 1000) 16 | --lr LR learning rate (default: 0.03) 17 | --momentum SGD momentum (default: 0.5) 18 | --seed SEED random seed (default: 1) 19 | --mps enables macos GPU training 20 | --save_model For saving the current Model 21 | --log_interval how many batches to wait before logging training status 22 | --num_process how many training processes to use (default: 2) 23 | --cuda enables CUDA training 24 | --dry-run quickly check a single pass 25 | --save-model For Saving the current Model 26 | ``` 27 | -------------------------------------------------------------------------------- /mnist_hogwild/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | -------------------------------------------------------------------------------- /mnist_hogwild/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | 6 | 7 | def train(rank, args, model, device, dataset, dataloader_kwargs): 8 | torch.manual_seed(args.seed + rank) 9 | 10 | train_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs) 11 | 12 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 13 | for epoch in range(1, args.epochs + 1): 14 | train_epoch(epoch, args, model, device, train_loader, optimizer) 15 | 16 | 17 | def test(args, model, device, dataset, dataloader_kwargs): 18 | torch.manual_seed(args.seed) 19 | 20 | test_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs) 21 | 22 | test_epoch(model, device, test_loader) 23 | 24 | 25 | def train_epoch(epoch, args, model, device, data_loader, optimizer): 26 | model.train() 27 | pid = os.getpid() 28 | for batch_idx, (data, target) in enumerate(data_loader): 29 | optimizer.zero_grad() 30 | output = model(data.to(device)) 31 | loss = F.nll_loss(output, target.to(device)) 32 | loss.backward() 33 | optimizer.step() 34 | if batch_idx % args.log_interval == 0: 35 | print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 36 | pid, epoch, batch_idx * len(data), len(data_loader.dataset), 37 | 100. * batch_idx / len(data_loader), loss.item())) 38 | if args.dry_run: 39 | break 40 | 41 | 42 | def test_epoch(model, device, data_loader): 43 | model.eval() 44 | test_loss = 0 45 | correct = 0 46 | with torch.no_grad(): 47 | for data, target in data_loader: 48 | output = model(data.to(device)) 49 | test_loss += F.nll_loss(output, target.to(device), reduction='sum').item() # sum up batch loss 50 | pred = output.max(1)[1] # get the index of the max log-probability 51 | correct += pred.eq(target.to(device)).sum().item() 52 | 53 | test_loss /= len(data_loader.dataset) 54 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 55 | test_loss, correct, len(data_loader.dataset), 56 | 100. * correct / len(data_loader.dataset))) 57 | -------------------------------------------------------------------------------- /mnist_rnn/README.md: -------------------------------------------------------------------------------- 1 | # Example of MNIST using RNN 2 | 3 | ## Motivation 4 | Create pytorch example similar to Official Tensorflow Keras RNN example using MNIST [here](https://www.tensorflow.org/guide/keras/rnn) 5 | 6 | ```bash 7 | pip install -r requirements.txt 8 | python main.py 9 | # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 10 | ``` 11 | -------------------------------------------------------------------------------- /mnist_rnn/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | -------------------------------------------------------------------------------- /regression/README.md: -------------------------------------------------------------------------------- 1 | # Linear regression example 2 | 3 | Trains a single fully-connected layer to fit a 4th degree polynomial. 4 | -------------------------------------------------------------------------------- /regression/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | from itertools import count 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | POLY_DEGREE = 4 9 | W_target = torch.randn(POLY_DEGREE, 1) * 5 10 | b_target = torch.randn(1) * 5 11 | 12 | 13 | def make_features(x): 14 | """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4].""" 15 | x = x.unsqueeze(1) 16 | return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1) 17 | 18 | 19 | def f(x): 20 | """Approximated function.""" 21 | return x.mm(W_target) + b_target.item() 22 | 23 | 24 | def poly_desc(W, b): 25 | """Creates a string description of a polynomial.""" 26 | result = 'y = ' 27 | for i, w in enumerate(W): 28 | result += '{:+.2f} x^{} '.format(w, i + 1) 29 | result += '{:+.2f}'.format(b[0]) 30 | return result 31 | 32 | 33 | def get_batch(batch_size=32): 34 | """Builds a batch i.e. (x, f(x)) pair.""" 35 | random = torch.randn(batch_size) 36 | x = make_features(random) 37 | y = f(x) 38 | return x, y 39 | 40 | 41 | # Define model 42 | fc = torch.nn.Linear(W_target.size(0), 1) 43 | 44 | for batch_idx in count(1): 45 | # Get data 46 | batch_x, batch_y = get_batch() 47 | 48 | # Reset gradients 49 | fc.zero_grad() 50 | 51 | # Forward pass 52 | output = F.smooth_l1_loss(fc(batch_x), batch_y) 53 | loss = output.item() 54 | 55 | # Backward pass 56 | output.backward() 57 | 58 | # Apply gradients 59 | for param in fc.parameters(): 60 | param.data.add_(-0.1 * param.grad) 61 | 62 | # Stop criterion 63 | if loss < 1e-3: 64 | break 65 | 66 | print('Loss: {:.6f} after {} batches'.format(loss, batch_idx)) 67 | print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias)) 68 | print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target)) 69 | -------------------------------------------------------------------------------- /reinforcement_learning/README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement learning training example 2 | 3 | ```bash 4 | pip install -r requirements.txt 5 | # For REINFORCE: 6 | python reinforce.py 7 | # For actor critic: 8 | python actor_critic.py 9 | ``` 10 | -------------------------------------------------------------------------------- /reinforcement_learning/reinforce.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | import numpy as np 4 | from itertools import count 5 | from collections import deque 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | from torch.distributions import Categorical 11 | 12 | 13 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') 14 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 15 | help='discount factor (default: 0.99)') 16 | parser.add_argument('--seed', type=int, default=543, metavar='N', 17 | help='random seed (default: 543)') 18 | parser.add_argument('--render', action='store_true', 19 | help='render the environment') 20 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 21 | help='interval between training status logs (default: 10)') 22 | args = parser.parse_args() 23 | 24 | 25 | env = gym.make('CartPole-v1') 26 | env.reset(seed=args.seed) 27 | torch.manual_seed(args.seed) 28 | 29 | 30 | class Policy(nn.Module): 31 | def __init__(self): 32 | super(Policy, self).__init__() 33 | self.affine1 = nn.Linear(4, 128) 34 | self.dropout = nn.Dropout(p=0.6) 35 | self.affine2 = nn.Linear(128, 2) 36 | 37 | self.saved_log_probs = [] 38 | self.rewards = [] 39 | 40 | def forward(self, x): 41 | x = self.affine1(x) 42 | x = self.dropout(x) 43 | x = F.relu(x) 44 | action_scores = self.affine2(x) 45 | return F.softmax(action_scores, dim=1) 46 | 47 | 48 | policy = Policy() 49 | optimizer = optim.Adam(policy.parameters(), lr=1e-2) 50 | eps = np.finfo(np.float32).eps.item() 51 | 52 | 53 | def select_action(state): 54 | state = torch.from_numpy(state).float().unsqueeze(0) 55 | probs = policy(state) 56 | m = Categorical(probs) 57 | action = m.sample() 58 | policy.saved_log_probs.append(m.log_prob(action)) 59 | return action.item() 60 | 61 | 62 | def finish_episode(): 63 | R = 0 64 | policy_loss = [] 65 | returns = deque() 66 | for r in policy.rewards[::-1]: 67 | R = r + args.gamma * R 68 | returns.appendleft(R) 69 | returns = torch.tensor(returns) 70 | returns = (returns - returns.mean()) / (returns.std() + eps) 71 | for log_prob, R in zip(policy.saved_log_probs, returns): 72 | policy_loss.append(-log_prob * R) 73 | optimizer.zero_grad() 74 | policy_loss = torch.cat(policy_loss).sum() 75 | policy_loss.backward() 76 | optimizer.step() 77 | del policy.rewards[:] 78 | del policy.saved_log_probs[:] 79 | 80 | 81 | def main(): 82 | running_reward = 10 83 | for i_episode in count(1): 84 | state, _ = env.reset() 85 | ep_reward = 0 86 | for t in range(1, 10000): # Don't infinite loop while learning 87 | action = select_action(state) 88 | state, reward, done, _, _ = env.step(action) 89 | if args.render: 90 | env.render() 91 | policy.rewards.append(reward) 92 | ep_reward += reward 93 | if done: 94 | break 95 | 96 | running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward 97 | finish_episode() 98 | if i_episode % args.log_interval == 0: 99 | print('Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}'.format( 100 | i_episode, ep_reward, running_reward)) 101 | if running_reward > env.spec.reward_threshold: 102 | print("Solved! Running reward is now {} and " 103 | "the last episode runs to {} time steps!".format(running_reward, t)) 104 | break 105 | 106 | 107 | if __name__ == '__main__': 108 | main() 109 | -------------------------------------------------------------------------------- /reinforcement_learning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | gym 4 | pygame 5 | -------------------------------------------------------------------------------- /run_distributed_examples.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # This script runs through the code in each of the python examples. 4 | # The purpose is just as an integration test, not to actually train models in any meaningful way. 5 | # For that reason, most of these set epochs = 1 and --dry-run. 6 | # 7 | # Optionally specify a comma separated list of examples to run. 8 | # can be run as: 9 | # ./run_python_examples.sh "install_deps,run_all,clean" 10 | # to pip install dependencies (other than pytorch), run all examples, and remove temporary/changed data files. 11 | # Expects pytorch, torchvision to be installed. 12 | 13 | BASE_DIR="$(pwd)/$(dirname $0)" 14 | source $BASE_DIR/utils.sh 15 | 16 | USE_CUDA=$(python -c "import torch; print(torch.cuda.is_available())") 17 | case $USE_CUDA in 18 | "True") 19 | echo "using cuda" 20 | CUDA=1 21 | CUDA_FLAG="--cuda" 22 | ;; 23 | "False") 24 | echo "not using cuda" 25 | CUDA=0 26 | CUDA_FLAG="" 27 | ;; 28 | "") 29 | exit 1; 30 | ;; 31 | esac 32 | 33 | function distributed() { 34 | start 35 | bash tensor_parallelism/run_example.sh tensor_parallelism/tensor_parallel_example.py || error "tensor parallel example failed" 36 | bash tensor_parallelism/run_example.sh tensor_parallelism/sequence_parallel_example.py || error "sequence parallel example failed" 37 | bash tensor_parallelism/run_example.sh tensor_parallelism/fsdp_tp_example.py || error "2D parallel example failed" 38 | python ddp/main.py || error "ddp example failed" 39 | } 40 | 41 | function clean() { 42 | cd $BASE_DIR 43 | echo "running clean to remove cruft" 44 | } 45 | 46 | function run_all() { 47 | distributed 48 | } 49 | 50 | # by default, run all examples 51 | if [ "" == "$EXAMPLES" ]; then 52 | run_all 53 | else 54 | for i in $(echo $EXAMPLES | sed "s/,/ /g") 55 | do 56 | echo "Starting $i" 57 | $i 58 | echo "Finished $i, status $?" 59 | done 60 | fi 61 | 62 | if [ "" == "$ERRORS" ]; then 63 | echo "Completed successfully with status $?" 64 | else 65 | echo "Some distributed examples failed:" 66 | printf "$ERRORS\n" 67 | #Exit with error (0-255) in case of failure in one of the tests. 68 | exit 1 69 | 70 | fi 71 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | 3.8 2 | -------------------------------------------------------------------------------- /siamese_network/README.md: -------------------------------------------------------------------------------- 1 | # Siamese Network Example 2 | 3 | ```bash 4 | pip install -r requirements.txt 5 | python main.py 6 | # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 7 | ``` 8 | -------------------------------------------------------------------------------- /siamese_network/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | -------------------------------------------------------------------------------- /super_resolution/README.md: -------------------------------------------------------------------------------- 1 | # Superresolution using an efficient sub-pixel convolutional neural network 2 | 3 | This example illustrates how to use the efficient sub-pixel convolution layer described in ["Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network" - Shi et al.](https://arxiv.org/abs/1609.05158) for increasing spatial resolution within your network for tasks such as superresolution. 4 | 5 | ``` 6 | usage: main.py [-h] --upscale_factor UPSCALE_FACTOR [--batchSize BATCHSIZE] 7 | [--testBatchSize TESTBATCHSIZE] [--nEpochs NEPOCHS] [--lr LR] 8 | [--cuda] [--threads THREADS] [--seed SEED] 9 | 10 | PyTorch Super Res Example 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | --upscale_factor super resolution upscale factor 15 | --batchSize training batch size 16 | --testBatchSize testing batch size 17 | --nEpochs number of epochs to train for 18 | --lr Learning Rate. Default=0.01 19 | --cuda use cuda 20 | --mps enable GPU on macOS 21 | --threads number of threads for data loader to use Default=4 22 | --seed random seed to use. Default=123 23 | ``` 24 | 25 | This example trains a super-resolution network on the [BSD300 dataset](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/), using crops from the 200 training images, and evaluating on crops of the 100 test images. A snapshot of the model after every epoch with filename model*epoch*.pth 26 | 27 | ## Example Usage: 28 | 29 | ### Train 30 | 31 | `python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 30 --lr 0.001` 32 | 33 | ### Super Resolve 34 | 35 | `python super_resolve.py --input_image dataset/BSDS300/images/test/16077.jpg --model model_epoch_500.pth --output_filename out.png` 36 | -------------------------------------------------------------------------------- /super_resolution/data.py: -------------------------------------------------------------------------------- 1 | from os.path import exists, join, basename 2 | from os import makedirs, remove 3 | from six.moves import urllib 4 | import tarfile 5 | from torchvision.transforms import Compose, CenterCrop, ToTensor, Resize 6 | 7 | from dataset import DatasetFromFolder 8 | 9 | 10 | def download_bsd300(dest="dataset"): 11 | output_image_dir = join(dest, "BSDS300/images") 12 | 13 | if not exists(output_image_dir): 14 | makedirs(dest) 15 | url = "http://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/BSDS300-images.tgz" 16 | print("downloading url ", url) 17 | 18 | data = urllib.request.urlopen(url) 19 | 20 | file_path = join(dest, basename(url)) 21 | with open(file_path, 'wb') as f: 22 | f.write(data.read()) 23 | 24 | print("Extracting data") 25 | with tarfile.open(file_path) as tar: 26 | for item in tar: 27 | tar.extract(item, dest) 28 | 29 | remove(file_path) 30 | 31 | return output_image_dir 32 | 33 | 34 | def calculate_valid_crop_size(crop_size, upscale_factor): 35 | return crop_size - (crop_size % upscale_factor) 36 | 37 | 38 | def input_transform(crop_size, upscale_factor): 39 | return Compose([ 40 | CenterCrop(crop_size), 41 | Resize(crop_size // upscale_factor), 42 | ToTensor(), 43 | ]) 44 | 45 | 46 | def target_transform(crop_size): 47 | return Compose([ 48 | CenterCrop(crop_size), 49 | ToTensor(), 50 | ]) 51 | 52 | 53 | def get_training_set(upscale_factor): 54 | root_dir = download_bsd300() 55 | train_dir = join(root_dir, "train") 56 | crop_size = calculate_valid_crop_size(256, upscale_factor) 57 | 58 | return DatasetFromFolder(train_dir, 59 | input_transform=input_transform(crop_size, upscale_factor), 60 | target_transform=target_transform(crop_size)) 61 | 62 | 63 | def get_test_set(upscale_factor): 64 | root_dir = download_bsd300() 65 | test_dir = join(root_dir, "test") 66 | crop_size = calculate_valid_crop_size(256, upscale_factor) 67 | 68 | return DatasetFromFolder(test_dir, 69 | input_transform=input_transform(crop_size, upscale_factor), 70 | target_transform=target_transform(crop_size)) 71 | -------------------------------------------------------------------------------- /super_resolution/dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from os import listdir 4 | from os.path import join 5 | from PIL import Image 6 | 7 | 8 | def is_image_file(filename): 9 | return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg"]) 10 | 11 | 12 | def load_img(filepath): 13 | img = Image.open(filepath).convert('YCbCr') 14 | y, _, _ = img.split() 15 | return y 16 | 17 | 18 | class DatasetFromFolder(data.Dataset): 19 | def __init__(self, image_dir, input_transform=None, target_transform=None): 20 | super(DatasetFromFolder, self).__init__() 21 | self.image_filenames = [join(image_dir, x) for x in listdir(image_dir) if is_image_file(x)] 22 | 23 | self.input_transform = input_transform 24 | self.target_transform = target_transform 25 | 26 | def __getitem__(self, index): 27 | input = load_img(self.image_filenames[index]) 28 | target = input.copy() 29 | if self.input_transform: 30 | input = self.input_transform(input) 31 | if self.target_transform: 32 | target = self.target_transform(target) 33 | 34 | return input, target 35 | 36 | def __len__(self): 37 | return len(self.image_filenames) 38 | -------------------------------------------------------------------------------- /super_resolution/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | from math import log10 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.utils.data import DataLoader 9 | from model import Net 10 | from data import get_training_set, get_test_set 11 | 12 | # Training settings 13 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example') 14 | parser.add_argument('--upscale_factor', type=int, required=True, help="super resolution upscale factor") 15 | parser.add_argument('--batchSize', type=int, default=64, help='training batch size') 16 | parser.add_argument('--testBatchSize', type=int, default=10, help='testing batch size') 17 | parser.add_argument('--nEpochs', type=int, default=2, help='number of epochs to train for') 18 | parser.add_argument('--lr', type=float, default=0.01, help='Learning Rate. Default=0.01') 19 | parser.add_argument('--cuda', action='store_true', help='use cuda?') 20 | parser.add_argument('--mps', action='store_true', default=False, help='enables macOS GPU training') 21 | parser.add_argument('--threads', type=int, default=4, help='number of threads for data loader to use') 22 | parser.add_argument('--seed', type=int, default=123, help='random seed to use. Default=123') 23 | opt = parser.parse_args() 24 | 25 | print(opt) 26 | 27 | if opt.cuda and not torch.cuda.is_available(): 28 | raise Exception("No GPU found, please run without --cuda") 29 | if not opt.mps and torch.backends.mps.is_available(): 30 | raise Exception("Found mps device, please run with --mps to enable macOS GPU") 31 | 32 | torch.manual_seed(opt.seed) 33 | use_mps = opt.mps and torch.backends.mps.is_available() 34 | 35 | if opt.cuda: 36 | device = torch.device("cuda") 37 | elif use_mps: 38 | device = torch.device("mps") 39 | else: 40 | device = torch.device("cpu") 41 | 42 | print('===> Loading datasets') 43 | train_set = get_training_set(opt.upscale_factor) 44 | test_set = get_test_set(opt.upscale_factor) 45 | training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True) 46 | testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.testBatchSize, shuffle=False) 47 | 48 | print('===> Building model') 49 | model = Net(upscale_factor=opt.upscale_factor).to(device) 50 | criterion = nn.MSELoss() 51 | 52 | optimizer = optim.Adam(model.parameters(), lr=opt.lr) 53 | 54 | 55 | def train(epoch): 56 | epoch_loss = 0 57 | for iteration, batch in enumerate(training_data_loader, 1): 58 | input, target = batch[0].to(device), batch[1].to(device) 59 | 60 | optimizer.zero_grad() 61 | loss = criterion(model(input), target) 62 | epoch_loss += loss.item() 63 | loss.backward() 64 | optimizer.step() 65 | 66 | print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.item())) 67 | 68 | print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader))) 69 | 70 | 71 | def test(): 72 | avg_psnr = 0 73 | with torch.no_grad(): 74 | for batch in testing_data_loader: 75 | input, target = batch[0].to(device), batch[1].to(device) 76 | 77 | prediction = model(input) 78 | mse = criterion(prediction, target) 79 | psnr = 10 * log10(1 / mse.item()) 80 | avg_psnr += psnr 81 | print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader))) 82 | 83 | 84 | def checkpoint(epoch): 85 | model_out_path = "model_epoch_{}.pth".format(epoch) 86 | torch.save(model, model_out_path) 87 | print("Checkpoint saved to {}".format(model_out_path)) 88 | 89 | for epoch in range(1, opt.nEpochs + 1): 90 | train(epoch) 91 | test() 92 | checkpoint(epoch) 93 | -------------------------------------------------------------------------------- /super_resolution/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | 5 | 6 | class Net(nn.Module): 7 | def __init__(self, upscale_factor): 8 | super(Net, self).__init__() 9 | 10 | self.relu = nn.ReLU() 11 | self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2)) 12 | self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1)) 13 | self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1)) 14 | self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1)) 15 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor) 16 | 17 | self._initialize_weights() 18 | 19 | def forward(self, x): 20 | x = self.relu(self.conv1(x)) 21 | x = self.relu(self.conv2(x)) 22 | x = self.relu(self.conv3(x)) 23 | x = self.pixel_shuffle(self.conv4(x)) 24 | return x 25 | 26 | def _initialize_weights(self): 27 | init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) 28 | init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) 29 | init.orthogonal_(self.conv3.weight, init.calculate_gain('relu')) 30 | init.orthogonal_(self.conv4.weight) 31 | -------------------------------------------------------------------------------- /super_resolution/super_resolve.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | from PIL import Image 5 | from torchvision.transforms import ToTensor 6 | 7 | import numpy as np 8 | 9 | # Training settings 10 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example') 11 | parser.add_argument('--input_image', type=str, required=True, help='input image to use') 12 | parser.add_argument('--model', type=str, required=True, help='model file to use') 13 | parser.add_argument('--output_filename', type=str, help='where to save the output image') 14 | parser.add_argument('--cuda', action='store_true', help='use cuda') 15 | opt = parser.parse_args() 16 | 17 | print(opt) 18 | img = Image.open(opt.input_image).convert('YCbCr') 19 | y, cb, cr = img.split() 20 | 21 | model = torch.load(opt.model) 22 | img_to_tensor = ToTensor() 23 | input = img_to_tensor(y).view(1, -1, y.size[1], y.size[0]) 24 | 25 | if opt.cuda: 26 | model = model.cuda() 27 | input = input.cuda() 28 | 29 | out = model(input) 30 | out = out.cpu() 31 | out_img_y = out[0].detach().numpy() 32 | out_img_y *= 255.0 33 | out_img_y = out_img_y.clip(0, 255) 34 | out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L') 35 | 36 | out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC) 37 | out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC) 38 | out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB') 39 | 40 | out_img.save(opt.output_filename) 41 | print('output image saved to ', opt.output_filename) 42 | -------------------------------------------------------------------------------- /time_sequence_prediction/README.md: -------------------------------------------------------------------------------- 1 | # Time Sequence Prediction 2 | 3 | This is a toy example for beginners to start with. It helps learn both PyTorch and time sequence prediction. Two LSTMCell units are used in this example to learn some sine wave signals starting at different phases. After learning the sine waves, the network tries to predict the signal values in the future. The results are shown in the picture below. 4 | 5 | ## Usage 6 | 7 | ``` 8 | python generate_sine_wave.py 9 | python train.py 10 | ``` 11 | 12 | ## Result 13 | 14 | The initial signal and the predicted results are shown in the image. We first give some initial signals (full line). The network will subsequently give some predicted results (dash line). It can be concluded that the network can generate new sine waves. 15 | ![image](https://cloud.githubusercontent.com/assets/1419566/24184438/e24f5280-0f08-11e7-8f8b-4d972b527a81.png) 16 | -------------------------------------------------------------------------------- /time_sequence_prediction/generate_sine_wave.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | np.random.seed(2) 5 | 6 | T = 20 7 | L = 1000 8 | N = 100 9 | 10 | x = np.empty((N, L), 'int64') 11 | x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1) 12 | data = np.sin(x / 1.0 / T).astype('float64') 13 | torch.save(data, open('traindata.pt', 'wb')) 14 | -------------------------------------------------------------------------------- /time_sequence_prediction/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | matplotlib 3 | -------------------------------------------------------------------------------- /time_sequence_prediction/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | import numpy as np 7 | import matplotlib 8 | matplotlib.use('Agg') 9 | import matplotlib.pyplot as plt 10 | 11 | class Sequence(nn.Module): 12 | def __init__(self): 13 | super(Sequence, self).__init__() 14 | self.lstm1 = nn.LSTMCell(1, 51) 15 | self.lstm2 = nn.LSTMCell(51, 51) 16 | self.linear = nn.Linear(51, 1) 17 | 18 | def forward(self, input, future = 0): 19 | outputs = [] 20 | h_t = torch.zeros(input.size(0), 51, dtype=torch.double) 21 | c_t = torch.zeros(input.size(0), 51, dtype=torch.double) 22 | h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double) 23 | c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double) 24 | 25 | for input_t in input.split(1, dim=1): 26 | h_t, c_t = self.lstm1(input_t, (h_t, c_t)) 27 | h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) 28 | output = self.linear(h_t2) 29 | outputs += [output] 30 | for i in range(future):# if we should predict the future 31 | h_t, c_t = self.lstm1(output, (h_t, c_t)) 32 | h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) 33 | output = self.linear(h_t2) 34 | outputs += [output] 35 | outputs = torch.cat(outputs, dim=1) 36 | return outputs 37 | 38 | 39 | if __name__ == '__main__': 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument('--steps', type=int, default=15, help='steps to run') 42 | opt = parser.parse_args() 43 | # set random seed to 0 44 | np.random.seed(0) 45 | torch.manual_seed(0) 46 | # load data and make training set 47 | data = torch.load('traindata.pt') 48 | input = torch.from_numpy(data[3:, :-1]) 49 | target = torch.from_numpy(data[3:, 1:]) 50 | test_input = torch.from_numpy(data[:3, :-1]) 51 | test_target = torch.from_numpy(data[:3, 1:]) 52 | # build the model 53 | seq = Sequence() 54 | seq.double() 55 | criterion = nn.MSELoss() 56 | # use LBFGS as optimizer since we can load the whole data to train 57 | optimizer = optim.LBFGS(seq.parameters(), lr=0.8) 58 | #begin to train 59 | for i in range(opt.steps): 60 | print('STEP: ', i) 61 | def closure(): 62 | optimizer.zero_grad() 63 | out = seq(input) 64 | loss = criterion(out, target) 65 | print('loss:', loss.item()) 66 | loss.backward() 67 | return loss 68 | optimizer.step(closure) 69 | # begin to predict, no need to track gradient here 70 | with torch.no_grad(): 71 | future = 1000 72 | pred = seq(test_input, future=future) 73 | loss = criterion(pred[:, :-future], test_target) 74 | print('test loss:', loss.item()) 75 | y = pred.detach().numpy() 76 | # draw the result 77 | plt.figure(figsize=(30,10)) 78 | plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30) 79 | plt.xlabel('x', fontsize=20) 80 | plt.ylabel('y', fontsize=20) 81 | plt.xticks(fontsize=20) 82 | plt.yticks(fontsize=20) 83 | def draw(yi, color): 84 | plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0) 85 | plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0) 86 | draw(y[0], 'r') 87 | draw(y[1], 'g') 88 | draw(y[2], 'b') 89 | plt.savefig('predict%d.pdf'%i) 90 | plt.close() 91 | -------------------------------------------------------------------------------- /utils.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script contains utility functions and initialize exmaple scripts. 3 | # Eg: run_python_examples.sh, run_distributed_examples.sh 4 | 5 | BASE_DIR="$(pwd)/$(dirname $0)" 6 | EXAMPLES=$(echo $1 | sed -e 's/ //g') 7 | 8 | # Redirect 'python' calls to 'python3' 9 | python() { 10 | command python3 "$@" 11 | } 12 | 13 | ERRORS=${ERRORS-""} 14 | 15 | function error() { 16 | ERR=$1 17 | if [ "" == "$ERRORS" ]; then 18 | ERRORS="$ERR" 19 | else 20 | ERRORS="$ERRORS\n$ERR" 21 | fi 22 | } 23 | 24 | function install_deps() { 25 | echo "installing requirements" 26 | cat $BASE_DIR/*/requirements.txt | \ 27 | sort -u | \ 28 | # testing the installed version of torch, so don't pip install it. 29 | grep -vE '^torch$' | \ 30 | pip install -r /dev/stdin || \ 31 | { error "failed to install dependencies"; exit 1; } 32 | } 33 | 34 | function start() { 35 | EXAMPLE=${FUNCNAME[1]} 36 | cd $BASE_DIR/$EXAMPLE 37 | echo "Running example: $EXAMPLE" 38 | } 39 | -------------------------------------------------------------------------------- /vae/README.md: -------------------------------------------------------------------------------- 1 | # Basic VAE Example 2 | 3 | This is an improved implementation of the paper [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) by Kingma and Welling. 4 | It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster. 5 | 6 | ```bash 7 | pip install -r requirements.txt 8 | python main.py 9 | ``` 10 | 11 | The main.py script accepts the following arguments: 12 | 13 | ```bash 14 | optional arguments: 15 | --batch-size input batch size for training (default: 128) 16 | --epochs number of epochs to train (default: 10) 17 | --no-cuda enables CUDA training 18 | --mps enables GPU on macOS 19 | --seed random seed (default: 1) 20 | --log-interval how many batches to wait before logging training status 21 | ``` -------------------------------------------------------------------------------- /vae/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision==0.20.0 3 | tqdm 4 | six 5 | -------------------------------------------------------------------------------- /vae/results/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | -------------------------------------------------------------------------------- /word_language_model/README.md: -------------------------------------------------------------------------------- 1 | # Word-level Language Modeling using RNN and Transformer 2 | 3 | This example trains a multi-layer RNN (Elman, GRU, or LSTM) or Transformer on a language modeling task. By default, the training script uses the Wikitext-2 dataset, provided. 4 | The trained model can then be used by the generate script to generate new text. 5 | 6 | ```bash 7 | python main.py --cuda --epochs 6 # Train a LSTM on Wikitext-2 with CUDA. 8 | python main.py --cuda --epochs 6 --tied # Train a tied LSTM on Wikitext-2 with CUDA. 9 | python main.py --cuda --tied # Train a tied LSTM on Wikitext-2 with CUDA for 40 epochs. 10 | python main.py --cuda --epochs 6 --model Transformer --lr 5 11 | # Train a Transformer model on Wikitext-2 with CUDA. 12 | 13 | python generate.py # Generate samples from the default model checkpoint. 14 | ``` 15 | 16 | The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`) or Transformer module (`nn.TransformerEncoder` and `nn.TransformerEncoderLayer`) which will automatically use the cuDNN backend if run on CUDA with cuDNN installed. 17 | 18 | During training, if a keyboard interrupt (Ctrl-C) is received, training is stopped and the current model is evaluated against the test dataset. 19 | 20 | The `main.py` script accepts the following arguments: 21 | 22 | ```bash 23 | optional arguments: 24 | -h, --help show this help message and exit 25 | --data DATA location of the data corpus 26 | --model MODEL type of network (RNN_TANH, RNN_RELU, LSTM, GRU, Transformer) 27 | --emsize EMSIZE size of word embeddings 28 | --nhid NHID number of hidden units per layer 29 | --nlayers NLAYERS number of layers 30 | --lr LR initial learning rate 31 | --clip CLIP gradient clipping 32 | --epochs EPOCHS upper epoch limit 33 | --batch_size N batch size 34 | --bptt BPTT sequence length 35 | --dropout DROPOUT dropout applied to layers (0 = no dropout) 36 | --tied tie the word embedding and softmax weights 37 | --seed SEED random seed 38 | --cuda use CUDA 39 | --mps enable GPU on macOS 40 | --log-interval N report interval 41 | --save SAVE path to save the final model 42 | --onnx-export ONNX_EXPORT 43 | path to export the final model in onnx format 44 | --nhead NHEAD the number of heads in the encoder/decoder of the transformer model 45 | --dry-run verify the code and the model 46 | ``` 47 | 48 | With these arguments, a variety of models can be tested. 49 | As an example, the following arguments produce slower but better models: 50 | 51 | ```bash 52 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 53 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied 54 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 55 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied 56 | ``` 57 | -------------------------------------------------------------------------------- /word_language_model/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from io import open 3 | import torch 4 | 5 | class Dictionary(object): 6 | def __init__(self): 7 | self.word2idx = {} 8 | self.idx2word = [] 9 | 10 | def add_word(self, word): 11 | if word not in self.word2idx: 12 | self.idx2word.append(word) 13 | self.word2idx[word] = len(self.idx2word) - 1 14 | return self.word2idx[word] 15 | 16 | def __len__(self): 17 | return len(self.idx2word) 18 | 19 | 20 | class Corpus(object): 21 | def __init__(self, path): 22 | self.dictionary = Dictionary() 23 | self.train = self.tokenize(os.path.join(path, 'train.txt')) 24 | self.valid = self.tokenize(os.path.join(path, 'valid.txt')) 25 | self.test = self.tokenize(os.path.join(path, 'test.txt')) 26 | 27 | def tokenize(self, path): 28 | """Tokenizes a text file.""" 29 | assert os.path.exists(path) 30 | # Add words to the dictionary 31 | with open(path, 'r', encoding="utf8") as f: 32 | for line in f: 33 | words = line.split() + [''] 34 | for word in words: 35 | self.dictionary.add_word(word) 36 | 37 | # Tokenize file content 38 | with open(path, 'r', encoding="utf8") as f: 39 | idss = [] 40 | for line in f: 41 | words = line.split() + [''] 42 | ids = [] 43 | for word in words: 44 | ids.append(self.dictionary.word2idx[word]) 45 | idss.append(torch.tensor(ids).type(torch.int64)) 46 | ids = torch.cat(idss) 47 | 48 | return ids 49 | -------------------------------------------------------------------------------- /word_language_model/data/wikitext-2/README: -------------------------------------------------------------------------------- 1 | This is raw data from the wikitext-2 dataset. 2 | 3 | See https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/ 4 | -------------------------------------------------------------------------------- /word_language_model/generate.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Language Modeling on Wikitext-2 3 | # 4 | # This file generates new sentences sampled from the language model. 5 | # 6 | ############################################################################### 7 | import argparse 8 | import torch 9 | 10 | import data 11 | 12 | parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 Language Model') 13 | # Model parameters. 14 | parser.add_argument('--data', type=str, default='./data/wikitext-2', 15 | help='location of the data corpus') 16 | parser.add_argument('--checkpoint', type=str, default='./model.pt', 17 | help='model checkpoint to use') 18 | parser.add_argument('--outf', type=str, default='generated.txt', 19 | help='output file for generated text') 20 | parser.add_argument('--words', type=int, default='1000', 21 | help='number of words to generate') 22 | parser.add_argument('--seed', type=int, default=1111, 23 | help='random seed') 24 | parser.add_argument('--cuda', action='store_true', 25 | help='use CUDA') 26 | parser.add_argument('--mps', action='store_true', default=False, 27 | help='enables macOS GPU training') 28 | parser.add_argument('--temperature', type=float, default=1.0, 29 | help='temperature - higher will increase diversity') 30 | parser.add_argument('--log-interval', type=int, default=100, 31 | help='reporting interval') 32 | args = parser.parse_args() 33 | 34 | # Set the random seed manually for reproducibility. 35 | torch.manual_seed(args.seed) 36 | if torch.cuda.is_available(): 37 | if not args.cuda: 38 | print("WARNING: You have a CUDA device, so you should probably run with --cuda.") 39 | if torch.backends.mps.is_available(): 40 | if not args.mps: 41 | print("WARNING: You have mps device, to enable macOS GPU run with --mps.") 42 | 43 | use_mps = args.mps and torch.backends.mps.is_available() 44 | if args.cuda: 45 | device = torch.device("cuda") 46 | elif use_mps: 47 | device = torch.device("mps") 48 | else: 49 | device = torch.device("cpu") 50 | 51 | if args.temperature < 1e-3: 52 | parser.error("--temperature has to be greater or equal 1e-3.") 53 | 54 | with open(args.checkpoint, 'rb') as f: 55 | model = torch.load(f, map_location=device) 56 | model.eval() 57 | 58 | corpus = data.Corpus(args.data) 59 | ntokens = len(corpus.dictionary) 60 | 61 | is_transformer_model = hasattr(model, 'model_type') and model.model_type == 'Transformer' 62 | if not is_transformer_model: 63 | hidden = model.init_hidden(1) 64 | input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device) 65 | 66 | with open(args.outf, 'w') as outf: 67 | with torch.no_grad(): # no tracking history 68 | for i in range(args.words): 69 | if is_transformer_model: 70 | output = model(input, False) 71 | word_weights = output[-1].squeeze().div(args.temperature).exp().cpu() 72 | word_idx = torch.multinomial(word_weights, 1)[0] 73 | word_tensor = torch.Tensor([[word_idx]]).long().to(device) 74 | input = torch.cat([input, word_tensor], 0) 75 | else: 76 | output, hidden = model(input, hidden) 77 | word_weights = output.squeeze().div(args.temperature).exp().cpu() 78 | word_idx = torch.multinomial(word_weights, 1)[0] 79 | input.fill_(word_idx) 80 | 81 | word = corpus.dictionary.idx2word[word_idx] 82 | 83 | outf.write(word + ('\n' if i % 20 == 19 else ' ')) 84 | 85 | if i % args.log_interval == 0: 86 | print('| Generated {}/{} words'.format(i, args.words)) 87 | -------------------------------------------------------------------------------- /word_language_model/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | --------------------------------------------------------------------------------