├── LICENSE
├── PPO_practice.ipynb
├── README.md
├── code
    ├── LICENSE
    ├── README.md
    ├── benchmarks
    │   ├── README.md
    │   ├── benchmark_gpt_dummy.py
    │   ├── benchmark_gpt_dummy.sh
    │   └── benchmark_opt_lora_dummy.py
    ├── chatgpt
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-310.pyc
    │   ├── dataset
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── reward_dataset.cpython-310.pyc
    │   │   │   └── utils.cpython-310.pyc
    │   │   ├── reward_dataset.py
    │   │   └── utils.py
    │   ├── experience_maker
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   └── naive.cpython-310.pyc
    │   │   ├── base.py
    │   │   └── naive.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── generation.cpython-310.pyc
    │   │   │   ├── generation_utils.cpython-310.pyc
    │   │   │   ├── lora.cpython-310.pyc
    │   │   │   ├── loss.cpython-310.pyc
    │   │   │   └── utils.cpython-310.pyc
    │   │   ├── base
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── actor.cpython-310.pyc
    │   │   │   │   ├── critic.cpython-310.pyc
    │   │   │   │   └── reward_model.cpython-310.pyc
    │   │   │   ├── actor.py
    │   │   │   ├── critic.py
    │   │   │   └── reward_model.py
    │   │   ├── bloom
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── bloom_actor.cpython-310.pyc
    │   │   │   │   ├── bloom_critic.cpython-310.pyc
    │   │   │   │   └── bloom_rm.cpython-310.pyc
    │   │   │   ├── bloom_actor.py
    │   │   │   ├── bloom_critic.py
    │   │   │   └── bloom_rm.py
    │   │   ├── generation.py
    │   │   ├── generation_utils.py
    │   │   ├── gpt
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── gpt_actor.cpython-310.pyc
    │   │   │   │   ├── gpt_critic.cpython-310.pyc
    │   │   │   │   └── gpt_rm.cpython-310.pyc
    │   │   │   ├── gpt_actor.py
    │   │   │   ├── gpt_critic.py
    │   │   │   └── gpt_rm.py
    │   │   ├── lora.py
    │   │   ├── loss.py
    │   │   ├── opt
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── opt_actor.cpython-310.pyc
    │   │   │   │   ├── opt_critic.cpython-310.pyc
    │   │   │   │   └── opt_rm.cpython-310.pyc
    │   │   │   ├── opt_actor.py
    │   │   │   ├── opt_critic.py
    │   │   │   └── opt_rm.py
    │   │   └── utils.py
    │   ├── replay_buffer
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   ├── naive.cpython-310.pyc
    │   │   │   └── utils.cpython-310.pyc
    │   │   ├── base.py
    │   │   ├── naive.py
    │   │   └── utils.py
    │   └── trainer
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-310.pyc
    │   │       ├── base.cpython-310.pyc
    │   │       ├── ppo.cpython-310.pyc
    │   │       ├── rm.cpython-310.pyc
    │   │       └── utils.cpython-310.pyc
    │   │   ├── base.py
    │   │   ├── callbacks
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── base.cpython-310.pyc
    │   │       │   ├── performance_evaluator.cpython-310.pyc
    │   │       │   └── save_checkpoint.cpython-310.pyc
    │   │       ├── base.py
    │   │       ├── performance_evaluator.py
    │   │       └── save_checkpoint.py
    │   │   ├── ppo.py
    │   │   ├── rm.py
    │   │   ├── strategies
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── base.cpython-310.pyc
    │   │       │   ├── colossalai.cpython-310.pyc
    │   │       │   ├── ddp.cpython-310.pyc
    │   │       │   ├── naive.cpython-310.pyc
    │   │       │   └── sampler.cpython-310.pyc
    │   │       ├── base.py
    │   │       ├── colossalai.py
    │   │       ├── ddp.py
    │   │       ├── naive.py
    │   │       └── sampler.py
    │   │   └── utils.py
    ├── examples
    │   ├── README.md
    │   ├── inference.py
    │   ├── requirements.txt
    │   ├── test_ci.sh
    │   ├── train_dummy.py
    │   ├── train_dummy.sh
    │   ├── train_prompts.py
    │   ├── train_prompts.sh
    │   ├── train_reward_model.py
    │   └── train_rm.sh
    ├── pytest.ini
    ├── requirements-test.txt
    ├── requirements.txt
    ├── setup.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_checkpoint.py
    │   └── test_data.py
    ├── utils.py
    └── version.txt
├── data
    ├── stage1. domain_adaptive_pretraining
    │   ├── BTS.csv
    │   ├── domain_adaptive_kuksundo_pretrain.jsonl
    │   ├── domain_adaptive_pretrain_ive.jsonl
    │   ├── 국선도.csv
    │   ├── 아이브.csv
    │   └── 템플릿.csv
    ├── stage1. domain_instruction_tuning
    │   ├── ive_instruction_test.jsonl
    │   ├── ive_instruction_train.jsonl
    │   ├── kuksundo_instruction_test.jsonl
    │   └── kuksundo_instruction_train.jsonl
    ├── stage2. RM
    │   ├── ive_test_rm.jsonl
    │   ├── ive_train_rm.jsonl
    │   ├── kuksundo_test_rm.jsonl
    │   └── kuksundo_train_rm.jsonl
    └── stage3. PPO
    │   ├── ive_test_ppo.jsonl
    │   ├── ive_train_ppo.jsonl
    │   ├── kuksundo_test_ppo.jsonl
    │   └── kuksundo_train_ppo.jsonl
├── mygpt_실습.ipynb
└── requirements.txt


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 oglee815
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # update
 2 | - 2024-6-13: 데이터 생성과 학습 노트북 통합 -> 하나의 colab 노트북으로 통일
 3 | - 2023-10-24: 라인 넘버 추가 및 코드 정리
 4 | 
 5 | # mygpt-lecture
 6 | 본 자료는 '나만의 데이터로 만드는 MyGPT 강의' 관련 자료입니다.
 7 | 
 8 | 아래 자료를 참고하여 만들었습니다.
 9 | https://github.com/airobotlab/KoChatGPT
10 | 
11 | # 전체 목차
12 | <img src='https://github.com/oglee815/mygpt-lecture/assets/18374514/0d0560f2-6946-497e-bc11-10b546923c31' width=800>
13 | 
14 | # 실습 진행 순서
15 | <img src='https://github.com/oglee815/mygpt-lecture/assets/18374514/08e5db0b-93a6-4a62-bfb8-4c59accc6895' width=800>
16 | 
17 | # 학습코드
18 | [my_gpt실습.ipynb](https://colab.research.google.com/github/oglee815/mygpt-lecture/blob/main/mygpt_실습.ipynb)
19 | 
20 | # 데이터 생성 코드
21 | [my_gpt실습.ipynb](https://colab.research.google.com/github/oglee815/mygpt-lecture/blob/main/mygpt_실습.ipynb)
22 | - ChatGPT API를 통해 데이터 자동 생성
23 | - <img src='https://github.com/oglee815/mygpt-lecture/assets/18374514/6152debd-2001-4366-914f-b3152a34098f' width=800>
24 | 
25 | # PPO 강화학습 연습 코드(Lunar Lander2)
26 | - [ppo_practice.ipynb](https://colab.research.google.com/github/oglee815/mygpt-lecture/blob/main/PPO_practice.ipynb)
27 | - <img src='https://github.com/oglee815/mygpt-lecture/assets/18374514/4f9c9da4-f66c-4202-b671-1e3867df5e8b' width=500>
28 | 
29 | # 학습 결과 예시
30 | - SKT-KoGPT2와 나무 위키의 '아이브' 카테고리 데이터를 기반으로 ChatGPT의 Stage 1, 2, 3를 학습 한 뒤, Stage 1의 SFT와 결과 비교
31 | - <img src='https://github.com/oglee815/mygpt-lecture/assets/18374514/5642277e-85da-4365-93b4-c6ca1ab6fdbb' width=800>
32 | - KL Penalty 덕분인지 의외로 동일한 Output을 내놓는 경우가 많음
33 | 
34 | # 자료 관련 문의
35 | - 이현제, oglee815@gmail.com
36 | - h8.lee@samsung.com
37 | 


--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
  1 | # RLHF - Colossal-AI
  2 | 
  3 | ## Table of Contents
  4 | 
  5 | - [What is RLHF - Colossal-AI?](#intro)
  6 | - [How to Install?](#install)
  7 | - [The Plan](#the-plan)
  8 | - [How can you partcipate in open source?](#invitation-to-open-source-contribution)
  9 | ---
 10 | ## Intro
 11 | Implementation of RLHF (Reinforcement Learning with Human Feedback) powered by Colossal-AI. It supports distributed training and offloading, which can fit extremly large models. More details can be found in the [blog](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt).
 12 | 
 13 | <p align="center">
 14 | <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/chatgpt.png" width=700/>
 15 | </p>
 16 | 
 17 | ## Training process (step 3)
 18 | <p align="center">
 19 | <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/experience.jpg" width=500/>
 20 | </p>
 21 | <p align="center">
 22 | <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/train.jpg" width=500/>
 23 | </p>
 24 | 
 25 | 
 26 | ## Install
 27 | ```shell
 28 | pip install .
 29 | ```
 30 | 
 31 | ## Usage
 32 | 
 33 | The main entrypoint is `Trainer`. We only support PPO trainer now. We support many training strategies:
 34 | 
 35 | - NaiveStrategy: simplest strategy. Train on single GPU.
 36 | - DDPStrategy: use `torch.nn.parallel.DistributedDataParallel`. Train on multi GPUs.
 37 | - ColossalAIStrategy: use Gemini and Zero of ColossalAI. It eliminates model duplication on each GPU and supports offload. It's very useful when training large models on multi GPUs.
 38 | 
 39 | Simplest usage:
 40 | 
 41 | ```python
 42 | from chatgpt.trainer import PPOTrainer
 43 | from chatgpt.trainer.strategies import ColossalAIStrategy
 44 | from chatgpt.models.gpt import GPTActor, GPTCritic
 45 | from chatgpt.models.base import RewardModel
 46 | from copy import deepcopy
 47 | from colossalai.nn.optimizer import HybridAdam
 48 | 
 49 | strategy = ColossalAIStrategy()
 50 | 
 51 | with strategy.model_init_context():
 52 |   # init your model here
 53 |   # load pretrained gpt2
 54 |   actor = GPTActor(pretrained='gpt2')
 55 |   critic = GPTCritic()
 56 |   initial_model = deepcopy(actor).cuda()
 57 |   reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda()
 58 | 
 59 | actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
 60 | critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
 61 | 
 62 | # prepare models and optimizers
 63 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
 64 |         (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
 65 | 
 66 | # load saved model checkpoint after preparing
 67 | strategy.load_model(actor, 'actor_checkpoint.pt', strict=False)
 68 | # load saved optimizer checkpoint after preparing
 69 | strategy.load_optimizer(actor_optim, 'actor_optim_checkpoint.pt')
 70 | 
 71 | trainer = PPOTrainer(strategy,
 72 |                      actor,
 73 |                      critic,
 74 |                      reward_model,
 75 |                      initial_model,
 76 |                      actor_optim,
 77 |                      critic_optim,
 78 |                      ...)
 79 | 
 80 | trainer.fit(dataset, ...)
 81 | 
 82 | # save model checkpoint after fitting on only rank0
 83 | strategy.save_model(actor, 'actor_checkpoint.pt', only_rank0=True)
 84 | # save optimizer checkpoint on all ranks
 85 | strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint.pt', only_rank0=False)
 86 | ```
 87 | 
 88 | For more details, see `examples/`.
 89 | 
 90 | We also support training reward model with true-world data. See `examples/train_reward_model.py`.
 91 | 
 92 | ## FAQ
 93 | 
 94 | ### How to save/load checkpoint
 95 | 
 96 | To load pretrained model, you can simply use huggingface pretrained models:
 97 | 
 98 | ```python
 99 | # load OPT-350m pretrained model
100 | actor = OPTActor(pretrained='facebook/opt-350m')
101 | ```
102 | 
103 | To save model checkpoint:
104 | 
105 | ```python
106 | # save model checkpoint on only rank0
107 | strategy.save_model(actor, 'actor_checkpoint.pt', only_rank0=True)
108 | ```
109 | 
110 | This function must be called after `strategy.prepare()`.
111 | 
112 | For DDP strategy, model weights are replicated on all ranks. And for ColossalAI strategy, model weights may be sharded, but all-gather will be applied before returning state dict. You can set `only_rank0=True` for both of them, which only saves checkpoint on rank0, to save disk space usage. The checkpoint is float32.
113 | 
114 | To save optimizer checkpoint:
115 | 
116 | ```python
117 | # save optimizer checkpoint on all ranks
118 | strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint.pt', only_rank0=False)
119 | ```
120 | 
121 | For DDP strategy, optimizer states are replicated on all ranks. You can set `only_rank0=True`. But for ColossalAI strategy, optimizer states are sharded over all ranks, and no all-gather will be applied. So for ColossalAI strategy, you can only set `only_rank0=False`. That is to say, each rank will save a cehckpoint. When loading, each rank should load the corresponding part.
122 | 
123 | Note that different stategy may have different shapes of optimizer checkpoint.
124 | 
125 | To load model checkpoint:
126 | 
127 | ```python
128 | # load saved model checkpoint after preparing
129 | strategy.load_model(actor, 'actor_checkpoint.pt', strict=False)
130 | ```
131 | 
132 | To load optimizer checkpoint:
133 | 
134 | ```python
135 | # load saved optimizer checkpoint after preparing
136 | strategy.load_optimizer(actor_optim, 'actor_optim_checkpoint.pt')
137 | ```
138 | 
139 | ## The Plan
140 | 
141 | - [x] implement PPO fine-tuning
142 | - [x] implement training reward model
143 | - [x] support LoRA
144 | - [x] support inference
145 | - [ ] open source the reward model weight
146 | - [ ] support llama from [facebook](https://github.com/facebookresearch/llama)
147 | - [ ] support BoN(best of N sample)
148 | - [ ] implement PPO-ptx fine-tuning
149 | - [ ] integrate with Ray
150 | - [ ] support more RL paradigms, like Implicit Language Q-Learning (ILQL),
151 | - [ ] support chain of throught by [langchain](https://github.com/hwchase17/langchain)
152 | 
153 | ### Real-time progress
154 | You will find our progress in github project broad
155 | 
156 | [Open ChatGPT](https://github.com/orgs/hpcaitech/projects/17/views/1)
157 | 
158 | ## Invitation to open-source contribution
159 | Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models from the starting point of replicating ChatGPT!
160 | 
161 | You may contact us or participate in the following ways:
162 | 1. [Leaving a Star ⭐](https://github.com/hpcaitech/ColossalAI/stargazers) to show your like and support. Thanks!
163 | 2. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose), or submitting a PR on GitHub follow the guideline in [Contributing](https://github.com/hpcaitech/ColossalAI/blob/main/CONTRIBUTING.md).
164 | 3. Join the Colossal-AI community on
165 | [Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w),
166 | and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas.
167 | 4. Send your official proposal to email contact@hpcaitech.com
168 | 
169 | Thanks so much to all of our amazing contributors!
170 | 
171 | ## Quick Preview
172 | <p id="ChatGPT_scaling" align="center">
173 | <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/>
174 | </p>
175 | 
176 | - Up to 7.73 times faster for single server training and 1.42 times faster for single-GPU inference
177 | 
178 | <p id="ChatGPT-1GPU" align="center">
179 | <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT-1GPU.jpg" width=450/>
180 | </p>
181 | 
182 | - Up to 10.3x growth in model capacity on one GPU
183 | - A mini demo training process requires only 1.62GB of GPU memory (any consumer-grade GPU)
184 | 
185 | <p id="inference" align="center">
186 | <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/LoRA%20data.jpg" width=600/>
187 | </p>
188 | 
189 | - Increase the capacity of the fine-tuning model by up to 3.7 times on a single GPU
190 | - Keep in a sufficiently high running speed
191 | 
192 | ## Citations
193 | 
194 | ```bibtex
195 | @article{Hu2021LoRALA,
196 |     title   = {LoRA: Low-Rank Adaptation of Large Language Models},
197 |     author  = {Edward J. Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Weizhu Chen},
198 |     journal = {ArXiv},
199 |     year    = {2021},
200 |     volume  = {abs/2106.09685}
201 | }
202 | 
203 | @article{ouyang2022training,
204 |   title={Training language models to follow instructions with human feedback},
205 |   author={Ouyang, Long and Wu, Jeff and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll L and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
206 |   journal={arXiv preprint arXiv:2203.02155},
207 |   year={2022}
208 | }
209 | ```
210 | 


--------------------------------------------------------------------------------
/code/benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks
 2 | 
 3 | ## Benchmark GPT on dummy prompt data
 4 | 
 5 | We provide various GPT models (string in parentheses is the corresponding model name used in this script):
 6 | 
 7 | - GPT2-S (s)
 8 | - GPT2-M (m)
 9 | - GPT2-L (l)
10 | - GPT2-XL (xl)
11 | - GPT2-4B (4b)
12 | - GPT2-6B (6b)
13 | - GPT2-8B (8b)
14 | - GPT2-10B (10b)
15 | - GPT2-12B (12b)
16 | - GPT2-15B (15b)
17 | - GPT2-18B (18b)
18 | - GPT2-20B (20b)
19 | - GPT2-24B (24b)
20 | - GPT2-28B (28b)
21 | - GPT2-32B (32b)
22 | - GPT2-36B (36b)
23 | - GPT2-40B (40b)
24 | - GPT3 (175b)
25 | 
26 | We also provide various training strategies:
27 | 
28 | - ddp: torch DDP
29 | - colossalai_gemini: ColossalAI GeminiDDP with `placement_policy="cuda"`, like zero3
30 | - colossalai_gemini_cpu: ColossalAI GeminiDDP with `placement_policy="cpu"`, like zero3-offload
31 | - colossalai_zero2: ColossalAI zero2
32 | - colossalai_zero2_cpu: ColossalAI zero2-offload
33 | - colossalai_zero1: ColossalAI zero1
34 | - colossalai_zero1_cpu: ColossalAI zero1-offload
35 | 
36 | We only support `torchrun` to launch now. E.g.
37 | 
38 | ```shell
39 | # run GPT2-S on single-node single-GPU with min batch size
40 | torchrun --standalone --nproc_per_node 1 benchmark_gpt_dummy.py --model s --strategy ddp --experience_batch_size 1 --train_batch_size 1
41 | # run GPT2-XL on single-node 4-GPU
42 | torchrun --standalone --nproc_per_node 4 benchmark_gpt_dummy.py --model xl --strategy colossalai_zero2
43 | # run GPT3 on 8-node 8-GPU
44 | torchrun --nnodes 8 --nproc_per_node 8 \
45 |  --rdzv_id=$JOB_ID --rdzv_backend=c10d --rdzv_endpoint=$HOST_NODE_ADDR \
46 |  benchmark_gpt_dummy.py --model 175b --strategy colossalai_gemini
47 | ```
48 | 
49 | > ⚠ Batch sizes in CLI args and outputed throughput/TFLOPS are all values of per GPU.
50 | 
51 | In this benchmark, we assume the model architectures/sizes of actor and critic are the same for simplicity. But in practice, to reduce training cost, we may use a smaller critic.
52 | 
53 | We also provide a simple shell script to run a set of benchmarks. But it only supports benchmark on single node. However, it's easy to run on multi-nodes by modifying launch command in this script.
54 | 
55 | Usage:
56 | 
57 | ```shell
58 | # run for GPUS=(1 2 4 8) x strategy=("ddp" "colossalai_zero2" "colossalai_gemini" "colossalai_zero2_cpu" "colossalai_gemini_cpu") x model=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b") x batch_size=(1 2 4 8 16 32 64 128 256)
59 | ./benchmark_gpt_dummy.sh
60 | # run for GPUS=2 x strategy=("ddp" "colossalai_zero2" "colossalai_gemini" "colossalai_zero2_cpu" "colossalai_gemini_cpu") x model=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b") x batch_size=(1 2 4 8 16 32 64 128 256)
61 | ./benchmark_gpt_dummy.sh 2
62 | # run for GPUS=2 x strategy=ddp x model=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b") x batch_size=(1 2 4 8 16 32 64 128 256)
63 | ./benchmark_gpt_dummy.sh 2 ddp
64 | # run for GPUS=2 x strategy=ddp x model=l x batch_size=(1 2 4 8 16 32 64 128 256)
65 | ./benchmark_gpt_dummy.sh 2 ddp l
66 | ```
67 | 
68 | ## Benchmark OPT with LoRA on dummy prompt data
69 | 
70 | We provide various OPT models (string in parentheses is the corresponding model name used in this script):
71 | 
72 | - OPT-125M (125m)
73 | - OPT-350M (350m)
74 | - OPT-700M (700m)
75 | - OPT-1.3B (1.3b)
76 | - OPT-2.7B (2.7b)
77 | - OPT-3.5B (3.5b)
78 | - OPT-5.5B (5.5b)
79 | - OPT-6.7B (6.7b)
80 | - OPT-10B (10b)
81 | - OPT-13B (13b)
82 | 
83 | We only support `torchrun` to launch now. E.g.
84 | 
85 | ```shell
86 | # run OPT-125M with no lora (lora_rank=0) on single-node single-GPU with min batch size
87 | torchrun --standalone --nproc_per_node 1 benchmark_opt_lora_dummy.py --model 125m --strategy ddp --experience_batch_size 1 --train_batch_size 1 --lora_rank 0
88 | # run OPT-350M with lora_rank=4 on single-node 4-GPU
89 | torchrun --standalone --nproc_per_node 4 benchmark_opt_lora_dummy.py --model 350m --strategy colossalai_zero2 --lora_rank 4
90 | ```
91 | 
92 | > ⚠ Batch sizes in CLI args and outputed throughput/TFLOPS are all values of per GPU.
93 | 
94 | In this benchmark, we assume the model architectures/sizes of actor and critic are the same for simplicity. But in practice, to reduce training cost, we may use a smaller critic.
95 | 


--------------------------------------------------------------------------------
/code/benchmarks/benchmark_gpt_dummy.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from copy import deepcopy
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | import torch.nn as nn
  7 | from chatgpt.models.base import RewardModel
  8 | from chatgpt.models.gpt import GPTActor, GPTCritic
  9 | from chatgpt.trainer import PPOTrainer
 10 | from chatgpt.trainer.callbacks import PerformanceEvaluator
 11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, Strategy
 12 | from torch.optim import Adam
 13 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
 14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
 15 | 
 16 | from colossalai.nn.optimizer import HybridAdam
 17 | 
 18 | 
 19 | def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
 20 |     numel = sum(p.numel() for p in model.parameters())
 21 |     if isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3 and strategy.shard_init:
 22 |         numel *= dist.get_world_size()
 23 |     return numel
 24 | 
 25 | 
 26 | def preprocess_batch(samples) -> dict:
 27 |     input_ids = torch.stack(samples)
 28 |     attention_mask = torch.ones_like(input_ids, dtype=torch.long)
 29 |     return {'input_ids': input_ids, 'attention_mask': attention_mask}
 30 | 
 31 | 
 32 | def print_rank_0(*args, **kwargs) -> None:
 33 |     if dist.get_rank() == 0:
 34 |         print(*args, **kwargs)
 35 | 
 36 | 
 37 | def print_model_numel(model_dict: dict) -> None:
 38 |     B = 1024**3
 39 |     M = 1024**2
 40 |     K = 1024
 41 |     outputs = ''
 42 |     for name, numel in model_dict.items():
 43 |         outputs += f'{name}: '
 44 |         if numel >= B:
 45 |             outputs += f'{numel / B:.2f} B\n'
 46 |         elif numel >= M:
 47 |             outputs += f'{numel / M:.2f} M\n'
 48 |         elif numel >= K:
 49 |             outputs += f'{numel / K:.2f} K\n'
 50 |         else:
 51 |             outputs += f'{numel}\n'
 52 |     print_rank_0(outputs)
 53 | 
 54 | 
 55 | def get_gpt_config(model_name: str) -> GPT2Config:
 56 |     model_map = {
 57 |         's': GPT2Config(),
 58 |         'm': GPT2Config(n_embd=1024, n_layer=24, n_head=16),
 59 |         'l': GPT2Config(n_embd=1280, n_layer=36, n_head=20),
 60 |         'xl': GPT2Config(n_embd=1600, n_layer=48, n_head=25),
 61 |         '2b': GPT2Config(n_embd=2048, n_layer=40, n_head=16),
 62 |         '4b': GPT2Config(n_embd=2304, n_layer=64, n_head=16),
 63 |         '6b': GPT2Config(n_embd=4096, n_layer=30, n_head=16),
 64 |         '8b': GPT2Config(n_embd=4096, n_layer=40, n_head=16),
 65 |         '10b': GPT2Config(n_embd=4096, n_layer=50, n_head=16),
 66 |         '12b': GPT2Config(n_embd=4096, n_layer=60, n_head=16),
 67 |         '15b': GPT2Config(n_embd=4096, n_layer=78, n_head=16),
 68 |         '18b': GPT2Config(n_embd=4096, n_layer=90, n_head=16),
 69 |         '20b': GPT2Config(n_embd=8192, n_layer=25, n_head=16),
 70 |         '24b': GPT2Config(n_embd=8192, n_layer=30, n_head=16),
 71 |         '28b': GPT2Config(n_embd=8192, n_layer=35, n_head=16),
 72 |         '32b': GPT2Config(n_embd=8192, n_layer=40, n_head=16),
 73 |         '36b': GPT2Config(n_embd=8192, n_layer=45, n_head=16),
 74 |         '40b': GPT2Config(n_embd=8192, n_layer=50, n_head=16),
 75 |         '175b': GPT2Config(n_positions=2048, n_embd=12288, n_layer=96, n_head=96),
 76 |     }
 77 |     try:
 78 |         return model_map[model_name]
 79 |     except KeyError:
 80 |         raise ValueError(f'Unknown model "{model_name}"')
 81 | 
 82 | 
 83 | def main(args):
 84 |     if args.strategy == 'ddp':
 85 |         strategy = DDPStrategy()
 86 |     elif args.strategy == 'colossalai_gemini':
 87 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
 88 |     elif args.strategy == 'colossalai_gemini_cpu':
 89 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5)
 90 |     elif args.strategy == 'colossalai_zero2':
 91 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
 92 |     elif args.strategy == 'colossalai_zero2_cpu':
 93 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cpu')
 94 |     elif args.strategy == 'colossalai_zero1':
 95 |         strategy = ColossalAIStrategy(stage=1, placement_policy='cuda')
 96 |     elif args.strategy == 'colossalai_zero1_cpu':
 97 |         strategy = ColossalAIStrategy(stage=1, placement_policy='cpu')
 98 |     else:
 99 |         raise ValueError(f'Unsupported strategy "{args.strategy}"')
100 | 
101 |     model_config = get_gpt_config(args.model)
102 | 
103 |     with strategy.model_init_context():
104 |         actor = GPTActor(config=model_config).cuda()
105 |         critic = GPTCritic(config=model_config).cuda()
106 | 
107 |         initial_model = deepcopy(actor).cuda()
108 |         reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda()
109 | 
110 |     actor_numel = get_model_numel(actor, strategy)
111 |     critic_numel = get_model_numel(critic, strategy)
112 |     initial_model_numel = get_model_numel(initial_model, strategy)
113 |     reward_model_numel = get_model_numel(reward_model, strategy)
114 |     print_model_numel({
115 |         'Actor': actor_numel,
116 |         'Critic': critic_numel,
117 |         'Initial model': initial_model_numel,
118 |         'Reward model': reward_model_numel
119 |     })
120 |     performance_evaluator = PerformanceEvaluator(actor_numel,
121 |                                                  critic_numel,
122 |                                                  initial_model_numel,
123 |                                                  reward_model_numel,
124 |                                                  enable_grad_checkpoint=False,
125 |                                                  ignore_episodes=1)
126 | 
127 |     if args.strategy.startswith('colossalai'):
128 |         actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
129 |         critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
130 |     else:
131 |         actor_optim = Adam(actor.parameters(), lr=5e-6)
132 |         critic_optim = Adam(critic.parameters(), lr=5e-6)
133 | 
134 |     tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
135 |     tokenizer.pad_token = tokenizer.eos_token
136 | 
137 |     (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
138 |         (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
139 | 
140 |     trainer = PPOTrainer(strategy,
141 |                          actor,
142 |                          critic,
143 |                          reward_model,
144 |                          initial_model,
145 |                          actor_optim,
146 |                          critic_optim,
147 |                          max_epochs=args.max_epochs,
148 |                          train_batch_size=args.train_batch_size,
149 |                          experience_batch_size=args.experience_batch_size,
150 |                          tokenizer=preprocess_batch,
151 |                          max_length=512,
152 |                          do_sample=True,
153 |                          temperature=1.0,
154 |                          top_k=50,
155 |                          pad_token_id=tokenizer.pad_token_id,
156 |                          eos_token_id=tokenizer.eos_token_id,
157 |                          callbacks=[performance_evaluator])
158 | 
159 |     random_prompts = torch.randint(tokenizer.vocab_size, (1000, 400), device=torch.cuda.current_device())
160 |     trainer.fit(random_prompts,
161 |                 num_episodes=args.num_episodes,
162 |                 max_timesteps=args.max_timesteps,
163 |                 update_timesteps=args.update_timesteps)
164 | 
165 |     print_rank_0(f'Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB')
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     parser = argparse.ArgumentParser()
170 |     parser.add_argument('--model', default='s')
171 |     parser.add_argument('--strategy',
172 |                         choices=[
173 |                             'ddp', 'colossalai_gemini', 'colossalai_gemini_cpu', 'colossalai_zero2',
174 |                             'colossalai_zero2_cpu', 'colossalai_zero1', 'colossalai_zero1_cpu'
175 |                         ],
176 |                         default='ddp')
177 |     parser.add_argument('--num_episodes', type=int, default=3)
178 |     parser.add_argument('--max_timesteps', type=int, default=8)
179 |     parser.add_argument('--update_timesteps', type=int, default=8)
180 |     parser.add_argument('--max_epochs', type=int, default=3)
181 |     parser.add_argument('--train_batch_size', type=int, default=8)
182 |     parser.add_argument('--experience_batch_size', type=int, default=8)
183 |     args = parser.parse_args()
184 |     main(args)
185 | 


--------------------------------------------------------------------------------
/code/benchmarks/benchmark_gpt_dummy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Usage: $0 <?number-of-gpus> <?strategy> <?model>
 3 | set -xu
 4 | 
 5 | BASE=$(realpath $(dirname $0))
 6 | 
 7 | 
 8 | PY_SCRIPT=${BASE}/benchmark_gpt_dummy.py
 9 | export OMP_NUM_THREADS=8
10 | 
11 | function tune_batch_size() {
12 |     # we found when experience batch size is equal to train batch size
13 |     # peak CUDA memory usage of making experience phase is less than or equal to that of training phase
14 |     # thus, experience batch size can be larger than or equal to train batch size
15 |     for bs in 1 2 4 8 16 32 64 128 256; do
16 |         torchrun --standalone --nproc_per_node $1 $PY_SCRIPT --model $2 --strategy $3 --experience_batch_size $bs --train_batch_size $bs || return 1
17 |     done
18 | }
19 | 
20 | if [ $# -eq 0 ]; then
21 |     num_gpus=(1 2 4 8)
22 | else
23 |     num_gpus=($1)
24 | fi
25 | 
26 | if [ $# -le 1 ]; then
27 |     strategies=("ddp" "colossalai_zero2" "colossalai_gemini" "colossalai_zero2_cpu" "colossalai_gemini_cpu")
28 | else
29 |     strategies=($2)
30 | fi
31 | 
32 | if [ $# -le 2 ]; then
33 |     models=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b")
34 | else
35 |     models=($3)
36 | fi
37 | 
38 | 
39 | for num_gpu in ${num_gpus[@]}; do
40 |     for strategy in ${strategies[@]}; do
41 |         for model in ${models[@]}; do
42 |             tune_batch_size $num_gpu $model $strategy || break
43 |         done
44 |     done
45 | done
46 | 


--------------------------------------------------------------------------------
/code/benchmarks/benchmark_opt_lora_dummy.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from copy import deepcopy
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | import torch.nn as nn
  7 | from chatgpt.models.base import RewardModel
  8 | from chatgpt.models.opt import OPTActor, OPTCritic
  9 | from chatgpt.trainer import PPOTrainer
 10 | from chatgpt.trainer.callbacks import PerformanceEvaluator
 11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, Strategy
 12 | from torch.optim import Adam
 13 | from transformers import AutoTokenizer
 14 | from transformers.models.opt.configuration_opt import OPTConfig
 15 | 
 16 | from colossalai.nn.optimizer import HybridAdam
 17 | 
 18 | 
 19 | def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
 20 |     numel = sum(p.numel() for p in model.parameters())
 21 |     if isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3 and strategy.shard_init:
 22 |         numel *= dist.get_world_size()
 23 |     return numel
 24 | 
 25 | 
 26 | def preprocess_batch(samples) -> dict:
 27 |     input_ids = torch.stack(samples)
 28 |     attention_mask = torch.ones_like(input_ids, dtype=torch.long)
 29 |     return {'input_ids': input_ids, 'attention_mask': attention_mask}
 30 | 
 31 | 
 32 | def print_rank_0(*args, **kwargs) -> None:
 33 |     if dist.get_rank() == 0:
 34 |         print(*args, **kwargs)
 35 | 
 36 | 
 37 | def print_model_numel(model_dict: dict) -> None:
 38 |     B = 1024**3
 39 |     M = 1024**2
 40 |     K = 1024
 41 |     outputs = ''
 42 |     for name, numel in model_dict.items():
 43 |         outputs += f'{name}: '
 44 |         if numel >= B:
 45 |             outputs += f'{numel / B:.2f} B\n'
 46 |         elif numel >= M:
 47 |             outputs += f'{numel / M:.2f} M\n'
 48 |         elif numel >= K:
 49 |             outputs += f'{numel / K:.2f} K\n'
 50 |         else:
 51 |             outputs += f'{numel}\n'
 52 |     print_rank_0(outputs)
 53 | 
 54 | 
 55 | def get_gpt_config(model_name: str) -> OPTConfig:
 56 |     model_map = {
 57 |         '125m': OPTConfig.from_pretrained('facebook/opt-125m'),
 58 |         '350m': OPTConfig(hidden_size=1024, ffn_dim=4096, num_hidden_layers=24, num_attention_heads=16),
 59 |         '700m': OPTConfig(hidden_size=1280, ffn_dim=5120, num_hidden_layers=36, num_attention_heads=20),
 60 |         '1.3b': OPTConfig.from_pretrained('facebook/opt-1.3b'),
 61 |         '2.7b': OPTConfig.from_pretrained('facebook/opt-2.7b'),
 62 |         '3.5b': OPTConfig(hidden_size=3072, ffn_dim=12288, num_hidden_layers=32, num_attention_heads=32),
 63 |         '5.5b': OPTConfig(hidden_size=3840, ffn_dim=15360, num_hidden_layers=32, num_attention_heads=32),
 64 |         '6.7b': OPTConfig.from_pretrained('facebook/opt-6.7b'),
 65 |         '10b': OPTConfig(hidden_size=5120, ffn_dim=20480, num_hidden_layers=32, num_attention_heads=32),
 66 |         '13b': OPTConfig.from_pretrained('facebook/opt-13b'),
 67 |     }
 68 |     try:
 69 |         return model_map[model_name]
 70 |     except KeyError:
 71 |         raise ValueError(f'Unknown model "{model_name}"')
 72 | 
 73 | 
 74 | def main(args):
 75 |     if args.strategy == 'ddp':
 76 |         strategy = DDPStrategy()
 77 |     elif args.strategy == 'colossalai_gemini':
 78 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
 79 |     elif args.strategy == 'colossalai_gemini_cpu':
 80 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5)
 81 |     elif args.strategy == 'colossalai_zero2':
 82 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
 83 |     elif args.strategy == 'colossalai_zero2_cpu':
 84 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cpu')
 85 |     elif args.strategy == 'colossalai_zero1':
 86 |         strategy = ColossalAIStrategy(stage=1, placement_policy='cuda')
 87 |     elif args.strategy == 'colossalai_zero1_cpu':
 88 |         strategy = ColossalAIStrategy(stage=1, placement_policy='cpu')
 89 |     else:
 90 |         raise ValueError(f'Unsupported strategy "{args.strategy}"')
 91 | 
 92 |     torch.cuda.set_per_process_memory_fraction(args.cuda_mem_frac)
 93 | 
 94 |     model_config = get_gpt_config(args.model)
 95 | 
 96 |     with strategy.model_init_context():
 97 |         actor = OPTActor(config=model_config, lora_rank=args.lora_rank).cuda()
 98 |         critic = OPTCritic(config=model_config, lora_rank=args.lora_rank).cuda()
 99 | 
100 |         initial_model = deepcopy(actor).cuda()
101 |         reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda()
102 | 
103 |     actor_numel = get_model_numel(actor, strategy)
104 |     critic_numel = get_model_numel(critic, strategy)
105 |     initial_model_numel = get_model_numel(initial_model, strategy)
106 |     reward_model_numel = get_model_numel(reward_model, strategy)
107 |     print_model_numel({
108 |         'Actor': actor_numel,
109 |         'Critic': critic_numel,
110 |         'Initial model': initial_model_numel,
111 |         'Reward model': reward_model_numel
112 |     })
113 |     performance_evaluator = PerformanceEvaluator(actor_numel,
114 |                                                  critic_numel,
115 |                                                  initial_model_numel,
116 |                                                  reward_model_numel,
117 |                                                  enable_grad_checkpoint=False,
118 |                                                  ignore_episodes=1)
119 | 
120 |     if args.strategy.startswith('colossalai'):
121 |         actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
122 |         critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
123 |     else:
124 |         actor_optim = Adam(actor.parameters(), lr=5e-6)
125 |         critic_optim = Adam(critic.parameters(), lr=5e-6)
126 | 
127 |     tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
128 |     tokenizer.pad_token = tokenizer.eos_token
129 | 
130 |     (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
131 |         (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
132 | 
133 |     trainer = PPOTrainer(strategy,
134 |                          actor,
135 |                          critic,
136 |                          reward_model,
137 |                          initial_model,
138 |                          actor_optim,
139 |                          critic_optim,
140 |                          max_epochs=args.max_epochs,
141 |                          train_batch_size=args.train_batch_size,
142 |                          experience_batch_size=args.experience_batch_size,
143 |                          tokenizer=preprocess_batch,
144 |                          max_length=512,
145 |                          do_sample=True,
146 |                          temperature=1.0,
147 |                          top_k=50,
148 |                          pad_token_id=tokenizer.pad_token_id,
149 |                          eos_token_id=tokenizer.eos_token_id,
150 |                          callbacks=[performance_evaluator])
151 | 
152 |     random_prompts = torch.randint(tokenizer.vocab_size, (1000, 400), device=torch.cuda.current_device())
153 |     trainer.fit(random_prompts,
154 |                 num_episodes=args.num_episodes,
155 |                 max_timesteps=args.max_timesteps,
156 |                 update_timesteps=args.update_timesteps)
157 | 
158 |     print_rank_0(f'Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB')
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     parser = argparse.ArgumentParser()
163 |     parser.add_argument('--model', default='125m')
164 |     parser.add_argument('--strategy',
165 |                         choices=[
166 |                             'ddp', 'colossalai_gemini', 'colossalai_gemini_cpu', 'colossalai_zero2',
167 |                             'colossalai_zero2_cpu', 'colossalai_zero1', 'colossalai_zero1_cpu'
168 |                         ],
169 |                         default='ddp')
170 |     parser.add_argument('--num_episodes', type=int, default=3)
171 |     parser.add_argument('--max_timesteps', type=int, default=8)
172 |     parser.add_argument('--update_timesteps', type=int, default=8)
173 |     parser.add_argument('--max_epochs', type=int, default=3)
174 |     parser.add_argument('--train_batch_size', type=int, default=8)
175 |     parser.add_argument('--experience_batch_size', type=int, default=8)
176 |     parser.add_argument('--lora_rank', type=int, default=4)
177 |     parser.add_argument('--cuda_mem_frac', type=float, default=1.0)
178 |     args = parser.parse_args()
179 |     main(args)
180 | 


--------------------------------------------------------------------------------
/code/chatgpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/__init__.py


--------------------------------------------------------------------------------
/code/chatgpt/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .reward_dataset import RewardDataset
2 | from .utils import is_rank_0
3 | 
4 | __all__ = ['RewardDataset', 'is_rank_0']
5 | 


--------------------------------------------------------------------------------
/code/chatgpt/dataset/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/dataset/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/dataset/__pycache__/reward_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/dataset/__pycache__/reward_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/dataset/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/dataset/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/dataset/reward_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | 
 3 | from torch.utils.data import Dataset
 4 | from tqdm import tqdm
 5 | 
 6 | from .utils import is_rank_0
 7 | 
 8 | 
 9 | class RewardDataset(Dataset):
10 |     """
11 |     Dataset for reward model
12 | 
13 |     Args:
14 |         dataset: dataset for reward model
15 |         tokenizer: tokenizer for reward model
16 |         max_length: max length of input
17 |     """
18 | 
19 |     def __init__(self, dataset, tokenizer: Callable, max_length: int) -> None:
20 |         super().__init__()
21 |         self.chosen = []
22 |         self.reject = []
23 |         for data in tqdm(dataset, disable=not is_rank_0()):
24 |             prompt = data['prompt']
25 | 
26 |             chosen = prompt + data['chosen'] + tokenizer.eos_token #"<|endoftext|>"
27 |             chosen_token = tokenizer(chosen,
28 |                                      max_length=max_length,
29 |                                      padding="max_length",
30 |                                      truncation=True,
31 |                                      return_tensors="pt")
32 |             self.chosen.append({
33 |                 "input_ids": chosen_token['input_ids'],
34 |                 "attention_mask": chosen_token['attention_mask']
35 |             })
36 | 
37 |             reject = prompt + data['rejected'] + tokenizer.eos_token
38 |             reject_token = tokenizer(reject,
39 |                                      max_length=max_length,
40 |                                      padding="max_length",
41 |                                      truncation=True,
42 |                                      return_tensors="pt")
43 |             self.reject.append({
44 |                 "input_ids": reject_token['input_ids'],
45 |                 "attention_mask": reject_token['attention_mask']
46 |             })
47 | 
48 |     def __len__(self):
49 |         length = len(self.chosen)
50 |         return length
51 | 
52 |     def __getitem__(self, idx):
53 |         return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx]["input_ids"], self.reject[idx]["attention_mask"]
54 | 


--------------------------------------------------------------------------------
/code/chatgpt/dataset/utils.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 | 
3 | 
4 | def is_rank_0() -> bool:
5 |     return not dist.is_initialized() or dist.get_rank() == 0
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Experience, ExperienceMaker
2 | from .naive import NaiveExperienceMaker
3 | 
4 | __all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker']
5 | 


--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/experience_maker/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/experience_maker/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/experience_maker/__pycache__/naive.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from dataclasses import dataclass
 3 | from typing import Optional
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from chatgpt.models.base import Actor
 8 | 
 9 | 
10 | @dataclass
11 | class Experience:
12 |     """Experience is a batch of data.
13 |     These data should have the the sequence length and number of actions.
14 |     Left padding for sequences is applied.
15 | 
16 |     Shapes of each tensor:
17 |     sequences: (B, S)
18 |     action_log_probs: (B, A)
19 |     values: (B)
20 |     reward: (B)
21 |     advatanges: (B)
22 |     attention_mask: (B, S)
23 |     action_mask: (B, A)
24 | 
25 |     "A" is the number of actions.
26 |     """
27 |     sequences: torch.Tensor
28 |     action_log_probs: torch.Tensor
29 |     values: torch.Tensor
30 |     reward: torch.Tensor
31 |     advantages: torch.Tensor
32 |     attention_mask: Optional[torch.LongTensor]
33 |     action_mask: Optional[torch.BoolTensor]
34 | 
35 |     @torch.no_grad()
36 |     def to_device(self, device: torch.device) -> None:
37 |         self.sequences = self.sequences.to(device)
38 |         self.action_log_probs = self.action_log_probs.to(device)
39 |         self.values = self.values.to(device)
40 |         self.reward = self.reward.to(device)
41 |         self.advantages = self.advantages.to(device)
42 |         if self.attention_mask is not None:
43 |             self.attention_mask = self.attention_mask.to(device)
44 |         if self.action_mask is not None:
45 |             self.action_mask = self.action_mask.to(device)
46 | 
47 |     def pin_memory(self):
48 |         self.sequences = self.sequences.pin_memory()
49 |         self.action_log_probs = self.action_log_probs.pin_memory()
50 |         self.values = self.values.pin_memory()
51 |         self.reward = self.reward.pin_memory()
52 |         self.advantages = self.advantages.pin_memory()
53 |         if self.attention_mask is not None:
54 |             self.attention_mask = self.attention_mask.pin_memory()
55 |         if self.action_mask is not None:
56 |             self.action_mask = self.action_mask.pin_memory()
57 |         return self
58 | 
59 | 
60 | class ExperienceMaker(ABC):
61 | 
62 |     def __init__(self,
63 |                  actor: Actor,
64 |                  critic: nn.Module,
65 |                  reward_model: nn.Module,
66 |                  initial_model: Actor,
67 |                  kl_coef: float = 0.1) -> None:
68 |         super().__init__()
69 |         self.actor = actor
70 |         self.critic = critic
71 |         self.reward_model = reward_model
72 |         self.initial_model = initial_model
73 |         self.kl_coef = kl_coef
74 | 
75 |     @abstractmethod
76 |     def make_experience(self, input_ids: torch.Tensor, **generate_kwargs) -> Experience:
77 |         pass
78 | 


--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/naive.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from chatgpt.models.utils import compute_reward, normalize
 3 | 
 4 | from .base import Experience, ExperienceMaker
 5 | 
 6 | 
 7 | class NaiveExperienceMaker(ExperienceMaker):
 8 |     """
 9 |     Naive experience maker.
10 |     """
11 | 
12 |     @torch.no_grad()
13 |     def make_experience(self, input_ids: torch.Tensor, **generate_kwargs) -> Experience:
14 |         self.actor.eval()
15 |         self.critic.eval()
16 |         self.initial_model.eval()
17 |         self.reward_model.eval()
18 | 
19 |         sequences, attention_mask, action_mask = self.actor.generate(input_ids,
20 |                                                                      return_action_mask=True,
21 |                                                                      **generate_kwargs)
22 |         num_actions = action_mask.size(1)
23 | 
24 |         action_log_probs = self.actor(sequences, num_actions, attention_mask)
25 |         base_action_log_probs = self.initial_model(sequences, num_actions, attention_mask)
26 |         value = self.critic(sequences, action_mask, attention_mask)
27 |         r = self.reward_model(sequences, attention_mask)
28 | 
29 |         reward = compute_reward(r, self.kl_coef, action_log_probs, base_action_log_probs, action_mask=action_mask)
30 | 
31 |         advantage = reward - value
32 |         # TODO(ver217): maybe normalize adv
33 |         if advantage.ndim == 1:
34 |             advantage = advantage.unsqueeze(-1)
35 | 
36 |         return Experience(sequences, action_log_probs, value, reward, advantage, attention_mask, action_mask)
37 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Actor, Critic, RewardModel
2 | from .loss import PairWiseLoss, PolicyLoss, PPOPtxActorLoss, ValueLoss
3 | 
4 | __all__ = ['Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'PPOPtxActorLoss', 'PairWiseLoss']
5 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/generation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/generation.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/generation_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/generation_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/lora.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/lora.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/loss.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .actor import Actor
2 | from .critic import Critic
3 | from .reward_model import RewardModel
4 | 
5 | __all__ = ['Actor', 'Critic', 'RewardModel']
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/actor.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/critic.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/reward_model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/reward_model.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/base/actor.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple, Union
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | from ..generation import generate
 8 | from ..lora import LoRAModule
 9 | from ..utils import log_probs_from_logits
10 | 
11 | 
12 | class Actor(LoRAModule):
13 |     """
14 |     Actor model base class.
15 | 
16 |     Args:
17 |         model (nn.Module): Actor Model.
18 |         lora_rank (int): LoRA rank.
19 |         lora_train_bias (str): LoRA bias training mode.
20 |     """
21 | 
22 |     def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None:
23 |         super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
24 |         self.model = model
25 |         self.convert_to_lora()
26 | 
27 |     @torch.no_grad()
28 |     def generate(
29 |         self,
30 |         input_ids: torch.Tensor,
31 |         return_action_mask: bool = True,
32 |         **kwargs
33 |     ) -> Union[Tuple[torch.LongTensor, torch.LongTensor], Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]]:
34 |         sequences = generate(self.model, input_ids, **kwargs)
35 |         attention_mask = None
36 |         pad_token_id = kwargs.get('pad_token_id', None)
37 |         if pad_token_id is not None:
38 |             attention_mask = sequences.not_equal(pad_token_id).to(dtype=torch.long, device=sequences.device)
39 |         if not return_action_mask:
40 |             return sequences, attention_mask, None
41 |         input_len = input_ids.size(1)
42 |         eos_token_id = kwargs.get('eos_token_id', None)
43 |         if eos_token_id is None:
44 |             action_mask = torch.ones_like(sequences, dtype=torch.bool)
45 |         else:
46 |             # left padding may be applied, only mask action
47 |             action_mask = (sequences[:, input_len:] == eos_token_id).cumsum(dim=-1) == 0
48 |             action_mask = F.pad(action_mask, (1 + input_len, -1), value=True)    # include eos token and input
49 |         action_mask[:, :input_len] = False
50 |         action_mask = action_mask[:, 1:]
51 |         return sequences, attention_mask, action_mask[:, -(sequences.size(1) - input_len):]
52 | 
53 |     def forward(self,
54 |                 sequences: torch.LongTensor,
55 |                 num_actions: int,
56 |                 attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
57 |         """Returns action log probs
58 |         """
59 |         output = self.model(sequences, attention_mask=attention_mask)
60 |         logits = output['logits']
61 |         log_probs = log_probs_from_logits(logits[:, :-1, :], sequences[:, 1:])
62 |         return log_probs[:, -num_actions:]
63 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/base/critic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from ..lora import LoRAModule
 7 | from ..utils import masked_mean
 8 | 
 9 | 
10 | class Critic(LoRAModule):
11 |     """
12 |     Critic model base class.
13 | 
14 |     Args:
15 |         model (nn.Module): Critic model.
16 |         value_head (nn.Module): Value head to get value.
17 |         lora_rank (int): LoRA rank.
18 |         lora_train_bias (str): LoRA bias training mode.
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         model: nn.Module,
24 |         value_head: nn.Module,
25 |         lora_rank: int = 0,
26 |         lora_train_bias: str = 'none',
27 |         use_action_mask: bool = False,
28 |     ) -> None:
29 | 
30 |         super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
31 |         self.model = model
32 |         self.value_head = value_head
33 |         self.use_action_mask = use_action_mask
34 |         self.convert_to_lora()
35 | 
36 |     def forward(self,
37 |                 sequences: torch.LongTensor,
38 |                 action_mask: Optional[torch.Tensor] = None,
39 |                 attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
40 |         outputs = self.model(sequences, attention_mask=attention_mask)
41 |         last_hidden_states = outputs['last_hidden_state']
42 | 
43 |         values = self.value_head(last_hidden_states).squeeze(-1)
44 | 
45 |         if action_mask is not None and self.use_action_mask:
46 |             num_actions = action_mask.size(1)
47 |             prompt_mask = attention_mask[:, :-num_actions]
48 |             values = values[:, :-num_actions]
49 |             value = masked_mean(values, prompt_mask, dim=1)
50 |             return value
51 | 
52 |         values = values[:, :-1]
53 |         value = values.mean(dim=1)
54 |         return value
55 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/base/reward_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from ..lora import LoRAModule
 7 | 
 8 | 
 9 | class RewardModel(LoRAModule):
10 |     """
11 |     Reward model base class.
12 | 
13 |     Args:
14 |         model (nn.Module): Reward model.
15 |         value_head (nn.Module): Value head to get reward score.
16 |         lora_rank (int): LoRA rank.
17 |         lora_train_bias (str): LoRA bias training mode.
18 |     """
19 | 
20 |     def __init__(self,
21 |                  model: nn.Module,
22 |                  value_head: Optional[nn.Module] = None,
23 |                  lora_rank: int = 0,
24 |                  lora_train_bias: str = 'none') -> None:
25 |         super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
26 |         self.model = model
27 |         self.convert_to_lora()
28 | 
29 |         if value_head is not None:
30 |             if value_head.out_features != 1:
31 |                 raise ValueError("The value head of reward model's output dim should be 1!")
32 |             self.value_head = value_head
33 |         else:
34 |             self.value_head = nn.Linear(model.config.n_embd, 1)
35 | 
36 |     def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
37 |         outputs = self.model(sequences, attention_mask=attention_mask)
38 |         last_hidden_states = outputs['last_hidden_state']
39 |         values = self.value_head(last_hidden_states)[:, :-1]
40 |         value = values.mean(dim=1).squeeze(1)    # ensure shape is (B)
41 |         return value
42 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__init__.py:
--------------------------------------------------------------------------------
1 | from .bloom_actor import BLOOMActor
2 | from .bloom_critic import BLOOMCritic
3 | from .bloom_rm import BLOOMRM
4 | 
5 | __all__ = ['BLOOMActor', 'BLOOMCritic', 'BLOOMRM']
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/bloom_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/bloom_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/bloom_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/bloom_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/bloom_rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/bloom_rm.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/bloom_actor.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | from transformers import BloomConfig, BloomForCausalLM, BloomModel
 5 | 
 6 | from ..base import Actor
 7 | 
 8 | 
 9 | class BLOOMActor(Actor):
10 |     """
11 |     BLOOM Actor model.
12 | 
13 |     Args:
14 |         pretrained (str): Pretrained model name or path.
15 |         config (BloomConfig): Model config.
16 |         checkpoint (bool): Enable gradient checkpointing.
17 |         lora_rank (int): LoRA rank.
18 |         lora_train_bias (str): LoRA bias training mode.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  pretrained: str = None,
23 |                  config: Optional[BloomConfig] = None,
24 |                  checkpoint: bool = False,
25 |                  lora_rank: int = 0,
26 |                  lora_train_bias: str = 'none') -> None:
27 |         if pretrained is not None:
28 |             model = BloomForCausalLM.from_pretrained(pretrained)
29 |         elif config is not None:
30 |             model = BloomForCausalLM(config)
31 |         else:
32 |             model = BloomForCausalLM(BloomConfig())
33 |         if checkpoint:
34 |             model.gradient_checkpointing_enable()
35 |         super().__init__(model, lora_rank, lora_train_bias)
36 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/bloom_critic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from transformers import BloomConfig, BloomForCausalLM, BloomModel
 6 | 
 7 | from ..base import Critic
 8 | 
 9 | 
10 | class BLOOMCritic(Critic):
11 |     """
12 |     BLOOM Critic model.
13 | 
14 |     Args:
15 |         pretrained (str): Pretrained model name or path.
16 |         config (BloomConfig): Model config.
17 |         checkpoint (bool): Enable gradient checkpointing.
18 |         lora_rank (int): LoRA rank.
19 |         lora_train_bias (str): LoRA bias training mode.
20 |     """
21 | 
22 |     def __init__(self,
23 |                  pretrained: str = None,
24 |                  config: Optional[BloomConfig] = None,
25 |                  checkpoint: bool = False,
26 |                  lora_rank: int = 0,
27 |                  lora_train_bias: str = 'none',
28 |                  **kwargs) -> None:
29 |         if pretrained is not None:
30 |             model = BloomModel.from_pretrained(pretrained)
31 |         elif config is not None:
32 |             model = BloomModel(config)
33 |         else:
34 |             model = BloomModel(BloomConfig())
35 |         if checkpoint:
36 |             model.gradient_checkpointing_enable()
37 |         value_head = nn.Linear(model.config.hidden_size, 1)
38 |         super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)
39 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/bloom_rm.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch.nn as nn
 4 | from transformers import BloomConfig, BloomForCausalLM, BloomModel
 5 | 
 6 | from ..base import RewardModel
 7 | 
 8 | 
 9 | class BLOOMRM(RewardModel):
10 |     """
11 |     BLOOM Reward model.
12 | 
13 |     Args:
14 |         pretrained (str): Pretrained model name or path.
15 |         config (BloomConfig): Model config.
16 |         checkpoint (bool): Enable gradient checkpointing.
17 |         lora_rank (int): LoRA rank.
18 |         lora_train_bias (str): LoRA bias training mode.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  pretrained: str = None,
23 |                  config: Optional[BloomConfig] = None,
24 |                  checkpoint: bool = False,
25 |                  lora_rank: int = 0,
26 |                  lora_train_bias: str = 'none') -> None:
27 |         if pretrained is not None:
28 |             model = BloomModel.from_pretrained(pretrained)
29 |         elif config is not None:
30 |             model = BloomModel(config)
31 |         else:
32 |             model = BloomModel(BloomConfig())
33 |         if checkpoint:
34 |             model.gradient_checkpointing_enable()
35 |         value_head = nn.Linear(model.config.hidden_size, 1)
36 |         super().__init__(model, value_head, lora_rank, lora_train_bias)
37 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/generation.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Callable, Optional
  2 | 
  3 | import torch
  4 | import torch.distributed as dist
  5 | import torch.nn as nn
  6 | 
  7 | try:
  8 |     from transformers.generation_logits_process import (
  9 |         LogitsProcessorList,
 10 |         TemperatureLogitsWarper,
 11 |         TopKLogitsWarper,
 12 |         TopPLogitsWarper,
 13 |     )
 14 | except ImportError:
 15 |     from transformers.generation import LogitsProcessorList, TemperatureLogitsWarper, TopKLogitsWarper, TopPLogitsWarper
 16 | 
 17 | 
 18 | def prepare_logits_processor(top_k: Optional[int] = None,
 19 |                              top_p: Optional[float] = None,
 20 |                              temperature: Optional[float] = None) -> LogitsProcessorList:
 21 |     processor_list = LogitsProcessorList()
 22 |     if temperature is not None and temperature != 1.0:
 23 |         processor_list.append(TemperatureLogitsWarper(temperature))
 24 |     if top_k is not None and top_k != 0:
 25 |         processor_list.append(TopKLogitsWarper(top_k))
 26 |     if top_p is not None and top_p < 1.0:
 27 |         processor_list.append(TopPLogitsWarper(top_p))
 28 |     return processor_list
 29 | 
 30 | 
 31 | def _is_sequence_finished(unfinished_sequences: torch.Tensor) -> bool:
 32 |     if dist.is_initialized() and dist.get_world_size() > 1:
 33 |         # consider DP
 34 |         unfinished_sequences = unfinished_sequences.clone()
 35 |         dist.all_reduce(unfinished_sequences)
 36 |     return unfinished_sequences.max() == 0
 37 | 
 38 | 
 39 | def sample(model: nn.Module,
 40 |            input_ids: torch.Tensor,
 41 |            max_length: int,
 42 |            early_stopping: bool = False,
 43 |            eos_token_id: Optional[int] = None,
 44 |            pad_token_id: Optional[int] = None,
 45 |            top_k: Optional[int] = None,
 46 |            top_p: Optional[float] = None,
 47 |            temperature: Optional[float] = None,
 48 |            prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None,
 49 |            update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None,
 50 |            **model_kwargs) -> torch.Tensor:
 51 |     if input_ids.size(1) >= max_length:
 52 |         return input_ids
 53 | 
 54 |     logits_processor = prepare_logits_processor(top_k, top_p, temperature)
 55 |     unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
 56 | 
 57 |     for _ in range(input_ids.size(1), max_length):
 58 |         model_inputs = prepare_inputs_fn(input_ids, **model_kwargs) if prepare_inputs_fn is not None else {
 59 |             'input_ids': input_ids
 60 |         }
 61 |         outputs = model(**model_inputs)
 62 | 
 63 |         next_token_logits = outputs['logits'][:, -1, :]
 64 |         # pre-process distribution
 65 |         next_token_logits = logits_processor(input_ids, next_token_logits)
 66 |         # sample
 67 |         probs = torch.softmax(next_token_logits, dim=-1, dtype=torch.float)
 68 |         next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
 69 | 
 70 |         # finished sentences should have their next token be a padding token
 71 |         if eos_token_id is not None:
 72 |             if pad_token_id is None:
 73 |                 raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
 74 |             next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
 75 | 
 76 |         # update generated ids, model inputs for next step
 77 |         input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
 78 |         if update_model_kwargs_fn is not None:
 79 |             model_kwargs = update_model_kwargs_fn(outputs, **model_kwargs)
 80 | 
 81 |         # if eos_token was found in one sentence, set sentence to finished
 82 |         if eos_token_id is not None:
 83 |             unfinished_sequences = unfinished_sequences.mul((next_tokens != eos_token_id).long())
 84 | 
 85 |         # stop when each sentence is finished if early_stopping=True
 86 |         if early_stopping and _is_sequence_finished(unfinished_sequences):
 87 |             break
 88 | 
 89 |     return input_ids
 90 | 
 91 | 
 92 | def generate(model: nn.Module,
 93 |              input_ids: torch.Tensor,
 94 |              max_length: int,
 95 |              num_beams: int = 1,
 96 |              do_sample: bool = True,
 97 |              early_stopping: bool = False,
 98 |              eos_token_id: Optional[int] = None,
 99 |              pad_token_id: Optional[int] = None,
100 |              top_k: Optional[int] = None,
101 |              top_p: Optional[float] = None,
102 |              temperature: Optional[float] = None,
103 |              prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None,
104 |              update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None,
105 |              **model_kwargs) -> torch.Tensor:
106 |     """Generate token sequence. The returned sequence is input_ids + generated_tokens.
107 | 
108 |     Args:
109 |         model (nn.Module): model
110 |         input_ids (torch.Tensor): input sequence
111 |         max_length (int): max length of the returned sequence
112 |         num_beams (int, optional): number of beams. Defaults to 1.
113 |         do_sample (bool, optional): whether to do sample. Defaults to True.
114 |         early_stopping (bool, optional): if True, the sequence length may be smaller than max_length due to finding eos. Defaults to False.
115 |         eos_token_id (Optional[int], optional): end of sequence token id. Defaults to None.
116 |         pad_token_id (Optional[int], optional): pad token id. Defaults to None.
117 |         top_k (Optional[int], optional): the number of highest probability vocabulary tokens to keep for top-k-filtering. Defaults to None.
118 |         top_p (Optional[float], optional): If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. Defaults to None.
119 |         temperature (Optional[float], optional): The value used to module the next token probabilities. Defaults to None.
120 |         prepare_inputs_fn (Optional[Callable[[torch.Tensor, Any], dict]], optional): Function to preprocess model inputs. Arguments of this function should be input_ids and model_kwargs. Defaults to None.
121 |         update_model_kwargs_fn (Optional[Callable[[dict, Any], dict]], optional): Function to update model_kwargs based on outputs. Arguments of this function should be outputs and model_kwargs. Defaults to None.
122 |     """
123 |     is_greedy_gen_mode = ((num_beams == 1) and do_sample is False)
124 |     is_sample_gen_mode = ((num_beams == 1) and do_sample is True)
125 |     is_beam_gen_mode = ((num_beams > 1) and do_sample is False)
126 |     if is_greedy_gen_mode:
127 |         # run greedy search
128 |         raise NotImplementedError
129 |     elif is_sample_gen_mode:
130 |         # run sample
131 |         return sample(model,
132 |                       input_ids,
133 |                       max_length,
134 |                       early_stopping=early_stopping,
135 |                       eos_token_id=eos_token_id,
136 |                       pad_token_id=pad_token_id,
137 |                       top_k=top_k,
138 |                       top_p=top_p,
139 |                       temperature=temperature,
140 |                       prepare_inputs_fn=prepare_inputs_fn,
141 |                       update_model_kwargs_fn=update_model_kwargs_fn,
142 |                       **model_kwargs)
143 |     elif is_beam_gen_mode:
144 |         raise NotImplementedError
145 |     else:
146 |         raise ValueError("Unsupported generation mode")
147 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/generation_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def gpt_prepare_inputs_fn(input_ids: torch.Tensor, past: Optional[torch.Tensor] = None, **kwargs) -> dict:
 7 |     token_type_ids = kwargs.get("token_type_ids", None)
 8 |     # only last token for inputs_ids if past is defined in kwargs
 9 |     if past:
10 |         input_ids = input_ids[:, -1].unsqueeze(-1)
11 |         if token_type_ids is not None:
12 |             token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
13 | 
14 |     attention_mask = kwargs.get("attention_mask", None)
15 |     position_ids = kwargs.get("position_ids", None)
16 | 
17 |     if attention_mask is not None and position_ids is None:
18 |         # create position_ids on the fly for batch generation
19 |         position_ids = attention_mask.long().cumsum(-1) - 1
20 |         position_ids.masked_fill_(attention_mask == 0, 1)
21 |         if past:
22 |             position_ids = position_ids[:, -1].unsqueeze(-1)
23 |     else:
24 |         position_ids = None
25 |     return {
26 |         "input_ids": input_ids,
27 |         "past_key_values": past,
28 |         "use_cache": kwargs.get("use_cache"),
29 |         "position_ids": position_ids,
30 |         "attention_mask": attention_mask,
31 |         "token_type_ids": token_type_ids,
32 |     }
33 | 
34 | 
35 | def update_model_kwargs_fn(outputs: dict, **model_kwargs) -> dict:
36 |     if "past_key_values" in outputs:
37 |         model_kwargs["past"] = outputs["past_key_values"]
38 |     else:
39 |         model_kwargs["past"] = None
40 | 
41 |     # update token_type_ids with last value
42 |     if "token_type_ids" in model_kwargs:
43 |         token_type_ids = model_kwargs["token_type_ids"]
44 |         model_kwargs["token_type_ids"] = torch.cat([token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1)
45 | 
46 |     # update attention mask
47 |     if "attention_mask" in model_kwargs:
48 |         attention_mask = model_kwargs["attention_mask"]
49 |         model_kwargs["attention_mask"] = torch.cat(
50 |             [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1)
51 | 
52 |     return model_kwargs
53 | 
54 | 
55 | def opt_prepare_inputs_fn(input_ids: torch.Tensor,
56 |                           past: Optional[torch.Tensor] = None,
57 |                           attention_mask: Optional[torch.Tensor] = None,
58 |                           use_cache: Optional[bool] = None,
59 |                           **kwargs) -> dict:
60 |     # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
61 |     if attention_mask is None:
62 |         attention_mask = input_ids.new_ones(input_ids.shape)
63 | 
64 |     if past:
65 |         input_ids = input_ids[:, -1:]
66 |     # first step, decoder_cached_states are empty
67 |     return {
68 |         "input_ids": input_ids,    # encoder_outputs is defined. input_ids not needed
69 |         "attention_mask": attention_mask,
70 |         "past_key_values": past,
71 |         "use_cache": use_cache,
72 |     }
73 | 
74 | 
75 | def bloom_prepare_inputs_fn(input_ids: torch.Tensor,
76 |                             past: Optional[torch.Tensor] = None,
77 |                             attention_mask: Optional[torch.Tensor] = None,
78 |                             use_cache: Optional[bool] = None,
79 |                             **kwargs) -> dict:
80 |     # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
81 |     if attention_mask is None:
82 |         attention_mask = input_ids.new_ones(input_ids.shape)
83 | 
84 |     if past:
85 |         input_ids = input_ids[:, -1:]
86 |     # first step, decoder_cached_states are empty
87 |     return {
88 |         "input_ids": input_ids,    # encoder_outputs is defined. input_ids not needed
89 |         "attention_mask": attention_mask,
90 |         "past_key_values": past,
91 |         "use_cache": use_cache,
92 |     }
93 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__init__.py:
--------------------------------------------------------------------------------
1 | from .gpt_actor import GPTActor
2 | from .gpt_critic import GPTCritic
3 | from .gpt_rm import GPTRM
4 | 
5 | __all__ = ['GPTActor', 'GPTCritic', 'GPTRM']
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/gpt_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/gpt_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/gpt_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/gpt_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/gpt_rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/gpt_rm.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/gpt_actor.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
 4 | from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel
 5 | 
 6 | from ..base import Actor
 7 | 
 8 | 
 9 | class GPTActor(Actor):
10 |     """
11 |     GPT Actor model.
12 | 
13 |     Args:
14 |         pretrained (str): Pretrained model name or path.
15 |         config (GPT2Config): Model config.
16 |         checkpoint (bool): Enable gradient checkpointing.
17 |         lora_rank (int): Rank of the LoRa layer.
18 |         lora_train_bias (str): Bias training strategy for the LoRa layer.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  pretrained: Optional[str] = None,
23 |                  config: Optional[GPT2Config] = None,
24 |                  checkpoint: bool = False,
25 |                  lora_rank: int = 0,
26 |                  lora_train_bias: str = 'none') -> None:
27 |         if pretrained is not None:
28 |             model = GPT2LMHeadModel.from_pretrained(pretrained)
29 |         elif config is not None:
30 |             model = GPT2LMHeadModel(config)
31 |         else:
32 |             model = GPT2LMHeadModel(GPT2Config())
33 |         if checkpoint:
34 |             model.gradient_checkpointing_enable()
35 |         super().__init__(model, lora_rank, lora_train_bias)
36 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/gpt_critic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch.nn as nn
 4 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
 5 | from transformers.models.gpt2.modeling_gpt2 import GPT2Model
 6 | 
 7 | from ..base import Critic
 8 | 
 9 | 
10 | class GPTCritic(Critic):
11 |     """
12 |     GPT Critic model.
13 | 
14 |     Args:
15 |         pretrained (str): Pretrained model name or path.
16 |         config (GPT2Config): Model config.
17 |         checkpoint (bool): Enable gradient checkpointing.
18 |         lora_rank (int): Rank of the LO-RA decomposition.
19 |         lora_train_bias (str): LoRA bias training mode.
20 |     """
21 | 
22 |     def __init__(self,
23 |                  pretrained: Optional[str] = None,
24 |                  config: Optional[GPT2Config] = None,
25 |                  checkpoint: bool = False,
26 |                  lora_rank: int = 0,
27 |                  lora_train_bias: str = 'none') -> None:
28 |         if pretrained is not None:
29 |             model = GPT2Model.from_pretrained(pretrained)
30 |         elif config is not None:
31 |             model = GPT2Model(config)
32 |         else:
33 |             model = GPT2Model(GPT2Config())
34 |         if checkpoint:
35 |             model.gradient_checkpointing_enable()
36 |         value_head = nn.Linear(model.config.n_embd, 1)
37 |         super().__init__(model, value_head, lora_rank, lora_train_bias)
38 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/gpt_rm.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch.nn as nn
 4 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
 5 | from transformers.models.gpt2.modeling_gpt2 import GPT2Model
 6 | 
 7 | from ..base import RewardModel
 8 | 
 9 | 
10 | class GPTRM(RewardModel):
11 |     """
12 |     GPT Reward model.
13 | 
14 |     Args:
15 |         pretrained (str): Pretrained model name or path.
16 |         config (GPT2Config): Model config.
17 |         checkpoint (bool): Enable gradient checkpointing.
18 |         lora_rank (int): Rank of the low-rank approximation.
19 |         lora_train_bias (str): LoRA bias training mode.
20 |     """
21 | 
22 |     def __init__(self,
23 |                  pretrained: Optional[str] = None,
24 |                  config: Optional[GPT2Config] = None,
25 |                  checkpoint: bool = False,
26 |                  lora_rank: int = 0,
27 |                  lora_train_bias: str = 'none') -> None:
28 |                  
29 |         if pretrained is not None:
30 |             model = GPT2Model.from_pretrained(pretrained)
31 |         elif config is not None:
32 |             model = GPT2Model(config)
33 |         else:
34 |             model = GPT2Model(GPT2Config())
35 |             
36 |         if checkpoint:
37 |             model.gradient_checkpointing_enable()
38 | 
39 |         value_head = nn.Linear(model.config.n_embd, 1)
40 |         super().__init__(model, value_head, lora_rank, lora_train_bias)
41 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/lora.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Optional
  3 | 
  4 | import loralib as lora
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class LoraLinear(lora.LoRALayer, nn.Module):
 11 |     """Replace in-place ops to out-of-place ops to fit gemini. Convert a torch.nn.Linear to LoraLinear.
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         weight: nn.Parameter,
 17 |         bias: Optional[nn.Parameter],
 18 |         r: int = 0,
 19 |         lora_alpha: int = 1,
 20 |         lora_dropout: float = 0.,
 21 |         fan_in_fan_out: bool = False,    # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
 22 |         merge_weights: bool = True,
 23 |     ):
 24 |         nn.Module.__init__(self)
 25 |         lora.LoRALayer.__init__(self,
 26 |                                 r=r,
 27 |                                 lora_alpha=lora_alpha,
 28 |                                 lora_dropout=lora_dropout,
 29 |                                 merge_weights=merge_weights)
 30 |         self.weight = weight
 31 |         self.bias = bias
 32 | 
 33 |         out_features, in_features = weight.shape
 34 |         self.in_features = in_features
 35 |         self.out_features = out_features
 36 | 
 37 |         self.fan_in_fan_out = fan_in_fan_out
 38 |         # Actual trainable parameters
 39 |         if r > 0:
 40 |             self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features)))
 41 |             self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r)))
 42 |             self.scaling = self.lora_alpha / self.r
 43 |             # Freezing the pre-trained weight matrix
 44 |             self.weight.requires_grad = False
 45 |         self.reset_parameters()
 46 |         if fan_in_fan_out:
 47 |             self.weight.data = self.weight.data.T
 48 | 
 49 |     def reset_parameters(self):
 50 |         if hasattr(self, 'lora_A'):
 51 |             # initialize A the same way as the default for nn.Linear and B to zero
 52 |             nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
 53 |             nn.init.zeros_(self.lora_B)
 54 | 
 55 |     def train(self, mode: bool = True):
 56 | 
 57 |         def T(w):
 58 |             return w.T if self.fan_in_fan_out else w
 59 | 
 60 |         nn.Module.train(self, mode)
 61 |         if self.merge_weights and self.merged:
 62 |             # Make sure that the weights are not merged
 63 |             if self.r > 0:
 64 |                 self.weight.data -= T(self.lora_B @ self.lora_A) * self.scaling
 65 |             self.merged = False
 66 | 
 67 |     def eval(self):
 68 | 
 69 |         def T(w):
 70 |             return w.T if self.fan_in_fan_out else w
 71 | 
 72 |         nn.Module.eval(self)
 73 |         if self.merge_weights and not self.merged:
 74 |             # Merge the weights and mark it
 75 |             if self.r > 0:
 76 |                 self.weight.data += T(self.lora_B @ self.lora_A) * self.scaling
 77 |                 delattr(self, 'lora_A')
 78 |                 delattr(self, 'lora_B')
 79 |             self.merged = True
 80 | 
 81 |     def forward(self, x: torch.Tensor):
 82 | 
 83 |         def T(w):
 84 |             return w.T if self.fan_in_fan_out else w
 85 | 
 86 |         if self.r > 0 and not self.merged:
 87 |             result = F.linear(x, T(self.weight), bias=self.bias)
 88 |             if self.r > 0:
 89 |                 result = result + (self.lora_dropout(x) @ self.lora_A.t() @ self.lora_B.t()) * self.scaling
 90 |             return result
 91 |         else:
 92 |             return F.linear(x, T(self.weight), bias=self.bias)
 93 | 
 94 | 
 95 | def lora_linear_wrapper(linear: nn.Linear, lora_rank: int) -> LoraLinear:
 96 |     assert lora_rank <= linear.in_features, f'LoRA rank ({lora_rank}) must be less than or equal to in features ({linear.in_features})'
 97 |     lora_linear = LoraLinear(linear.weight, linear.bias, r=lora_rank, merge_weights=False)
 98 |     return lora_linear
 99 | 
100 | 
101 | def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None:
102 |     for name, child in module.named_children():
103 |         if isinstance(child, nn.Linear):
104 |             setattr(module, name, lora_linear_wrapper(child, lora_rank))
105 |         else:
106 |             convert_to_lora_recursively(child, lora_rank)
107 | 
108 | 
109 | class LoRAModule(nn.Module):
110 |     """A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`.
111 |     This calss will convert all torch.nn.Linear layer to LoraLinear layer.
112 | 
113 |     Args:
114 |         lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0.
115 |         lora_train_bias (str, optional): Whether LoRA train biases.
116 |             'none' means it doesn't train biases. 'all' means it trains all biases. 'lora_only' means it only trains biases of LoRA layers.
117 |             Defaults to 'none'.
118 |     """
119 | 
120 |     def __init__(self, lora_rank: int = 0, lora_train_bias: str = 'none') -> None:
121 |         super().__init__()
122 |         self.lora_rank = lora_rank
123 |         self.lora_train_bias = lora_train_bias
124 | 
125 |     def convert_to_lora(self) -> None:
126 |         if self.lora_rank <= 0:
127 |             return
128 |         convert_to_lora_recursively(self, self.lora_rank)
129 |         lora.mark_only_lora_as_trainable(self, self.lora_train_bias)
130 |                 
131 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/loss.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from .utils import masked_mean
  7 | 
  8 | 
  9 | class GPTLMLoss(nn.Module):
 10 |     """
 11 |     GPT Language Model Loss
 12 |     """
 13 | 
 14 |     def __init__(self):
 15 |         super().__init__()
 16 |         self.loss = nn.CrossEntropyLoss()
 17 | 
 18 |     def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
 19 |         shift_logits = logits[..., :-1, :].contiguous()
 20 |         shift_labels = labels[..., 1:].contiguous()
 21 |         # Flatten the tokens
 22 |         return self.loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
 23 | 
 24 | 
 25 | class PolicyLoss(nn.Module):
 26 |     """
 27 |     Policy Loss for PPO
 28 |     """
 29 | 
 30 |     def __init__(self, clip_eps: float = 0.2) -> None:
 31 |         super().__init__()
 32 |         self.clip_eps = clip_eps
 33 | 
 34 |     def forward(self,
 35 |                 log_probs: torch.Tensor,
 36 |                 old_log_probs: torch.Tensor,
 37 |                 advantages: torch.Tensor,
 38 |                 action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
 39 |                 
 40 |         ratio = (log_probs - old_log_probs).exp()
 41 |         surr1 = ratio * advantages
 42 |         surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages
 43 |         loss = -torch.min(surr1, surr2)
 44 |         if action_mask is not None:
 45 |             loss = masked_mean(loss, action_mask)
 46 |         loss = loss.mean()
 47 |         return loss
 48 | 
 49 | 
 50 | class ValueLoss(nn.Module):
 51 |     """
 52 |     Value Loss for PPO
 53 |     """
 54 | 
 55 |     def __init__(self, clip_eps: float = 0.4) -> None:
 56 |         super().__init__()
 57 |         self.clip_eps = clip_eps
 58 | 
 59 |     def forward(self,
 60 |                 values: torch.Tensor,
 61 |                 old_values: torch.Tensor,
 62 |                 reward: torch.Tensor,
 63 |                 action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
 64 |         values_clipped = old_values + (values - old_values).clamp(-self.clip_eps, self.clip_eps)
 65 |         surr1 = (values_clipped - reward)**2
 66 |         surr2 = (values - reward)**2
 67 |         loss = torch.max(surr1, surr2)
 68 |         loss = loss.mean()
 69 |         return loss
 70 | 
 71 | 
 72 | class PPOPtxActorLoss(nn.Module):
 73 |     """
 74 |     To Do:
 75 | 
 76 |     PPO-ptx Actor Loss
 77 |     """
 78 | 
 79 |     def __init__(self, policy_clip_eps: float = 0.2, pretrain_coef: float = 0.0, pretrain_loss_fn=GPTLMLoss()) -> None:
 80 |         super().__init__()
 81 |         self.pretrain_coef = pretrain_coef
 82 |         self.policy_loss_fn = PolicyLoss(clip_eps=policy_clip_eps)
 83 |         self.pretrain_loss_fn = pretrain_loss_fn
 84 | 
 85 |     def forward(self,
 86 |                 log_probs: torch.Tensor,
 87 |                 old_log_probs: torch.Tensor,
 88 |                 advantages: torch.Tensor,
 89 |                 lm_logits: torch.Tensor,
 90 |                 lm_input_ids: torch.Tensor,
 91 |                 action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
 92 |         policy_loss = self.policy_loss_fn(log_probs, old_log_probs, advantages, action_mask=action_mask)
 93 |         lm_loss = self.pretrain_loss_fn(lm_logits, lm_input_ids)
 94 |         return policy_loss + self.pretrain_coef * lm_loss
 95 | 
 96 | 
 97 | class PairWiseLoss(nn.Module):
 98 |     """
 99 |     Pairwise Loss for Reward Model
100 |     """
101 | 
102 |     def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor:
103 |         probs = torch.sigmoid(chosen_reward - reject_reward)
104 |         log_probs = torch.log(probs)
105 |         loss = -log_probs.mean()
106 |         return loss
107 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .opt_actor import OPTActor
2 | from .opt_critic import OPTCritic
3 | from .opt_rm import OPTRM
4 | 
5 | __all__ = ['OPTActor', 'OPTCritic', 'OPTRM']
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/opt_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/opt_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/opt_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/opt_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/opt_rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/opt_rm.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/opt_actor.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from transformers.models.opt.configuration_opt import OPTConfig
 4 | from transformers.models.opt.modeling_opt import OPTForCausalLM
 5 | 
 6 | from ..base import Actor
 7 | 
 8 | 
 9 | class OPTActor(Actor):
10 |     """
11 |     OPT Actor model.
12 | 
13 |     Args:
14 |         pretrained (str): Pretrained model name or path.
15 |         config (OPTConfig): Model config.
16 |         checkpoint (bool): Enable gradient checkpointing.
17 |         lora_rank (int): Rank of the low-rank approximation.
18 |         lora_train_bias (str): LoRA bias training mode.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  pretrained: Optional[str] = None,
23 |                  config: Optional[OPTConfig] = None,
24 |                  checkpoint: bool = False,
25 |                  lora_rank: int = 0,
26 |                  lora_train_bias: str = 'none') -> None:
27 |         if pretrained is not None:
28 |             model = OPTForCausalLM.from_pretrained(pretrained)
29 |         elif config is not None:
30 |             model = OPTForCausalLM(config)
31 |         else:
32 |             model = OPTForCausalLM(OPTConfig())
33 |         if checkpoint:
34 |             model.gradient_checkpointing_enable()
35 |         super().__init__(model, lora_rank, lora_train_bias)
36 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/opt_critic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch.nn as nn
 4 | from transformers.models.opt.configuration_opt import OPTConfig
 5 | from transformers.models.opt.modeling_opt import OPTModel
 6 | 
 7 | from ..base import Critic
 8 | 
 9 | 
10 | class OPTCritic(Critic):
11 |     """
12 |     OPT Critic model.
13 | 
14 |     Args:
15 |         pretrained (str): Pretrained model name or path.
16 |         config (OPTConfig): Model config.
17 |         checkpoint (bool): Enable gradient checkpointing.
18 |         lora_rank (int): Rank of the low-rank approximation.
19 |         lora_train_bias (str): LoRA bias training mode.
20 |     """
21 | 
22 |     def __init__(self,
23 |                  pretrained: Optional[str] = None,
24 |                  config: Optional[OPTConfig] = None,
25 |                  checkpoint: bool = False,
26 |                  lora_rank: int = 0,
27 |                  lora_train_bias: str = 'none',
28 |                  **kwargs) -> None:
29 |         if pretrained is not None:
30 |             model = OPTModel.from_pretrained(pretrained)
31 |         elif config is not None:
32 |             model = OPTModel(config)
33 |         else:
34 |             model = OPTModel(OPTConfig())
35 |         if checkpoint:
36 |             model.gradient_checkpointing_enable()
37 |         value_head = nn.Linear(model.config.word_embed_proj_dim, 1)
38 |         super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)
39 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/opt/opt_rm.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch.nn as nn
 4 | from transformers import OPTConfig, OPTModel
 5 | 
 6 | from ..base import RewardModel
 7 | 
 8 | 
 9 | class OPTRM(RewardModel):
10 |     """
11 |     OPT Reward model.
12 | 
13 |     Args:
14 |         pretrained (str): Pretrained model name or path.
15 |         config (OPTConfig): Model config.
16 |         checkpoint (bool): Enable gradient checkpointing.
17 |         lora_rank (int): Rank of the low-rank approximation.
18 |         lora_train_bias (str): LoRA bias training mode.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  pretrained: Optional[str] = None,
23 |                  config: Optional[OPTConfig] = None,
24 |                  checkpoint: bool = False,
25 |                  lora_rank: int = 0,
26 |                  lora_train_bias: str = 'none') -> None:
27 |         if pretrained is not None:
28 |             model = OPTModel.from_pretrained(pretrained)
29 |         elif config is not None:
30 |             model = OPTModel(config)
31 |         else:
32 |             model = OPTModel(OPTConfig())
33 |         if checkpoint:
34 |             model.gradient_checkpointing_enable()
35 | 
36 |         value_head = nn.Linear(model.config.word_embed_proj_dim, 1)
37 |         super().__init__(model, value_head, lora_rank, lora_train_bias)
38 | 


--------------------------------------------------------------------------------
/code/chatgpt/models/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Union
 2 | 
 3 | import loralib as lora
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | 
 9 | def compute_approx_kl(log_probs: torch.Tensor,
10 |                       log_probs_base: torch.Tensor,
11 |                       action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
12 |     """
13 |     Compute the approximate KL divergence between two distributions.
14 |     Schulman blog: http://joschu.net/blog/kl-approx.html
15 | 
16 |     Args:
17 |         log_probs: Log probabilities of the new distribution.
18 |         log_probs_base: Log probabilities of the base distribution.
19 |         action_mask: Mask for actions.
20 |     """
21 | 
22 |     log_ratio = log_probs - log_probs_base
23 |     approx_kl = (log_ratio.exp() - 1) - log_ratio
24 |     if action_mask is not None:
25 |         approx_kl = masked_mean(approx_kl, action_mask, dim=1)
26 |         return approx_kl
27 |     approx_kl = approx_kl.mean(dim=1)
28 |     return approx_kl
29 | 
30 | 
31 | def compute_reward(r: Union[torch.Tensor, float],
32 |                    kl_coef: float,
33 |                    log_probs: torch.Tensor,
34 |                    log_probs_base: torch.Tensor,
35 |                    action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
36 |     if kl_coef <= 0.0:
37 |         return r
38 |     kl = compute_approx_kl(log_probs, log_probs_base, action_mask=action_mask)
39 |     reward = r - kl_coef * kl
40 |     return reward
41 | 
42 | 
43 | def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
44 |     log_probs = F.log_softmax(logits, dim=-1)
45 |     log_probs_labels = log_probs.gather(dim=-1, index=labels.unsqueeze(-1))
46 |     return log_probs_labels.squeeze(-1)
47 | 
48 | 
49 | def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.Tensor:
50 |     tensor = tensor * mask
51 |     tensor = tensor.sum(dim=dim)
52 |     mask_sum = mask.sum(dim=dim)
53 |     mean = tensor / (mask_sum + 1e-8)
54 |     return mean
55 | 
56 | 
57 | def masked_normalize(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1, eps: float = 1e-8) -> torch.Tensor:
58 |     tensor = tensor * mask
59 |     mean = masked_mean(tensor, mask, dim=dim)
60 |     mean_centered = tensor - mean
61 |     var = masked_mean(mean_centered**2, mask, dim=dim)
62 |     return mean_centered * var.clamp(min=eps).rsqrt()
63 | 
64 | 
65 | def normalize(tensor: torch.Tensor, dim: int = 0, eps: float = 1e-8) -> torch.Tensor:
66 |     mean = tensor.mean(dim)
67 |     mean_centered = tensor - mean
68 |     var = (mean_centered**2).mean(dim)
69 |     norm = mean_centered * var.clamp(min=eps).rsqrt()
70 |     return norm
71 | 
72 | 
73 | def convert_to_lora(model: nn.Module,
74 |                     input_size: int,
75 |                     output_size: int,
76 |                     lora_rank: int = 16,
77 |                     lora_alpha: int = 1,
78 |                     lora_dropout: float = 0.,
79 |                     fan_in_fan_out: bool = False,
80 |                     merge_weights: bool = True):
81 |     if lora_rank > min(input_size, output_size):
82 |         raise ValueError(f"LoRA rank {lora_rank} must be less or equal than {min(input_size, output_size)}")
83 | 
84 |     for name, module in model.named_modules():
85 |         if isinstance(module, nn.Linear):
86 |             module._modules[name] = lora.Linear(input_size,
87 |                                                 output_size,
88 |                                                 r=lora_rank,
89 |                                                 lora_alpha=lora_alpha,
90 |                                                 lora_dropout=lora_dropout,
91 |                                                 fan_in_fan_out=fan_in_fan_out,
92 |                                                 merge_weights=merge_weights)
93 | 


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import ReplayBuffer
2 | from .naive import NaiveReplayBuffer
3 | 
4 | __all__ = ['ReplayBuffer', 'NaiveReplayBuffer']
5 | 


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/naive.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any
 3 | 
 4 | from chatgpt.experience_maker.base import Experience
 5 | 
 6 | 
 7 | class ReplayBuffer(ABC):
 8 |     """Replay buffer base class. It stores experience.
 9 | 
10 |      Args:
11 |          sample_batch_size (int): Batch size when sampling.
12 |          limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
13 |     """
14 | 
15 |     def __init__(self, sample_batch_size: int, limit: int = 0) -> None:
16 |         super().__init__()
17 |         self.sample_batch_size = sample_batch_size
18 |         # limit <= 0 means unlimited
19 |         self.limit = limit
20 | 
21 |     @abstractmethod
22 |     def append(self, experience: Experience) -> None:
23 |         pass
24 | 
25 |     @abstractmethod
26 |     def clear(self) -> None:
27 |         pass
28 | 
29 |     @abstractmethod
30 |     def sample(self) -> Experience:
31 |         pass
32 | 
33 |     @abstractmethod
34 |     def __len__(self) -> int:
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def __getitem__(self, idx: int) -> Any:
39 |         pass
40 | 
41 |     @abstractmethod
42 |     def collate_fn(self, batch: Any) -> Experience:
43 |         pass
44 | 


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/naive.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List
 3 | 
 4 | import torch
 5 | from chatgpt.experience_maker.base import Experience
 6 | 
 7 | from .base import ReplayBuffer
 8 | from .utils import BufferItem, make_experience_batch, split_experience_batch
 9 | 
10 | 
11 | class NaiveReplayBuffer(ReplayBuffer):
12 |     """Naive replay buffer class. It stores experience.
13 | 
14 |      Args:
15 |          sample_batch_size (int): Batch size when sampling.
16 |          limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
17 |          cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
18 |     """
19 | 
20 |     def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:
21 |         super().__init__(sample_batch_size, limit)
22 |         self.cpu_offload = cpu_offload
23 |         self.target_device = torch.device(f'cuda:{torch.cuda.current_device()}')
24 |         # TODO(ver217): add prefetch
25 |         self.items: List[BufferItem] = []
26 | 
27 |     @torch.no_grad()
28 |     def append(self, experience: Experience) -> None:
29 |         if self.cpu_offload:
30 |             experience.to_device(torch.device('cpu'))
31 |         items = split_experience_batch(experience)
32 |         self.items.extend(items)
33 |         if self.limit > 0:
34 |             samples_to_remove = len(self.items) - self.limit
35 |             if samples_to_remove > 0:
36 |                 self.items = self.items[samples_to_remove:]
37 | 
38 |     def clear(self) -> None:
39 |         self.items.clear()
40 | 
41 |     @torch.no_grad()
42 |     def sample(self) -> Experience:
43 |         items = random.sample(self.items, self.sample_batch_size)
44 |         experience = make_experience_batch(items)
45 |         if self.cpu_offload:
46 |             experience.to_device(self.target_device)
47 |         return experience
48 | 
49 |     def __len__(self) -> int:
50 |         return len(self.items)
51 | 
52 |     def __getitem__(self, idx: int) -> BufferItem:
53 |         return self.items[idx]
54 | 
55 |     def collate_fn(self, batch) -> Experience:
56 |         experience = make_experience_batch(batch)
57 |         return experience
58 | 


--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/utils.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Optional
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F
 6 | from chatgpt.experience_maker.base import Experience
 7 | 
 8 | 
 9 | @dataclass
10 | class BufferItem:
11 |     """BufferItem is an item of experience data.
12 | 
13 |     Shapes of each tensor:
14 |     sequences: (S)
15 |     action_log_probs: (A)
16 |     values: (1)
17 |     reward: (1)
18 |     advatanges: (1)
19 |     attention_mask: (S)
20 |     action_mask: (A)
21 | 
22 |     "A" is the number of actions.
23 |     """
24 |     sequences: torch.Tensor
25 |     action_log_probs: torch.Tensor
26 |     values: torch.Tensor
27 |     reward: torch.Tensor
28 |     advantages: torch.Tensor
29 |     attention_mask: Optional[torch.LongTensor]
30 |     action_mask: Optional[torch.BoolTensor]
31 | 
32 | 
33 | def split_experience_batch(experience: Experience) -> List[BufferItem]:
34 |     batch_size = experience.sequences.size(0)
35 |     batch_kwargs = [{} for _ in range(batch_size)]
36 |     keys = ('sequences', 'action_log_probs', 'values', 'reward', 'advantages', 'attention_mask', 'action_mask')
37 |     for key in keys:
38 |         value = getattr(experience, key)
39 |         if isinstance(value, torch.Tensor):
40 |             vals = torch.unbind(value)
41 |         else:
42 |             # None
43 |             vals = [value for _ in range(batch_size)]
44 |         assert batch_size == len(vals)
45 |         for i, v in enumerate(vals):
46 |             batch_kwargs[i][key] = v
47 |     items = [BufferItem(**kwargs) for kwargs in batch_kwargs]
48 |     return items
49 | 
50 | 
51 | def zero_pad_sequences(sequences: List[torch.Tensor], side: str = 'left') -> torch.Tensor:
52 |     assert side in ('left', 'right')
53 |     max_len = max(seq.size(0) for seq in sequences)
54 |     padded_sequences = []
55 |     for seq in sequences:
56 |         pad_len = max_len - seq.size(0)
57 |         padding = (pad_len, 0) if side == 'left' else (0, pad_len)
58 |         padded_sequences.append(F.pad(seq, padding))
59 |     return torch.stack(padded_sequences, dim=0)
60 | 
61 | 
62 | def make_experience_batch(items: List[BufferItem]) -> Experience:
63 |     kwargs = {}
64 |     to_pad_keys = set(('action_log_probs', 'action_mask'))
65 |     keys = ('sequences', 'action_log_probs', 'values', 'reward', 'advantages', 'attention_mask', 'action_mask')
66 |     for key in keys:
67 |         vals = [getattr(item, key) for item in items]
68 |         if key in to_pad_keys:
69 |             batch_data = zero_pad_sequences(vals)
70 |         else:
71 |             batch_data = torch.stack(vals, dim=0)
72 |         kwargs[key] = batch_data
73 |     return Experience(**kwargs)
74 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Trainer
2 | from .ppo import PPOTrainer
3 | from .rm import RewardModelTrainer
4 | 
5 | __all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer']
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/ppo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/ppo.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/rm.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Any, Callable, Dict, List, Optional, Union
  3 | 
  4 | import torch
  5 | from chatgpt.experience_maker import Experience, ExperienceMaker
  6 | from chatgpt.replay_buffer import ReplayBuffer
  7 | from torch import Tensor
  8 | from torch.utils.data import DistributedSampler
  9 | from tqdm import tqdm
 10 | 
 11 | from .callbacks import Callback
 12 | from .strategies import Strategy
 13 | from .utils import is_rank_0
 14 | 
 15 | 
 16 | class Trainer(ABC):
 17 |     """
 18 |         Base class for rlhf trainers.
 19 | 
 20 |     Args:
 21 |         strategy (Strategy):the strategy to use for training
 22 |         experience_maker (ExperienceMaker): the experience maker to use for produce experience to fullfill replay buffer
 23 |         replay_buffer (ReplayBuffer): the replay buffer to use for training
 24 |         experience_batch_size (int, defaults to 8): the batch size to use for experience generation
 25 |         max_epochs (int, defaults to 1): the number of epochs of training process
 26 |         tokenizer (Callable, optional): the tokenizer to use for tokenizing the input
 27 |         sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
 28 |         data_loader_pin_memory (bool, defaults to True): whether to pin memory for data loader
 29 |         callbacks (List[Callback], defaults to []): the callbacks to call during training process
 30 |         generate_kwargs (dict, optional): the kwargs to use while model generating
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  strategy: Strategy,
 35 |                  experience_maker: ExperienceMaker,
 36 |                  replay_buffer: ReplayBuffer,
 37 |                  experience_batch_size: int = 8,
 38 |                  max_epochs: int = 1,
 39 |                  tokenizer: Optional[Callable[[Any], dict]] = None,
 40 |                  sample_replay_buffer: bool = False,
 41 |                  dataloader_pin_memory: bool = True,
 42 |                  callbacks: List[Callback] = [],
 43 |                  **generate_kwargs) -> None:
 44 |         super().__init__()
 45 |         self.strategy = strategy
 46 |         self.experience_maker = experience_maker
 47 |         self.replay_buffer = replay_buffer
 48 |         self.experience_batch_size = experience_batch_size
 49 |         self.max_epochs = max_epochs
 50 |         self.tokenizer = tokenizer
 51 |         self.generate_kwargs = generate_kwargs
 52 |         self.sample_replay_buffer = sample_replay_buffer
 53 |         self.dataloader_pin_memory = dataloader_pin_memory
 54 |         self.callbacks = callbacks
 55 | 
 56 |     @abstractmethod
 57 |     def training_step(self, experience: Experience) -> Dict[str, Any]:
 58 |         pass
 59 | 
 60 |     def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
 61 |         if isinstance(inputs, Tensor):
 62 |             return self.experience_maker.make_experience(inputs, **self.generate_kwargs)
 63 |         elif isinstance(inputs, dict):
 64 |             return self.experience_maker.make_experience(**inputs, **self.generate_kwargs)
 65 |         else:
 66 |             raise ValueError(f'Unsupported input type "{type(inputs)}"')
 67 | 
 68 |     def _sample_prompts(self, prompts) -> list:
 69 |         indices = list(range(len(prompts)))
 70 |         sampled_indices = self.strategy.experience_sampler.choice(indices, self.experience_batch_size, replace=False)
 71 |         return [prompts[i] for i in sampled_indices]
 72 | 
 73 |     def _learn(self):
 74 |         # replay buffer may be empty at first, we should rebuild at each training
 75 |         if not self.sample_replay_buffer:
 76 |             dataloader = self.strategy.setup_dataloader(self.replay_buffer, self.dataloader_pin_memory)
 77 |             device = torch.cuda.current_device()
 78 |         if self.sample_replay_buffer:
 79 |             pbar = tqdm(range(self.max_epochs), desc='Train epoch', disable=not is_rank_0())
 80 |             for _ in pbar:
 81 |                 experience = self.replay_buffer.sample()
 82 |                 metrics = self.training_step(experience)
 83 |                 pbar.set_postfix(metrics)
 84 |         else:
 85 |             for epoch in range(self.max_epochs):
 86 |                 self._on_learn_epoch_start(epoch)
 87 |                 if isinstance(dataloader.sampler, DistributedSampler):
 88 |                     dataloader.sampler.set_epoch(epoch)
 89 |                 pbar = tqdm(dataloader, desc=f'Train epoch [{epoch+1}/{self.max_epochs}]', disable=not is_rank_0())
 90 |                 for experience in pbar:
 91 |                     self._on_learn_batch_start()
 92 |                     experience.to_device(device)
 93 |                     metrics = self.training_step(experience)
 94 |                     self._on_learn_batch_end(metrics, experience)
 95 |                     pbar.set_postfix(metrics)
 96 |                 self._on_learn_epoch_end(epoch)
 97 | 
 98 |     def fit(self, prompts, num_episodes: int = 50000, max_timesteps: int = 500, update_timesteps: int = 5000) -> None:
 99 |         time = 0
100 |         sampler = self.strategy.setup_sampler(prompts)
101 |         self._on_fit_start()
102 |         for episode in range(num_episodes):
103 |             self._on_episode_start(episode)
104 |             for timestep in tqdm(range(max_timesteps),
105 |                                  desc=f'Episode [{episode+1}/{num_episodes}]',
106 |                                  disable=not is_rank_0()):
107 |                 time += 1
108 |                 rand_prompts = sampler.sample(self.experience_batch_size)
109 |                 if self.tokenizer is not None:
110 |                     inputs = self.tokenizer(rand_prompts)
111 |                 else:
112 |                     inputs = rand_prompts
113 |                 self._on_make_experience_start()
114 |                 experience = self._make_experience(inputs)
115 |                 self._on_make_experience_end(experience)
116 |                 self.replay_buffer.append(experience)
117 |                 if time % update_timesteps == 0:
118 |                     self._learn()
119 |                     self.replay_buffer.clear()
120 |             self._on_episode_end(episode)
121 |         self._on_fit_end()
122 | 
123 |     # TODO(ver217): maybe simplify these code using context
124 |     def _on_fit_start(self) -> None:
125 |         for callback in self.callbacks:
126 |             callback.on_fit_start()
127 | 
128 |     def _on_fit_end(self) -> None:
129 |         for callback in self.callbacks:
130 |             callback.on_fit_end()
131 | 
132 |     def _on_episode_start(self, episode: int) -> None:
133 |         for callback in self.callbacks:
134 |             callback.on_episode_start(episode)
135 | 
136 |     def _on_episode_end(self, episode: int) -> None:
137 |         for callback in self.callbacks:
138 |             callback.on_episode_end(episode)
139 | 
140 |     def _on_make_experience_start(self) -> None:
141 |         for callback in self.callbacks:
142 |             callback.on_make_experience_start()
143 | 
144 |     def _on_make_experience_end(self, experience: Experience) -> None:
145 |         for callback in self.callbacks:
146 |             callback.on_make_experience_end(experience)
147 | 
148 |     def _on_learn_epoch_start(self, epoch: int) -> None:
149 |         for callback in self.callbacks:
150 |             callback.on_learn_epoch_start(epoch)
151 | 
152 |     def _on_learn_epoch_end(self, epoch: int) -> None:
153 |         for callback in self.callbacks:
154 |             callback.on_learn_epoch_end(epoch)
155 | 
156 |     def _on_learn_batch_start(self) -> None:
157 |         for callback in self.callbacks:
158 |             callback.on_learn_batch_start()
159 | 
160 |     def _on_learn_batch_end(self, metrics: dict, experience: Experience) -> None:
161 |         for callback in self.callbacks:
162 |             callback.on_learn_batch_end(metrics, experience)
163 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Callback
2 | from .performance_evaluator import PerformanceEvaluator
3 | from .save_checkpoint import SaveCheckpoint
4 | 
5 | __all__ = ['Callback', 'PerformanceEvaluator', 'SaveCheckpoint']
6 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/performance_evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/performance_evaluator.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/save_checkpoint.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/save_checkpoint.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | 
 3 | from chatgpt.experience_maker import Experience
 4 | 
 5 | 
 6 | class Callback(ABC):
 7 |     """
 8 |         Base callback class. It defines the interface for callbacks.
 9 |     """
10 | 
11 |     def on_fit_start(self) -> None:
12 |         pass
13 | 
14 |     def on_fit_end(self) -> None:
15 |         pass
16 | 
17 |     def on_episode_start(self, episode: int) -> None:
18 |         pass
19 | 
20 |     def on_episode_end(self, episode: int) -> None:
21 |         pass
22 | 
23 |     def on_make_experience_start(self) -> None:
24 |         pass
25 | 
26 |     def on_make_experience_end(self, experience: Experience) -> None:
27 |         pass
28 | 
29 |     def on_learn_epoch_start(self, epoch: int) -> None:
30 |         pass
31 | 
32 |     def on_learn_epoch_end(self, epoch: int) -> None:
33 |         pass
34 | 
35 |     def on_learn_batch_start(self) -> None:
36 |         pass
37 | 
38 |     def on_learn_batch_end(self, metrics: dict, experience: Experience) -> None:
39 |         pass
40 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/performance_evaluator.py:
--------------------------------------------------------------------------------
  1 | from time import time
  2 | from typing import Optional
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | from chatgpt.experience_maker import Experience
  7 | 
  8 | from .base import Callback
  9 | 
 10 | 
 11 | def get_world_size() -> int:
 12 |     if dist.is_initialized():
 13 |         return dist.get_world_size()
 14 |     return 1
 15 | 
 16 | 
 17 | def print_rank_0(*args, **kwargs) -> None:
 18 |     if not dist.is_initialized() or dist.get_rank() == 0:
 19 |         print(*args, **kwargs)
 20 | 
 21 | 
 22 | @torch.no_grad()
 23 | def all_reduce_mean(x: float, world_size: int) -> float:
 24 |     if world_size == 1:
 25 |         return x
 26 |     tensor = torch.tensor([x], device=torch.cuda.current_device())
 27 |     dist.all_reduce(tensor)
 28 |     tensor = tensor / world_size
 29 |     return tensor.item()
 30 | 
 31 | 
 32 | class PerformanceEvaluator(Callback):
 33 |     """
 34 |         Callback for valuate the performance of the model.
 35 |     Args:
 36 |         actor_num_params: The number of parameters of the actor model.
 37 |         critic_num_params: The number of parameters of the critic model.
 38 |         initial_model_num_params: The number of parameters of the initial model.
 39 |         reward_model_num_params: The number of parameters of the reward model.
 40 |         enable_grad_checkpoint: Whether to enable gradient checkpointing.
 41 |         ignore_episodes: The number of episodes to ignore when calculating the performance.
 42 |     """
 43 | 
 44 |     def __init__(self,
 45 |                  actor_num_params: int,
 46 |                  critic_num_params: int,
 47 |                  initial_model_num_params: int,
 48 |                  reward_model_num_params: int,
 49 |                  enable_grad_checkpoint: bool = False,
 50 |                  ignore_episodes: int = 0) -> None:
 51 |         super().__init__()
 52 |         self.world_size = get_world_size()
 53 |         self.actor_num_params = actor_num_params
 54 |         self.critic_num_params = critic_num_params
 55 |         self.initial_model_num_params = initial_model_num_params
 56 |         self.reward_model_num_params = reward_model_num_params
 57 |         self.enable_grad_checkpoint = enable_grad_checkpoint
 58 |         self.ignore_episodes = ignore_episodes
 59 |         self.disable: bool = False
 60 | 
 61 |         self.make_experience_duration: float = 0.
 62 |         self.make_experience_start_time: Optional[float] = None
 63 |         self.make_experience_num_samples: int = 0
 64 |         self.make_experience_flop: int = 0
 65 |         self.learn_duration: float = 0.
 66 |         self.learn_start_time: Optional[float] = None
 67 |         self.learn_num_samples: int = 0
 68 |         self.learn_flop: int = 0
 69 | 
 70 |     def on_episode_start(self, episode: int) -> None:
 71 |         self.disable = self.ignore_episodes > 0 and episode < self.ignore_episodes
 72 | 
 73 |     def on_make_experience_start(self) -> None:
 74 |         if self.disable:
 75 |             return
 76 |         self.make_experience_start_time = time()
 77 | 
 78 |     def on_make_experience_end(self, experience: Experience) -> None:
 79 |         if self.disable:
 80 |             return
 81 |         self.make_experience_duration += time() - self.make_experience_start_time
 82 | 
 83 |         batch_size, seq_len = experience.sequences.shape
 84 | 
 85 |         self.make_experience_num_samples += batch_size
 86 | 
 87 |         # actor generate
 88 |         num_actions = experience.action_mask.size(1)
 89 |         input_len = seq_len - num_actions
 90 |         total_seq_len = (input_len + seq_len - 1) * num_actions / 2
 91 |         self.make_experience_flop += self.actor_num_params * batch_size * total_seq_len * 2
 92 |         # actor forward
 93 |         self.make_experience_flop += self.actor_num_params * batch_size * seq_len * 2
 94 |         # critic forward
 95 |         self.make_experience_flop += self.critic_num_params * batch_size * seq_len * 2
 96 |         # initial model forward
 97 |         self.make_experience_flop += self.initial_model_num_params * batch_size * seq_len * 2
 98 |         # reward model forward
 99 |         self.make_experience_flop += self.reward_model_num_params * batch_size * seq_len * 2
100 | 
101 |     def on_learn_batch_start(self) -> None:
102 |         if self.disable:
103 |             return
104 |         self.learn_start_time = time()
105 | 
106 |     def on_learn_batch_end(self, metrics: dict, experience: Experience) -> None:
107 |         if self.disable:
108 |             return
109 |         self.learn_duration += time() - self.learn_start_time
110 | 
111 |         batch_size, seq_len = experience.sequences.shape
112 | 
113 |         self.learn_num_samples += batch_size
114 | 
115 |         # actor forward-backward, 3 means forward(1) + backward(2)
116 |         self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
117 |         # critic foward-backward
118 |         self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
119 | 
120 |     def on_fit_end(self) -> None:
121 |         avg_make_experience_duration = all_reduce_mean(self.make_experience_duration, self.world_size)
122 |         avg_learn_duration = all_reduce_mean(self.learn_duration, self.world_size)
123 | 
124 |         avg_make_experience_throughput = self.make_experience_num_samples / (avg_make_experience_duration + 1e-12)
125 |         avg_make_experience_tflops = self.make_experience_flop / 1e12 / (avg_make_experience_duration + 1e-12)
126 | 
127 |         avg_learn_throughput = self.learn_num_samples / (avg_learn_duration + 1e-12)
128 |         avg_learn_tflops = self.learn_flop / 1e12 / (avg_learn_duration + 1e-12)
129 | 
130 |         print_rank_0(
131 |             f'Making experience throughput: {avg_make_experience_throughput:.3f} samples/sec, TFLOPS: {avg_make_experience_tflops:.3f}'
132 |         )
133 |         print_rank_0(f'Learning throughput: {avg_learn_throughput:.3f} samples/sec, TFLOPS: {avg_learn_tflops:.3f}')
134 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/save_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch.distributed as dist
 4 | from chatgpt.trainer.strategies import ColossalAIStrategy, Strategy
 5 | from chatgpt.trainer.utils import is_rank_0
 6 | from torch import nn
 7 | from torch.optim import Optimizer
 8 | 
 9 | from .base import Callback
10 | 
11 | 
12 | class SaveCheckpoint(Callback):
13 |     """
14 |         The callback for saving checkpoint for chatgpt.
15 | 
16 |         Only support saving actor and critic model.
17 |         A typical architecture of the saved checkpoint would be:
18 |             - checkpoint
19 |                 - episode_x
20 |                     - actor.pt
21 |                     - actor-optim-rank-0.pt
22 |                     - actor-optim-rank-1.pt
23 |                     - critic.pt
24 |                     - critic-optim-rank-0.pt
25 |                     - critic-optim-rank-1.pt
26 |                 - ...
27 | 
28 |     Args:
29 |         path(str): the base path you want to save checkpoint, the checkpoint would be saved at `path/checkpoint`
30 |         interval(int): the interval episode of saving checkpoint
31 |         strategy(Strategy): the strategy used to train
32 |         actor(nn.Module): the actor model
33 |         critic(nn.Module): the critic model
34 |         actor_optim(Optimizer): the optimizer of actor
35 |         critic_optim(Optimizer): the optimizer of critic
36 | 
37 |     """
38 | 
39 |     def __init__(self,
40 |                  path: str,
41 |                  interval: int,
42 |                  strategy: Strategy,
43 |                  actor: nn.Module = None,
44 |                  critic: nn.Module = None,
45 |                  actor_optim: Optimizer = None,
46 |                  critic_optim: Optimizer = None) -> None:
47 |         super().__init__()
48 |         self.path = os.path.join(path, 'checkpoint')
49 |         self.interval = interval
50 |         self.strategy = strategy
51 |         self.model_dict = {'actor': [actor, actor_optim], 'critic': [critic, critic_optim]}
52 | 
53 |     def on_episode_end(self, episode: int) -> None:
54 |         if (episode + 1) % self.interval != 0:
55 |             return
56 |         base_path = os.path.join(self.path, f'episode_{episode}')
57 |         if not os.path.exists(base_path):
58 |             os.makedirs(base_path)
59 | 
60 |         for model in self.model_dict.keys():
61 | 
62 |             # save model
63 |             if self.model_dict[model][0] is None:
64 |                 # saving only optimizer states is meaningless, so it would be skipped
65 |                 continue
66 |             model_path = os.path.join(base_path, f'{model}.pt')
67 |             self.strategy.save_model(model=self.model_dict[model][0], path=model_path, only_rank0=True)
68 | 
69 |             # save optimizer
70 |             if self.model_dict[model][1] is None:
71 |                 continue
72 |             only_rank0 = not isinstance(self.strategy, ColossalAIStrategy)
73 |             rank = 0 if is_rank_0() else dist.get_rank()
74 |             optim_path = os.path.join(base_path, f'{model}-optim-rank-{rank}.pt')
75 |             self.strategy.save_optimizer(optimizer=self.model_dict[model][1], path=optim_path, only_rank0=only_rank0)
76 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/ppo.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Callable, Dict, List, Optional
  2 | 
  3 | import torch.nn as nn
  4 | from chatgpt.experience_maker import Experience, NaiveExperienceMaker
  5 | from chatgpt.models.base import Actor, Critic
  6 | from chatgpt.models.generation_utils import update_model_kwargs_fn
  7 | from chatgpt.models.loss import PolicyLoss, ValueLoss
  8 | from chatgpt.replay_buffer import NaiveReplayBuffer
  9 | from torch.optim import Optimizer
 10 | 
 11 | from .base import Trainer
 12 | from .callbacks import Callback
 13 | from .strategies import Strategy
 14 | 
 15 | 
 16 | class PPOTrainer(Trainer):
 17 |     """
 18 |         Trainer for PPO algorithm.
 19 | 
 20 |     Args:
 21 |         strategy (Strategy): the strategy to use for training
 22 |         actor (Actor): the actor model in ppo algorithm
 23 |         critic (Critic): the critic model in ppo algorithm
 24 |         reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences
 25 |         initial_model (Actor): the initial model in rlhf algorithm to generate reference logits to limit the update of actor
 26 |         actor_optim (Optimizer): the optimizer to use for actor model
 27 |         critic_optim (Optimizer): the optimizer to use for critic model
 28 |         kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss
 29 |         train_batch_size (int, defaults to 8): the batch size to use for training
 30 |         buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
 31 |         buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
 32 |         eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
 33 |         value_clip (float, defaults to 0.4): the clip coefficient of value loss
 34 |         experience_batch_size (int, defaults to 8): the batch size to use for experience generation
 35 |         max_epochs (int, defaults to 1): the number of epochs of training process
 36 |         tokenier (Callable, optional): the tokenizer to use for tokenizing the input
 37 |         sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
 38 |         dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
 39 |         callbacks (List[Callback], defaults to []): the callbacks to call during training process
 40 |         generate_kwargs (dict, optional): the kwargs to use while model generating
 41 |     """
 42 | 
 43 |     def __init__(self,
 44 |                  strategy: Strategy,
 45 |                  actor: Actor,
 46 |                  critic: Critic,
 47 |                  reward_model: nn.Module,
 48 |                  initial_model: Actor,
 49 |                  actor_optim: Optimizer,
 50 |                  critic_optim: Optimizer,
 51 |                  kl_coef: float = 0.1,
 52 |                  train_batch_size: int = 8,
 53 |                  buffer_limit: int = 0,
 54 |                  buffer_cpu_offload: bool = True,
 55 |                  eps_clip: float = 0.2,
 56 |                  value_clip: float = 0.4,
 57 |                  experience_batch_size: int = 8,
 58 |                  max_epochs: int = 1,
 59 |                  tokenizer: Optional[Callable[[Any], dict]] = None,
 60 |                  sample_replay_buffer: bool = False,
 61 |                  dataloader_pin_memory: bool = True,
 62 |                  callbacks: List[Callback] = [],
 63 |                  **generate_kwargs) -> None:
 64 |         experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model, kl_coef)
 65 |         replay_buffer = NaiveReplayBuffer(train_batch_size, buffer_limit, buffer_cpu_offload)
 66 |         generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
 67 |         super().__init__(strategy, experience_maker, replay_buffer, experience_batch_size, max_epochs, tokenizer,
 68 |                          sample_replay_buffer, dataloader_pin_memory, callbacks, **generate_kwargs)
 69 |         self.actor = actor
 70 |         self.critic = critic
 71 | 
 72 |         self.actor_loss_fn = PolicyLoss(eps_clip)
 73 |         self.critic_loss_fn = ValueLoss(value_clip)
 74 | 
 75 |         self.actor_optim = actor_optim
 76 |         self.critic_optim = critic_optim
 77 | 
 78 |     def training_step(self, experience: Experience) -> Dict[str, float]:
 79 |         self.actor.train()
 80 |         self.critic.train()
 81 | 
 82 |         num_actions = experience.action_mask.size(1)
 83 |         action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask)
 84 |         actor_loss = self.actor_loss_fn(action_log_probs,
 85 |                                         experience.action_log_probs,
 86 |                                         experience.advantages,
 87 |                                         action_mask=experience.action_mask)
 88 |         self.strategy.backward(actor_loss, self.actor, self.actor_optim)
 89 |         self.strategy.optimizer_step(self.actor_optim)
 90 |         self.actor_optim.zero_grad()
 91 | 
 92 |         values = self.critic(experience.sequences,
 93 |                              action_mask=experience.action_mask,
 94 |                              attention_mask=experience.attention_mask)
 95 |         critic_loss = self.critic_loss_fn(values,
 96 |                                           experience.values,
 97 |                                           experience.reward,
 98 |                                           action_mask=experience.action_mask)
 99 |         self.strategy.backward(critic_loss, self.critic, self.critic_optim)
100 |         self.strategy.optimizer_step(self.critic_optim)
101 |         self.critic_optim.zero_grad()
102 | 
103 |         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
104 | 
105 | 
106 | def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
107 |     origin_model = strategy._unwrap_actor(actor)
108 |     new_kwargs = {**generate_kwargs}
109 |     # use huggingface models method directly
110 |     if 'prepare_inputs_fn' not in generate_kwargs and hasattr(origin_model, 'prepare_inputs_for_generation'):
111 |         new_kwargs['prepare_inputs_fn'] = origin_model.prepare_inputs_for_generation
112 | 
113 |     if 'update_model_kwargs_fn' not in generate_kwargs:
114 |         new_kwargs['update_model_kwargs_fn'] = update_model_kwargs_fn
115 | 
116 |     return new_kwargs
117 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/rm.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | 
 3 | import loralib as lora
 4 | import torch
 5 | from chatgpt.dataset import RewardDataset
 6 | from chatgpt.models.loss import PairWiseLoss
 7 | from torch.optim import Adam, Optimizer
 8 | from torch.utils.data import DataLoader
 9 | from tqdm import tqdm
10 | 
11 | from .strategies import Strategy
12 | from .utils import is_rank_0
13 | 
14 | 
15 | class RewardModelTrainer(ABC):
16 |     """
17 |         Trainer to use while training reward model.
18 | 
19 |     Args:
20 |         model (torch.nn.Module): the model to train
21 |         strategy (Strategy): the strategy to use for training
22 |         optim(Optimizer): the optimizer to use for training
23 |         train_dataset (RewardDataset): the dataset to use for training
24 |         eval_dataset (RewardDataset): the dataset to use for evaluation
25 |         batch_size (int, defaults to 1): the batch size while training
26 |         max_epochs (int, defaults to 2): the number of epochs to train
27 |         optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer
28 |     """
29 | 
30 |     def __init__(
31 |         self,
32 |         model,
33 |         strategy: Strategy,
34 |         optim: Optimizer,
35 |         train_dataset: RewardDataset,
36 |         eval_dataset: RewardDataset,
37 |         batch_size: int = 1,
38 |         max_epochs: int = 2,
39 |     ) -> None:
40 |         super().__init__()
41 |         self.strategy = strategy
42 |         self.epochs = max_epochs
43 |         self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
44 |         self.eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size)
45 | 
46 |         self.model = strategy.setup_model(model)
47 |         if "DDP" in str(self.strategy):
48 |             self.model = self.model.module
49 |         self.loss_fn = PairWiseLoss()
50 |         self.optimizer = strategy.setup_optimizer(optim, self.model)
51 | 
52 |     def fit(self, use_lora):
53 |         epoch_bar = tqdm(range(self.epochs), desc='Train epoch', disable=not is_rank_0())
54 |         for epoch in range(self.epochs):
55 |             step_bar = tqdm(range(self.train_dataloader.__len__()),
56 |                             desc='Train step of epoch %d' % epoch,
57 |                             disable=not is_rank_0())
58 |             # train
59 |             self.model.train()
60 |             for chosen_ids, c_mask, reject_ids, r_mask in self.train_dataloader:
61 |                 chosen_ids = chosen_ids.squeeze(1).cuda()
62 |                 c_mask = c_mask.squeeze(1).cuda()
63 |                 reject_ids = reject_ids.squeeze(1).cuda()
64 |                 r_mask = r_mask.squeeze(1).cuda()
65 |                 chosen_reward = self.model(chosen_ids, attention_mask=c_mask)
66 |                 reject_reward = self.model(reject_ids, attention_mask=r_mask)
67 |                 loss = self.loss_fn(chosen_reward, reject_reward)
68 |                 self.strategy.backward(loss, self.model, self.optimizer)
69 |                 self.strategy.optimizer_step(self.optimizer)
70 |                 self.optimizer.zero_grad()
71 |                 step_bar.update()
72 |                 step_bar.set_postfix({'loss': loss.item()})
73 | 
74 |             # eval
75 |             self.model.eval()
76 |             with torch.no_grad():
77 |                 dist = 0
78 |                 loss_sum = 0
79 |                 for chosen_ids, c_mask, reject_ids, r_mask in self.eval_dataloader:
80 |                     chosen_ids = chosen_ids.squeeze(1).cuda()
81 |                     c_mask = c_mask.squeeze(1).cuda()
82 |                     reject_ids = reject_ids.squeeze(1).cuda()
83 |                     r_mask = r_mask.squeeze(1).cuda()
84 |                     chosen_reward = self.model(chosen_ids, attention_mask=c_mask)
85 |                     reject_reward = self.model(reject_ids, attention_mask=r_mask)
86 |                     dist += (chosen_reward - reject_reward).mean().item()
87 |                     loss = self.loss_fn(chosen_reward, reject_reward)
88 |                     loss_sum += loss.item()
89 |                 dist_mean = dist / self.eval_dataloader.__len__()
90 |                 loss_mean = loss_sum / self.eval_dataloader.__len__()
91 |             epoch_bar.update()
92 |             step_bar.set_postfix({'loss': loss_mean, 'dist_mean': dist_mean})
93 |             step_bar.close()
94 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Strategy
2 | from .colossalai import ColossalAIStrategy
3 | from .ddp import DDPStrategy
4 | from .naive import NaiveStrategy
5 | 
6 | __all__ = ['Strategy', 'NaiveStrategy', 'DDPStrategy', 'ColossalAIStrategy']
7 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/colossalai.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/colossalai.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/ddp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/ddp.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/naive.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/sampler.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/sampler.cpython-310.pyc


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from contextlib import nullcontext
  3 | from typing import Any, List, Tuple, Union
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | from chatgpt.models.base import Actor, Critic, RewardModel
  9 | from chatgpt.replay_buffer import ReplayBuffer
 10 | from torch.optim import Optimizer
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | from .sampler import DistributedSampler
 14 | 
 15 | ModelOptimPair = Tuple[nn.Module, Optimizer]
 16 | ModelOrModelOptimPair = Union[nn.Module, ModelOptimPair]
 17 | 
 18 | 
 19 | class Strategy(ABC):
 20 |     """
 21 |         Base class for training strategies.
 22 |     """
 23 | 
 24 |     def __init__(self) -> None:
 25 |         super().__init__()
 26 |         self.setup_distributed()
 27 | 
 28 |     @abstractmethod
 29 |     def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: Optimizer, **kwargs) -> None:
 30 |         pass
 31 | 
 32 |     @abstractmethod
 33 |     def optimizer_step(self, optimizer: Optimizer, **kwargs) -> None:
 34 |         pass
 35 | 
 36 |     @abstractmethod
 37 |     def setup_distributed(self) -> None:
 38 |         pass
 39 | 
 40 |     @abstractmethod
 41 |     def setup_model(self, model: nn.Module) -> nn.Module:
 42 |         pass
 43 | 
 44 |     @abstractmethod
 45 |     def setup_optimizer(self, optimizer: Optimizer, model: nn.Module) -> Optimizer:
 46 |         pass
 47 | 
 48 |     @abstractmethod
 49 |     def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
 50 |         pass
 51 | 
 52 |     def model_init_context(self):
 53 |         return nullcontext()
 54 | 
 55 |     def prepare(
 56 |         self, *models_or_model_optim_pairs: ModelOrModelOptimPair
 57 |     ) -> Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]:
 58 |         """Prepare models or model-optimizer-pairs based on each strategy.
 59 | 
 60 |         Example::
 61 |             >>> # when fine-tuning actor and critic
 62 |             >>> (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare((actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
 63 |             >>> # or when training reward model
 64 |             >>> (reward_model, reward_model_optim) = strategy.prepare((reward_model, reward_model_optim))
 65 |             >>> # or just inference
 66 |             >>> actor, critic = strategy.prepare(actor, critic)
 67 | 
 68 |         Returns:
 69 |             Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: Models or model-optimizer-pairs in the original order.
 70 |         """
 71 | 
 72 |         def prepare_model(model: nn.Module):
 73 |             if isinstance(model, Actor):
 74 |                 return Actor(self.setup_model(self._unwrap_model(model)))
 75 |             return self.setup_model(self._unwrap_model(model))
 76 | 
 77 |         rets = []
 78 |         for arg in models_or_model_optim_pairs:
 79 |             if isinstance(arg, tuple):
 80 |                 assert len(arg) == 2, f'Expect (model, optimizer) pair, got a tuple with size "{len(arg)}"'
 81 |                 model, optimizer = arg
 82 |                 model = prepare_model(model)
 83 |                 optimizer = self.setup_optimizer(optimizer, self._unwrap_model(model))
 84 |                 rets.append((model, optimizer))
 85 |             elif isinstance(arg, nn.Module):
 86 |                 rets.append(prepare_model(arg))
 87 |             else:
 88 |                 raise RuntimeError(f'Expect model or (model, optimizer) pair, got {type(arg)}')
 89 | 
 90 |         if len(rets) == 1:
 91 |             return rets[0]
 92 |         return rets
 93 | 
 94 |     @staticmethod
 95 |     def _unwrap_model(model: nn.Module) -> nn.Module:
 96 |         """Useful for saving state dict. As actor is wrapped by Actor class again in `prepare()`, we should unwrap it before saving.
 97 | 
 98 |         Args:
 99 |             model (nn.Module): an actor or a critic
100 |         """
101 |         if isinstance(model, Actor):
102 |             return model.model
103 |         return model
104 | 
105 |     @staticmethod
106 |     def _unwrap_actor(actor: Actor) -> nn.Module:
107 |         """Get `actor.model` from a wrapped (by `prepare()`) actor. Useful for getting original huggingface model.
108 | 
109 |         Args:
110 |             actor (Actor): a wrapped actor
111 |         """
112 |         return Strategy._unwrap_model(actor)
113 | 
114 |     @abstractmethod
115 |     def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
116 |         pass
117 | 
118 |     @abstractmethod
119 |     def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None:
120 |         pass
121 | 
122 |     @abstractmethod
123 |     def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
124 |         pass
125 | 
126 |     @abstractmethod
127 |     def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None:
128 |         pass
129 | 
130 |     def setup_sampler(self, dataset) -> DistributedSampler:
131 |         return DistributedSampler(dataset, 1, 0)
132 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/colossalai.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import Optional, Union
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from chatgpt.models.base import Actor
  9 | from chatgpt.models.lora import LoraLinear
 10 | from torch.optim import Optimizer
 11 | 
 12 | import colossalai
 13 | from colossalai.nn.optimizer import CPUAdam, HybridAdam
 14 | from colossalai.nn.parallel import ZeroDDP, zero_model_wrapper, zero_optim_wrapper
 15 | from colossalai.nn.parallel.utils import get_static_torch_model
 16 | from colossalai.tensor import ProcessGroup, ShardSpec
 17 | from colossalai.utils import get_current_device
 18 | from colossalai.utils.model.colo_init_context import ColoInitContext
 19 | 
 20 | from .base import Strategy
 21 | from .ddp import DDPStrategy
 22 | 
 23 | 
 24 | class ColossalAIStrategy(DDPStrategy):
 25 |     """
 26 |         The strategy for training with ColossalAI.
 27 | 
 28 |     Args:
 29 |         stage(int): The stage to use in ZeRO. Choose in (1, 2, 3)
 30 |         seed(int): The seed for the random number generator.
 31 |         shard_init(bool): Whether to shard the model parameters during initialization. Only for ZeRO-3.
 32 |             This is not compativle with `from_pretrained()`. We temporarily disable this and will support it in the future.
 33 |         placement_policy(str): The placement policy for gemini. Choose in ('cpu', 'cuda')
 34 |                           If it is “cpu”, parameters, gradients and optimizer states will be offloaded to CPU,
 35 |                           If it is “cuda”, they will not be offloaded, which means max CUDA memory will be used. It is the fastest.
 36 |         pin_memory(bool): Whether to pin the memory for the data loader. Only for ZeRO-3.
 37 |         force_outputs_fp32(bool): Whether to force the outputs to be fp32. Only for ZeRO-3.
 38 |         search_range_mb(int): The search range in MB for the chunk size. Only for ZeRO-3.
 39 |         hidden_dim(optional, int): The hidden dimension for the gemini. Only for ZeRO-3.
 40 |         min_chunk_size_mb(float): The minimum chunk size in MB. Only for ZeRO-3.
 41 |         gpu_margin_mem_ratio(float): The margin memory ratio for the GPU. Only for ZeRO-3.
 42 |         reduce_bugket_size(int): The reduce bucket size in bytes. Only for ZeRO-1 and ZeRO-2.
 43 |         overlap_communication(bool): Whether to overlap communication and computation. Only for ZeRO-1 and ZeRO-2.
 44 |         initial_scale(float): The initial scale for the optimizer.
 45 |         growth_factor(float): The growth factor for the optimizer.
 46 |         backoff_factor(float): The backoff factor for the optimizer.
 47 |         growth_interval(int): The growth interval for the optimizer.
 48 |         hysteresis(int): The hysteresis for the optimizer.
 49 |         min_scale(float): The minimum scale for the optimizer.
 50 |         max_scale(float): The maximum scale for the optimizer.
 51 |         max_norm(float): The maximum norm for the optimizer.
 52 |         norm_type(float): The norm type for the optimizer.
 53 | 
 54 |     """
 55 | 
 56 |     def __init__(
 57 |             self,
 58 |             stage: int = 3,
 59 |             seed: int = 42,
 60 |             shard_init: bool = False,    # only for stage 3
 61 |             placement_policy: str = 'cuda',
 62 |             pin_memory: bool = True,    # only for stage 3
 63 |             force_outputs_fp32: bool = False,    # only for stage 3
 64 |             search_range_mb: int = 32,    # only for stage 3
 65 |             hidden_dim: Optional[int] = None,    # only for stage 3
 66 |             min_chunk_size_mb: float = 32,    # only for stage 3
 67 |             gpu_margin_mem_ratio: float = 0.0,    # only for stage 3
 68 |             reduce_bucket_size: int = 12 * 1024**2,    # only for stage 1&2
 69 |             overlap_communication: bool = True,    # only for stage 1&2
 70 |             initial_scale: float = 2**16,
 71 |             growth_factor: float = 2,
 72 |             backoff_factor: float = 0.5,
 73 |             growth_interval: int = 1000,
 74 |             hysteresis: int = 2,
 75 |             min_scale: float = 1,
 76 |             max_scale: float = 2**32,
 77 |             max_norm: float = 0.0,
 78 |             norm_type: float = 2.0) -> None:
 79 |         super().__init__(seed)
 80 |         assert placement_policy in ('cpu', 'cuda'), f'Unsupported placement policy "{placement_policy}"'
 81 |         self.stage = stage
 82 |         # TODO(ver217): support shard_init when using from_pretrained()
 83 |         if shard_init:
 84 |             warnings.warn(
 85 |                 f'Shard init is not supported model.from_pretrained() yet. Please load weights after strategy.prepare()'
 86 |             )
 87 |         self.shard_init = shard_init
 88 |         self.gemini_config = dict(device=get_current_device(),
 89 |                                   placement_policy=placement_policy,
 90 |                                   pin_memory=pin_memory,
 91 |                                   force_outputs_fp32=force_outputs_fp32,
 92 |                                   strict_ddp_mode=shard_init,
 93 |                                   search_range_mb=search_range_mb,
 94 |                                   hidden_dim=hidden_dim,
 95 |                                   min_chunk_size_mb=min_chunk_size_mb)
 96 |         if stage == 3:
 97 |             self.zero_optim_config = dict(gpu_margin_mem_ratio=gpu_margin_mem_ratio)
 98 |         else:
 99 |             self.zero_optim_config = dict(reduce_bucket_size=reduce_bucket_size,
100 |                                           overlap_communication=overlap_communication,
101 |                                           cpu_offload=(placement_policy == 'cpu'))
102 |         self.optim_kwargs = dict(initial_scale=initial_scale,
103 |                                  growth_factor=growth_factor,
104 |                                  backoff_factor=backoff_factor,
105 |                                  growth_interval=growth_interval,
106 |                                  hysteresis=hysteresis,
107 |                                  min_scale=min_scale,
108 |                                  max_scale=max_scale,
109 |                                  max_norm=max_norm,
110 |                                  norm_type=norm_type)
111 | 
112 |     def setup_distributed(self) -> None:
113 |         colossalai.launch_from_torch({}, seed=self.seed)
114 | 
115 |     def model_init_context(self):
116 |         if self.stage == 3:
117 |             world_size = dist.get_world_size()
118 |             shard_pg = ProcessGroup(tp_degree=world_size) if self.shard_init else None
119 |             default_dist_spec = ShardSpec([-1], [world_size]) if self.shard_init else None
120 |             return ColoInitContext(device=get_current_device(),
121 |                                    dtype=torch.half,
122 |                                    default_pg=shard_pg,
123 |                                    default_dist_spec=default_dist_spec)
124 |         return super().model_init_context()
125 | 
126 |     def setup_model(self, model: nn.Module) -> nn.Module:
127 |         return zero_model_wrapper(model, zero_stage=self.stage, gemini_config=self.gemini_config)
128 | 
129 |     def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer:
130 |         assert isinstance(optimizer, (CPUAdam, HybridAdam)), f'Unsupported optimizer {type(optimizer)}'
131 |         return zero_optim_wrapper(model, optimizer, optim_config=self.zero_optim_config, **self.optim_kwargs)
132 | 
133 |     def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None:
134 |         optimizer.backward(loss)
135 | 
136 |     def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None:
137 |         optimizer.step()
138 | 
139 |     @staticmethod
140 |     def _unwrap_actor(actor: Actor) -> nn.Module:
141 |         model: Union[nn.Module, ZeroDDP] = Strategy._unwrap_actor(actor)
142 |         if isinstance(model, ZeroDDP):
143 |             return model.module
144 |         return model
145 | 
146 |     def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
147 |         unwrapped_model = self._unwrap_model(model)
148 |         # TODO : better way to get torch model from gemini model
149 |         # to get torch model from gemini model
150 |         if isinstance(unwrapped_model, ZeroDDP):
151 |             state_dict = unwrapped_model.state_dict()
152 |             unwrapped_model = get_static_torch_model(unwrapped_model)
153 |             if only_rank0 and dist.get_rank() != 0:
154 |                 return
155 |             unwrapped_model.load_state_dict(state_dict)
156 |         # merge lora_weights into weights
157 |         for module in unwrapped_model.modules():
158 |             if isinstance(module, LoraLinear):
159 |                 module.merge_weights=True
160 |                 module.eval()
161 |         # get state_dict and save
162 |         state_dict = unwrapped_model.state_dict()
163 |         if only_rank0 and dist.get_rank() != 0:
164 |             return
165 |         torch.save(state_dict, path)
166 | 
167 |     def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
168 |         if only_rank0:
169 |             raise RuntimeError(
170 |                 f'Optimizer states are sharded when using ColossalAIStrategy. Only rank0 is not supported.')
171 |         torch.save(optimizer.state_dict(), path)
172 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/ddp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | import torch.distributed as dist
 7 | import torch.nn as nn
 8 | from chatgpt.models.base import Actor
 9 | from chatgpt.models.lora import LoraLinear
10 | from chatgpt.replay_buffer import ReplayBuffer
11 | from torch.nn.parallel import DistributedDataParallel as DDP
12 | from torch.optim import Optimizer
13 | from torch.utils.data import DataLoader
14 | 
15 | from .base import Strategy
16 | from .naive import NaiveStrategy
17 | from .sampler import DistributedSampler
18 | 
19 | 
20 | class DDPStrategy(NaiveStrategy):
21 |     """
22 |         Strategy for distributed training using torch.distributed.
23 |     """
24 | 
25 |     def __init__(self, seed: int = 42) -> None:
26 |         self.seed = seed
27 |         super().__init__()
28 | 
29 |     def setup_distributed(self) -> None:
30 |         try:
31 |             rank = int(os.environ['RANK'])
32 |             local_rank = int(os.environ['LOCAL_RANK'])
33 |             world_size = int(os.environ['WORLD_SIZE'])
34 |             host = os.environ['MASTER_ADDR']
35 |             port = int(os.environ['MASTER_PORT'])
36 |         except KeyError as e:
37 |             raise RuntimeError(
38 |                 f"Could not find {e} in the torch environment, visit https://www.colossalai.org/ for more information on launching with torch"
39 |             )
40 |         dist.init_process_group('nccl', init_method=f'tcp://[{host}]:{port}', world_size=world_size, rank=rank)
41 |         self.set_seed(self.seed)
42 |         torch.cuda.set_device(local_rank)
43 | 
44 |     def set_seed(self, seed: int) -> None:
45 |         random.seed(seed)
46 |         np.random.seed(seed)
47 |         torch.manual_seed(seed)
48 | 
49 |     def setup_model(self, model: nn.Module) -> nn.Module:
50 |         device = torch.cuda.current_device()
51 |         return DDP(model, device_ids=[device])
52 | 
53 |     def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
54 |         # DDP only mode, replay buffers on each rank are different.
55 |         # sampler = DistributedSampler(replay_buffer,
56 |         #                              num_replicas=dist.get_world_size(),
57 |         #                              rank=dist.get_rank(),
58 |         #                              shuffle=True,
59 |         #                              seed=self.seed,
60 |         #                              drop_last=True)
61 |         return DataLoader(
62 |             replay_buffer,
63 |             batch_size=replay_buffer.sample_batch_size,
64 |         #   sampler=sampler,
65 |             shuffle=True,
66 |             drop_last=True,
67 |             pin_memory=pin_memory,
68 |             collate_fn=replay_buffer.collate_fn)
69 | 
70 |     @staticmethod
71 |     def _unwrap_actor(actor: Actor) -> nn.Module:
72 |         model: DDP = Strategy._unwrap_actor(actor)
73 |         return model.module
74 | 
75 |     def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
76 |         for module in model.modules():
77 |             if isinstance(module, LoraLinear):
78 |                 module.merge_weights=True
79 |                 module.eval()
80 |                 
81 |         if only_rank0 and dist.get_rank() != 0:
82 |             return
83 |         model = model.model.module
84 |         state_dict = model.state_dict()
85 |         torch.save(state_dict, path)
86 |         
87 |     def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
88 |         if only_rank0 and dist.get_rank() != 0:
89 |             return
90 |         super().save_optimizer(optimizer, path, only_rank0)
91 | 
92 |     def setup_sampler(self, dataset) -> DistributedSampler:
93 |         return DistributedSampler(dataset, dist.get_world_size(), dist.get_rank())
94 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/naive.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.optim as optim
 6 | from chatgpt.replay_buffer import ReplayBuffer
 7 | from torch.optim import Optimizer
 8 | from torch.utils.data import DataLoader
 9 | 
10 | from .base import Strategy
11 | 
12 | 
13 | class NaiveStrategy(Strategy):
14 |     """
15 |         Strategy for single GPU. No parallelism is used.
16 |     """
17 | 
18 |     def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None:
19 |         loss.backward()
20 | 
21 |     def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None:
22 |         optimizer.step()
23 | 
24 |     def setup_distributed(self) -> None:
25 |         pass
26 | 
27 |     def setup_model(self, model: nn.Module) -> nn.Module:
28 |         return model
29 | 
30 |     def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer:
31 |         return optimizer
32 | 
33 |     def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
34 |         return DataLoader(replay_buffer,
35 |                           batch_size=replay_buffer.sample_batch_size,
36 |                           shuffle=True,
37 |                           drop_last=True,
38 |                           pin_memory=pin_memory,
39 |                           collate_fn=replay_buffer.collate_fn)
40 | 
41 |     def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
42 |         unwrapped_model = self._unwrap_model(model)
43 |         torch.save(unwrapped_model.state_dict(), path)
44 | 
45 |     def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None:
46 |         unwrapped_model = self._unwrap_model(model)
47 |         state_dict = torch.load(path, map_location=map_location)
48 |         unwrapped_model.load_state_dict(state_dict, strict=strict)
49 | 
50 |     def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
51 |         torch.save(optimizer.state_dict(), path)
52 | 
53 |     def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None:
54 |         state_dict = torch.load(path, map_location=map_location)
55 |         optimizer.load_state_dict(state_dict)
56 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class DistributedSampler:
 7 | 
 8 |     def __init__(self, dataset, num_replicas: int, rank: int) -> None:
 9 |         self.dataset = dataset
10 |         self.num_replicas = num_replicas
11 |         self.rank = rank
12 | 
13 |         if len(self.dataset) % self.num_replicas != 0:
14 |             self.num_samples = math.ceil(
15 |                 (len(self.dataset) - self.num_replicas) / self.num_replicas    # type: ignore[arg-type]
16 |             )
17 |         else:
18 |             self.num_samples = math.ceil(len(self.dataset) / self.num_replicas)
19 | 
20 |         self.total_size = self.num_samples * self.num_replicas
21 | 
22 |         indices = list(range(len(self.dataset)))
23 |         indices = indices[:self.total_size]
24 |         assert len(indices) == self.total_size
25 |         # subsample
26 |         indices = indices[self.rank:self.total_size:self.num_replicas]
27 |         assert len(indices) == self.num_samples
28 |         self.indices = indices
29 | 
30 |     def sample(self, batch_size: int) -> list:
31 |         sampled_indices = np.random.choice(self.indices, batch_size, replace=False)
32 |         return [self.dataset[idx] for idx in sampled_indices]
33 | 


--------------------------------------------------------------------------------
/code/chatgpt/trainer/utils.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 | 
3 | 
4 | def is_rank_0() -> bool:
5 |     return not dist.is_initialized() or dist.get_rank() == 0
6 | 


--------------------------------------------------------------------------------
/code/examples/README.md:
--------------------------------------------------------------------------------
  1 | # Examples
  2 | 
  3 | ## Install requirements
  4 | 
  5 | ```shell
  6 | pip install -r requirements.txt
  7 | ```
  8 | 
  9 | ## Train the reward model (Stage 2)
 10 | We use [rm-static](https://huggingface.co/datasets/Dahoas/rm-static) as dataset to train our reward model. It is a dataset of chosen & rejected response of the same prompt.
 11 | 
 12 | You can download the dataset from huggingface automatically.
 13 | 
 14 | Use these code to train your reward model.
 15 | 
 16 | ```shell
 17 | # Naive reward model training
 18 | python train_reward_model.py --pretrain <your model path> --model <your model type> --strategy naive
 19 | # use colossalai_zero2
 20 | torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain <your model path> --model <your model type> --strategy colossalai_zero2
 21 | ```
 22 | 
 23 | ## Train with dummy prompt data (Stage 3)
 24 | 
 25 | This script supports 3 strategies:
 26 | 
 27 | - naive
 28 | - ddp
 29 | - colossalai
 30 | 
 31 | It uses random generated prompt data.
 32 | 
 33 | Naive strategy only support single GPU training:
 34 | 
 35 | ```shell
 36 | python train_dummy.py --strategy naive
 37 | # display cli help
 38 | python train_dummy.py -h
 39 | ```
 40 | 
 41 | DDP strategy and ColossalAI strategy support multi GPUs training:
 42 | 
 43 | ```shell
 44 | # run DDP on 2 GPUs
 45 | torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy ddp
 46 | # run ColossalAI on 2 GPUs
 47 | torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2
 48 | ```
 49 | 
 50 | ## Train with real prompt data (Stage 3)
 51 | 
 52 | We use [awesome-chatgpt-prompts](https://huggingface.co/datasets/fka/awesome-chatgpt-prompts) as example dataset. It is a small dataset with hundreds of prompts.
 53 | 
 54 | You should download `prompts.csv` first.
 55 | 
 56 | This script also supports 3 strategies.
 57 | 
 58 | ```shell
 59 | # display cli help
 60 | python train_dummy.py -h
 61 | # run naive on 1 GPU
 62 | python train_prompts.py prompts.csv --strategy naive
 63 | # run DDP on 2 GPUs
 64 | torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy ddp
 65 | # run ColossalAI on 2 GPUs
 66 | torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
 67 | ```
 68 | 
 69 | ## Inference example(After Stage3)
 70 | We support naive inference demo after training.
 71 | ```shell
 72 | # inference, using pretrain path to configure model
 73 | python inference.py --model_path <your actor model path> --model <your model type> --pretrain <your pretrain model name/path>
 74 | # example
 75 | python inference.py --model_path ./actor_checkpoint_prompts.pt --pretrain bigscience/bloom-560m --model bloom
 76 | ```
 77 | 
 78 | 
 79 | #### data
 80 | - [x] [rm-static](https://huggingface.co/datasets/Dahoas/rm-static)
 81 | - [x] [hh-rlhf](https://huggingface.co/datasets/Anthropic/hh-rlhf)
 82 | - [ ] [openai/summarize_from_feedback](https://huggingface.co/datasets/openai/summarize_from_feedback)
 83 | - [ ] [openai/webgpt_comparisons](https://huggingface.co/datasets/openai/webgpt_comparisons)
 84 | - [ ] [Dahoas/instruct-synthetic-prompt-responses](https://huggingface.co/datasets/Dahoas/instruct-synthetic-prompt-responses)
 85 | 
 86 | ## Support Model
 87 | 
 88 | ### GPT
 89 | - [x]  GPT2-S (s)
 90 | - [x]  GPT2-M (m)
 91 | - [x]  GPT2-L (l)
 92 | - [ ]  GPT2-XL (xl)
 93 | - [x]  GPT2-4B (4b)
 94 | - [ ]  GPT2-6B (6b)
 95 | - [ ]  GPT2-8B (8b)
 96 | - [ ]  GPT2-10B (10b)
 97 | - [ ]  GPT2-12B (12b)
 98 | - [ ]  GPT2-15B (15b)
 99 | - [ ]  GPT2-18B (18b)
100 | - [ ]  GPT2-20B (20b)
101 | - [ ]  GPT2-24B (24b)
102 | - [ ]  GPT2-28B (28b)
103 | - [ ]  GPT2-32B (32b)
104 | - [ ]  GPT2-36B (36b)
105 | - [ ]  GPT2-40B (40b)
106 | - [ ]  GPT3 (175b)
107 | 
108 | ### BLOOM
109 | - [x] [BLOOM-560m](https://huggingface.co/bigscience/bloom-560m)
110 | - [x] [BLOOM-1b1](https://huggingface.co/bigscience/bloom-1b1)
111 | - [x] [BLOOM-3b](https://huggingface.co/bigscience/bloom-3b)
112 | - [x] [BLOOM-7b](https://huggingface.co/bigscience/bloom-7b1)
113 | - [ ] BLOOM-175b
114 | 
115 | ### OPT
116 | - [x] [OPT-125M](https://huggingface.co/facebook/opt-125m)
117 | - [x] [OPT-350M](https://huggingface.co/facebook/opt-350m)
118 | - [ ] [OPT-1.3B](https://huggingface.co/facebook/opt-1.3b)
119 | - [ ] [OPT-2.7B](https://huggingface.co/facebook/opt-2.7b)
120 | - [ ] [OPT-6.7B](https://huggingface.co/facebook/opt-6.7b)
121 | - [ ] [OPT-13B](https://huggingface.co/facebook/opt-13b)
122 | - [ ] [OPT-30B](https://huggingface.co/facebook/opt-30b)
123 | 


--------------------------------------------------------------------------------
/code/examples/inference.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | from chatgpt.models.bloom import BLOOMActor
 5 | from chatgpt.models.gpt import GPTActor
 6 | from chatgpt.models.opt import OPTActor
 7 | from transformers import AutoTokenizer
 8 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
 9 | 
10 | 
11 | def eval(args):
12 |     # configure model
13 |     if args.model == 'gpt2':
14 |         actor = GPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
15 |     elif args.model == 'bloom':
16 |         actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device())
17 |     elif args.model == 'opt':
18 |         actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
19 |     else:
20 |         raise ValueError(f'Unsupported model "{args.model}"')
21 | 
22 |     state_dict = torch.load(args.model_path)
23 |     actor.model.load_state_dict(state_dict)
24 | 
25 |     # configure tokenizer
26 |     if args.model == 'gpt2':
27 |         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
28 |         tokenizer.pad_token = tokenizer.eos_token
29 |     elif args.model == 'bloom':
30 |         tokenizer = AutoTokenizer.from_pretrained('bigscience/bloom-560m')
31 |         tokenizer.pad_token = tokenizer.eos_token
32 |     elif args.model == 'opt':
33 |         tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
34 |     else:
35 |         raise ValueError(f'Unsupported model "{args.model}"')
36 | 
37 |     actor.eval()
38 |     input = args.input
39 |     input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device())
40 |     outputs = actor.generate(input_ids,
41 |                              max_length=args.max_length,
42 |                              do_sample=True,
43 |                              top_k=50,
44 |                              top_p=0.95,
45 |                              num_return_sequences=1)
46 |     output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)
47 |     print(output)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     parser = argparse.ArgumentParser()
52 |     parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
53 |     # We suggest to use the pretrained model from HuggingFace, use pretrain to configure model
54 |     parser.add_argument('--pretrain', type=str, default=None)
55 |     parser.add_argument('--model_path', type=str, default=None)
56 |     parser.add_argument('--input', type=str, default='Question: How are you ? Answer:')
57 |     parser.add_argument('--max_length', type=int, default=100)
58 |     args = parser.parse_args()
59 |     eval(args)
60 | 


--------------------------------------------------------------------------------
/code/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=1.4.1
2 | 


--------------------------------------------------------------------------------
/code/examples/test_ci.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -xue
 4 | 
 5 | if [ -z "$PROMPT_PATH" ]; then
 6 |     echo "Please set \$PROMPT_PATH to the path to prompts csv."
 7 |     exit 1
 8 | fi
 9 | 
10 | BASE=$(realpath $(dirname $0))
11 | 
12 | export OMP_NUM_THREADS=8
13 | 
14 | # install requirements
15 | pip install -r ${BASE}/requirements.txt
16 | 
17 | # train dummy
18 | python ${BASE}/train_dummy.py --strategy naive --num_episodes 1 \
19 |                               --max_timesteps 2 --update_timesteps 2 \
20 |                               --max_epochs 1 --train_batch_size 2 --lora_rank 4
21 | 
22 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_dummy.py \
23 |          --strategy colossalai_gemini --num_episodes 1 --max_timesteps 2 \
24 |          --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
25 |          --pretrain 'facebook/opt-350m' --model opt --lora_rank 4\
26 |          --save_path ${BASE}/actor_checkpoint_dummy.pt
27 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_dummy.pt --pretrain 'facebook/opt-350m' --model opt
28 | 
29 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_dummy.py \
30 |          --strategy ddp --num_episodes 1 --max_timesteps 2 \
31 |          --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
32 |          --pretrain 'facebook/opt-350m' --model opt --lora_rank 4\
33 |          --save_path ${BASE}/actor_checkpoint_dummy.pt
34 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_dummy.pt --pretrain 'facebook/opt-350m' --model opt
35 | 
36 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_dummy.py \
37 |          --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
38 |          --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
39 |          --pretrain 'gpt2' --model gpt2 --lora_rank 4\
40 |          --save_path ${BASE}/actor_checkpoint_dummy.pt
41 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_dummy.pt --pretrain 'gpt2' --model gpt2
42 | 
43 | rm -rf ${BASE}/actor_checkpoint_dummy.pt
44 | 
45 | # train prompts
46 | python ${BASE}/train_prompts.py $PROMPT_PATH --strategy naive --num_episodes 1 \
47 |                                              --max_timesteps 2 --update_timesteps 2 \
48 |                                              --max_epochs 1 --train_batch_size 2 --lora_rank 4
49 | 
50 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py $PROMPT_PATH \
51 |          --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
52 |          --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
53 |          --pretrain 'facebook/opt-350m' --model opt --lora_rank 4\
54 |          --save_path ${BASE}/actor_checkpoint_prompts.pt
55 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_prompts.pt --pretrain 'facebook/opt-350m' --model opt
56 | 
57 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py $PROMPT_PATH \
58 |          --strategy ddp --num_episodes 1 --max_timesteps 2 \
59 |          --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
60 |          --pretrain 'gpt2' --model gpt2 --lora_rank 4\
61 |          --save_path ${BASE}/actor_checkpoint_prompts.pt
62 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_prompts.pt --pretrain 'gpt2' --model gpt2
63 | 
64 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py $PROMPT_PATH \
65 |          --strategy colossalai_gemini --num_episodes 1 --max_timesteps 2 \
66 |          --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
67 |          --pretrain 'gpt2' --model gpt2 --lora_rank 4\
68 |          --save_path ${BASE}/actor_checkpoint_prompts.pt
69 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_prompts.pt --pretrain 'gpt2' --model gpt2
70 | 
71 | rm -rf ${BASE}/actor_checkpoint_prompts.pt
72 | 


--------------------------------------------------------------------------------
/code/examples/train_dummy.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from copy import deepcopy
  3 | 
  4 | import torch
  5 | from chatgpt.models.base import RewardModel
  6 | from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
  7 | from chatgpt.models.gpt import GPTActor, GPTCritic
  8 | from chatgpt.models.opt import OPTActor, OPTCritic
  9 | from chatgpt.trainer import PPOTrainer
 10 | from chatgpt.trainer.callbacks import SaveCheckpoint
 11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 12 | from torch.optim import Adam
 13 | from transformers import AutoTokenizer, BloomTokenizerFast
 14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
 15 | 
 16 | from colossalai.nn.optimizer import HybridAdam
 17 | 
 18 | 
 19 | def preprocess_batch(samples):
 20 |     input_ids = torch.stack(samples)
 21 |     attention_mask = torch.ones_like(input_ids, dtype=torch.long)
 22 |     return {'input_ids': input_ids, 'attention_mask': attention_mask}
 23 | 
 24 | 
 25 | def main(args):
 26 |     # configure strategy
 27 |     if args.strategy == 'naive':
 28 |         strategy = NaiveStrategy()
 29 |     elif args.strategy == 'ddp':
 30 |         strategy = DDPStrategy()
 31 |     elif args.strategy == 'colossalai_gemini':
 32 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
 33 |     elif args.strategy == 'colossalai_zero2':
 34 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
 35 |     else:
 36 |         raise ValueError(f'Unsupported strategy "{args.strategy}"')
 37 | 
 38 |     # configure model
 39 |     with strategy.model_init_context():
 40 |         if args.model == 'gpt2':
 41 |             actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 42 |             critic = GPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 43 |         elif args.model == 'bloom':
 44 |             actor = BLOOMActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 45 |             critic = BLOOMCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 46 |         elif args.model == 'opt':
 47 |             actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 48 |             critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 49 |         else:
 50 |             raise ValueError(f'Unsupported model "{args.model}"')
 51 | 
 52 |         initial_model = deepcopy(actor).to(torch.cuda.current_device())
 53 |         reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
 54 | 
 55 |     # configure optimizer
 56 |     if args.strategy.startswith('colossalai'):
 57 |         actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
 58 |         critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
 59 |     else:
 60 |         actor_optim = Adam(actor.parameters(), lr=5e-6)
 61 |         critic_optim = Adam(critic.parameters(), lr=5e-6)
 62 | 
 63 |     # configure tokenizer
 64 |     if args.model == 'gpt2':
 65 |         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 66 |         tokenizer.pad_token = tokenizer.eos_token
 67 |     elif args.model == 'bloom':
 68 |         tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
 69 |         tokenizer.pad_token = tokenizer.eos_token
 70 |     elif args.model == 'opt':
 71 |         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
 72 |     else:
 73 |         raise ValueError(f'Unsupported model "{args.model}"')
 74 | 
 75 |     (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
 76 |         (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
 77 | 
 78 |     callbacks = []
 79 |     if args.save_ckpt_path:
 80 |         ckpt_callback = SaveCheckpoint(
 81 |             args.save_ckpt_path,
 82 |             args.save_ckpt_interval,
 83 |             strategy,
 84 |             actor,
 85 |             critic,
 86 |             actor_optim,
 87 |             critic_optim,
 88 |         )
 89 |         callbacks.append(ckpt_callback)
 90 | 
 91 |     # configure trainer
 92 | 
 93 |     trainer = PPOTrainer(strategy,
 94 |                          actor,
 95 |                          critic,
 96 |                          reward_model,
 97 |                          initial_model,
 98 |                          actor_optim,
 99 |                          critic_optim,
100 |                          max_epochs=args.max_epochs,
101 |                          train_batch_size=args.train_batch_size,
102 |                          tokenizer=preprocess_batch,
103 |                          max_length=128,
104 |                          do_sample=True,
105 |                          temperature=1.0,
106 |                          top_k=50,
107 |                          pad_token_id=tokenizer.pad_token_id,
108 |                          eos_token_id=tokenizer.eos_token_id,
109 |                          callbacks=callbacks)
110 | 
111 |     random_prompts = torch.randint(tokenizer.vocab_size, (1000, 64), device=torch.cuda.current_device())
112 |     trainer.fit(random_prompts,
113 |                 num_episodes=args.num_episodes,
114 |                 max_timesteps=args.max_timesteps,
115 |                 update_timesteps=args.update_timesteps)
116 | 
117 |     # save model checkpoint after fitting
118 |     strategy.save_model(actor, args.save_path, only_rank0=True)
119 |     # save optimizer checkpoint on all ranks
120 |     if args.need_optim_ckpt:
121 |         strategy.save_optimizer(actor_optim,
122 |                                 'actor_optim_checkpoint_dummy_%d.pt' % (torch.cuda.current_device()),
123 |                                 only_rank0=False)
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     parser = argparse.ArgumentParser()
128 |     parser.add_argument('--strategy',
129 |                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
130 |                         default='naive')
131 |     parser.add_argument('--model', type=str, default='gpt2', choices=['gpt2', 'bloom', 'opt'])
132 |     parser.add_argument('--pretrain', type=str, default=None)
133 |     parser.add_argument('--save_path', type=str, default='actor_checkpoint_dummy.pt')
134 |     parser.add_argument('--need_optim_ckpt', type=bool, default=False)
135 |     parser.add_argument('--num_episodes', type=int, default=50)
136 |     parser.add_argument('--max_timesteps', type=int, default=10)
137 |     parser.add_argument('--update_timesteps', type=int, default=10)
138 |     parser.add_argument('--max_epochs', type=int, default=5)
139 |     parser.add_argument('--train_batch_size', type=int, default=8)
140 |     parser.add_argument('--experience_batch_size', type=int, default=8)
141 |     parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
142 |     parser.add_argument('--save_ckpt_path',
143 |                         type=str,
144 |                         default=None,
145 |                         help="path to save checkpoint, None means not to save")
146 |     parser.add_argument('--save_ckpt_interval', type=int, default=1, help="the interval of episode to save checkpoint")
147 |     args = parser.parse_args()
148 |     main(args)
149 | 


--------------------------------------------------------------------------------
/code/examples/train_dummy.sh:
--------------------------------------------------------------------------------
 1 | set_n_least_used_CUDA_VISIBLE_DEVICES() {
 2 |     local n=${1:-"9999"}
 3 |     echo "GPU Memory Usage:"
 4 |     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
 5 |         | tail -n +2 \
 6 |         | nl -v 0 \
 7 |         | tee /dev/tty \
 8 |         | sort -g -k 2 \
 9 |         | awk '{print $1}' \
10 |         | head -n $n)
11 |     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
12 |     echo "Now CUDA_VISIBLE_DEVICES is set to:"
13 |     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
14 | }
15 | 
16 | set_n_least_used_CUDA_VISIBLE_DEVICES 2
17 | 
18 | torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2
19 | 


--------------------------------------------------------------------------------
/code/examples/train_prompts.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from copy import deepcopy
  3 | 
  4 | import pandas as pd
  5 | import torch
  6 | from chatgpt.models.base import RewardModel
  7 | from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
  8 | from chatgpt.models.gpt import GPTActor, GPTCritic
  9 | from chatgpt.models.opt import OPTActor, OPTCritic
 10 | from chatgpt.trainer import PPOTrainer
 11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 12 | from torch.optim import Adam
 13 | from transformers import AutoTokenizer, BloomTokenizerFast
 14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
 15 | 
 16 | from colossalai.nn.optimizer import HybridAdam
 17 | 
 18 | 
 19 | def main(args):
 20 |     # configure strategy
 21 |     if args.strategy == 'naive':
 22 |         strategy = NaiveStrategy()
 23 |     elif args.strategy == 'ddp':
 24 |         strategy = DDPStrategy()
 25 |     elif args.strategy == 'colossalai_gemini':
 26 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
 27 |     elif args.strategy == 'colossalai_zero2':
 28 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
 29 |     else:
 30 |         raise ValueError(f'Unsupported strategy "{args.strategy}"')
 31 | 
 32 |     # configure model
 33 |     with strategy.model_init_context():
 34 |         if args.model == 'gpt2':
 35 |             actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 36 |             critic = GPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 37 |         elif args.model == 'bloom':
 38 |             actor = BLOOMActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 39 |             critic = BLOOMCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 40 |         elif args.model == 'opt':
 41 |             actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 42 |             critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
 43 |         else:
 44 |             raise ValueError(f'Unsupported model "{args.model}"')
 45 | 
 46 |         initial_model = deepcopy(actor)
 47 |         reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
 48 | 
 49 |     # configure optimizer
 50 |     if args.strategy.startswith('colossalai'):
 51 |         actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
 52 |         critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
 53 |     else:
 54 |         actor_optim = Adam(actor.parameters(), lr=5e-6)
 55 |         critic_optim = Adam(critic.parameters(), lr=5e-6)
 56 | 
 57 |     # configure tokenizer
 58 |     if args.model == 'gpt2':
 59 |         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 60 |         tokenizer.pad_token = tokenizer.eos_token
 61 |     elif args.model == 'bloom':
 62 |         tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
 63 |         tokenizer.pad_token = tokenizer.eos_token
 64 |     elif args.model == 'opt':
 65 |         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
 66 |     else:
 67 |         raise ValueError(f'Unsupported model "{args.model}"')
 68 | 
 69 |     dataset = pd.read_csv(args.prompt_path)['prompt']
 70 | 
 71 |     def tokenize_fn(texts):
 72 |         # MUST padding to max length to ensure inputs of all ranks have the same length
 73 |         # Different length may lead to hang when using gemini, as different generation steps
 74 |         batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
 75 |         return {k: v.cuda() for k, v in batch.items()}
 76 | 
 77 |     (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
 78 |         (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
 79 | 
 80 |     # configure trainer
 81 |     trainer = PPOTrainer(
 82 |         strategy,
 83 |         actor,
 84 |         critic,
 85 |         reward_model,
 86 |         initial_model,
 87 |         actor_optim,
 88 |         critic_optim,
 89 |         max_epochs=args.max_epochs,
 90 |         train_batch_size=args.train_batch_size,
 91 |         experience_batch_size=args.experience_batch_size,
 92 |         tokenizer=tokenize_fn,
 93 |         max_length=128,
 94 |         do_sample=True,
 95 |         temperature=1.0,
 96 |         top_k=50,
 97 |         pad_token_id=tokenizer.pad_token_id,
 98 |         eos_token_id=tokenizer.eos_token_id,
 99 |     )
100 | 
101 |     trainer.fit(dataset,
102 |                 num_episodes=args.num_episodes,
103 |                 max_timesteps=args.max_timesteps,
104 |                 update_timesteps=args.update_timesteps)
105 |     # save model checkpoint after fitting
106 |     strategy.save_model(actor, args.save_path, only_rank0=True)
107 |     # save optimizer checkpoint on all ranks
108 |     if args.need_optim_ckpt:
109 |         strategy.save_optimizer(actor_optim,
110 |                                 'actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
111 |                                 only_rank0=False)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     parser = argparse.ArgumentParser()
116 |     parser.add_argument('prompt_path')
117 |     parser.add_argument('--strategy',
118 |                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
119 |                         default='naive')
120 |     parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
121 |     parser.add_argument('--pretrain', type=str, default=None)
122 |     parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
123 |     parser.add_argument('--need_optim_ckpt', type=bool, default=False)
124 |     parser.add_argument('--num_episodes', type=int, default=10)
125 |     parser.add_argument('--max_timesteps', type=int, default=10)
126 |     parser.add_argument('--update_timesteps', type=int, default=10)
127 |     parser.add_argument('--max_epochs', type=int, default=5)
128 |     parser.add_argument('--train_batch_size', type=int, default=8)
129 |     parser.add_argument('--experience_batch_size', type=int, default=8)
130 |     parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
131 |     args = parser.parse_args()
132 |     main(args)
133 | 


--------------------------------------------------------------------------------
/code/examples/train_prompts.sh:
--------------------------------------------------------------------------------
 1 | set_n_least_used_CUDA_VISIBLE_DEVICES() {
 2 |     local n=${1:-"9999"}
 3 |     echo "GPU Memory Usage:"
 4 |     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
 5 |         | tail -n +2 \
 6 |         | nl -v 0 \
 7 |         | tee /dev/tty \
 8 |         | sort -g -k 2 \
 9 |         | awk '{print $1}' \
10 |         | head -n $n)
11 |     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
12 |     echo "Now CUDA_VISIBLE_DEVICES is set to:"
13 |     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
14 | }
15 | 
16 | set_n_least_used_CUDA_VISIBLE_DEVICES 2
17 | 
18 | torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
19 | 


--------------------------------------------------------------------------------
/code/examples/train_reward_model.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import loralib as lora
  4 | import torch
  5 | from chatgpt.dataset import RewardDataset
  6 | from chatgpt.models.base import RewardModel
  7 | from chatgpt.models.bloom import BLOOMRM
  8 | from chatgpt.models.gpt import GPTRM
  9 | from chatgpt.models.opt import OPTRM
 10 | from chatgpt.trainer import RewardModelTrainer
 11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 12 | from datasets import load_dataset
 13 | from torch.optim import Adam
 14 | from transformers import AutoTokenizer, BloomTokenizerFast
 15 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
 16 | 
 17 | from colossalai.nn.optimizer import HybridAdam
 18 | 
 19 | 
 20 | def train(args):
 21 |     # configure strategy
 22 |     if args.strategy == 'naive':
 23 |         strategy = NaiveStrategy()
 24 |     elif args.strategy == 'ddp':
 25 |         strategy = DDPStrategy()
 26 |     elif args.strategy == 'colossalai_gemini':
 27 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cuda')
 28 |     elif args.strategy == 'colossalai_zero2':
 29 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
 30 |     else:
 31 |         raise ValueError(f'Unsupported strategy "{args.strategy}"')
 32 | 
 33 |     # configure model
 34 |     with strategy.model_init_context():
 35 |         if args.model == 'bloom':
 36 |             model = BLOOMRM(pretrained=args.pretrain, lora_rank=args.lora_rank).cuda()
 37 |         elif args.model == 'opt':
 38 |             model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).cuda()
 39 |         elif args.model == 'gpt2':
 40 |             model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).cuda()
 41 |         else:
 42 |             raise ValueError(f'Unsupported model "{args.model}"')
 43 | 
 44 |     # configure tokenizer
 45 |     if args.model == 'gpt2':
 46 |         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 47 |         tokenizer.pad_token = tokenizer.eos_token
 48 |     elif args.model == 'bloom':
 49 |         tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
 50 |         tokenizer.pad_token = tokenizer.eos_token
 51 |     elif args.model == 'opt':
 52 |         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
 53 |     else:
 54 |         raise ValueError(f'Unsupported model "{args.model}"')
 55 |     tokenizer.pad_token = tokenizer.eos_token
 56 | 
 57 |     max_len = 512
 58 | 
 59 |     # configure optimizer
 60 |     if args.strategy.startswith('colossalai'):
 61 |         optim = HybridAdam(model.parameters(), lr=5e-5)
 62 |     else:
 63 |         optim = Adam(model.parameters(), lr=5e-5)
 64 | 
 65 |     # prepare for data and dataset
 66 |     data = load_dataset(args.dataset)
 67 |     train_data = data["train"]
 68 |     eval_data = data['test']
 69 |     train_dataset = RewardDataset(train_data, tokenizer, max_len)
 70 |     eval_dataset = RewardDataset(eval_data, tokenizer, max_len)
 71 | 
 72 |     trainer = RewardModelTrainer(model=model,
 73 |                                  strategy=strategy,
 74 |                                  optim=optim,
 75 |                                  train_dataset=train_dataset,
 76 |                                  eval_dataset=eval_dataset,
 77 |                                  batch_size=args.batch_size,
 78 |                                  max_epochs=args.max_epochs)
 79 | 
 80 |     trainer.fit(use_lora=args.lora_rank)
 81 | 
 82 |     # save model checkpoint after fitting on only rank0
 83 |     strategy.save_model(model, 'rm_checkpoint.pt', only_rank0=True)
 84 |     # save optimizer checkpoint on all ranks
 85 |     strategy.save_optimizer(optim, 'rm_optim_checkpoint_%d.pt' % (torch.cuda.current_device()), only_rank0=False)
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     parser = argparse.ArgumentParser()
 90 |     parser.add_argument('--strategy',
 91 |                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
 92 |                         default='naive')
 93 |     parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt'], default='bloom')
 94 |     parser.add_argument('--pretrain', type=str, default=None)
 95 |     parser.add_argument('--dataset', type=str, default='Dahoas/rm-static')
 96 |     parser.add_argument('--save_path', type=str, default='rm_ckpt.pth')
 97 |     parser.add_argument('--max_epochs', type=int, default=1)
 98 |     parser.add_argument('--batch_size', type=int, default=4)
 99 |     parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
100 |     args = parser.parse_args()
101 |     train(args)
102 | 


--------------------------------------------------------------------------------
/code/examples/train_rm.sh:
--------------------------------------------------------------------------------
 1 | set_n_least_used_CUDA_VISIBLE_DEVICES() {
 2 |     local n=${1:-"9999"}
 3 |     echo "GPU Memory Usage:"
 4 |     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
 5 |         | tail -n +2 \
 6 |         | nl -v 0 \
 7 |         | tee /dev/tty \
 8 |         | sort -g -k 2 \
 9 |         | awk '{print $1}' \
10 |         | head -n $n)
11 |     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
12 |     echo "Now CUDA_VISIBLE_DEVICES is set to:"
13 |     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
14 | }
15 | 
16 | set_n_least_used_CUDA_VISIBLE_DEVICES 2
17 | 
18 | # torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain 'bigscience/bloomz-560m' --model 'bloom' --strategy colossalai_zero2
19 | torchrun --standalone --nproc_per_node=2 train_reward_model.py  --model 'gpt2' --strategy colossalai_zero2
20 | # torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain "facebook/opt-350m" --model 'opt' --strategy colossalai_zero2
21 | 


--------------------------------------------------------------------------------
/code/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     cpu: tests which can run on CPU
4 |     gpu: tests which requires a single GPU
5 |     dist: tests which are run in a multi-GPU or multi-machine environment
6 |     experiment: tests for experimental features
7 | 


--------------------------------------------------------------------------------
/code/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | 


--------------------------------------------------------------------------------
/code/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.20.1
2 | tqdm
3 | datasets
4 | loralib
5 | colossalai>=0.2.4
6 | torch
7 | langchain
8 | 


--------------------------------------------------------------------------------
/code/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | 
 4 | def fetch_requirements(path):
 5 |     with open(path, 'r') as fd:
 6 |         return [r.strip() for r in fd.readlines()]
 7 | 
 8 | 
 9 | def fetch_readme():
10 |     with open('README.md', encoding='utf-8') as f:
11 |         return f.read()
12 | 
13 | 
14 | def fetch_version():
15 |     with open('version.txt', 'r') as f:
16 |         return f.read().strip()
17 | 
18 | 
19 | setup(
20 |     name='chatgpt',
21 |     version=fetch_version(),
22 |     packages=find_packages(exclude=(
23 |         'tests',
24 |         'benchmarks',
25 |         '*.egg-info',
26 |     )),
27 |     description='A RLFH implementation (ChatGPT) powered by ColossalAI',
28 |     long_description=fetch_readme(),
29 |     long_description_content_type='text/markdown',
30 |     license='Apache Software License 2.0',
31 |     url='https://github.com/hpcaitech/ChatGPT',
32 |     install_requires=fetch_requirements('requirements.txt'),
33 |     python_requires='>=3.6',
34 |     classifiers=[
35 |         'Programming Language :: Python :: 3',
36 |         'License :: OSI Approved :: Apache Software License',
37 |         'Environment :: GPU :: NVIDIA CUDA',
38 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
39 |         'Topic :: System :: Distributed Computing',
40 |     ],
41 | )
42 | 


--------------------------------------------------------------------------------
/code/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/tests/__init__.py


--------------------------------------------------------------------------------
/code/tests/test_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from contextlib import nullcontext
 4 | from functools import partial
 5 | 
 6 | import pytest
 7 | import torch
 8 | import torch.distributed as dist
 9 | import torch.multiprocessing as mp
10 | from chatgpt.models.gpt import GPTActor
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy
12 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
13 | 
14 | from colossalai.nn.optimizer import HybridAdam
15 | from colossalai.testing import rerun_if_address_is_in_use
16 | from colossalai.utils import free_port
17 | 
18 | GPT_CONFIG = GPT2Config(n_embd=128, n_layer=4, n_head=4)
19 | 
20 | 
21 | def get_data(batch_size: int, seq_len: int = 10) -> dict:
22 |     input_ids = torch.randint(0, 50257, (batch_size, seq_len), device='cuda')
23 |     attention_mask = torch.ones_like(input_ids)
24 |     return dict(input_ids=input_ids, attention_mask=attention_mask)
25 | 
26 | 
27 | def run_test_checkpoint(strategy):
28 |     BATCH_SIZE = 2
29 | 
30 |     if strategy == 'ddp':
31 |         strategy = DDPStrategy()
32 |     elif strategy == 'colossalai_gemini':
33 |         strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
34 |     elif strategy == 'colossalai_zero2':
35 |         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
36 |     else:
37 |         raise ValueError(f'Unsupported strategy "{strategy}"')
38 | 
39 |     with strategy.model_init_context():
40 |         actor = GPTActor(config=GPT_CONFIG).cuda()
41 | 
42 |     actor_optim = HybridAdam(actor.parameters())
43 | 
44 |     actor, actor_optim = strategy.prepare((actor, actor_optim))
45 | 
46 |     def run_step():
47 |         data = get_data(BATCH_SIZE)
48 |         action_mask = torch.ones_like(data['attention_mask'], dtype=torch.bool)
49 |         action_log_probs = actor(data['input_ids'], action_mask.size(1), data['attention_mask'])
50 |         loss = action_log_probs.sum()
51 |         strategy.backward(loss, actor, actor_optim)
52 |         strategy.optimizer_step(actor_optim)
53 | 
54 |     run_step()
55 | 
56 |     ctx = tempfile.TemporaryDirectory() if dist.get_rank() == 0 else nullcontext()
57 | 
58 |     with ctx as dirname:
59 |         rank0_dirname = [dirname]
60 |         dist.broadcast_object_list(rank0_dirname)
61 |         rank0_dirname = rank0_dirname[0]
62 | 
63 |         model_path = os.path.join(rank0_dirname, 'model.pt')
64 |         optim_path = os.path.join(rank0_dirname, f'optim-r{dist.get_rank()}.pt')
65 | 
66 |         strategy.save_model(actor, model_path, only_rank0=True)
67 |         strategy.save_optimizer(actor_optim, optim_path, only_rank0=False)
68 | 
69 |         dist.barrier()
70 | 
71 |         strategy.load_model(actor, model_path, strict=False)
72 |         strategy.load_optimizer(actor_optim, optim_path)
73 | 
74 |         dist.barrier()
75 | 
76 |     run_step()
77 | 
78 | 
79 | def run_dist(rank, world_size, port, strategy):
80 |     os.environ['RANK'] = str(rank)
81 |     os.environ['LOCAL_RANK'] = str(rank)
82 |     os.environ['WORLD_SIZE'] = str(world_size)
83 |     os.environ['MASTER_ADDR'] = 'localhost'
84 |     os.environ['MASTER_PORT'] = str(port)
85 |     run_test_checkpoint(strategy)
86 | 
87 | 
88 | @pytest.mark.dist
89 | @pytest.mark.parametrize('world_size', [2])
90 | @pytest.mark.parametrize('strategy', ['ddp', 'colossalai_zero2', 'colossalai_gemini'])
91 | @rerun_if_address_is_in_use()
92 | def test_checkpoint(world_size, strategy):
93 |     run_func = partial(run_dist, world_size=world_size, port=free_port(), strategy=strategy)
94 |     mp.spawn(run_func, nprocs=world_size)
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     test_checkpoint(2, 'colossalai_zero2')
99 | 


--------------------------------------------------------------------------------
/code/tests/test_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from copy import deepcopy
  3 | from functools import partial
  4 | 
  5 | import pytest
  6 | import torch
  7 | import torch.distributed as dist
  8 | import torch.multiprocessing as mp
  9 | from chatgpt.experience_maker import NaiveExperienceMaker
 10 | from chatgpt.models.base import RewardModel
 11 | from chatgpt.models.gpt import GPTActor, GPTCritic
 12 | from chatgpt.replay_buffer import NaiveReplayBuffer
 13 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy
 14 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
 15 | 
 16 | from colossalai.testing import rerun_if_address_is_in_use
 17 | from colossalai.utils import free_port
 18 | 
 19 | GPT_CONFIG = GPT2Config(n_embd=128, n_layer=4, n_head=4)
 20 | 
 21 | 
 22 | def get_data(batch_size: int, seq_len: int = 10) -> dict:
 23 |     input_ids = torch.randint(0, 50257, (batch_size, seq_len), device='cuda')
 24 |     attention_mask = torch.ones_like(input_ids)
 25 |     return dict(input_ids=input_ids, attention_mask=attention_mask)
 26 | 
 27 | 
 28 | def gather_and_equal(tensor: torch.Tensor) -> bool:
 29 |     world_size = dist.get_world_size()
 30 |     outputs = [torch.empty_like(tensor) for _ in range(world_size)]
 31 |     dist.all_gather(outputs, tensor.contiguous())
 32 |     for t in outputs[1:]:
 33 |         if not torch.equal(outputs[0], t):
 34 |             return False
 35 |     return True
 36 | 
 37 | 
 38 | def run_test_data(strategy):
 39 |     EXPERINCE_BATCH_SIZE = 4
 40 |     SAMPLE_BATCH_SIZE = 2
 41 | 
 42 |     if strategy == 'ddp':
 43 |         strategy = DDPStrategy()
 44 |     elif strategy == 'colossalai':
 45 |         strategy = ColossalAIStrategy(placement_policy='cuda')
 46 |     else:
 47 |         raise ValueError(f'Unsupported strategy "{strategy}"')
 48 | 
 49 |     actor = GPTActor(config=GPT_CONFIG).cuda()
 50 |     critic = GPTCritic(config=GPT_CONFIG).cuda()
 51 | 
 52 |     initial_model = deepcopy(actor)
 53 |     reward_model = RewardModel(deepcopy(critic.model)).cuda()
 54 | 
 55 |     experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model)
 56 |     replay_buffer = NaiveReplayBuffer(SAMPLE_BATCH_SIZE, cpu_offload=False)
 57 | 
 58 |     # experience of all ranks should be the same
 59 |     for _ in range(2):
 60 |         data = get_data(EXPERINCE_BATCH_SIZE)
 61 |         assert gather_and_equal(data['input_ids'])
 62 |         assert gather_and_equal(data['attention_mask'])
 63 |         experience = experience_maker.make_experience(**data,
 64 |                                                       do_sample=True,
 65 |                                                       max_length=16,
 66 |                                                       eos_token_id=50256,
 67 |                                                       pad_token_id=50256)
 68 |         assert gather_and_equal(experience.sequences)
 69 |         assert gather_and_equal(experience.action_log_probs)
 70 |         assert gather_and_equal(experience.values)
 71 |         assert gather_and_equal(experience.reward)
 72 |         assert gather_and_equal(experience.advantages)
 73 |         assert gather_and_equal(experience.action_mask)
 74 |         assert gather_and_equal(experience.attention_mask)
 75 |         replay_buffer.append(experience)
 76 | 
 77 |     # replay buffer's data should be the same
 78 |     buffer_size = torch.tensor([len(replay_buffer)], device='cuda')
 79 |     assert gather_and_equal(buffer_size)
 80 |     for item in replay_buffer.items:
 81 |         assert gather_and_equal(item.sequences)
 82 |         assert gather_and_equal(item.action_log_probs)
 83 |         assert gather_and_equal(item.values)
 84 |         assert gather_and_equal(item.reward)
 85 |         assert gather_and_equal(item.advantages)
 86 |         assert gather_and_equal(item.action_mask)
 87 |         assert gather_and_equal(item.attention_mask)
 88 | 
 89 |     # dataloader of each rank should have the same size and different batch
 90 |     dataloader = strategy.setup_dataloader(replay_buffer)
 91 |     dataloader_size = torch.tensor([len(dataloader)], device='cuda')
 92 |     assert gather_and_equal(dataloader_size)
 93 |     for experience in dataloader:
 94 |         assert not gather_and_equal(experience.sequences)
 95 |         assert not gather_and_equal(experience.action_log_probs)
 96 |         assert not gather_and_equal(experience.values)
 97 |         assert not gather_and_equal(experience.reward)
 98 |         assert not gather_and_equal(experience.advantages)
 99 |         # action mask and attention mask may be same
100 | 
101 | 
102 | def run_dist(rank, world_size, port, strategy):
103 |     os.environ['RANK'] = str(rank)
104 |     os.environ['LOCAL_RANK'] = str(rank)
105 |     os.environ['WORLD_SIZE'] = str(world_size)
106 |     os.environ['MASTER_ADDR'] = 'localhost'
107 |     os.environ['MASTER_PORT'] = str(port)
108 |     run_test_data(strategy)
109 | 
110 | 
111 | @pytest.mark.skip
112 | @pytest.mark.dist
113 | @pytest.mark.parametrize('world_size', [2])
114 | @pytest.mark.parametrize('strategy', ['ddp', 'colossalai'])
115 | @rerun_if_address_is_in_use()
116 | def test_data(world_size, strategy):
117 |     run_func = partial(run_dist, world_size=world_size, port=free_port(), strategy=strategy)
118 |     mp.spawn(run_func, nprocs=world_size)
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     test_data(2, 'colossalai')
123 | 


--------------------------------------------------------------------------------
/code/utils.py:
--------------------------------------------------------------------------------
  1 | # import
  2 | # import os
  3 | # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.utils.data import Dataset
  7 | from datasets import load_dataset
  8 | import transformers
  9 | from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
 10 | from transformers import Trainer, TrainingArguments, AutoModelWithLMHead
 11 | from copy import deepcopy
 12 | from torch.optim import Adam
 13 | from transformers import AutoTokenizer, BloomTokenizerFast
 14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
 15 | import pandas as pd
 16 | import argparse
 17 | import copy
 18 | import logging
 19 | import json
 20 | from dataclasses import dataclass, field
 21 | 
 22 | PROMPT= "### system:사용자의 질문에 맞는 적절한 응답을 생성하세요.\n### 사용자:{instruction}\n### 응답:"
 23 | # data config
 24 | IGNORE_INDEX = -100
 25 | 
 26 | def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output_dir: str):
 27 |     """Collects the state dict and dump to disk."""
 28 |     state_dict = trainer.model.state_dict()
 29 |     if trainer.args.should_save:
 30 |         cpu_state_dict = {key: value.cpu() for key, value in list(state_dict.items())}
 31 |         del state_dict
 32 |         trainer._save(output_dir, state_dict=cpu_state_dict)  # noqa
 33 | 
 34 | ## prepare data
 35 | from typing import Optional, Dict, Sequence
 36 | import jsonlines
 37 | 
 38 | class SFT_dataset(Dataset):
 39 |     '''SFT dataset by wygo'''
 40 |     def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, verbose=True):
 41 |         super(SFT_dataset, self).__init__()
 42 |         logging.warning("Loading data...")
 43 | 
 44 |         # with open(data_path, "r", encoding='utf-8-sig') as json_file:
 45 |         #     list_data_dict = json.load(json_file)
 46 |         #     if verbose:
 47 |         #         print('## data check ##')
 48 |         sources = []
 49 |         targets = []
 50 |         with jsonlines.open(data_path) as f:
 51 |             for example in f.iter():
 52 |                 tmp = PROMPT.format_map(example)
 53 |                 sources.append(tmp)
 54 |                 targets.append(f"{example['output']}{tokenizer.eos_token}")
 55 | 
 56 |         if verbose:
 57 |             idx = 0
 58 |             print((sources[idx]))
 59 |             print((targets[idx]))
 60 |             print("Tokenizing inputs... This may take some time...")
 61 | 
 62 |         ############################################################
 63 |         examples = [s + t for s, t in zip(sources, targets)]
 64 | 
 65 |         # source data tokenized
 66 |         sources_tokenized = self._tokenize_fn(sources, tokenizer)  # source만
 67 |         examples_tokenized = self._tokenize_fn(examples, tokenizer)  # source + target
 68 | 
 69 |         ## 입력은 source, 출력은 source+target 이지만 학습은 target 부분만
 70 |         input_ids = examples_tokenized["input_ids"]
 71 |         labels = copy.deepcopy(input_ids)
 72 |         for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]):
 73 |             label[:source_len] = IGNORE_INDEX  # source 부분은 -100으로 채운다
 74 | 
 75 |         data_dict = dict(input_ids=input_ids, labels=labels)
 76 | 
 77 |         self.input_ids = data_dict["input_ids"]
 78 |         self.labels = data_dict["labels"]
 79 |         logging.warning("Loading data done!!: %d"%(len(self.labels)))
 80 | 
 81 |     def _tokenize_fn(self, strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict:
 82 |         """Tokenize a list of strings."""
 83 |         tokenized_list = [
 84 |             tokenizer(
 85 |                 text,
 86 |                 return_tensors="pt",
 87 |                 padding="longest",
 88 |                 max_length=tokenizer.model_max_length,
 89 |                 truncation=True,
 90 |             )
 91 |             for text in strings
 92 |         ]
 93 |         input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
 94 |         input_ids_lens = labels_lens = [
 95 |             tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list
 96 |         ]
 97 | 
 98 |         return dict(
 99 |             input_ids=input_ids,
100 |             labels=labels,
101 |             input_ids_lens=input_ids_lens,
102 |             labels_lens=labels_lens,
103 |         )
104 | 
105 | 
106 |     def __len__(self):
107 |         return len(self.input_ids)
108 | 
109 | 
110 |     def __getitem__(self, i) -> Dict[str, torch.Tensor]:
111 |         return dict(input_ids=self.input_ids[i], labels=self.labels[i])
112 | 
113 | 
114 | @dataclass
115 | class DataCollatorForSupervisedDataset(object):
116 |     """Collate examples for supervised fine-tuning."""
117 | 
118 |     tokenizer: transformers.PreTrainedTokenizer
119 | 
120 |     def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
121 |         input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
122 |         input_ids = torch.nn.utils.rnn.pad_sequence(
123 |             input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
124 |         )
125 |         labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
126 |         return dict(
127 |             input_ids=input_ids,
128 |             labels=labels,
129 |             attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
130 |         )
131 | 
132 | # check
133 | # print('input : %s'%train_dataset.input_ids[0])
134 | # print('output: %s'%train_dataset.labels[0])


--------------------------------------------------------------------------------
/code/version.txt:
--------------------------------------------------------------------------------
1 | 0.1.0
2 | 


--------------------------------------------------------------------------------
/data/stage1. domain_adaptive_pretraining/템플릿.csv:
--------------------------------------------------------------------------------
1 | ﻿주제,내용,길이
2 | BTS의 개요,2013년 6월 13일에 데뷔한 대한민국의 7인조 보이그룹이다.,35
3 | 


--------------------------------------------------------------------------------
/data/stage1. domain_instruction_tuning/ive_instruction_test.jsonl:
--------------------------------------------------------------------------------
 1 | {"instruction": "아이브의 소속사는 어떤 엔터테인먼트 기업인가요?", "output": "아이브의 소속사는 스타쉽엔터테인먼트입니다."}
 2 | {"instruction": "장원영의 생년월일은 언제인가요?", "output": "장원영의 생년월일은 2004년 8월 31일입니다."}
 3 | {"instruction": "아이브와 (여자)아이들은 어떤 차이점이 있나요?", "output": "아이브는 보컬의 비중이 높은 음악을 내세우고 있지만, (여자)아이들은 콘셉트추얼함과 자체 제작으로 승부를 보며 음악에 어필합니다."}
 4 | {"instruction": "어떤 멤버들이 IVE의 성장을 도왔나요?", "output": "안유진과 장원영 외에도 아이즈원 출신이 아닌 새로운 멤버들이 IVE의 성장을 도왔습니다."}
 5 | {"instruction": "아이브의 실력 중에서 가장 특출난 점은 무엇인가요?", "output": "아이브의 표정 연기입니다."}
 6 | {"instruction": "아이브의 모든 멤버들이 곡의 스토리와 가사에 맞는 표정 연기를 잘 하나요?", "output": "네, 아이브의 모든 멤버들이 곡의 스토리와 가사에 맞는 표정 연기를 능숙하게 사용합니다."}
 7 | {"instruction": "아이브의 메인보컬과 서브보컬은 누구들인가요?", "output": "메인보컬은 안유진과 리즈이고, 서브보컬은 가을, 레이, 장원영, 이서입니다."}
 8 | {"instruction": "아이브의 랩 파트와 고음역대 파트는 누가 맡고 있나요?", "output": "랩 파트는 가을, 레이가 맡고, 고음역대 파트는 리즈와 안유진이 맡고 있습니다."}
 9 | {"instruction": "어떤 그룹이 처음부터 비주얼을 내세웠나요?", "output": "스타쉽엔터테인먼트 그룹인 아이브가 처음부터 비주얼을 내세웠습니다."}
10 | {"instruction": "이 아티스트는 어떤 시상식에서 3개의 시상식에서 신인상과 대상을 동시 수상한 최초의 아티스트가 되었다고 언급되었나요?", "output": "이 아티스트는 2022년 11월 26일 멜론 뮤직 어워드, 11월 30일 마마 어워즈, 12월 13일 아시아 아티스트 어워즈에서 신인상과 대상을 동시에 수상하였습니다."}
11 | {"instruction": "이 아티스트가 속한 그룹은 어떤 시기에 여자 아이돌 시장의 침체기를 가져온 걸그룹으로 꼽혔나요?", "output": "이 아티스트가 속한 그룹은 약간의 침체기가 있었던 여자 아이돌 시장의 부흥을 가져온 걸그룹으로 꼽혔습니다."}
12 | {"instruction": "《After LIKE》는 어떤 수치상 한국 걸그룹 중에서 어떤 위치에 올랐나요?", "output": "두 번째로 높은 위치, Spotify 글로벌 차트 23위"}
13 | {"instruction": "\"After LIKE\"는 한국 걸그룹 중 초동 판매량 2위를 달성했나요?", "output": "네, \"After LIKE\"는 한국 걸그룹 초동 판매량 2위에 등극했습니다."}
14 | {"instruction": "IVE의 그룹명은 어떻게 결정되었나요?", "output": "IVE의 그룹명은 회사 공모를 통해 결정되었다."}
15 | {"instruction": "아이브가 첫 곡으로 연습한 노래는 무엇인가요?", "output": "피프스 하모니의 That's my girl입니다."}
16 | {"instruction": "아이브 멤버들 중 가장 어리게 태어난 멤버는 누구인가요?", "output": "이서입니다."}
17 | {"instruction": "아이브 멤버들은 어떤 애니메이션을 모르거나, 틀린 이름을 부르기도 했나요?", "output": "원영과 이서는 슬라이드폰을 열 줄 모른다든가, 이서는 마시마로를 몰라서 마시멜로라고 하기도 했습니다."}
18 | {"instruction": "가을은 이서와의 세대 차이를 어떻게 느끼고 있었나요?", "output": "가을은 이서와 크게 세대 차이를 못 느낀다고 밝혔습니다."}
19 | {"instruction": "아이브 멤버 중에서 가장 연장자는 누구인가요?", "output": "최연장자가 아닌 둘째가 리더인 걸그룹이 되었습니다."}
20 | {"instruction": "아이브 멤버 중에서 지상파 음악 방송 MC를 맡고 있던 멤버는 몇 명인가요?", "output": "아이돌 그룹 내에 지상파 음악 방송 MC를 맡고 있던 멤버가 무려 2명이었습니다."}
21 | {"instruction": "아이브의 멤버들은 가로본능 핸드폰에 대해 어떤 반응을 보였나요?", "output": "멤버들 모두가 뭔지 모르는 듯한 모습을 보였습니다."}
22 | {"instruction": "멤버들 중 MBTI가 내향형인 멤버들은 누구인가요?", "output": "장원영과 이서를 제외한 나머지 멤버들"}
23 | {"instruction": "어떤 것들이 숙소 규칙으로 정해져 있는가?", "output": "빨래 제때 가져가기, 냉장고 유통기한 음식 본인 건 본인이 버리기 등이 있다."}
24 | {"instruction": "IVE의 데뷔월 기준 유행어는 무엇인가요?", "output": "\"너무나도\", \"꽤나\", \"오히려 좋아\" 등의 유행어가 있습니다."}
25 | {"instruction": "IVE는 어떤 걸그룹의 파생 걸그룹인가요?", "output": "IVE는 엠넷의 프로듀스 101 시리즈 데뷔조 파생 걸그룹으로, 프리스틴, 구구단, 다이아 등과 함께 대표되는 걸그룹입니다."}
26 | 


--------------------------------------------------------------------------------
/data/stage2. RM/ive_test_rm.jsonl:
--------------------------------------------------------------------------------
 1 | {"prompt": "아이브 멤버 중에서 서울공연예술고등학교를 졸업한 멤버는 누구인가요?", "chosen": "레이, 장원영", "rejected": "누군지는 잘 모르겠어요."}
 2 | {"prompt": "다른 그룹명도 고려됐던 건가요?", "chosen": "네, 멤버들은 어떤 이름이 될지 기대하며 기다리고 있었다.", "rejected": "그룹명을 정하기 위해 다양한 그룹명이 고려되었지만, 아이브가 선택되었습니다."}
 3 | {"prompt": "아이브의 비주얼이 왜 유명한가요?", "chosen": "아이브는 자타공인 전원 센터급 비주얼로 유명합니다.", "rejected": "아이브의 비주얼은 논란이 있지만 다양한 스타일을 소화해내어 유명한 편입니다."}
 4 | {"prompt": "아이브의 데뷔곡 《ELEVEN》은 몇 개의 음악 방송에서 1위를 차지했나요?", "chosen": "13관왕", "rejected": "한 개의 음악 방송에서 1위를 차지했어요."}
 5 | {"prompt": "리즈는 어느 도시에서 태어났나요?", "chosen": "제주에서 태어났습니다.", "rejected": "상하이"}
 6 | {"prompt": "멤버들의 공식색은 어떤 색들로 이루어져 있는가?", "chosen": "멤버들의 공식색은 빨간색, 주황색, 초록색, 파란색, 남색, 보라색으로 무지개 7색 중에서 노란색만 빠져 있어서 그 이유를 궁금해하는 팬들이 많다.", "rejected": "멤버들의 공식색은 하늘과 바닷물 색으로 이루어져 있다고 합니다."}
 7 | {"prompt": "아이브의 랩 실력은 그룹 내에서 어떻게 평가되나요?", "chosen": "그룹 내에서 아이브의 랩 실력은 뛰어나다고 평가되고 있습니다.", "rejected": "아이브의 랩 실력은 그룹 내에서 평가가 다양하게 나뉘어요."}
 8 | {"prompt": "2023년 4월 15일 방송된 아는 형님에서 이사 소식을 알렸는데, 멤버들은 새로운 숙소에서 3명씩 두 채로 나누어 살게 되었다고 하는데, 과거 숙소는 어디에 위치했나요?", "chosen": "성수동에 위치했던 것으로 보입니다.", "rejected": "전 세계 어딘가"}
 9 | {"prompt": "멜론 20만 이상 하트를 보유한 아이브의 곡은 몇 개인가요?", "chosen": "멜론 20만 이상 하트를 보유한 아이브의 곡은 2곡입니다.", "rejected": "2곡"}
10 | {"prompt": "어떤 곡에서 아이브는 빠른 랩으로 호평을 받았나요?", "chosen": "<After LIKE>에서 빠른 랩으로 호평을 받았습니다.", "rejected": "아이브는 \"11\"이라는 곡에서 빠른 랩으로 호평을 받았어요."}
11 | {"prompt": "어떻게 아이브의 표정 연기로 개개인이 더욱 돋보이게 되나요?", "chosen": "아이브의 표정 연기는 개개인을 더욱 돋보이게 만들어줍니다.", "rejected": "아이브 멤버들의 표정 연기로 인해 각자가 더 돋보이게 되는 이유는 자신들의 개성 때문이죠."}
12 | {"prompt": "아이브의 정규 1집 I've IVE로 컴백하면서 어떤 성장을 보였나요?", "chosen": "신속한 피드백 수용과 노력하는 모습으로 한 단계 성장한 라이브 실력을 보였습니다.", "rejected": "아이브의 정규 1집 I've IVE로 컴백하면서 주목할 만한 성장은 그림 그리기 실력이 향상되었다는 것입니다."}
13 | {"prompt": "아이브의 평균 비주얼은 어떤가요?", "chosen": "아이브의 평균 비주얼은 상당히 뛰어나다고 평가받고 있습니다.", "rejected": "아이브의 평균 비주얼은 평범한 수준으로 보여요."}
14 | {"prompt": "아이브 멤버 중에서 서울공연예술고등학교를 중퇴한 멤버는 누구인가요?", "chosen": "안유진", "rejected": "잘 모르겠습니다."}
15 | {"prompt": "아이브의 댄스 실력은 전문가들에게도 인정 받나요?", "chosen": "예, 아이브의 댄스 실력은 전문가들에게도 인정받고 있습니다.", "rejected": "아이브의 댄스 실력은 전문가들에게도 어느 정도 인정받고 있어요."}
16 | {"prompt": "2002년생 가을 제외하면 나머지 멤버들은 2000년대에 어떤 문화를 잘 모르는 경향이 있나요?", "chosen": "2000년대의 문화를 잘 모른다는 경향이 있습니다.", "rejected": "2002년생 가을을 제외한 나머지 멤버들은 2000년대에 IT 문화를 잘 모르는 경향이 있습니다."}
17 | {"prompt": "IZ*ONE 활동이 끝난 후 데뷔한 최초의 그룹은 어떤 그룹인가요?", "chosen": "IVE", "rejected": "그룹 A"}
18 | {"prompt": "가을이는 어떤 애완동물을 키우고 있나요?", "chosen": "가을이는 토끼를 키우고 있습니다.", "rejected": "가을이는 전 해초류를 기르고 있어요."}
19 | {"prompt": "SBS 인기가요에서 아이브는 몇 번 트리플 크라운을 수상했나요?", "chosen": "7월 3일 기준 SBS 인기가요에서도 한 번 트리플 크라운을 수상", "rejected": "5번 TMC를 수상했어요."}
20 | {"prompt": "아이브의 댄스 실력은 어떻게 평가되나요?", "chosen": "전체적으로 준수한 실력을 갖추고 있습니다.", "rejected": "아이브의 댄스 실력은 시각적으로 평가되는 경향이 있습니다."}
21 | 


--------------------------------------------------------------------------------
/data/stage3. PPO/ive_test_ppo.jsonl:
--------------------------------------------------------------------------------
 1 | {"prompt": "표정 연기가 무대 전반의 분위기나 이야기 전달에 어떤 영향을 미치는지 알려주세요."}
 2 | {"prompt": "아이브가 향후 어떤 방식으로 발전하고 성장해 나갈 것으로 예상되는가요?"}
 3 | {"prompt": "IVE의 멤버 중에서 가장 먼저 지상파 음악 방송 1위를 기록한 멤버가 누구였나요?"}
 4 | {"prompt": "가족관계가 팀 내에서 언급되는 이유가 음악적 활동에 미친 영향에 대해 어떤 생각을 가지십니까?"}
 5 | {"prompt": "아이브의 랩이 앞으로 그녀의 음악적 진행을 어떻게 변화시킬지 예상해 보십시오."}
 6 | {"prompt": "성인이 되면서 팀 내의 역할 분담이나 관계에 변화가 있었을까요?"}
 7 | {"prompt": "아이브의 댄스 실력을 보완하거나 향상시킬 수 있는 방법에는 어떤 것들이 있을까요?"}
 8 | {"prompt": "아이브의 랩이 음악이나 가사에 미칠 수 있는 강력한 영향은 무엇인가요?"}
 9 | {"prompt": "아이브는 다른 프로듀스 파생 걸그룹들과 다르게 데뷔조에 속한 멤버들의 인지도가 고르게 높다고 합니다. 이러한 현상이 나타나는 이유는 무엇이라고 생각하십니까?"}
10 | {"prompt": "'소녀'와 '자기애'라는 콘셉트의 마케팅 전략은 어떤 방식으로 전개되었을까요?"}
11 | {"prompt": "멤버들이 강한 포스를 뿜어내는데에는 어떤 특징이 도움이 되었을까요?"}
12 | {"prompt": "걸 크러시 콘셉트와 소녀다운 이미지를 결합시키는 과정에서 어려움을 겪은 부분은 무엇이 있었을까요?"}
13 | {"prompt": "세대 간의 차이를 긍정적이고 다양한 가치로 받아들이는데 도움을 주는 활동이 있다면, 어떤 것들이 있을까요?"}
14 | {"prompt": "각 멤버들의 취향을 반영한 단체여행을 계획할 때, 가을이 가장 먼저 제안할 곳은 어디일까요? 다른 멤버들은 그 제안에 대해 어떤 반응을 보일까요?"}
15 | {"prompt": "아이브가 한중일 아이돌 시장 등 글로벌 시장에서의 입지를 고려할 때, 어떤 전략이 필요할 것으로 보이나요?"}
16 | {"prompt": "성인이 된 후에는 어떻게 변화가 있었을까요? 예를 들어, 리더인 유진이 성인이 된 후에 리더십이나 활동에 어떤 영향을 미쳤을까요?"}
17 | {"prompt": "각 멤버의 음색과 개성을 가장 잘 드러내는 곡은 무엇인가요? 이유도 함께 궁금합니다."}
18 | {"prompt": "IVE의 음악이 2000년대 후반 ~ 2010년대 초중반의 2세대 걸그룹 노래와 공통점이 있는 이유는 무엇일까요?"}
19 | {"prompt": "멤버들의 얼굴에 있는 각각의 점은 어떤 의미가 있을까요?"}
20 | {"prompt": "멤버들이 신장 차이가 큰 영향을 미친 곡이나 활동이 있다면 무엇인가요?"}
21 | 


--------------------------------------------------------------------------------
/data/stage3. PPO/kuksundo_test_ppo.jsonl:
--------------------------------------------------------------------------------
 1 | {"prompt": "국선도는 어떤 사람들에게 추천되는 수련법인가요?"}
 2 | {"prompt": "밝돌법이라는 이름의 유래는 무엇인가요?"}
 3 | {"prompt": "국선도를 통해 건강한 몸을 유지하는 방법은 무엇인가요?"}
 4 | {"prompt": "국선도를 수행하는 데 필요한 시간과 노력은 어느 정도인가요?"}
 5 | {"prompt": "국선도의 목적은 무엇이고, 그것을 달성하기 위해 어떤 방법을 사용하나요?"}
 6 | {"prompt": "국선도는 어떤 의지를 기르고 어떠한 미덕을 갖게 해주나요?"}
 7 | {"prompt": "국선도는 어떻게 우주자연과 인간을 하나로 만드는 건가요?"}
 8 | {"prompt": "국선도를 통해 어떻게 조화로운 세상을 만들 수 있는 건가요?"}
 9 | {"prompt": "국선도와 국가 지도자의 도법은 어떤 관계를 가지고 있나요?"}
10 | {"prompt": "국선도의 수련 과정을 주야로 수련해야 할까요?"}
11 | {"prompt": "국선도 밝돌법을 꾸준히 수련함으로써 어떻게 체력이 증강되는지 설명해주세요."}
12 | {"prompt": "국선도를 통한 디톡스 수련법은 어떤 것이 있을까요?"}
13 | {"prompt": "스트레스관리를 위해 어떤 습관을 가질 수 있을까요?"}
14 | {"prompt": "불로장생은 어떤 원리를 기반으로 하는 것인가요?"}
15 | {"prompt": "도인도송에서 언급된 중기(中氣)의 음양(陰陽) 변화와 수화(水火)의 승강(昇降), 그리고 기혈(氣血)의 순환(循環)은 왜 중앙오십토(中央五十土)의 중기단합력(中氣團合力)에 의해 발생하는 것인지 설명해 주세요."}
16 | {"prompt": "앞서 말씀하신 것처럼, 중기(中氣)는 인체에서 하단전(下丹田)에서 발생한 기혈(氣血)이 간(肝)으로 나와 생신(生新)하는 것이라고 알고 있습니다. 이런 과정에서 중앙오십토(中央五十土)가 어떻게 작용하는지 설명해 주세요."}
17 | {"prompt": "도인도송에 따르면, 중기(中氣)단법은 우주적(宇宙的) 입장(立場)에서 오인(吾人)이 행공(行功)해야 하는 고행(苦行)이라고 합니다. 이런 고행(苦行)의 목적은 무엇인지 설명해 주세요."}
18 | {"prompt": "중기(中氣)단법을 수도(修道) 초공(初功)으로 선택하게 된 이유에 대해 설명해 주세요."}
19 | {"prompt": "선도주의가 개인적인 삶에 어떤 영향을 미칠 수 있을까요? 개인이 선도주의를 적용했을 때 어떤 변화를 기대할 수 있나요?"}
20 | {"prompt": "선도주의를 실천하기 위해서 필요한 가장 중요한 자질은 무엇인가요? 그 자질을 어떻게 갖출 수 있을까요?"}
21 | {"prompt": "국선도본원 세계본부에서 발급되는 자격증은 어떤 특징을 가지고 있나요?"}
22 | {"prompt": "국선도의 고유 권한 보존을 위해 강화된 관리 시스템이 구축되었나요? 어떤 관리 시스템이 있는지 알려주세요."}
23 | {"prompt": "기혈순환유통유통법을 할 때 힘을 주는 정도는 어떻게 결정하나요?"}
24 | {"prompt": "기혈순환유통유통법을 할 때 상상력을 이용하는 방법이 있을까요?"}
25 | {"prompt": "단전호흡이 정신적인 측면에 어떤 영향을 미치나요?"}
26 | {"prompt": "호흡법을 익히는 과정에서 주의해야 할 점이 있나요?"}
27 | {"prompt": "이 호흡법은 왜 마음을 가라앉히는 것부터 시작할까요?"}
28 | {"prompt": "아랫배 3cm 아래에서 시작되는 기운을 왜 몸을 반시계 방향으로 돌린다는 생각을 하게 될까요?"}
29 | {"prompt": "이 호흡법은 왜 힘을 건강하게 유지할 수 있는 도움이 될까요?"}
30 | {"prompt": "\"마음을 가라앉힌다\"는 말이 추상적일 수 있습니다. 도장에서는 어떤 방법을 사용하여 마음을 고요하게 만드는지 알 수 있을까요?"}
31 | {"prompt": "마음이 가라앉지 않은 상태에서 아랫배를 내밀 때 일어나는 부작용에는 어떤 것들이 있나요?"}
32 | {"prompt": "배꼽 위의 상체 중 어디라도 긴장이나 힘이 있는 상태에서 아랫배를 내밀려고 하면 어떤 일이 일어날까요?"}
33 | {"prompt": "동작을 정확하게 하는 것과 호흡에 무리가 없도록 하는 것 중 더 중요한 것은 무엇인가요?"}
34 | {"prompt": "단전행공을 통해 얻을 수 있는 신체적인 이점은 무엇인가요?"}
35 | {"prompt": "국선도의 정각도 단계의 동작 수가 많은 이유는 무엇인가요?"}
36 | {"prompt": "국선도의 정각도 단계를 통해 얻을 수 있는 이점은 무엇인가요?"}
37 | {"prompt": "중기단법을 집에서 수련할 때와 도장이나 연수원에서 특수 프로그램을 받을 때의 차이점은 무엇인가요?"}
38 | {"prompt": "중기단법을 수행하면 호흡 뿐만 아니라 다른 측면에서도 어떤 변화가 생기나요?"}
39 | {"prompt": "중기단법의 임의성이 있는 이유는 무엇일까?"}
40 | {"prompt": "원기단법에서는 어떤 유통 과정을 자주 시도하나요?"}
41 | {"prompt": "원기단법에서 축기가 왜 중요한가요?"}
42 | {"prompt": "원기단법에서의 흡지와 축기는 왜 번갈아가며 이루어져야 하나요?"}
43 | {"prompt": "국선도의 통기법을 통해 내면의 평화를 얻을 수 있나요?"}
44 | {"prompt": "국선도의 통기법은 얼마나 오래 실천해야 효과가 나타날까요?"}
45 | {"prompt": "영체와 함께 수련하는 것은 어느 단계부터 가능한 건가요?"}
46 | {"prompt": "삼합단법을 통해 우리 몸은 어떻게 이산화탄소를 배출하나요?"}
47 | {"prompt": "조리단법은 어떤 방식으로 전파되고 널리 알려지게 되었나요?"}
48 | {"prompt": "국선도의 선도법이란 무엇인가요?"}
49 | {"prompt": "국선도의 선도법은 어떻게 구성되어 있나요?"}
50 | {"prompt": "하늘과 하나가 되는 단법이 국선도의 선도법에 어떤 영향을 미치나요?"}
51 | {"prompt": "국선도의 선도법을 세우기 위해서는 어떤 조건이 필요할까요?"}
52 | {"prompt": "국선도의 선도법이 지역 사회에 미치는 영향에는 어떤 요소가 있을까요?"}
53 | {"prompt": "국선도의 삼청단법이란 무엇인가요?"}
54 | {"prompt": "삼청단법을 통해 어떤 능력을 기를 수 있나요?"}
55 | {"prompt": "삼청단법의 목표는 무엇인가요?"}
56 | {"prompt": "삼청단법을 통해 어떤 실용적인 혜택을 얻을 수 있나요?"}
57 | {"prompt": "국선도의 무진단법의 수련 과정에서 어떤 장점이 있나요?"}
58 | {"prompt": "국선도의 무진단법을 통해 수련할 때 가장 어려운 점은 무엇인가요?"}
59 | {"prompt": "국선도의 무진단법을 통해 몸과 마음을 나누는 방법에는 어떤 종류가 있을까요?"}
60 | {"prompt": "국선도의 무진단법을 통해 나눠진 몸과 마음을 다시 하나로 합치는 시기는 얼마나 걸리나요?"}
61 | {"prompt": "국선도의 장부강화운동을 꾸준히 하면 몸에 어떤 변화가 생길까요?"}
62 | {"prompt": "국선도의 진공단법을 수행하면서 정리운동의 난도가 준비운동보다 높다는데, 그 이유는 무엇인가요?"}
63 | {"prompt": "정리운동을 수행하면서 어떻게 기혈순환을 유지할 수 있을까요?"}
64 | {"prompt": "정리운동을 수행하면서 얻을 수 있는 다른 이점들이 있을까요?"}
65 | {"prompt": "국선도를 함으로써 어떤 자세 개선이 이루어질 수 있을까요?"}
66 | {"prompt": "천화법과 내기전신행법의 차이점은 무엇인가요?"}
67 | {"prompt": "천화법을 사용하는 사람들은 어떤 이유로 이를 선택하는 건가요?"}
68 | {"prompt": "천화법의 성공적인 활용 사례를 알려주세요."}
69 | {"prompt": "천화법을 사용하면 단점은 없는 것인가요?"}
70 | {"prompt": "천화법을 활용한 정책 수립의 성공적인 예시를 알려주세요."}
71 | {"prompt": "천화법을 사용한 정책이 다른 국가에 영향을 미칠 수 있는가요?"}
72 | {"prompt": "천화법의 활용은 국가의 민주주의 발전에 어떤 역할을 하는 것인가요?"}
73 | {"prompt": "국선도를 연마하기 위해 필요한 신체적인 능력은 무엇이 있을까요?"}
74 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.13.1
 2 | transformers
 3 | accelerate
 4 | colossalai==0.2.7
 5 | openai
 6 | langchain==0.0.113
 7 | pandas>=1.4.1
 8 | datasets
 9 | jsonlines
10 | loralib


--------------------------------------------------------------------------------