├── LICENSE
├── PPO_practice.ipynb
├── README.md
├── code
├── LICENSE
├── README.md
├── benchmarks
│ ├── README.md
│ ├── benchmark_gpt_dummy.py
│ ├── benchmark_gpt_dummy.sh
│ └── benchmark_opt_lora_dummy.py
├── chatgpt
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-310.pyc
│ ├── dataset
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── reward_dataset.cpython-310.pyc
│ │ │ └── utils.cpython-310.pyc
│ │ ├── reward_dataset.py
│ │ └── utils.py
│ ├── experience_maker
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── base.cpython-310.pyc
│ │ │ └── naive.cpython-310.pyc
│ │ ├── base.py
│ │ └── naive.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── generation.cpython-310.pyc
│ │ │ ├── generation_utils.cpython-310.pyc
│ │ │ ├── lora.cpython-310.pyc
│ │ │ ├── loss.cpython-310.pyc
│ │ │ └── utils.cpython-310.pyc
│ │ ├── base
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ ├── actor.cpython-310.pyc
│ │ │ │ ├── critic.cpython-310.pyc
│ │ │ │ └── reward_model.cpython-310.pyc
│ │ │ ├── actor.py
│ │ │ ├── critic.py
│ │ │ └── reward_model.py
│ │ ├── bloom
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ ├── bloom_actor.cpython-310.pyc
│ │ │ │ ├── bloom_critic.cpython-310.pyc
│ │ │ │ └── bloom_rm.cpython-310.pyc
│ │ │ ├── bloom_actor.py
│ │ │ ├── bloom_critic.py
│ │ │ └── bloom_rm.py
│ │ ├── generation.py
│ │ ├── generation_utils.py
│ │ ├── gpt
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ ├── gpt_actor.cpython-310.pyc
│ │ │ │ ├── gpt_critic.cpython-310.pyc
│ │ │ │ └── gpt_rm.cpython-310.pyc
│ │ │ ├── gpt_actor.py
│ │ │ ├── gpt_critic.py
│ │ │ └── gpt_rm.py
│ │ ├── lora.py
│ │ ├── loss.py
│ │ ├── opt
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ ├── opt_actor.cpython-310.pyc
│ │ │ │ ├── opt_critic.cpython-310.pyc
│ │ │ │ └── opt_rm.cpython-310.pyc
│ │ │ ├── opt_actor.py
│ │ │ ├── opt_critic.py
│ │ │ └── opt_rm.py
│ │ └── utils.py
│ ├── replay_buffer
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── base.cpython-310.pyc
│ │ │ ├── naive.cpython-310.pyc
│ │ │ └── utils.cpython-310.pyc
│ │ ├── base.py
│ │ ├── naive.py
│ │ └── utils.py
│ └── trainer
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── base.cpython-310.pyc
│ │ ├── ppo.cpython-310.pyc
│ │ ├── rm.cpython-310.pyc
│ │ └── utils.cpython-310.pyc
│ │ ├── base.py
│ │ ├── callbacks
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── base.cpython-310.pyc
│ │ │ ├── performance_evaluator.cpython-310.pyc
│ │ │ └── save_checkpoint.cpython-310.pyc
│ │ ├── base.py
│ │ ├── performance_evaluator.py
│ │ └── save_checkpoint.py
│ │ ├── ppo.py
│ │ ├── rm.py
│ │ ├── strategies
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── base.cpython-310.pyc
│ │ │ ├── colossalai.cpython-310.pyc
│ │ │ ├── ddp.cpython-310.pyc
│ │ │ ├── naive.cpython-310.pyc
│ │ │ └── sampler.cpython-310.pyc
│ │ ├── base.py
│ │ ├── colossalai.py
│ │ ├── ddp.py
│ │ ├── naive.py
│ │ └── sampler.py
│ │ └── utils.py
├── examples
│ ├── README.md
│ ├── inference.py
│ ├── requirements.txt
│ ├── test_ci.sh
│ ├── train_dummy.py
│ ├── train_dummy.sh
│ ├── train_prompts.py
│ ├── train_prompts.sh
│ ├── train_reward_model.py
│ └── train_rm.sh
├── pytest.ini
├── requirements-test.txt
├── requirements.txt
├── setup.py
├── tests
│ ├── __init__.py
│ ├── test_checkpoint.py
│ └── test_data.py
├── utils.py
└── version.txt
├── data
├── stage1. domain_adaptive_pretraining
│ ├── BTS.csv
│ ├── domain_adaptive_kuksundo_pretrain.jsonl
│ ├── domain_adaptive_pretrain_ive.jsonl
│ ├── 국선도.csv
│ ├── 아이브.csv
│ └── 템플릿.csv
├── stage1. domain_instruction_tuning
│ ├── ive_instruction_test.jsonl
│ ├── ive_instruction_train.jsonl
│ ├── kuksundo_instruction_test.jsonl
│ └── kuksundo_instruction_train.jsonl
├── stage2. RM
│ ├── ive_test_rm.jsonl
│ ├── ive_train_rm.jsonl
│ ├── kuksundo_test_rm.jsonl
│ └── kuksundo_train_rm.jsonl
└── stage3. PPO
│ ├── ive_test_ppo.jsonl
│ ├── ive_train_ppo.jsonl
│ ├── kuksundo_test_ppo.jsonl
│ └── kuksundo_train_ppo.jsonl
├── mygpt_실습.ipynb
└── requirements.txt
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 oglee815
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # update
2 | - 2024-6-13: 데이터 생성과 학습 노트북 통합 -> 하나의 colab 노트북으로 통일
3 | - 2023-10-24: 라인 넘버 추가 및 코드 정리
4 |
5 | # mygpt-lecture
6 | 본 자료는 '나만의 데이터로 만드는 MyGPT 강의' 관련 자료입니다.
7 |
8 | 아래 자료를 참고하여 만들었습니다.
9 | https://github.com/airobotlab/KoChatGPT
10 |
11 | # 전체 목차
12 |
13 |
14 | # 실습 진행 순서
15 |
16 |
17 | # 학습코드
18 | [my_gpt실습.ipynb](https://colab.research.google.com/github/oglee815/mygpt-lecture/blob/main/mygpt_실습.ipynb)
19 |
20 | # 데이터 생성 코드
21 | [my_gpt실습.ipynb](https://colab.research.google.com/github/oglee815/mygpt-lecture/blob/main/mygpt_실습.ipynb)
22 | - ChatGPT API를 통해 데이터 자동 생성
23 | -
24 |
25 | # PPO 강화학습 연습 코드(Lunar Lander2)
26 | - [ppo_practice.ipynb](https://colab.research.google.com/github/oglee815/mygpt-lecture/blob/main/PPO_practice.ipynb)
27 | -
28 |
29 | # 학습 결과 예시
30 | - SKT-KoGPT2와 나무 위키의 '아이브' 카테고리 데이터를 기반으로 ChatGPT의 Stage 1, 2, 3를 학습 한 뒤, Stage 1의 SFT와 결과 비교
31 | -
32 | - KL Penalty 덕분인지 의외로 동일한 Output을 내놓는 경우가 많음
33 |
34 | # 자료 관련 문의
35 | - 이현제, oglee815@gmail.com
36 | - h8.lee@samsung.com
37 |
--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
1 | # RLHF - Colossal-AI
2 |
3 | ## Table of Contents
4 |
5 | - [What is RLHF - Colossal-AI?](#intro)
6 | - [How to Install?](#install)
7 | - [The Plan](#the-plan)
8 | - [How can you partcipate in open source?](#invitation-to-open-source-contribution)
9 | ---
10 | ## Intro
11 | Implementation of RLHF (Reinforcement Learning with Human Feedback) powered by Colossal-AI. It supports distributed training and offloading, which can fit extremly large models. More details can be found in the [blog](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt).
12 |
13 |
14 |
15 |
16 |
17 | ## Training process (step 3)
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | ## Install
27 | ```shell
28 | pip install .
29 | ```
30 |
31 | ## Usage
32 |
33 | The main entrypoint is `Trainer`. We only support PPO trainer now. We support many training strategies:
34 |
35 | - NaiveStrategy: simplest strategy. Train on single GPU.
36 | - DDPStrategy: use `torch.nn.parallel.DistributedDataParallel`. Train on multi GPUs.
37 | - ColossalAIStrategy: use Gemini and Zero of ColossalAI. It eliminates model duplication on each GPU and supports offload. It's very useful when training large models on multi GPUs.
38 |
39 | Simplest usage:
40 |
41 | ```python
42 | from chatgpt.trainer import PPOTrainer
43 | from chatgpt.trainer.strategies import ColossalAIStrategy
44 | from chatgpt.models.gpt import GPTActor, GPTCritic
45 | from chatgpt.models.base import RewardModel
46 | from copy import deepcopy
47 | from colossalai.nn.optimizer import HybridAdam
48 |
49 | strategy = ColossalAIStrategy()
50 |
51 | with strategy.model_init_context():
52 | # init your model here
53 | # load pretrained gpt2
54 | actor = GPTActor(pretrained='gpt2')
55 | critic = GPTCritic()
56 | initial_model = deepcopy(actor).cuda()
57 | reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda()
58 |
59 | actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
60 | critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
61 |
62 | # prepare models and optimizers
63 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
64 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
65 |
66 | # load saved model checkpoint after preparing
67 | strategy.load_model(actor, 'actor_checkpoint.pt', strict=False)
68 | # load saved optimizer checkpoint after preparing
69 | strategy.load_optimizer(actor_optim, 'actor_optim_checkpoint.pt')
70 |
71 | trainer = PPOTrainer(strategy,
72 | actor,
73 | critic,
74 | reward_model,
75 | initial_model,
76 | actor_optim,
77 | critic_optim,
78 | ...)
79 |
80 | trainer.fit(dataset, ...)
81 |
82 | # save model checkpoint after fitting on only rank0
83 | strategy.save_model(actor, 'actor_checkpoint.pt', only_rank0=True)
84 | # save optimizer checkpoint on all ranks
85 | strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint.pt', only_rank0=False)
86 | ```
87 |
88 | For more details, see `examples/`.
89 |
90 | We also support training reward model with true-world data. See `examples/train_reward_model.py`.
91 |
92 | ## FAQ
93 |
94 | ### How to save/load checkpoint
95 |
96 | To load pretrained model, you can simply use huggingface pretrained models:
97 |
98 | ```python
99 | # load OPT-350m pretrained model
100 | actor = OPTActor(pretrained='facebook/opt-350m')
101 | ```
102 |
103 | To save model checkpoint:
104 |
105 | ```python
106 | # save model checkpoint on only rank0
107 | strategy.save_model(actor, 'actor_checkpoint.pt', only_rank0=True)
108 | ```
109 |
110 | This function must be called after `strategy.prepare()`.
111 |
112 | For DDP strategy, model weights are replicated on all ranks. And for ColossalAI strategy, model weights may be sharded, but all-gather will be applied before returning state dict. You can set `only_rank0=True` for both of them, which only saves checkpoint on rank0, to save disk space usage. The checkpoint is float32.
113 |
114 | To save optimizer checkpoint:
115 |
116 | ```python
117 | # save optimizer checkpoint on all ranks
118 | strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint.pt', only_rank0=False)
119 | ```
120 |
121 | For DDP strategy, optimizer states are replicated on all ranks. You can set `only_rank0=True`. But for ColossalAI strategy, optimizer states are sharded over all ranks, and no all-gather will be applied. So for ColossalAI strategy, you can only set `only_rank0=False`. That is to say, each rank will save a cehckpoint. When loading, each rank should load the corresponding part.
122 |
123 | Note that different stategy may have different shapes of optimizer checkpoint.
124 |
125 | To load model checkpoint:
126 |
127 | ```python
128 | # load saved model checkpoint after preparing
129 | strategy.load_model(actor, 'actor_checkpoint.pt', strict=False)
130 | ```
131 |
132 | To load optimizer checkpoint:
133 |
134 | ```python
135 | # load saved optimizer checkpoint after preparing
136 | strategy.load_optimizer(actor_optim, 'actor_optim_checkpoint.pt')
137 | ```
138 |
139 | ## The Plan
140 |
141 | - [x] implement PPO fine-tuning
142 | - [x] implement training reward model
143 | - [x] support LoRA
144 | - [x] support inference
145 | - [ ] open source the reward model weight
146 | - [ ] support llama from [facebook](https://github.com/facebookresearch/llama)
147 | - [ ] support BoN(best of N sample)
148 | - [ ] implement PPO-ptx fine-tuning
149 | - [ ] integrate with Ray
150 | - [ ] support more RL paradigms, like Implicit Language Q-Learning (ILQL),
151 | - [ ] support chain of throught by [langchain](https://github.com/hwchase17/langchain)
152 |
153 | ### Real-time progress
154 | You will find our progress in github project broad
155 |
156 | [Open ChatGPT](https://github.com/orgs/hpcaitech/projects/17/views/1)
157 |
158 | ## Invitation to open-source contribution
159 | Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models from the starting point of replicating ChatGPT!
160 |
161 | You may contact us or participate in the following ways:
162 | 1. [Leaving a Star ⭐](https://github.com/hpcaitech/ColossalAI/stargazers) to show your like and support. Thanks!
163 | 2. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose), or submitting a PR on GitHub follow the guideline in [Contributing](https://github.com/hpcaitech/ColossalAI/blob/main/CONTRIBUTING.md).
164 | 3. Join the Colossal-AI community on
165 | [Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w),
166 | and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas.
167 | 4. Send your official proposal to email contact@hpcaitech.com
168 |
169 | Thanks so much to all of our amazing contributors!
170 |
171 | ## Quick Preview
172 |
173 |
174 |
175 |
176 | - Up to 7.73 times faster for single server training and 1.42 times faster for single-GPU inference
177 |
178 |
179 |
180 |
181 |
182 | - Up to 10.3x growth in model capacity on one GPU
183 | - A mini demo training process requires only 1.62GB of GPU memory (any consumer-grade GPU)
184 |
185 |
186 |
187 |
188 |
189 | - Increase the capacity of the fine-tuning model by up to 3.7 times on a single GPU
190 | - Keep in a sufficiently high running speed
191 |
192 | ## Citations
193 |
194 | ```bibtex
195 | @article{Hu2021LoRALA,
196 | title = {LoRA: Low-Rank Adaptation of Large Language Models},
197 | author = {Edward J. Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Weizhu Chen},
198 | journal = {ArXiv},
199 | year = {2021},
200 | volume = {abs/2106.09685}
201 | }
202 |
203 | @article{ouyang2022training,
204 | title={Training language models to follow instructions with human feedback},
205 | author={Ouyang, Long and Wu, Jeff and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll L and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
206 | journal={arXiv preprint arXiv:2203.02155},
207 | year={2022}
208 | }
209 | ```
210 |
--------------------------------------------------------------------------------
/code/benchmarks/README.md:
--------------------------------------------------------------------------------
1 | # Benchmarks
2 |
3 | ## Benchmark GPT on dummy prompt data
4 |
5 | We provide various GPT models (string in parentheses is the corresponding model name used in this script):
6 |
7 | - GPT2-S (s)
8 | - GPT2-M (m)
9 | - GPT2-L (l)
10 | - GPT2-XL (xl)
11 | - GPT2-4B (4b)
12 | - GPT2-6B (6b)
13 | - GPT2-8B (8b)
14 | - GPT2-10B (10b)
15 | - GPT2-12B (12b)
16 | - GPT2-15B (15b)
17 | - GPT2-18B (18b)
18 | - GPT2-20B (20b)
19 | - GPT2-24B (24b)
20 | - GPT2-28B (28b)
21 | - GPT2-32B (32b)
22 | - GPT2-36B (36b)
23 | - GPT2-40B (40b)
24 | - GPT3 (175b)
25 |
26 | We also provide various training strategies:
27 |
28 | - ddp: torch DDP
29 | - colossalai_gemini: ColossalAI GeminiDDP with `placement_policy="cuda"`, like zero3
30 | - colossalai_gemini_cpu: ColossalAI GeminiDDP with `placement_policy="cpu"`, like zero3-offload
31 | - colossalai_zero2: ColossalAI zero2
32 | - colossalai_zero2_cpu: ColossalAI zero2-offload
33 | - colossalai_zero1: ColossalAI zero1
34 | - colossalai_zero1_cpu: ColossalAI zero1-offload
35 |
36 | We only support `torchrun` to launch now. E.g.
37 |
38 | ```shell
39 | # run GPT2-S on single-node single-GPU with min batch size
40 | torchrun --standalone --nproc_per_node 1 benchmark_gpt_dummy.py --model s --strategy ddp --experience_batch_size 1 --train_batch_size 1
41 | # run GPT2-XL on single-node 4-GPU
42 | torchrun --standalone --nproc_per_node 4 benchmark_gpt_dummy.py --model xl --strategy colossalai_zero2
43 | # run GPT3 on 8-node 8-GPU
44 | torchrun --nnodes 8 --nproc_per_node 8 \
45 | --rdzv_id=$JOB_ID --rdzv_backend=c10d --rdzv_endpoint=$HOST_NODE_ADDR \
46 | benchmark_gpt_dummy.py --model 175b --strategy colossalai_gemini
47 | ```
48 |
49 | > ⚠ Batch sizes in CLI args and outputed throughput/TFLOPS are all values of per GPU.
50 |
51 | In this benchmark, we assume the model architectures/sizes of actor and critic are the same for simplicity. But in practice, to reduce training cost, we may use a smaller critic.
52 |
53 | We also provide a simple shell script to run a set of benchmarks. But it only supports benchmark on single node. However, it's easy to run on multi-nodes by modifying launch command in this script.
54 |
55 | Usage:
56 |
57 | ```shell
58 | # run for GPUS=(1 2 4 8) x strategy=("ddp" "colossalai_zero2" "colossalai_gemini" "colossalai_zero2_cpu" "colossalai_gemini_cpu") x model=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b") x batch_size=(1 2 4 8 16 32 64 128 256)
59 | ./benchmark_gpt_dummy.sh
60 | # run for GPUS=2 x strategy=("ddp" "colossalai_zero2" "colossalai_gemini" "colossalai_zero2_cpu" "colossalai_gemini_cpu") x model=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b") x batch_size=(1 2 4 8 16 32 64 128 256)
61 | ./benchmark_gpt_dummy.sh 2
62 | # run for GPUS=2 x strategy=ddp x model=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b") x batch_size=(1 2 4 8 16 32 64 128 256)
63 | ./benchmark_gpt_dummy.sh 2 ddp
64 | # run for GPUS=2 x strategy=ddp x model=l x batch_size=(1 2 4 8 16 32 64 128 256)
65 | ./benchmark_gpt_dummy.sh 2 ddp l
66 | ```
67 |
68 | ## Benchmark OPT with LoRA on dummy prompt data
69 |
70 | We provide various OPT models (string in parentheses is the corresponding model name used in this script):
71 |
72 | - OPT-125M (125m)
73 | - OPT-350M (350m)
74 | - OPT-700M (700m)
75 | - OPT-1.3B (1.3b)
76 | - OPT-2.7B (2.7b)
77 | - OPT-3.5B (3.5b)
78 | - OPT-5.5B (5.5b)
79 | - OPT-6.7B (6.7b)
80 | - OPT-10B (10b)
81 | - OPT-13B (13b)
82 |
83 | We only support `torchrun` to launch now. E.g.
84 |
85 | ```shell
86 | # run OPT-125M with no lora (lora_rank=0) on single-node single-GPU with min batch size
87 | torchrun --standalone --nproc_per_node 1 benchmark_opt_lora_dummy.py --model 125m --strategy ddp --experience_batch_size 1 --train_batch_size 1 --lora_rank 0
88 | # run OPT-350M with lora_rank=4 on single-node 4-GPU
89 | torchrun --standalone --nproc_per_node 4 benchmark_opt_lora_dummy.py --model 350m --strategy colossalai_zero2 --lora_rank 4
90 | ```
91 |
92 | > ⚠ Batch sizes in CLI args and outputed throughput/TFLOPS are all values of per GPU.
93 |
94 | In this benchmark, we assume the model architectures/sizes of actor and critic are the same for simplicity. But in practice, to reduce training cost, we may use a smaller critic.
95 |
--------------------------------------------------------------------------------
/code/benchmarks/benchmark_gpt_dummy.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from copy import deepcopy
3 |
4 | import torch
5 | import torch.distributed as dist
6 | import torch.nn as nn
7 | from chatgpt.models.base import RewardModel
8 | from chatgpt.models.gpt import GPTActor, GPTCritic
9 | from chatgpt.trainer import PPOTrainer
10 | from chatgpt.trainer.callbacks import PerformanceEvaluator
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, Strategy
12 | from torch.optim import Adam
13 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
15 |
16 | from colossalai.nn.optimizer import HybridAdam
17 |
18 |
19 | def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
20 | numel = sum(p.numel() for p in model.parameters())
21 | if isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3 and strategy.shard_init:
22 | numel *= dist.get_world_size()
23 | return numel
24 |
25 |
26 | def preprocess_batch(samples) -> dict:
27 | input_ids = torch.stack(samples)
28 | attention_mask = torch.ones_like(input_ids, dtype=torch.long)
29 | return {'input_ids': input_ids, 'attention_mask': attention_mask}
30 |
31 |
32 | def print_rank_0(*args, **kwargs) -> None:
33 | if dist.get_rank() == 0:
34 | print(*args, **kwargs)
35 |
36 |
37 | def print_model_numel(model_dict: dict) -> None:
38 | B = 1024**3
39 | M = 1024**2
40 | K = 1024
41 | outputs = ''
42 | for name, numel in model_dict.items():
43 | outputs += f'{name}: '
44 | if numel >= B:
45 | outputs += f'{numel / B:.2f} B\n'
46 | elif numel >= M:
47 | outputs += f'{numel / M:.2f} M\n'
48 | elif numel >= K:
49 | outputs += f'{numel / K:.2f} K\n'
50 | else:
51 | outputs += f'{numel}\n'
52 | print_rank_0(outputs)
53 |
54 |
55 | def get_gpt_config(model_name: str) -> GPT2Config:
56 | model_map = {
57 | 's': GPT2Config(),
58 | 'm': GPT2Config(n_embd=1024, n_layer=24, n_head=16),
59 | 'l': GPT2Config(n_embd=1280, n_layer=36, n_head=20),
60 | 'xl': GPT2Config(n_embd=1600, n_layer=48, n_head=25),
61 | '2b': GPT2Config(n_embd=2048, n_layer=40, n_head=16),
62 | '4b': GPT2Config(n_embd=2304, n_layer=64, n_head=16),
63 | '6b': GPT2Config(n_embd=4096, n_layer=30, n_head=16),
64 | '8b': GPT2Config(n_embd=4096, n_layer=40, n_head=16),
65 | '10b': GPT2Config(n_embd=4096, n_layer=50, n_head=16),
66 | '12b': GPT2Config(n_embd=4096, n_layer=60, n_head=16),
67 | '15b': GPT2Config(n_embd=4096, n_layer=78, n_head=16),
68 | '18b': GPT2Config(n_embd=4096, n_layer=90, n_head=16),
69 | '20b': GPT2Config(n_embd=8192, n_layer=25, n_head=16),
70 | '24b': GPT2Config(n_embd=8192, n_layer=30, n_head=16),
71 | '28b': GPT2Config(n_embd=8192, n_layer=35, n_head=16),
72 | '32b': GPT2Config(n_embd=8192, n_layer=40, n_head=16),
73 | '36b': GPT2Config(n_embd=8192, n_layer=45, n_head=16),
74 | '40b': GPT2Config(n_embd=8192, n_layer=50, n_head=16),
75 | '175b': GPT2Config(n_positions=2048, n_embd=12288, n_layer=96, n_head=96),
76 | }
77 | try:
78 | return model_map[model_name]
79 | except KeyError:
80 | raise ValueError(f'Unknown model "{model_name}"')
81 |
82 |
83 | def main(args):
84 | if args.strategy == 'ddp':
85 | strategy = DDPStrategy()
86 | elif args.strategy == 'colossalai_gemini':
87 | strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
88 | elif args.strategy == 'colossalai_gemini_cpu':
89 | strategy = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5)
90 | elif args.strategy == 'colossalai_zero2':
91 | strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
92 | elif args.strategy == 'colossalai_zero2_cpu':
93 | strategy = ColossalAIStrategy(stage=2, placement_policy='cpu')
94 | elif args.strategy == 'colossalai_zero1':
95 | strategy = ColossalAIStrategy(stage=1, placement_policy='cuda')
96 | elif args.strategy == 'colossalai_zero1_cpu':
97 | strategy = ColossalAIStrategy(stage=1, placement_policy='cpu')
98 | else:
99 | raise ValueError(f'Unsupported strategy "{args.strategy}"')
100 |
101 | model_config = get_gpt_config(args.model)
102 |
103 | with strategy.model_init_context():
104 | actor = GPTActor(config=model_config).cuda()
105 | critic = GPTCritic(config=model_config).cuda()
106 |
107 | initial_model = deepcopy(actor).cuda()
108 | reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda()
109 |
110 | actor_numel = get_model_numel(actor, strategy)
111 | critic_numel = get_model_numel(critic, strategy)
112 | initial_model_numel = get_model_numel(initial_model, strategy)
113 | reward_model_numel = get_model_numel(reward_model, strategy)
114 | print_model_numel({
115 | 'Actor': actor_numel,
116 | 'Critic': critic_numel,
117 | 'Initial model': initial_model_numel,
118 | 'Reward model': reward_model_numel
119 | })
120 | performance_evaluator = PerformanceEvaluator(actor_numel,
121 | critic_numel,
122 | initial_model_numel,
123 | reward_model_numel,
124 | enable_grad_checkpoint=False,
125 | ignore_episodes=1)
126 |
127 | if args.strategy.startswith('colossalai'):
128 | actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
129 | critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
130 | else:
131 | actor_optim = Adam(actor.parameters(), lr=5e-6)
132 | critic_optim = Adam(critic.parameters(), lr=5e-6)
133 |
134 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
135 | tokenizer.pad_token = tokenizer.eos_token
136 |
137 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
138 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
139 |
140 | trainer = PPOTrainer(strategy,
141 | actor,
142 | critic,
143 | reward_model,
144 | initial_model,
145 | actor_optim,
146 | critic_optim,
147 | max_epochs=args.max_epochs,
148 | train_batch_size=args.train_batch_size,
149 | experience_batch_size=args.experience_batch_size,
150 | tokenizer=preprocess_batch,
151 | max_length=512,
152 | do_sample=True,
153 | temperature=1.0,
154 | top_k=50,
155 | pad_token_id=tokenizer.pad_token_id,
156 | eos_token_id=tokenizer.eos_token_id,
157 | callbacks=[performance_evaluator])
158 |
159 | random_prompts = torch.randint(tokenizer.vocab_size, (1000, 400), device=torch.cuda.current_device())
160 | trainer.fit(random_prompts,
161 | num_episodes=args.num_episodes,
162 | max_timesteps=args.max_timesteps,
163 | update_timesteps=args.update_timesteps)
164 |
165 | print_rank_0(f'Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB')
166 |
167 |
168 | if __name__ == '__main__':
169 | parser = argparse.ArgumentParser()
170 | parser.add_argument('--model', default='s')
171 | parser.add_argument('--strategy',
172 | choices=[
173 | 'ddp', 'colossalai_gemini', 'colossalai_gemini_cpu', 'colossalai_zero2',
174 | 'colossalai_zero2_cpu', 'colossalai_zero1', 'colossalai_zero1_cpu'
175 | ],
176 | default='ddp')
177 | parser.add_argument('--num_episodes', type=int, default=3)
178 | parser.add_argument('--max_timesteps', type=int, default=8)
179 | parser.add_argument('--update_timesteps', type=int, default=8)
180 | parser.add_argument('--max_epochs', type=int, default=3)
181 | parser.add_argument('--train_batch_size', type=int, default=8)
182 | parser.add_argument('--experience_batch_size', type=int, default=8)
183 | args = parser.parse_args()
184 | main(args)
185 |
--------------------------------------------------------------------------------
/code/benchmarks/benchmark_gpt_dummy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Usage: $0
3 | set -xu
4 |
5 | BASE=$(realpath $(dirname $0))
6 |
7 |
8 | PY_SCRIPT=${BASE}/benchmark_gpt_dummy.py
9 | export OMP_NUM_THREADS=8
10 |
11 | function tune_batch_size() {
12 | # we found when experience batch size is equal to train batch size
13 | # peak CUDA memory usage of making experience phase is less than or equal to that of training phase
14 | # thus, experience batch size can be larger than or equal to train batch size
15 | for bs in 1 2 4 8 16 32 64 128 256; do
16 | torchrun --standalone --nproc_per_node $1 $PY_SCRIPT --model $2 --strategy $3 --experience_batch_size $bs --train_batch_size $bs || return 1
17 | done
18 | }
19 |
20 | if [ $# -eq 0 ]; then
21 | num_gpus=(1 2 4 8)
22 | else
23 | num_gpus=($1)
24 | fi
25 |
26 | if [ $# -le 1 ]; then
27 | strategies=("ddp" "colossalai_zero2" "colossalai_gemini" "colossalai_zero2_cpu" "colossalai_gemini_cpu")
28 | else
29 | strategies=($2)
30 | fi
31 |
32 | if [ $# -le 2 ]; then
33 | models=("s" "m" "l" "xl" "2b" "4b" "6b" "8b" "10b")
34 | else
35 | models=($3)
36 | fi
37 |
38 |
39 | for num_gpu in ${num_gpus[@]}; do
40 | for strategy in ${strategies[@]}; do
41 | for model in ${models[@]}; do
42 | tune_batch_size $num_gpu $model $strategy || break
43 | done
44 | done
45 | done
46 |
--------------------------------------------------------------------------------
/code/benchmarks/benchmark_opt_lora_dummy.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from copy import deepcopy
3 |
4 | import torch
5 | import torch.distributed as dist
6 | import torch.nn as nn
7 | from chatgpt.models.base import RewardModel
8 | from chatgpt.models.opt import OPTActor, OPTCritic
9 | from chatgpt.trainer import PPOTrainer
10 | from chatgpt.trainer.callbacks import PerformanceEvaluator
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, Strategy
12 | from torch.optim import Adam
13 | from transformers import AutoTokenizer
14 | from transformers.models.opt.configuration_opt import OPTConfig
15 |
16 | from colossalai.nn.optimizer import HybridAdam
17 |
18 |
19 | def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
20 | numel = sum(p.numel() for p in model.parameters())
21 | if isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3 and strategy.shard_init:
22 | numel *= dist.get_world_size()
23 | return numel
24 |
25 |
26 | def preprocess_batch(samples) -> dict:
27 | input_ids = torch.stack(samples)
28 | attention_mask = torch.ones_like(input_ids, dtype=torch.long)
29 | return {'input_ids': input_ids, 'attention_mask': attention_mask}
30 |
31 |
32 | def print_rank_0(*args, **kwargs) -> None:
33 | if dist.get_rank() == 0:
34 | print(*args, **kwargs)
35 |
36 |
37 | def print_model_numel(model_dict: dict) -> None:
38 | B = 1024**3
39 | M = 1024**2
40 | K = 1024
41 | outputs = ''
42 | for name, numel in model_dict.items():
43 | outputs += f'{name}: '
44 | if numel >= B:
45 | outputs += f'{numel / B:.2f} B\n'
46 | elif numel >= M:
47 | outputs += f'{numel / M:.2f} M\n'
48 | elif numel >= K:
49 | outputs += f'{numel / K:.2f} K\n'
50 | else:
51 | outputs += f'{numel}\n'
52 | print_rank_0(outputs)
53 |
54 |
55 | def get_gpt_config(model_name: str) -> OPTConfig:
56 | model_map = {
57 | '125m': OPTConfig.from_pretrained('facebook/opt-125m'),
58 | '350m': OPTConfig(hidden_size=1024, ffn_dim=4096, num_hidden_layers=24, num_attention_heads=16),
59 | '700m': OPTConfig(hidden_size=1280, ffn_dim=5120, num_hidden_layers=36, num_attention_heads=20),
60 | '1.3b': OPTConfig.from_pretrained('facebook/opt-1.3b'),
61 | '2.7b': OPTConfig.from_pretrained('facebook/opt-2.7b'),
62 | '3.5b': OPTConfig(hidden_size=3072, ffn_dim=12288, num_hidden_layers=32, num_attention_heads=32),
63 | '5.5b': OPTConfig(hidden_size=3840, ffn_dim=15360, num_hidden_layers=32, num_attention_heads=32),
64 | '6.7b': OPTConfig.from_pretrained('facebook/opt-6.7b'),
65 | '10b': OPTConfig(hidden_size=5120, ffn_dim=20480, num_hidden_layers=32, num_attention_heads=32),
66 | '13b': OPTConfig.from_pretrained('facebook/opt-13b'),
67 | }
68 | try:
69 | return model_map[model_name]
70 | except KeyError:
71 | raise ValueError(f'Unknown model "{model_name}"')
72 |
73 |
74 | def main(args):
75 | if args.strategy == 'ddp':
76 | strategy = DDPStrategy()
77 | elif args.strategy == 'colossalai_gemini':
78 | strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
79 | elif args.strategy == 'colossalai_gemini_cpu':
80 | strategy = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5)
81 | elif args.strategy == 'colossalai_zero2':
82 | strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
83 | elif args.strategy == 'colossalai_zero2_cpu':
84 | strategy = ColossalAIStrategy(stage=2, placement_policy='cpu')
85 | elif args.strategy == 'colossalai_zero1':
86 | strategy = ColossalAIStrategy(stage=1, placement_policy='cuda')
87 | elif args.strategy == 'colossalai_zero1_cpu':
88 | strategy = ColossalAIStrategy(stage=1, placement_policy='cpu')
89 | else:
90 | raise ValueError(f'Unsupported strategy "{args.strategy}"')
91 |
92 | torch.cuda.set_per_process_memory_fraction(args.cuda_mem_frac)
93 |
94 | model_config = get_gpt_config(args.model)
95 |
96 | with strategy.model_init_context():
97 | actor = OPTActor(config=model_config, lora_rank=args.lora_rank).cuda()
98 | critic = OPTCritic(config=model_config, lora_rank=args.lora_rank).cuda()
99 |
100 | initial_model = deepcopy(actor).cuda()
101 | reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda()
102 |
103 | actor_numel = get_model_numel(actor, strategy)
104 | critic_numel = get_model_numel(critic, strategy)
105 | initial_model_numel = get_model_numel(initial_model, strategy)
106 | reward_model_numel = get_model_numel(reward_model, strategy)
107 | print_model_numel({
108 | 'Actor': actor_numel,
109 | 'Critic': critic_numel,
110 | 'Initial model': initial_model_numel,
111 | 'Reward model': reward_model_numel
112 | })
113 | performance_evaluator = PerformanceEvaluator(actor_numel,
114 | critic_numel,
115 | initial_model_numel,
116 | reward_model_numel,
117 | enable_grad_checkpoint=False,
118 | ignore_episodes=1)
119 |
120 | if args.strategy.startswith('colossalai'):
121 | actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
122 | critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
123 | else:
124 | actor_optim = Adam(actor.parameters(), lr=5e-6)
125 | critic_optim = Adam(critic.parameters(), lr=5e-6)
126 |
127 | tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
128 | tokenizer.pad_token = tokenizer.eos_token
129 |
130 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
131 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
132 |
133 | trainer = PPOTrainer(strategy,
134 | actor,
135 | critic,
136 | reward_model,
137 | initial_model,
138 | actor_optim,
139 | critic_optim,
140 | max_epochs=args.max_epochs,
141 | train_batch_size=args.train_batch_size,
142 | experience_batch_size=args.experience_batch_size,
143 | tokenizer=preprocess_batch,
144 | max_length=512,
145 | do_sample=True,
146 | temperature=1.0,
147 | top_k=50,
148 | pad_token_id=tokenizer.pad_token_id,
149 | eos_token_id=tokenizer.eos_token_id,
150 | callbacks=[performance_evaluator])
151 |
152 | random_prompts = torch.randint(tokenizer.vocab_size, (1000, 400), device=torch.cuda.current_device())
153 | trainer.fit(random_prompts,
154 | num_episodes=args.num_episodes,
155 | max_timesteps=args.max_timesteps,
156 | update_timesteps=args.update_timesteps)
157 |
158 | print_rank_0(f'Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB')
159 |
160 |
161 | if __name__ == '__main__':
162 | parser = argparse.ArgumentParser()
163 | parser.add_argument('--model', default='125m')
164 | parser.add_argument('--strategy',
165 | choices=[
166 | 'ddp', 'colossalai_gemini', 'colossalai_gemini_cpu', 'colossalai_zero2',
167 | 'colossalai_zero2_cpu', 'colossalai_zero1', 'colossalai_zero1_cpu'
168 | ],
169 | default='ddp')
170 | parser.add_argument('--num_episodes', type=int, default=3)
171 | parser.add_argument('--max_timesteps', type=int, default=8)
172 | parser.add_argument('--update_timesteps', type=int, default=8)
173 | parser.add_argument('--max_epochs', type=int, default=3)
174 | parser.add_argument('--train_batch_size', type=int, default=8)
175 | parser.add_argument('--experience_batch_size', type=int, default=8)
176 | parser.add_argument('--lora_rank', type=int, default=4)
177 | parser.add_argument('--cuda_mem_frac', type=float, default=1.0)
178 | args = parser.parse_args()
179 | main(args)
180 |
--------------------------------------------------------------------------------
/code/chatgpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/__init__.py
--------------------------------------------------------------------------------
/code/chatgpt/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .reward_dataset import RewardDataset
2 | from .utils import is_rank_0
3 |
4 | __all__ = ['RewardDataset', 'is_rank_0']
5 |
--------------------------------------------------------------------------------
/code/chatgpt/dataset/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/dataset/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/dataset/__pycache__/reward_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/dataset/__pycache__/reward_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/dataset/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/dataset/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/dataset/reward_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | from torch.utils.data import Dataset
4 | from tqdm import tqdm
5 |
6 | from .utils import is_rank_0
7 |
8 |
9 | class RewardDataset(Dataset):
10 | """
11 | Dataset for reward model
12 |
13 | Args:
14 | dataset: dataset for reward model
15 | tokenizer: tokenizer for reward model
16 | max_length: max length of input
17 | """
18 |
19 | def __init__(self, dataset, tokenizer: Callable, max_length: int) -> None:
20 | super().__init__()
21 | self.chosen = []
22 | self.reject = []
23 | for data in tqdm(dataset, disable=not is_rank_0()):
24 | prompt = data['prompt']
25 |
26 | chosen = prompt + data['chosen'] + tokenizer.eos_token #"<|endoftext|>"
27 | chosen_token = tokenizer(chosen,
28 | max_length=max_length,
29 | padding="max_length",
30 | truncation=True,
31 | return_tensors="pt")
32 | self.chosen.append({
33 | "input_ids": chosen_token['input_ids'],
34 | "attention_mask": chosen_token['attention_mask']
35 | })
36 |
37 | reject = prompt + data['rejected'] + tokenizer.eos_token
38 | reject_token = tokenizer(reject,
39 | max_length=max_length,
40 | padding="max_length",
41 | truncation=True,
42 | return_tensors="pt")
43 | self.reject.append({
44 | "input_ids": reject_token['input_ids'],
45 | "attention_mask": reject_token['attention_mask']
46 | })
47 |
48 | def __len__(self):
49 | length = len(self.chosen)
50 | return length
51 |
52 | def __getitem__(self, idx):
53 | return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx]["input_ids"], self.reject[idx]["attention_mask"]
54 |
--------------------------------------------------------------------------------
/code/chatgpt/dataset/utils.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 |
3 |
4 | def is_rank_0() -> bool:
5 | return not dist.is_initialized() or dist.get_rank() == 0
6 |
--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Experience, ExperienceMaker
2 | from .naive import NaiveExperienceMaker
3 |
4 | __all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker']
5 |
--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/experience_maker/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/experience_maker/__pycache__/base.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/experience_maker/__pycache__/naive.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from dataclasses import dataclass
3 | from typing import Optional
4 |
5 | import torch
6 | import torch.nn as nn
7 | from chatgpt.models.base import Actor
8 |
9 |
10 | @dataclass
11 | class Experience:
12 | """Experience is a batch of data.
13 | These data should have the the sequence length and number of actions.
14 | Left padding for sequences is applied.
15 |
16 | Shapes of each tensor:
17 | sequences: (B, S)
18 | action_log_probs: (B, A)
19 | values: (B)
20 | reward: (B)
21 | advatanges: (B)
22 | attention_mask: (B, S)
23 | action_mask: (B, A)
24 |
25 | "A" is the number of actions.
26 | """
27 | sequences: torch.Tensor
28 | action_log_probs: torch.Tensor
29 | values: torch.Tensor
30 | reward: torch.Tensor
31 | advantages: torch.Tensor
32 | attention_mask: Optional[torch.LongTensor]
33 | action_mask: Optional[torch.BoolTensor]
34 |
35 | @torch.no_grad()
36 | def to_device(self, device: torch.device) -> None:
37 | self.sequences = self.sequences.to(device)
38 | self.action_log_probs = self.action_log_probs.to(device)
39 | self.values = self.values.to(device)
40 | self.reward = self.reward.to(device)
41 | self.advantages = self.advantages.to(device)
42 | if self.attention_mask is not None:
43 | self.attention_mask = self.attention_mask.to(device)
44 | if self.action_mask is not None:
45 | self.action_mask = self.action_mask.to(device)
46 |
47 | def pin_memory(self):
48 | self.sequences = self.sequences.pin_memory()
49 | self.action_log_probs = self.action_log_probs.pin_memory()
50 | self.values = self.values.pin_memory()
51 | self.reward = self.reward.pin_memory()
52 | self.advantages = self.advantages.pin_memory()
53 | if self.attention_mask is not None:
54 | self.attention_mask = self.attention_mask.pin_memory()
55 | if self.action_mask is not None:
56 | self.action_mask = self.action_mask.pin_memory()
57 | return self
58 |
59 |
60 | class ExperienceMaker(ABC):
61 |
62 | def __init__(self,
63 | actor: Actor,
64 | critic: nn.Module,
65 | reward_model: nn.Module,
66 | initial_model: Actor,
67 | kl_coef: float = 0.1) -> None:
68 | super().__init__()
69 | self.actor = actor
70 | self.critic = critic
71 | self.reward_model = reward_model
72 | self.initial_model = initial_model
73 | self.kl_coef = kl_coef
74 |
75 | @abstractmethod
76 | def make_experience(self, input_ids: torch.Tensor, **generate_kwargs) -> Experience:
77 | pass
78 |
--------------------------------------------------------------------------------
/code/chatgpt/experience_maker/naive.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from chatgpt.models.utils import compute_reward, normalize
3 |
4 | from .base import Experience, ExperienceMaker
5 |
6 |
7 | class NaiveExperienceMaker(ExperienceMaker):
8 | """
9 | Naive experience maker.
10 | """
11 |
12 | @torch.no_grad()
13 | def make_experience(self, input_ids: torch.Tensor, **generate_kwargs) -> Experience:
14 | self.actor.eval()
15 | self.critic.eval()
16 | self.initial_model.eval()
17 | self.reward_model.eval()
18 |
19 | sequences, attention_mask, action_mask = self.actor.generate(input_ids,
20 | return_action_mask=True,
21 | **generate_kwargs)
22 | num_actions = action_mask.size(1)
23 |
24 | action_log_probs = self.actor(sequences, num_actions, attention_mask)
25 | base_action_log_probs = self.initial_model(sequences, num_actions, attention_mask)
26 | value = self.critic(sequences, action_mask, attention_mask)
27 | r = self.reward_model(sequences, attention_mask)
28 |
29 | reward = compute_reward(r, self.kl_coef, action_log_probs, base_action_log_probs, action_mask=action_mask)
30 |
31 | advantage = reward - value
32 | # TODO(ver217): maybe normalize adv
33 | if advantage.ndim == 1:
34 | advantage = advantage.unsqueeze(-1)
35 |
36 | return Experience(sequences, action_log_probs, value, reward, advantage, attention_mask, action_mask)
37 |
--------------------------------------------------------------------------------
/code/chatgpt/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Actor, Critic, RewardModel
2 | from .loss import PairWiseLoss, PolicyLoss, PPOPtxActorLoss, ValueLoss
3 |
4 | __all__ = ['Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'PPOPtxActorLoss', 'PairWiseLoss']
5 |
--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/generation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/generation.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/generation_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/generation_utils.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/lora.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/lora.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/loss.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .actor import Actor
2 | from .critic import Critic
3 | from .reward_model import RewardModel
4 |
5 | __all__ = ['Actor', 'Critic', 'RewardModel']
6 |
--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/actor.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/critic.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/base/__pycache__/reward_model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/base/__pycache__/reward_model.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/base/actor.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Tuple, Union
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | from ..generation import generate
8 | from ..lora import LoRAModule
9 | from ..utils import log_probs_from_logits
10 |
11 |
12 | class Actor(LoRAModule):
13 | """
14 | Actor model base class.
15 |
16 | Args:
17 | model (nn.Module): Actor Model.
18 | lora_rank (int): LoRA rank.
19 | lora_train_bias (str): LoRA bias training mode.
20 | """
21 |
22 | def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None:
23 | super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
24 | self.model = model
25 | self.convert_to_lora()
26 |
27 | @torch.no_grad()
28 | def generate(
29 | self,
30 | input_ids: torch.Tensor,
31 | return_action_mask: bool = True,
32 | **kwargs
33 | ) -> Union[Tuple[torch.LongTensor, torch.LongTensor], Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]]:
34 | sequences = generate(self.model, input_ids, **kwargs)
35 | attention_mask = None
36 | pad_token_id = kwargs.get('pad_token_id', None)
37 | if pad_token_id is not None:
38 | attention_mask = sequences.not_equal(pad_token_id).to(dtype=torch.long, device=sequences.device)
39 | if not return_action_mask:
40 | return sequences, attention_mask, None
41 | input_len = input_ids.size(1)
42 | eos_token_id = kwargs.get('eos_token_id', None)
43 | if eos_token_id is None:
44 | action_mask = torch.ones_like(sequences, dtype=torch.bool)
45 | else:
46 | # left padding may be applied, only mask action
47 | action_mask = (sequences[:, input_len:] == eos_token_id).cumsum(dim=-1) == 0
48 | action_mask = F.pad(action_mask, (1 + input_len, -1), value=True) # include eos token and input
49 | action_mask[:, :input_len] = False
50 | action_mask = action_mask[:, 1:]
51 | return sequences, attention_mask, action_mask[:, -(sequences.size(1) - input_len):]
52 |
53 | def forward(self,
54 | sequences: torch.LongTensor,
55 | num_actions: int,
56 | attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
57 | """Returns action log probs
58 | """
59 | output = self.model(sequences, attention_mask=attention_mask)
60 | logits = output['logits']
61 | log_probs = log_probs_from_logits(logits[:, :-1, :], sequences[:, 1:])
62 | return log_probs[:, -num_actions:]
63 |
--------------------------------------------------------------------------------
/code/chatgpt/models/base/critic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from ..lora import LoRAModule
7 | from ..utils import masked_mean
8 |
9 |
10 | class Critic(LoRAModule):
11 | """
12 | Critic model base class.
13 |
14 | Args:
15 | model (nn.Module): Critic model.
16 | value_head (nn.Module): Value head to get value.
17 | lora_rank (int): LoRA rank.
18 | lora_train_bias (str): LoRA bias training mode.
19 | """
20 |
21 | def __init__(
22 | self,
23 | model: nn.Module,
24 | value_head: nn.Module,
25 | lora_rank: int = 0,
26 | lora_train_bias: str = 'none',
27 | use_action_mask: bool = False,
28 | ) -> None:
29 |
30 | super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
31 | self.model = model
32 | self.value_head = value_head
33 | self.use_action_mask = use_action_mask
34 | self.convert_to_lora()
35 |
36 | def forward(self,
37 | sequences: torch.LongTensor,
38 | action_mask: Optional[torch.Tensor] = None,
39 | attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
40 | outputs = self.model(sequences, attention_mask=attention_mask)
41 | last_hidden_states = outputs['last_hidden_state']
42 |
43 | values = self.value_head(last_hidden_states).squeeze(-1)
44 |
45 | if action_mask is not None and self.use_action_mask:
46 | num_actions = action_mask.size(1)
47 | prompt_mask = attention_mask[:, :-num_actions]
48 | values = values[:, :-num_actions]
49 | value = masked_mean(values, prompt_mask, dim=1)
50 | return value
51 |
52 | values = values[:, :-1]
53 | value = values.mean(dim=1)
54 | return value
55 |
--------------------------------------------------------------------------------
/code/chatgpt/models/base/reward_model.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from ..lora import LoRAModule
7 |
8 |
9 | class RewardModel(LoRAModule):
10 | """
11 | Reward model base class.
12 |
13 | Args:
14 | model (nn.Module): Reward model.
15 | value_head (nn.Module): Value head to get reward score.
16 | lora_rank (int): LoRA rank.
17 | lora_train_bias (str): LoRA bias training mode.
18 | """
19 |
20 | def __init__(self,
21 | model: nn.Module,
22 | value_head: Optional[nn.Module] = None,
23 | lora_rank: int = 0,
24 | lora_train_bias: str = 'none') -> None:
25 | super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
26 | self.model = model
27 | self.convert_to_lora()
28 |
29 | if value_head is not None:
30 | if value_head.out_features != 1:
31 | raise ValueError("The value head of reward model's output dim should be 1!")
32 | self.value_head = value_head
33 | else:
34 | self.value_head = nn.Linear(model.config.n_embd, 1)
35 |
36 | def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
37 | outputs = self.model(sequences, attention_mask=attention_mask)
38 | last_hidden_states = outputs['last_hidden_state']
39 | values = self.value_head(last_hidden_states)[:, :-1]
40 | value = values.mean(dim=1).squeeze(1) # ensure shape is (B)
41 | return value
42 |
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__init__.py:
--------------------------------------------------------------------------------
1 | from .bloom_actor import BLOOMActor
2 | from .bloom_critic import BLOOMCritic
3 | from .bloom_rm import BLOOMRM
4 |
5 | __all__ = ['BLOOMActor', 'BLOOMCritic', 'BLOOMRM']
6 |
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/bloom_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/bloom_actor.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/bloom_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/bloom_critic.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/__pycache__/bloom_rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/bloom/__pycache__/bloom_rm.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/bloom_actor.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | from transformers import BloomConfig, BloomForCausalLM, BloomModel
5 |
6 | from ..base import Actor
7 |
8 |
9 | class BLOOMActor(Actor):
10 | """
11 | BLOOM Actor model.
12 |
13 | Args:
14 | pretrained (str): Pretrained model name or path.
15 | config (BloomConfig): Model config.
16 | checkpoint (bool): Enable gradient checkpointing.
17 | lora_rank (int): LoRA rank.
18 | lora_train_bias (str): LoRA bias training mode.
19 | """
20 |
21 | def __init__(self,
22 | pretrained: str = None,
23 | config: Optional[BloomConfig] = None,
24 | checkpoint: bool = False,
25 | lora_rank: int = 0,
26 | lora_train_bias: str = 'none') -> None:
27 | if pretrained is not None:
28 | model = BloomForCausalLM.from_pretrained(pretrained)
29 | elif config is not None:
30 | model = BloomForCausalLM(config)
31 | else:
32 | model = BloomForCausalLM(BloomConfig())
33 | if checkpoint:
34 | model.gradient_checkpointing_enable()
35 | super().__init__(model, lora_rank, lora_train_bias)
36 |
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/bloom_critic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | import torch.nn as nn
5 | from transformers import BloomConfig, BloomForCausalLM, BloomModel
6 |
7 | from ..base import Critic
8 |
9 |
10 | class BLOOMCritic(Critic):
11 | """
12 | BLOOM Critic model.
13 |
14 | Args:
15 | pretrained (str): Pretrained model name or path.
16 | config (BloomConfig): Model config.
17 | checkpoint (bool): Enable gradient checkpointing.
18 | lora_rank (int): LoRA rank.
19 | lora_train_bias (str): LoRA bias training mode.
20 | """
21 |
22 | def __init__(self,
23 | pretrained: str = None,
24 | config: Optional[BloomConfig] = None,
25 | checkpoint: bool = False,
26 | lora_rank: int = 0,
27 | lora_train_bias: str = 'none',
28 | **kwargs) -> None:
29 | if pretrained is not None:
30 | model = BloomModel.from_pretrained(pretrained)
31 | elif config is not None:
32 | model = BloomModel(config)
33 | else:
34 | model = BloomModel(BloomConfig())
35 | if checkpoint:
36 | model.gradient_checkpointing_enable()
37 | value_head = nn.Linear(model.config.hidden_size, 1)
38 | super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)
39 |
--------------------------------------------------------------------------------
/code/chatgpt/models/bloom/bloom_rm.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch.nn as nn
4 | from transformers import BloomConfig, BloomForCausalLM, BloomModel
5 |
6 | from ..base import RewardModel
7 |
8 |
9 | class BLOOMRM(RewardModel):
10 | """
11 | BLOOM Reward model.
12 |
13 | Args:
14 | pretrained (str): Pretrained model name or path.
15 | config (BloomConfig): Model config.
16 | checkpoint (bool): Enable gradient checkpointing.
17 | lora_rank (int): LoRA rank.
18 | lora_train_bias (str): LoRA bias training mode.
19 | """
20 |
21 | def __init__(self,
22 | pretrained: str = None,
23 | config: Optional[BloomConfig] = None,
24 | checkpoint: bool = False,
25 | lora_rank: int = 0,
26 | lora_train_bias: str = 'none') -> None:
27 | if pretrained is not None:
28 | model = BloomModel.from_pretrained(pretrained)
29 | elif config is not None:
30 | model = BloomModel(config)
31 | else:
32 | model = BloomModel(BloomConfig())
33 | if checkpoint:
34 | model.gradient_checkpointing_enable()
35 | value_head = nn.Linear(model.config.hidden_size, 1)
36 | super().__init__(model, value_head, lora_rank, lora_train_bias)
37 |
--------------------------------------------------------------------------------
/code/chatgpt/models/generation.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, Optional
2 |
3 | import torch
4 | import torch.distributed as dist
5 | import torch.nn as nn
6 |
7 | try:
8 | from transformers.generation_logits_process import (
9 | LogitsProcessorList,
10 | TemperatureLogitsWarper,
11 | TopKLogitsWarper,
12 | TopPLogitsWarper,
13 | )
14 | except ImportError:
15 | from transformers.generation import LogitsProcessorList, TemperatureLogitsWarper, TopKLogitsWarper, TopPLogitsWarper
16 |
17 |
18 | def prepare_logits_processor(top_k: Optional[int] = None,
19 | top_p: Optional[float] = None,
20 | temperature: Optional[float] = None) -> LogitsProcessorList:
21 | processor_list = LogitsProcessorList()
22 | if temperature is not None and temperature != 1.0:
23 | processor_list.append(TemperatureLogitsWarper(temperature))
24 | if top_k is not None and top_k != 0:
25 | processor_list.append(TopKLogitsWarper(top_k))
26 | if top_p is not None and top_p < 1.0:
27 | processor_list.append(TopPLogitsWarper(top_p))
28 | return processor_list
29 |
30 |
31 | def _is_sequence_finished(unfinished_sequences: torch.Tensor) -> bool:
32 | if dist.is_initialized() and dist.get_world_size() > 1:
33 | # consider DP
34 | unfinished_sequences = unfinished_sequences.clone()
35 | dist.all_reduce(unfinished_sequences)
36 | return unfinished_sequences.max() == 0
37 |
38 |
39 | def sample(model: nn.Module,
40 | input_ids: torch.Tensor,
41 | max_length: int,
42 | early_stopping: bool = False,
43 | eos_token_id: Optional[int] = None,
44 | pad_token_id: Optional[int] = None,
45 | top_k: Optional[int] = None,
46 | top_p: Optional[float] = None,
47 | temperature: Optional[float] = None,
48 | prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None,
49 | update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None,
50 | **model_kwargs) -> torch.Tensor:
51 | if input_ids.size(1) >= max_length:
52 | return input_ids
53 |
54 | logits_processor = prepare_logits_processor(top_k, top_p, temperature)
55 | unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
56 |
57 | for _ in range(input_ids.size(1), max_length):
58 | model_inputs = prepare_inputs_fn(input_ids, **model_kwargs) if prepare_inputs_fn is not None else {
59 | 'input_ids': input_ids
60 | }
61 | outputs = model(**model_inputs)
62 |
63 | next_token_logits = outputs['logits'][:, -1, :]
64 | # pre-process distribution
65 | next_token_logits = logits_processor(input_ids, next_token_logits)
66 | # sample
67 | probs = torch.softmax(next_token_logits, dim=-1, dtype=torch.float)
68 | next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
69 |
70 | # finished sentences should have their next token be a padding token
71 | if eos_token_id is not None:
72 | if pad_token_id is None:
73 | raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
74 | next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
75 |
76 | # update generated ids, model inputs for next step
77 | input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
78 | if update_model_kwargs_fn is not None:
79 | model_kwargs = update_model_kwargs_fn(outputs, **model_kwargs)
80 |
81 | # if eos_token was found in one sentence, set sentence to finished
82 | if eos_token_id is not None:
83 | unfinished_sequences = unfinished_sequences.mul((next_tokens != eos_token_id).long())
84 |
85 | # stop when each sentence is finished if early_stopping=True
86 | if early_stopping and _is_sequence_finished(unfinished_sequences):
87 | break
88 |
89 | return input_ids
90 |
91 |
92 | def generate(model: nn.Module,
93 | input_ids: torch.Tensor,
94 | max_length: int,
95 | num_beams: int = 1,
96 | do_sample: bool = True,
97 | early_stopping: bool = False,
98 | eos_token_id: Optional[int] = None,
99 | pad_token_id: Optional[int] = None,
100 | top_k: Optional[int] = None,
101 | top_p: Optional[float] = None,
102 | temperature: Optional[float] = None,
103 | prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None,
104 | update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None,
105 | **model_kwargs) -> torch.Tensor:
106 | """Generate token sequence. The returned sequence is input_ids + generated_tokens.
107 |
108 | Args:
109 | model (nn.Module): model
110 | input_ids (torch.Tensor): input sequence
111 | max_length (int): max length of the returned sequence
112 | num_beams (int, optional): number of beams. Defaults to 1.
113 | do_sample (bool, optional): whether to do sample. Defaults to True.
114 | early_stopping (bool, optional): if True, the sequence length may be smaller than max_length due to finding eos. Defaults to False.
115 | eos_token_id (Optional[int], optional): end of sequence token id. Defaults to None.
116 | pad_token_id (Optional[int], optional): pad token id. Defaults to None.
117 | top_k (Optional[int], optional): the number of highest probability vocabulary tokens to keep for top-k-filtering. Defaults to None.
118 | top_p (Optional[float], optional): If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. Defaults to None.
119 | temperature (Optional[float], optional): The value used to module the next token probabilities. Defaults to None.
120 | prepare_inputs_fn (Optional[Callable[[torch.Tensor, Any], dict]], optional): Function to preprocess model inputs. Arguments of this function should be input_ids and model_kwargs. Defaults to None.
121 | update_model_kwargs_fn (Optional[Callable[[dict, Any], dict]], optional): Function to update model_kwargs based on outputs. Arguments of this function should be outputs and model_kwargs. Defaults to None.
122 | """
123 | is_greedy_gen_mode = ((num_beams == 1) and do_sample is False)
124 | is_sample_gen_mode = ((num_beams == 1) and do_sample is True)
125 | is_beam_gen_mode = ((num_beams > 1) and do_sample is False)
126 | if is_greedy_gen_mode:
127 | # run greedy search
128 | raise NotImplementedError
129 | elif is_sample_gen_mode:
130 | # run sample
131 | return sample(model,
132 | input_ids,
133 | max_length,
134 | early_stopping=early_stopping,
135 | eos_token_id=eos_token_id,
136 | pad_token_id=pad_token_id,
137 | top_k=top_k,
138 | top_p=top_p,
139 | temperature=temperature,
140 | prepare_inputs_fn=prepare_inputs_fn,
141 | update_model_kwargs_fn=update_model_kwargs_fn,
142 | **model_kwargs)
143 | elif is_beam_gen_mode:
144 | raise NotImplementedError
145 | else:
146 | raise ValueError("Unsupported generation mode")
147 |
--------------------------------------------------------------------------------
/code/chatgpt/models/generation_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 |
5 |
6 | def gpt_prepare_inputs_fn(input_ids: torch.Tensor, past: Optional[torch.Tensor] = None, **kwargs) -> dict:
7 | token_type_ids = kwargs.get("token_type_ids", None)
8 | # only last token for inputs_ids if past is defined in kwargs
9 | if past:
10 | input_ids = input_ids[:, -1].unsqueeze(-1)
11 | if token_type_ids is not None:
12 | token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
13 |
14 | attention_mask = kwargs.get("attention_mask", None)
15 | position_ids = kwargs.get("position_ids", None)
16 |
17 | if attention_mask is not None and position_ids is None:
18 | # create position_ids on the fly for batch generation
19 | position_ids = attention_mask.long().cumsum(-1) - 1
20 | position_ids.masked_fill_(attention_mask == 0, 1)
21 | if past:
22 | position_ids = position_ids[:, -1].unsqueeze(-1)
23 | else:
24 | position_ids = None
25 | return {
26 | "input_ids": input_ids,
27 | "past_key_values": past,
28 | "use_cache": kwargs.get("use_cache"),
29 | "position_ids": position_ids,
30 | "attention_mask": attention_mask,
31 | "token_type_ids": token_type_ids,
32 | }
33 |
34 |
35 | def update_model_kwargs_fn(outputs: dict, **model_kwargs) -> dict:
36 | if "past_key_values" in outputs:
37 | model_kwargs["past"] = outputs["past_key_values"]
38 | else:
39 | model_kwargs["past"] = None
40 |
41 | # update token_type_ids with last value
42 | if "token_type_ids" in model_kwargs:
43 | token_type_ids = model_kwargs["token_type_ids"]
44 | model_kwargs["token_type_ids"] = torch.cat([token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1)
45 |
46 | # update attention mask
47 | if "attention_mask" in model_kwargs:
48 | attention_mask = model_kwargs["attention_mask"]
49 | model_kwargs["attention_mask"] = torch.cat(
50 | [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1)
51 |
52 | return model_kwargs
53 |
54 |
55 | def opt_prepare_inputs_fn(input_ids: torch.Tensor,
56 | past: Optional[torch.Tensor] = None,
57 | attention_mask: Optional[torch.Tensor] = None,
58 | use_cache: Optional[bool] = None,
59 | **kwargs) -> dict:
60 | # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
61 | if attention_mask is None:
62 | attention_mask = input_ids.new_ones(input_ids.shape)
63 |
64 | if past:
65 | input_ids = input_ids[:, -1:]
66 | # first step, decoder_cached_states are empty
67 | return {
68 | "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed
69 | "attention_mask": attention_mask,
70 | "past_key_values": past,
71 | "use_cache": use_cache,
72 | }
73 |
74 |
75 | def bloom_prepare_inputs_fn(input_ids: torch.Tensor,
76 | past: Optional[torch.Tensor] = None,
77 | attention_mask: Optional[torch.Tensor] = None,
78 | use_cache: Optional[bool] = None,
79 | **kwargs) -> dict:
80 | # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
81 | if attention_mask is None:
82 | attention_mask = input_ids.new_ones(input_ids.shape)
83 |
84 | if past:
85 | input_ids = input_ids[:, -1:]
86 | # first step, decoder_cached_states are empty
87 | return {
88 | "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed
89 | "attention_mask": attention_mask,
90 | "past_key_values": past,
91 | "use_cache": use_cache,
92 | }
93 |
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__init__.py:
--------------------------------------------------------------------------------
1 | from .gpt_actor import GPTActor
2 | from .gpt_critic import GPTCritic
3 | from .gpt_rm import GPTRM
4 |
5 | __all__ = ['GPTActor', 'GPTCritic', 'GPTRM']
6 |
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/gpt_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/gpt_actor.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/gpt_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/gpt_critic.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/__pycache__/gpt_rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/gpt/__pycache__/gpt_rm.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/gpt_actor.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
4 | from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel
5 |
6 | from ..base import Actor
7 |
8 |
9 | class GPTActor(Actor):
10 | """
11 | GPT Actor model.
12 |
13 | Args:
14 | pretrained (str): Pretrained model name or path.
15 | config (GPT2Config): Model config.
16 | checkpoint (bool): Enable gradient checkpointing.
17 | lora_rank (int): Rank of the LoRa layer.
18 | lora_train_bias (str): Bias training strategy for the LoRa layer.
19 | """
20 |
21 | def __init__(self,
22 | pretrained: Optional[str] = None,
23 | config: Optional[GPT2Config] = None,
24 | checkpoint: bool = False,
25 | lora_rank: int = 0,
26 | lora_train_bias: str = 'none') -> None:
27 | if pretrained is not None:
28 | model = GPT2LMHeadModel.from_pretrained(pretrained)
29 | elif config is not None:
30 | model = GPT2LMHeadModel(config)
31 | else:
32 | model = GPT2LMHeadModel(GPT2Config())
33 | if checkpoint:
34 | model.gradient_checkpointing_enable()
35 | super().__init__(model, lora_rank, lora_train_bias)
36 |
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/gpt_critic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch.nn as nn
4 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
5 | from transformers.models.gpt2.modeling_gpt2 import GPT2Model
6 |
7 | from ..base import Critic
8 |
9 |
10 | class GPTCritic(Critic):
11 | """
12 | GPT Critic model.
13 |
14 | Args:
15 | pretrained (str): Pretrained model name or path.
16 | config (GPT2Config): Model config.
17 | checkpoint (bool): Enable gradient checkpointing.
18 | lora_rank (int): Rank of the LO-RA decomposition.
19 | lora_train_bias (str): LoRA bias training mode.
20 | """
21 |
22 | def __init__(self,
23 | pretrained: Optional[str] = None,
24 | config: Optional[GPT2Config] = None,
25 | checkpoint: bool = False,
26 | lora_rank: int = 0,
27 | lora_train_bias: str = 'none') -> None:
28 | if pretrained is not None:
29 | model = GPT2Model.from_pretrained(pretrained)
30 | elif config is not None:
31 | model = GPT2Model(config)
32 | else:
33 | model = GPT2Model(GPT2Config())
34 | if checkpoint:
35 | model.gradient_checkpointing_enable()
36 | value_head = nn.Linear(model.config.n_embd, 1)
37 | super().__init__(model, value_head, lora_rank, lora_train_bias)
38 |
--------------------------------------------------------------------------------
/code/chatgpt/models/gpt/gpt_rm.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch.nn as nn
4 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
5 | from transformers.models.gpt2.modeling_gpt2 import GPT2Model
6 |
7 | from ..base import RewardModel
8 |
9 |
10 | class GPTRM(RewardModel):
11 | """
12 | GPT Reward model.
13 |
14 | Args:
15 | pretrained (str): Pretrained model name or path.
16 | config (GPT2Config): Model config.
17 | checkpoint (bool): Enable gradient checkpointing.
18 | lora_rank (int): Rank of the low-rank approximation.
19 | lora_train_bias (str): LoRA bias training mode.
20 | """
21 |
22 | def __init__(self,
23 | pretrained: Optional[str] = None,
24 | config: Optional[GPT2Config] = None,
25 | checkpoint: bool = False,
26 | lora_rank: int = 0,
27 | lora_train_bias: str = 'none') -> None:
28 |
29 | if pretrained is not None:
30 | model = GPT2Model.from_pretrained(pretrained)
31 | elif config is not None:
32 | model = GPT2Model(config)
33 | else:
34 | model = GPT2Model(GPT2Config())
35 |
36 | if checkpoint:
37 | model.gradient_checkpointing_enable()
38 |
39 | value_head = nn.Linear(model.config.n_embd, 1)
40 | super().__init__(model, value_head, lora_rank, lora_train_bias)
41 |
--------------------------------------------------------------------------------
/code/chatgpt/models/lora.py:
--------------------------------------------------------------------------------
1 | import math
2 | from typing import Optional
3 |
4 | import loralib as lora
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 |
10 | class LoraLinear(lora.LoRALayer, nn.Module):
11 | """Replace in-place ops to out-of-place ops to fit gemini. Convert a torch.nn.Linear to LoraLinear.
12 | """
13 |
14 | def __init__(
15 | self,
16 | weight: nn.Parameter,
17 | bias: Optional[nn.Parameter],
18 | r: int = 0,
19 | lora_alpha: int = 1,
20 | lora_dropout: float = 0.,
21 | fan_in_fan_out: bool = False, # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
22 | merge_weights: bool = True,
23 | ):
24 | nn.Module.__init__(self)
25 | lora.LoRALayer.__init__(self,
26 | r=r,
27 | lora_alpha=lora_alpha,
28 | lora_dropout=lora_dropout,
29 | merge_weights=merge_weights)
30 | self.weight = weight
31 | self.bias = bias
32 |
33 | out_features, in_features = weight.shape
34 | self.in_features = in_features
35 | self.out_features = out_features
36 |
37 | self.fan_in_fan_out = fan_in_fan_out
38 | # Actual trainable parameters
39 | if r > 0:
40 | self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features)))
41 | self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r)))
42 | self.scaling = self.lora_alpha / self.r
43 | # Freezing the pre-trained weight matrix
44 | self.weight.requires_grad = False
45 | self.reset_parameters()
46 | if fan_in_fan_out:
47 | self.weight.data = self.weight.data.T
48 |
49 | def reset_parameters(self):
50 | if hasattr(self, 'lora_A'):
51 | # initialize A the same way as the default for nn.Linear and B to zero
52 | nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
53 | nn.init.zeros_(self.lora_B)
54 |
55 | def train(self, mode: bool = True):
56 |
57 | def T(w):
58 | return w.T if self.fan_in_fan_out else w
59 |
60 | nn.Module.train(self, mode)
61 | if self.merge_weights and self.merged:
62 | # Make sure that the weights are not merged
63 | if self.r > 0:
64 | self.weight.data -= T(self.lora_B @ self.lora_A) * self.scaling
65 | self.merged = False
66 |
67 | def eval(self):
68 |
69 | def T(w):
70 | return w.T if self.fan_in_fan_out else w
71 |
72 | nn.Module.eval(self)
73 | if self.merge_weights and not self.merged:
74 | # Merge the weights and mark it
75 | if self.r > 0:
76 | self.weight.data += T(self.lora_B @ self.lora_A) * self.scaling
77 | delattr(self, 'lora_A')
78 | delattr(self, 'lora_B')
79 | self.merged = True
80 |
81 | def forward(self, x: torch.Tensor):
82 |
83 | def T(w):
84 | return w.T if self.fan_in_fan_out else w
85 |
86 | if self.r > 0 and not self.merged:
87 | result = F.linear(x, T(self.weight), bias=self.bias)
88 | if self.r > 0:
89 | result = result + (self.lora_dropout(x) @ self.lora_A.t() @ self.lora_B.t()) * self.scaling
90 | return result
91 | else:
92 | return F.linear(x, T(self.weight), bias=self.bias)
93 |
94 |
95 | def lora_linear_wrapper(linear: nn.Linear, lora_rank: int) -> LoraLinear:
96 | assert lora_rank <= linear.in_features, f'LoRA rank ({lora_rank}) must be less than or equal to in features ({linear.in_features})'
97 | lora_linear = LoraLinear(linear.weight, linear.bias, r=lora_rank, merge_weights=False)
98 | return lora_linear
99 |
100 |
101 | def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None:
102 | for name, child in module.named_children():
103 | if isinstance(child, nn.Linear):
104 | setattr(module, name, lora_linear_wrapper(child, lora_rank))
105 | else:
106 | convert_to_lora_recursively(child, lora_rank)
107 |
108 |
109 | class LoRAModule(nn.Module):
110 | """A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`.
111 | This calss will convert all torch.nn.Linear layer to LoraLinear layer.
112 |
113 | Args:
114 | lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0.
115 | lora_train_bias (str, optional): Whether LoRA train biases.
116 | 'none' means it doesn't train biases. 'all' means it trains all biases. 'lora_only' means it only trains biases of LoRA layers.
117 | Defaults to 'none'.
118 | """
119 |
120 | def __init__(self, lora_rank: int = 0, lora_train_bias: str = 'none') -> None:
121 | super().__init__()
122 | self.lora_rank = lora_rank
123 | self.lora_train_bias = lora_train_bias
124 |
125 | def convert_to_lora(self) -> None:
126 | if self.lora_rank <= 0:
127 | return
128 | convert_to_lora_recursively(self, self.lora_rank)
129 | lora.mark_only_lora_as_trainable(self, self.lora_train_bias)
130 |
131 |
--------------------------------------------------------------------------------
/code/chatgpt/models/loss.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from .utils import masked_mean
7 |
8 |
9 | class GPTLMLoss(nn.Module):
10 | """
11 | GPT Language Model Loss
12 | """
13 |
14 | def __init__(self):
15 | super().__init__()
16 | self.loss = nn.CrossEntropyLoss()
17 |
18 | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
19 | shift_logits = logits[..., :-1, :].contiguous()
20 | shift_labels = labels[..., 1:].contiguous()
21 | # Flatten the tokens
22 | return self.loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
23 |
24 |
25 | class PolicyLoss(nn.Module):
26 | """
27 | Policy Loss for PPO
28 | """
29 |
30 | def __init__(self, clip_eps: float = 0.2) -> None:
31 | super().__init__()
32 | self.clip_eps = clip_eps
33 |
34 | def forward(self,
35 | log_probs: torch.Tensor,
36 | old_log_probs: torch.Tensor,
37 | advantages: torch.Tensor,
38 | action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
39 |
40 | ratio = (log_probs - old_log_probs).exp()
41 | surr1 = ratio * advantages
42 | surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages
43 | loss = -torch.min(surr1, surr2)
44 | if action_mask is not None:
45 | loss = masked_mean(loss, action_mask)
46 | loss = loss.mean()
47 | return loss
48 |
49 |
50 | class ValueLoss(nn.Module):
51 | """
52 | Value Loss for PPO
53 | """
54 |
55 | def __init__(self, clip_eps: float = 0.4) -> None:
56 | super().__init__()
57 | self.clip_eps = clip_eps
58 |
59 | def forward(self,
60 | values: torch.Tensor,
61 | old_values: torch.Tensor,
62 | reward: torch.Tensor,
63 | action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
64 | values_clipped = old_values + (values - old_values).clamp(-self.clip_eps, self.clip_eps)
65 | surr1 = (values_clipped - reward)**2
66 | surr2 = (values - reward)**2
67 | loss = torch.max(surr1, surr2)
68 | loss = loss.mean()
69 | return loss
70 |
71 |
72 | class PPOPtxActorLoss(nn.Module):
73 | """
74 | To Do:
75 |
76 | PPO-ptx Actor Loss
77 | """
78 |
79 | def __init__(self, policy_clip_eps: float = 0.2, pretrain_coef: float = 0.0, pretrain_loss_fn=GPTLMLoss()) -> None:
80 | super().__init__()
81 | self.pretrain_coef = pretrain_coef
82 | self.policy_loss_fn = PolicyLoss(clip_eps=policy_clip_eps)
83 | self.pretrain_loss_fn = pretrain_loss_fn
84 |
85 | def forward(self,
86 | log_probs: torch.Tensor,
87 | old_log_probs: torch.Tensor,
88 | advantages: torch.Tensor,
89 | lm_logits: torch.Tensor,
90 | lm_input_ids: torch.Tensor,
91 | action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
92 | policy_loss = self.policy_loss_fn(log_probs, old_log_probs, advantages, action_mask=action_mask)
93 | lm_loss = self.pretrain_loss_fn(lm_logits, lm_input_ids)
94 | return policy_loss + self.pretrain_coef * lm_loss
95 |
96 |
97 | class PairWiseLoss(nn.Module):
98 | """
99 | Pairwise Loss for Reward Model
100 | """
101 |
102 | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor:
103 | probs = torch.sigmoid(chosen_reward - reject_reward)
104 | log_probs = torch.log(probs)
105 | loss = -log_probs.mean()
106 | return loss
107 |
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .opt_actor import OPTActor
2 | from .opt_critic import OPTCritic
3 | from .opt_rm import OPTRM
4 |
5 | __all__ = ['OPTActor', 'OPTCritic', 'OPTRM']
6 |
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/opt_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/opt_actor.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/opt_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/opt_critic.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/__pycache__/opt_rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/models/opt/__pycache__/opt_rm.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/opt_actor.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from transformers.models.opt.configuration_opt import OPTConfig
4 | from transformers.models.opt.modeling_opt import OPTForCausalLM
5 |
6 | from ..base import Actor
7 |
8 |
9 | class OPTActor(Actor):
10 | """
11 | OPT Actor model.
12 |
13 | Args:
14 | pretrained (str): Pretrained model name or path.
15 | config (OPTConfig): Model config.
16 | checkpoint (bool): Enable gradient checkpointing.
17 | lora_rank (int): Rank of the low-rank approximation.
18 | lora_train_bias (str): LoRA bias training mode.
19 | """
20 |
21 | def __init__(self,
22 | pretrained: Optional[str] = None,
23 | config: Optional[OPTConfig] = None,
24 | checkpoint: bool = False,
25 | lora_rank: int = 0,
26 | lora_train_bias: str = 'none') -> None:
27 | if pretrained is not None:
28 | model = OPTForCausalLM.from_pretrained(pretrained)
29 | elif config is not None:
30 | model = OPTForCausalLM(config)
31 | else:
32 | model = OPTForCausalLM(OPTConfig())
33 | if checkpoint:
34 | model.gradient_checkpointing_enable()
35 | super().__init__(model, lora_rank, lora_train_bias)
36 |
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/opt_critic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch.nn as nn
4 | from transformers.models.opt.configuration_opt import OPTConfig
5 | from transformers.models.opt.modeling_opt import OPTModel
6 |
7 | from ..base import Critic
8 |
9 |
10 | class OPTCritic(Critic):
11 | """
12 | OPT Critic model.
13 |
14 | Args:
15 | pretrained (str): Pretrained model name or path.
16 | config (OPTConfig): Model config.
17 | checkpoint (bool): Enable gradient checkpointing.
18 | lora_rank (int): Rank of the low-rank approximation.
19 | lora_train_bias (str): LoRA bias training mode.
20 | """
21 |
22 | def __init__(self,
23 | pretrained: Optional[str] = None,
24 | config: Optional[OPTConfig] = None,
25 | checkpoint: bool = False,
26 | lora_rank: int = 0,
27 | lora_train_bias: str = 'none',
28 | **kwargs) -> None:
29 | if pretrained is not None:
30 | model = OPTModel.from_pretrained(pretrained)
31 | elif config is not None:
32 | model = OPTModel(config)
33 | else:
34 | model = OPTModel(OPTConfig())
35 | if checkpoint:
36 | model.gradient_checkpointing_enable()
37 | value_head = nn.Linear(model.config.word_embed_proj_dim, 1)
38 | super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)
39 |
--------------------------------------------------------------------------------
/code/chatgpt/models/opt/opt_rm.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch.nn as nn
4 | from transformers import OPTConfig, OPTModel
5 |
6 | from ..base import RewardModel
7 |
8 |
9 | class OPTRM(RewardModel):
10 | """
11 | OPT Reward model.
12 |
13 | Args:
14 | pretrained (str): Pretrained model name or path.
15 | config (OPTConfig): Model config.
16 | checkpoint (bool): Enable gradient checkpointing.
17 | lora_rank (int): Rank of the low-rank approximation.
18 | lora_train_bias (str): LoRA bias training mode.
19 | """
20 |
21 | def __init__(self,
22 | pretrained: Optional[str] = None,
23 | config: Optional[OPTConfig] = None,
24 | checkpoint: bool = False,
25 | lora_rank: int = 0,
26 | lora_train_bias: str = 'none') -> None:
27 | if pretrained is not None:
28 | model = OPTModel.from_pretrained(pretrained)
29 | elif config is not None:
30 | model = OPTModel(config)
31 | else:
32 | model = OPTModel(OPTConfig())
33 | if checkpoint:
34 | model.gradient_checkpointing_enable()
35 |
36 | value_head = nn.Linear(model.config.word_embed_proj_dim, 1)
37 | super().__init__(model, value_head, lora_rank, lora_train_bias)
38 |
--------------------------------------------------------------------------------
/code/chatgpt/models/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union
2 |
3 | import loralib as lora
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 |
9 | def compute_approx_kl(log_probs: torch.Tensor,
10 | log_probs_base: torch.Tensor,
11 | action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
12 | """
13 | Compute the approximate KL divergence between two distributions.
14 | Schulman blog: http://joschu.net/blog/kl-approx.html
15 |
16 | Args:
17 | log_probs: Log probabilities of the new distribution.
18 | log_probs_base: Log probabilities of the base distribution.
19 | action_mask: Mask for actions.
20 | """
21 |
22 | log_ratio = log_probs - log_probs_base
23 | approx_kl = (log_ratio.exp() - 1) - log_ratio
24 | if action_mask is not None:
25 | approx_kl = masked_mean(approx_kl, action_mask, dim=1)
26 | return approx_kl
27 | approx_kl = approx_kl.mean(dim=1)
28 | return approx_kl
29 |
30 |
31 | def compute_reward(r: Union[torch.Tensor, float],
32 | kl_coef: float,
33 | log_probs: torch.Tensor,
34 | log_probs_base: torch.Tensor,
35 | action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
36 | if kl_coef <= 0.0:
37 | return r
38 | kl = compute_approx_kl(log_probs, log_probs_base, action_mask=action_mask)
39 | reward = r - kl_coef * kl
40 | return reward
41 |
42 |
43 | def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
44 | log_probs = F.log_softmax(logits, dim=-1)
45 | log_probs_labels = log_probs.gather(dim=-1, index=labels.unsqueeze(-1))
46 | return log_probs_labels.squeeze(-1)
47 |
48 |
49 | def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.Tensor:
50 | tensor = tensor * mask
51 | tensor = tensor.sum(dim=dim)
52 | mask_sum = mask.sum(dim=dim)
53 | mean = tensor / (mask_sum + 1e-8)
54 | return mean
55 |
56 |
57 | def masked_normalize(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1, eps: float = 1e-8) -> torch.Tensor:
58 | tensor = tensor * mask
59 | mean = masked_mean(tensor, mask, dim=dim)
60 | mean_centered = tensor - mean
61 | var = masked_mean(mean_centered**2, mask, dim=dim)
62 | return mean_centered * var.clamp(min=eps).rsqrt()
63 |
64 |
65 | def normalize(tensor: torch.Tensor, dim: int = 0, eps: float = 1e-8) -> torch.Tensor:
66 | mean = tensor.mean(dim)
67 | mean_centered = tensor - mean
68 | var = (mean_centered**2).mean(dim)
69 | norm = mean_centered * var.clamp(min=eps).rsqrt()
70 | return norm
71 |
72 |
73 | def convert_to_lora(model: nn.Module,
74 | input_size: int,
75 | output_size: int,
76 | lora_rank: int = 16,
77 | lora_alpha: int = 1,
78 | lora_dropout: float = 0.,
79 | fan_in_fan_out: bool = False,
80 | merge_weights: bool = True):
81 | if lora_rank > min(input_size, output_size):
82 | raise ValueError(f"LoRA rank {lora_rank} must be less or equal than {min(input_size, output_size)}")
83 |
84 | for name, module in model.named_modules():
85 | if isinstance(module, nn.Linear):
86 | module._modules[name] = lora.Linear(input_size,
87 | output_size,
88 | r=lora_rank,
89 | lora_alpha=lora_alpha,
90 | lora_dropout=lora_dropout,
91 | fan_in_fan_out=fan_in_fan_out,
92 | merge_weights=merge_weights)
93 |
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import ReplayBuffer
2 | from .naive import NaiveReplayBuffer
3 |
4 | __all__ = ['ReplayBuffer', 'NaiveReplayBuffer']
5 |
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/base.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/naive.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/replay_buffer/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Any
3 |
4 | from chatgpt.experience_maker.base import Experience
5 |
6 |
7 | class ReplayBuffer(ABC):
8 | """Replay buffer base class. It stores experience.
9 |
10 | Args:
11 | sample_batch_size (int): Batch size when sampling.
12 | limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
13 | """
14 |
15 | def __init__(self, sample_batch_size: int, limit: int = 0) -> None:
16 | super().__init__()
17 | self.sample_batch_size = sample_batch_size
18 | # limit <= 0 means unlimited
19 | self.limit = limit
20 |
21 | @abstractmethod
22 | def append(self, experience: Experience) -> None:
23 | pass
24 |
25 | @abstractmethod
26 | def clear(self) -> None:
27 | pass
28 |
29 | @abstractmethod
30 | def sample(self) -> Experience:
31 | pass
32 |
33 | @abstractmethod
34 | def __len__(self) -> int:
35 | pass
36 |
37 | @abstractmethod
38 | def __getitem__(self, idx: int) -> Any:
39 | pass
40 |
41 | @abstractmethod
42 | def collate_fn(self, batch: Any) -> Experience:
43 | pass
44 |
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/naive.py:
--------------------------------------------------------------------------------
1 | import random
2 | from typing import List
3 |
4 | import torch
5 | from chatgpt.experience_maker.base import Experience
6 |
7 | from .base import ReplayBuffer
8 | from .utils import BufferItem, make_experience_batch, split_experience_batch
9 |
10 |
11 | class NaiveReplayBuffer(ReplayBuffer):
12 | """Naive replay buffer class. It stores experience.
13 |
14 | Args:
15 | sample_batch_size (int): Batch size when sampling.
16 | limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
17 | cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
18 | """
19 |
20 | def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:
21 | super().__init__(sample_batch_size, limit)
22 | self.cpu_offload = cpu_offload
23 | self.target_device = torch.device(f'cuda:{torch.cuda.current_device()}')
24 | # TODO(ver217): add prefetch
25 | self.items: List[BufferItem] = []
26 |
27 | @torch.no_grad()
28 | def append(self, experience: Experience) -> None:
29 | if self.cpu_offload:
30 | experience.to_device(torch.device('cpu'))
31 | items = split_experience_batch(experience)
32 | self.items.extend(items)
33 | if self.limit > 0:
34 | samples_to_remove = len(self.items) - self.limit
35 | if samples_to_remove > 0:
36 | self.items = self.items[samples_to_remove:]
37 |
38 | def clear(self) -> None:
39 | self.items.clear()
40 |
41 | @torch.no_grad()
42 | def sample(self) -> Experience:
43 | items = random.sample(self.items, self.sample_batch_size)
44 | experience = make_experience_batch(items)
45 | if self.cpu_offload:
46 | experience.to_device(self.target_device)
47 | return experience
48 |
49 | def __len__(self) -> int:
50 | return len(self.items)
51 |
52 | def __getitem__(self, idx: int) -> BufferItem:
53 | return self.items[idx]
54 |
55 | def collate_fn(self, batch) -> Experience:
56 | experience = make_experience_batch(batch)
57 | return experience
58 |
--------------------------------------------------------------------------------
/code/chatgpt/replay_buffer/utils.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Optional
3 |
4 | import torch
5 | import torch.nn.functional as F
6 | from chatgpt.experience_maker.base import Experience
7 |
8 |
9 | @dataclass
10 | class BufferItem:
11 | """BufferItem is an item of experience data.
12 |
13 | Shapes of each tensor:
14 | sequences: (S)
15 | action_log_probs: (A)
16 | values: (1)
17 | reward: (1)
18 | advatanges: (1)
19 | attention_mask: (S)
20 | action_mask: (A)
21 |
22 | "A" is the number of actions.
23 | """
24 | sequences: torch.Tensor
25 | action_log_probs: torch.Tensor
26 | values: torch.Tensor
27 | reward: torch.Tensor
28 | advantages: torch.Tensor
29 | attention_mask: Optional[torch.LongTensor]
30 | action_mask: Optional[torch.BoolTensor]
31 |
32 |
33 | def split_experience_batch(experience: Experience) -> List[BufferItem]:
34 | batch_size = experience.sequences.size(0)
35 | batch_kwargs = [{} for _ in range(batch_size)]
36 | keys = ('sequences', 'action_log_probs', 'values', 'reward', 'advantages', 'attention_mask', 'action_mask')
37 | for key in keys:
38 | value = getattr(experience, key)
39 | if isinstance(value, torch.Tensor):
40 | vals = torch.unbind(value)
41 | else:
42 | # None
43 | vals = [value for _ in range(batch_size)]
44 | assert batch_size == len(vals)
45 | for i, v in enumerate(vals):
46 | batch_kwargs[i][key] = v
47 | items = [BufferItem(**kwargs) for kwargs in batch_kwargs]
48 | return items
49 |
50 |
51 | def zero_pad_sequences(sequences: List[torch.Tensor], side: str = 'left') -> torch.Tensor:
52 | assert side in ('left', 'right')
53 | max_len = max(seq.size(0) for seq in sequences)
54 | padded_sequences = []
55 | for seq in sequences:
56 | pad_len = max_len - seq.size(0)
57 | padding = (pad_len, 0) if side == 'left' else (0, pad_len)
58 | padded_sequences.append(F.pad(seq, padding))
59 | return torch.stack(padded_sequences, dim=0)
60 |
61 |
62 | def make_experience_batch(items: List[BufferItem]) -> Experience:
63 | kwargs = {}
64 | to_pad_keys = set(('action_log_probs', 'action_mask'))
65 | keys = ('sequences', 'action_log_probs', 'values', 'reward', 'advantages', 'attention_mask', 'action_mask')
66 | for key in keys:
67 | vals = [getattr(item, key) for item in items]
68 | if key in to_pad_keys:
69 | batch_data = zero_pad_sequences(vals)
70 | else:
71 | batch_data = torch.stack(vals, dim=0)
72 | kwargs[key] = batch_data
73 | return Experience(**kwargs)
74 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Trainer
2 | from .ppo import PPOTrainer
3 | from .rm import RewardModelTrainer
4 |
5 | __all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer']
6 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/base.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/ppo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/ppo.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/rm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/rm.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Any, Callable, Dict, List, Optional, Union
3 |
4 | import torch
5 | from chatgpt.experience_maker import Experience, ExperienceMaker
6 | from chatgpt.replay_buffer import ReplayBuffer
7 | from torch import Tensor
8 | from torch.utils.data import DistributedSampler
9 | from tqdm import tqdm
10 |
11 | from .callbacks import Callback
12 | from .strategies import Strategy
13 | from .utils import is_rank_0
14 |
15 |
16 | class Trainer(ABC):
17 | """
18 | Base class for rlhf trainers.
19 |
20 | Args:
21 | strategy (Strategy):the strategy to use for training
22 | experience_maker (ExperienceMaker): the experience maker to use for produce experience to fullfill replay buffer
23 | replay_buffer (ReplayBuffer): the replay buffer to use for training
24 | experience_batch_size (int, defaults to 8): the batch size to use for experience generation
25 | max_epochs (int, defaults to 1): the number of epochs of training process
26 | tokenizer (Callable, optional): the tokenizer to use for tokenizing the input
27 | sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
28 | data_loader_pin_memory (bool, defaults to True): whether to pin memory for data loader
29 | callbacks (List[Callback], defaults to []): the callbacks to call during training process
30 | generate_kwargs (dict, optional): the kwargs to use while model generating
31 | """
32 |
33 | def __init__(self,
34 | strategy: Strategy,
35 | experience_maker: ExperienceMaker,
36 | replay_buffer: ReplayBuffer,
37 | experience_batch_size: int = 8,
38 | max_epochs: int = 1,
39 | tokenizer: Optional[Callable[[Any], dict]] = None,
40 | sample_replay_buffer: bool = False,
41 | dataloader_pin_memory: bool = True,
42 | callbacks: List[Callback] = [],
43 | **generate_kwargs) -> None:
44 | super().__init__()
45 | self.strategy = strategy
46 | self.experience_maker = experience_maker
47 | self.replay_buffer = replay_buffer
48 | self.experience_batch_size = experience_batch_size
49 | self.max_epochs = max_epochs
50 | self.tokenizer = tokenizer
51 | self.generate_kwargs = generate_kwargs
52 | self.sample_replay_buffer = sample_replay_buffer
53 | self.dataloader_pin_memory = dataloader_pin_memory
54 | self.callbacks = callbacks
55 |
56 | @abstractmethod
57 | def training_step(self, experience: Experience) -> Dict[str, Any]:
58 | pass
59 |
60 | def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
61 | if isinstance(inputs, Tensor):
62 | return self.experience_maker.make_experience(inputs, **self.generate_kwargs)
63 | elif isinstance(inputs, dict):
64 | return self.experience_maker.make_experience(**inputs, **self.generate_kwargs)
65 | else:
66 | raise ValueError(f'Unsupported input type "{type(inputs)}"')
67 |
68 | def _sample_prompts(self, prompts) -> list:
69 | indices = list(range(len(prompts)))
70 | sampled_indices = self.strategy.experience_sampler.choice(indices, self.experience_batch_size, replace=False)
71 | return [prompts[i] for i in sampled_indices]
72 |
73 | def _learn(self):
74 | # replay buffer may be empty at first, we should rebuild at each training
75 | if not self.sample_replay_buffer:
76 | dataloader = self.strategy.setup_dataloader(self.replay_buffer, self.dataloader_pin_memory)
77 | device = torch.cuda.current_device()
78 | if self.sample_replay_buffer:
79 | pbar = tqdm(range(self.max_epochs), desc='Train epoch', disable=not is_rank_0())
80 | for _ in pbar:
81 | experience = self.replay_buffer.sample()
82 | metrics = self.training_step(experience)
83 | pbar.set_postfix(metrics)
84 | else:
85 | for epoch in range(self.max_epochs):
86 | self._on_learn_epoch_start(epoch)
87 | if isinstance(dataloader.sampler, DistributedSampler):
88 | dataloader.sampler.set_epoch(epoch)
89 | pbar = tqdm(dataloader, desc=f'Train epoch [{epoch+1}/{self.max_epochs}]', disable=not is_rank_0())
90 | for experience in pbar:
91 | self._on_learn_batch_start()
92 | experience.to_device(device)
93 | metrics = self.training_step(experience)
94 | self._on_learn_batch_end(metrics, experience)
95 | pbar.set_postfix(metrics)
96 | self._on_learn_epoch_end(epoch)
97 |
98 | def fit(self, prompts, num_episodes: int = 50000, max_timesteps: int = 500, update_timesteps: int = 5000) -> None:
99 | time = 0
100 | sampler = self.strategy.setup_sampler(prompts)
101 | self._on_fit_start()
102 | for episode in range(num_episodes):
103 | self._on_episode_start(episode)
104 | for timestep in tqdm(range(max_timesteps),
105 | desc=f'Episode [{episode+1}/{num_episodes}]',
106 | disable=not is_rank_0()):
107 | time += 1
108 | rand_prompts = sampler.sample(self.experience_batch_size)
109 | if self.tokenizer is not None:
110 | inputs = self.tokenizer(rand_prompts)
111 | else:
112 | inputs = rand_prompts
113 | self._on_make_experience_start()
114 | experience = self._make_experience(inputs)
115 | self._on_make_experience_end(experience)
116 | self.replay_buffer.append(experience)
117 | if time % update_timesteps == 0:
118 | self._learn()
119 | self.replay_buffer.clear()
120 | self._on_episode_end(episode)
121 | self._on_fit_end()
122 |
123 | # TODO(ver217): maybe simplify these code using context
124 | def _on_fit_start(self) -> None:
125 | for callback in self.callbacks:
126 | callback.on_fit_start()
127 |
128 | def _on_fit_end(self) -> None:
129 | for callback in self.callbacks:
130 | callback.on_fit_end()
131 |
132 | def _on_episode_start(self, episode: int) -> None:
133 | for callback in self.callbacks:
134 | callback.on_episode_start(episode)
135 |
136 | def _on_episode_end(self, episode: int) -> None:
137 | for callback in self.callbacks:
138 | callback.on_episode_end(episode)
139 |
140 | def _on_make_experience_start(self) -> None:
141 | for callback in self.callbacks:
142 | callback.on_make_experience_start()
143 |
144 | def _on_make_experience_end(self, experience: Experience) -> None:
145 | for callback in self.callbacks:
146 | callback.on_make_experience_end(experience)
147 |
148 | def _on_learn_epoch_start(self, epoch: int) -> None:
149 | for callback in self.callbacks:
150 | callback.on_learn_epoch_start(epoch)
151 |
152 | def _on_learn_epoch_end(self, epoch: int) -> None:
153 | for callback in self.callbacks:
154 | callback.on_learn_epoch_end(epoch)
155 |
156 | def _on_learn_batch_start(self) -> None:
157 | for callback in self.callbacks:
158 | callback.on_learn_batch_start()
159 |
160 | def _on_learn_batch_end(self, metrics: dict, experience: Experience) -> None:
161 | for callback in self.callbacks:
162 | callback.on_learn_batch_end(metrics, experience)
163 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Callback
2 | from .performance_evaluator import PerformanceEvaluator
3 | from .save_checkpoint import SaveCheckpoint
4 |
5 | __all__ = ['Callback', 'PerformanceEvaluator', 'SaveCheckpoint']
6 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/base.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/performance_evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/performance_evaluator.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/__pycache__/save_checkpoint.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/callbacks/__pycache__/save_checkpoint.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 |
3 | from chatgpt.experience_maker import Experience
4 |
5 |
6 | class Callback(ABC):
7 | """
8 | Base callback class. It defines the interface for callbacks.
9 | """
10 |
11 | def on_fit_start(self) -> None:
12 | pass
13 |
14 | def on_fit_end(self) -> None:
15 | pass
16 |
17 | def on_episode_start(self, episode: int) -> None:
18 | pass
19 |
20 | def on_episode_end(self, episode: int) -> None:
21 | pass
22 |
23 | def on_make_experience_start(self) -> None:
24 | pass
25 |
26 | def on_make_experience_end(self, experience: Experience) -> None:
27 | pass
28 |
29 | def on_learn_epoch_start(self, epoch: int) -> None:
30 | pass
31 |
32 | def on_learn_epoch_end(self, epoch: int) -> None:
33 | pass
34 |
35 | def on_learn_batch_start(self) -> None:
36 | pass
37 |
38 | def on_learn_batch_end(self, metrics: dict, experience: Experience) -> None:
39 | pass
40 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/performance_evaluator.py:
--------------------------------------------------------------------------------
1 | from time import time
2 | from typing import Optional
3 |
4 | import torch
5 | import torch.distributed as dist
6 | from chatgpt.experience_maker import Experience
7 |
8 | from .base import Callback
9 |
10 |
11 | def get_world_size() -> int:
12 | if dist.is_initialized():
13 | return dist.get_world_size()
14 | return 1
15 |
16 |
17 | def print_rank_0(*args, **kwargs) -> None:
18 | if not dist.is_initialized() or dist.get_rank() == 0:
19 | print(*args, **kwargs)
20 |
21 |
22 | @torch.no_grad()
23 | def all_reduce_mean(x: float, world_size: int) -> float:
24 | if world_size == 1:
25 | return x
26 | tensor = torch.tensor([x], device=torch.cuda.current_device())
27 | dist.all_reduce(tensor)
28 | tensor = tensor / world_size
29 | return tensor.item()
30 |
31 |
32 | class PerformanceEvaluator(Callback):
33 | """
34 | Callback for valuate the performance of the model.
35 | Args:
36 | actor_num_params: The number of parameters of the actor model.
37 | critic_num_params: The number of parameters of the critic model.
38 | initial_model_num_params: The number of parameters of the initial model.
39 | reward_model_num_params: The number of parameters of the reward model.
40 | enable_grad_checkpoint: Whether to enable gradient checkpointing.
41 | ignore_episodes: The number of episodes to ignore when calculating the performance.
42 | """
43 |
44 | def __init__(self,
45 | actor_num_params: int,
46 | critic_num_params: int,
47 | initial_model_num_params: int,
48 | reward_model_num_params: int,
49 | enable_grad_checkpoint: bool = False,
50 | ignore_episodes: int = 0) -> None:
51 | super().__init__()
52 | self.world_size = get_world_size()
53 | self.actor_num_params = actor_num_params
54 | self.critic_num_params = critic_num_params
55 | self.initial_model_num_params = initial_model_num_params
56 | self.reward_model_num_params = reward_model_num_params
57 | self.enable_grad_checkpoint = enable_grad_checkpoint
58 | self.ignore_episodes = ignore_episodes
59 | self.disable: bool = False
60 |
61 | self.make_experience_duration: float = 0.
62 | self.make_experience_start_time: Optional[float] = None
63 | self.make_experience_num_samples: int = 0
64 | self.make_experience_flop: int = 0
65 | self.learn_duration: float = 0.
66 | self.learn_start_time: Optional[float] = None
67 | self.learn_num_samples: int = 0
68 | self.learn_flop: int = 0
69 |
70 | def on_episode_start(self, episode: int) -> None:
71 | self.disable = self.ignore_episodes > 0 and episode < self.ignore_episodes
72 |
73 | def on_make_experience_start(self) -> None:
74 | if self.disable:
75 | return
76 | self.make_experience_start_time = time()
77 |
78 | def on_make_experience_end(self, experience: Experience) -> None:
79 | if self.disable:
80 | return
81 | self.make_experience_duration += time() - self.make_experience_start_time
82 |
83 | batch_size, seq_len = experience.sequences.shape
84 |
85 | self.make_experience_num_samples += batch_size
86 |
87 | # actor generate
88 | num_actions = experience.action_mask.size(1)
89 | input_len = seq_len - num_actions
90 | total_seq_len = (input_len + seq_len - 1) * num_actions / 2
91 | self.make_experience_flop += self.actor_num_params * batch_size * total_seq_len * 2
92 | # actor forward
93 | self.make_experience_flop += self.actor_num_params * batch_size * seq_len * 2
94 | # critic forward
95 | self.make_experience_flop += self.critic_num_params * batch_size * seq_len * 2
96 | # initial model forward
97 | self.make_experience_flop += self.initial_model_num_params * batch_size * seq_len * 2
98 | # reward model forward
99 | self.make_experience_flop += self.reward_model_num_params * batch_size * seq_len * 2
100 |
101 | def on_learn_batch_start(self) -> None:
102 | if self.disable:
103 | return
104 | self.learn_start_time = time()
105 |
106 | def on_learn_batch_end(self, metrics: dict, experience: Experience) -> None:
107 | if self.disable:
108 | return
109 | self.learn_duration += time() - self.learn_start_time
110 |
111 | batch_size, seq_len = experience.sequences.shape
112 |
113 | self.learn_num_samples += batch_size
114 |
115 | # actor forward-backward, 3 means forward(1) + backward(2)
116 | self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
117 | # critic foward-backward
118 | self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
119 |
120 | def on_fit_end(self) -> None:
121 | avg_make_experience_duration = all_reduce_mean(self.make_experience_duration, self.world_size)
122 | avg_learn_duration = all_reduce_mean(self.learn_duration, self.world_size)
123 |
124 | avg_make_experience_throughput = self.make_experience_num_samples / (avg_make_experience_duration + 1e-12)
125 | avg_make_experience_tflops = self.make_experience_flop / 1e12 / (avg_make_experience_duration + 1e-12)
126 |
127 | avg_learn_throughput = self.learn_num_samples / (avg_learn_duration + 1e-12)
128 | avg_learn_tflops = self.learn_flop / 1e12 / (avg_learn_duration + 1e-12)
129 |
130 | print_rank_0(
131 | f'Making experience throughput: {avg_make_experience_throughput:.3f} samples/sec, TFLOPS: {avg_make_experience_tflops:.3f}'
132 | )
133 | print_rank_0(f'Learning throughput: {avg_learn_throughput:.3f} samples/sec, TFLOPS: {avg_learn_tflops:.3f}')
134 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/callbacks/save_checkpoint.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch.distributed as dist
4 | from chatgpt.trainer.strategies import ColossalAIStrategy, Strategy
5 | from chatgpt.trainer.utils import is_rank_0
6 | from torch import nn
7 | from torch.optim import Optimizer
8 |
9 | from .base import Callback
10 |
11 |
12 | class SaveCheckpoint(Callback):
13 | """
14 | The callback for saving checkpoint for chatgpt.
15 |
16 | Only support saving actor and critic model.
17 | A typical architecture of the saved checkpoint would be:
18 | - checkpoint
19 | - episode_x
20 | - actor.pt
21 | - actor-optim-rank-0.pt
22 | - actor-optim-rank-1.pt
23 | - critic.pt
24 | - critic-optim-rank-0.pt
25 | - critic-optim-rank-1.pt
26 | - ...
27 |
28 | Args:
29 | path(str): the base path you want to save checkpoint, the checkpoint would be saved at `path/checkpoint`
30 | interval(int): the interval episode of saving checkpoint
31 | strategy(Strategy): the strategy used to train
32 | actor(nn.Module): the actor model
33 | critic(nn.Module): the critic model
34 | actor_optim(Optimizer): the optimizer of actor
35 | critic_optim(Optimizer): the optimizer of critic
36 |
37 | """
38 |
39 | def __init__(self,
40 | path: str,
41 | interval: int,
42 | strategy: Strategy,
43 | actor: nn.Module = None,
44 | critic: nn.Module = None,
45 | actor_optim: Optimizer = None,
46 | critic_optim: Optimizer = None) -> None:
47 | super().__init__()
48 | self.path = os.path.join(path, 'checkpoint')
49 | self.interval = interval
50 | self.strategy = strategy
51 | self.model_dict = {'actor': [actor, actor_optim], 'critic': [critic, critic_optim]}
52 |
53 | def on_episode_end(self, episode: int) -> None:
54 | if (episode + 1) % self.interval != 0:
55 | return
56 | base_path = os.path.join(self.path, f'episode_{episode}')
57 | if not os.path.exists(base_path):
58 | os.makedirs(base_path)
59 |
60 | for model in self.model_dict.keys():
61 |
62 | # save model
63 | if self.model_dict[model][0] is None:
64 | # saving only optimizer states is meaningless, so it would be skipped
65 | continue
66 | model_path = os.path.join(base_path, f'{model}.pt')
67 | self.strategy.save_model(model=self.model_dict[model][0], path=model_path, only_rank0=True)
68 |
69 | # save optimizer
70 | if self.model_dict[model][1] is None:
71 | continue
72 | only_rank0 = not isinstance(self.strategy, ColossalAIStrategy)
73 | rank = 0 if is_rank_0() else dist.get_rank()
74 | optim_path = os.path.join(base_path, f'{model}-optim-rank-{rank}.pt')
75 | self.strategy.save_optimizer(optimizer=self.model_dict[model][1], path=optim_path, only_rank0=only_rank0)
76 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/ppo.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, Dict, List, Optional
2 |
3 | import torch.nn as nn
4 | from chatgpt.experience_maker import Experience, NaiveExperienceMaker
5 | from chatgpt.models.base import Actor, Critic
6 | from chatgpt.models.generation_utils import update_model_kwargs_fn
7 | from chatgpt.models.loss import PolicyLoss, ValueLoss
8 | from chatgpt.replay_buffer import NaiveReplayBuffer
9 | from torch.optim import Optimizer
10 |
11 | from .base import Trainer
12 | from .callbacks import Callback
13 | from .strategies import Strategy
14 |
15 |
16 | class PPOTrainer(Trainer):
17 | """
18 | Trainer for PPO algorithm.
19 |
20 | Args:
21 | strategy (Strategy): the strategy to use for training
22 | actor (Actor): the actor model in ppo algorithm
23 | critic (Critic): the critic model in ppo algorithm
24 | reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences
25 | initial_model (Actor): the initial model in rlhf algorithm to generate reference logits to limit the update of actor
26 | actor_optim (Optimizer): the optimizer to use for actor model
27 | critic_optim (Optimizer): the optimizer to use for critic model
28 | kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss
29 | train_batch_size (int, defaults to 8): the batch size to use for training
30 | buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
31 | buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
32 | eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
33 | value_clip (float, defaults to 0.4): the clip coefficient of value loss
34 | experience_batch_size (int, defaults to 8): the batch size to use for experience generation
35 | max_epochs (int, defaults to 1): the number of epochs of training process
36 | tokenier (Callable, optional): the tokenizer to use for tokenizing the input
37 | sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
38 | dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
39 | callbacks (List[Callback], defaults to []): the callbacks to call during training process
40 | generate_kwargs (dict, optional): the kwargs to use while model generating
41 | """
42 |
43 | def __init__(self,
44 | strategy: Strategy,
45 | actor: Actor,
46 | critic: Critic,
47 | reward_model: nn.Module,
48 | initial_model: Actor,
49 | actor_optim: Optimizer,
50 | critic_optim: Optimizer,
51 | kl_coef: float = 0.1,
52 | train_batch_size: int = 8,
53 | buffer_limit: int = 0,
54 | buffer_cpu_offload: bool = True,
55 | eps_clip: float = 0.2,
56 | value_clip: float = 0.4,
57 | experience_batch_size: int = 8,
58 | max_epochs: int = 1,
59 | tokenizer: Optional[Callable[[Any], dict]] = None,
60 | sample_replay_buffer: bool = False,
61 | dataloader_pin_memory: bool = True,
62 | callbacks: List[Callback] = [],
63 | **generate_kwargs) -> None:
64 | experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model, kl_coef)
65 | replay_buffer = NaiveReplayBuffer(train_batch_size, buffer_limit, buffer_cpu_offload)
66 | generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
67 | super().__init__(strategy, experience_maker, replay_buffer, experience_batch_size, max_epochs, tokenizer,
68 | sample_replay_buffer, dataloader_pin_memory, callbacks, **generate_kwargs)
69 | self.actor = actor
70 | self.critic = critic
71 |
72 | self.actor_loss_fn = PolicyLoss(eps_clip)
73 | self.critic_loss_fn = ValueLoss(value_clip)
74 |
75 | self.actor_optim = actor_optim
76 | self.critic_optim = critic_optim
77 |
78 | def training_step(self, experience: Experience) -> Dict[str, float]:
79 | self.actor.train()
80 | self.critic.train()
81 |
82 | num_actions = experience.action_mask.size(1)
83 | action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask)
84 | actor_loss = self.actor_loss_fn(action_log_probs,
85 | experience.action_log_probs,
86 | experience.advantages,
87 | action_mask=experience.action_mask)
88 | self.strategy.backward(actor_loss, self.actor, self.actor_optim)
89 | self.strategy.optimizer_step(self.actor_optim)
90 | self.actor_optim.zero_grad()
91 |
92 | values = self.critic(experience.sequences,
93 | action_mask=experience.action_mask,
94 | attention_mask=experience.attention_mask)
95 | critic_loss = self.critic_loss_fn(values,
96 | experience.values,
97 | experience.reward,
98 | action_mask=experience.action_mask)
99 | self.strategy.backward(critic_loss, self.critic, self.critic_optim)
100 | self.strategy.optimizer_step(self.critic_optim)
101 | self.critic_optim.zero_grad()
102 |
103 | return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
104 |
105 |
106 | def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
107 | origin_model = strategy._unwrap_actor(actor)
108 | new_kwargs = {**generate_kwargs}
109 | # use huggingface models method directly
110 | if 'prepare_inputs_fn' not in generate_kwargs and hasattr(origin_model, 'prepare_inputs_for_generation'):
111 | new_kwargs['prepare_inputs_fn'] = origin_model.prepare_inputs_for_generation
112 |
113 | if 'update_model_kwargs_fn' not in generate_kwargs:
114 | new_kwargs['update_model_kwargs_fn'] = update_model_kwargs_fn
115 |
116 | return new_kwargs
117 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/rm.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 |
3 | import loralib as lora
4 | import torch
5 | from chatgpt.dataset import RewardDataset
6 | from chatgpt.models.loss import PairWiseLoss
7 | from torch.optim import Adam, Optimizer
8 | from torch.utils.data import DataLoader
9 | from tqdm import tqdm
10 |
11 | from .strategies import Strategy
12 | from .utils import is_rank_0
13 |
14 |
15 | class RewardModelTrainer(ABC):
16 | """
17 | Trainer to use while training reward model.
18 |
19 | Args:
20 | model (torch.nn.Module): the model to train
21 | strategy (Strategy): the strategy to use for training
22 | optim(Optimizer): the optimizer to use for training
23 | train_dataset (RewardDataset): the dataset to use for training
24 | eval_dataset (RewardDataset): the dataset to use for evaluation
25 | batch_size (int, defaults to 1): the batch size while training
26 | max_epochs (int, defaults to 2): the number of epochs to train
27 | optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer
28 | """
29 |
30 | def __init__(
31 | self,
32 | model,
33 | strategy: Strategy,
34 | optim: Optimizer,
35 | train_dataset: RewardDataset,
36 | eval_dataset: RewardDataset,
37 | batch_size: int = 1,
38 | max_epochs: int = 2,
39 | ) -> None:
40 | super().__init__()
41 | self.strategy = strategy
42 | self.epochs = max_epochs
43 | self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
44 | self.eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size)
45 |
46 | self.model = strategy.setup_model(model)
47 | if "DDP" in str(self.strategy):
48 | self.model = self.model.module
49 | self.loss_fn = PairWiseLoss()
50 | self.optimizer = strategy.setup_optimizer(optim, self.model)
51 |
52 | def fit(self, use_lora):
53 | epoch_bar = tqdm(range(self.epochs), desc='Train epoch', disable=not is_rank_0())
54 | for epoch in range(self.epochs):
55 | step_bar = tqdm(range(self.train_dataloader.__len__()),
56 | desc='Train step of epoch %d' % epoch,
57 | disable=not is_rank_0())
58 | # train
59 | self.model.train()
60 | for chosen_ids, c_mask, reject_ids, r_mask in self.train_dataloader:
61 | chosen_ids = chosen_ids.squeeze(1).cuda()
62 | c_mask = c_mask.squeeze(1).cuda()
63 | reject_ids = reject_ids.squeeze(1).cuda()
64 | r_mask = r_mask.squeeze(1).cuda()
65 | chosen_reward = self.model(chosen_ids, attention_mask=c_mask)
66 | reject_reward = self.model(reject_ids, attention_mask=r_mask)
67 | loss = self.loss_fn(chosen_reward, reject_reward)
68 | self.strategy.backward(loss, self.model, self.optimizer)
69 | self.strategy.optimizer_step(self.optimizer)
70 | self.optimizer.zero_grad()
71 | step_bar.update()
72 | step_bar.set_postfix({'loss': loss.item()})
73 |
74 | # eval
75 | self.model.eval()
76 | with torch.no_grad():
77 | dist = 0
78 | loss_sum = 0
79 | for chosen_ids, c_mask, reject_ids, r_mask in self.eval_dataloader:
80 | chosen_ids = chosen_ids.squeeze(1).cuda()
81 | c_mask = c_mask.squeeze(1).cuda()
82 | reject_ids = reject_ids.squeeze(1).cuda()
83 | r_mask = r_mask.squeeze(1).cuda()
84 | chosen_reward = self.model(chosen_ids, attention_mask=c_mask)
85 | reject_reward = self.model(reject_ids, attention_mask=r_mask)
86 | dist += (chosen_reward - reject_reward).mean().item()
87 | loss = self.loss_fn(chosen_reward, reject_reward)
88 | loss_sum += loss.item()
89 | dist_mean = dist / self.eval_dataloader.__len__()
90 | loss_mean = loss_sum / self.eval_dataloader.__len__()
91 | epoch_bar.update()
92 | step_bar.set_postfix({'loss': loss_mean, 'dist_mean': dist_mean})
93 | step_bar.close()
94 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Strategy
2 | from .colossalai import ColossalAIStrategy
3 | from .ddp import DDPStrategy
4 | from .naive import NaiveStrategy
5 |
6 | __all__ = ['Strategy', 'NaiveStrategy', 'DDPStrategy', 'ColossalAIStrategy']
7 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/base.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/colossalai.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/colossalai.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/ddp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/ddp.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/naive.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/__pycache__/sampler.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/chatgpt/trainer/strategies/__pycache__/sampler.cpython-310.pyc
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from contextlib import nullcontext
3 | from typing import Any, List, Tuple, Union
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 | from chatgpt.models.base import Actor, Critic, RewardModel
9 | from chatgpt.replay_buffer import ReplayBuffer
10 | from torch.optim import Optimizer
11 | from torch.utils.data import DataLoader
12 |
13 | from .sampler import DistributedSampler
14 |
15 | ModelOptimPair = Tuple[nn.Module, Optimizer]
16 | ModelOrModelOptimPair = Union[nn.Module, ModelOptimPair]
17 |
18 |
19 | class Strategy(ABC):
20 | """
21 | Base class for training strategies.
22 | """
23 |
24 | def __init__(self) -> None:
25 | super().__init__()
26 | self.setup_distributed()
27 |
28 | @abstractmethod
29 | def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: Optimizer, **kwargs) -> None:
30 | pass
31 |
32 | @abstractmethod
33 | def optimizer_step(self, optimizer: Optimizer, **kwargs) -> None:
34 | pass
35 |
36 | @abstractmethod
37 | def setup_distributed(self) -> None:
38 | pass
39 |
40 | @abstractmethod
41 | def setup_model(self, model: nn.Module) -> nn.Module:
42 | pass
43 |
44 | @abstractmethod
45 | def setup_optimizer(self, optimizer: Optimizer, model: nn.Module) -> Optimizer:
46 | pass
47 |
48 | @abstractmethod
49 | def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
50 | pass
51 |
52 | def model_init_context(self):
53 | return nullcontext()
54 |
55 | def prepare(
56 | self, *models_or_model_optim_pairs: ModelOrModelOptimPair
57 | ) -> Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]:
58 | """Prepare models or model-optimizer-pairs based on each strategy.
59 |
60 | Example::
61 | >>> # when fine-tuning actor and critic
62 | >>> (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare((actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
63 | >>> # or when training reward model
64 | >>> (reward_model, reward_model_optim) = strategy.prepare((reward_model, reward_model_optim))
65 | >>> # or just inference
66 | >>> actor, critic = strategy.prepare(actor, critic)
67 |
68 | Returns:
69 | Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: Models or model-optimizer-pairs in the original order.
70 | """
71 |
72 | def prepare_model(model: nn.Module):
73 | if isinstance(model, Actor):
74 | return Actor(self.setup_model(self._unwrap_model(model)))
75 | return self.setup_model(self._unwrap_model(model))
76 |
77 | rets = []
78 | for arg in models_or_model_optim_pairs:
79 | if isinstance(arg, tuple):
80 | assert len(arg) == 2, f'Expect (model, optimizer) pair, got a tuple with size "{len(arg)}"'
81 | model, optimizer = arg
82 | model = prepare_model(model)
83 | optimizer = self.setup_optimizer(optimizer, self._unwrap_model(model))
84 | rets.append((model, optimizer))
85 | elif isinstance(arg, nn.Module):
86 | rets.append(prepare_model(arg))
87 | else:
88 | raise RuntimeError(f'Expect model or (model, optimizer) pair, got {type(arg)}')
89 |
90 | if len(rets) == 1:
91 | return rets[0]
92 | return rets
93 |
94 | @staticmethod
95 | def _unwrap_model(model: nn.Module) -> nn.Module:
96 | """Useful for saving state dict. As actor is wrapped by Actor class again in `prepare()`, we should unwrap it before saving.
97 |
98 | Args:
99 | model (nn.Module): an actor or a critic
100 | """
101 | if isinstance(model, Actor):
102 | return model.model
103 | return model
104 |
105 | @staticmethod
106 | def _unwrap_actor(actor: Actor) -> nn.Module:
107 | """Get `actor.model` from a wrapped (by `prepare()`) actor. Useful for getting original huggingface model.
108 |
109 | Args:
110 | actor (Actor): a wrapped actor
111 | """
112 | return Strategy._unwrap_model(actor)
113 |
114 | @abstractmethod
115 | def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
116 | pass
117 |
118 | @abstractmethod
119 | def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None:
120 | pass
121 |
122 | @abstractmethod
123 | def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
124 | pass
125 |
126 | @abstractmethod
127 | def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None:
128 | pass
129 |
130 | def setup_sampler(self, dataset) -> DistributedSampler:
131 | return DistributedSampler(dataset, 1, 0)
132 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/colossalai.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from typing import Optional, Union
3 |
4 | import torch
5 | import torch.distributed as dist
6 | import torch.nn as nn
7 | import torch.optim as optim
8 | from chatgpt.models.base import Actor
9 | from chatgpt.models.lora import LoraLinear
10 | from torch.optim import Optimizer
11 |
12 | import colossalai
13 | from colossalai.nn.optimizer import CPUAdam, HybridAdam
14 | from colossalai.nn.parallel import ZeroDDP, zero_model_wrapper, zero_optim_wrapper
15 | from colossalai.nn.parallel.utils import get_static_torch_model
16 | from colossalai.tensor import ProcessGroup, ShardSpec
17 | from colossalai.utils import get_current_device
18 | from colossalai.utils.model.colo_init_context import ColoInitContext
19 |
20 | from .base import Strategy
21 | from .ddp import DDPStrategy
22 |
23 |
24 | class ColossalAIStrategy(DDPStrategy):
25 | """
26 | The strategy for training with ColossalAI.
27 |
28 | Args:
29 | stage(int): The stage to use in ZeRO. Choose in (1, 2, 3)
30 | seed(int): The seed for the random number generator.
31 | shard_init(bool): Whether to shard the model parameters during initialization. Only for ZeRO-3.
32 | This is not compativle with `from_pretrained()`. We temporarily disable this and will support it in the future.
33 | placement_policy(str): The placement policy for gemini. Choose in ('cpu', 'cuda')
34 | If it is “cpu”, parameters, gradients and optimizer states will be offloaded to CPU,
35 | If it is “cuda”, they will not be offloaded, which means max CUDA memory will be used. It is the fastest.
36 | pin_memory(bool): Whether to pin the memory for the data loader. Only for ZeRO-3.
37 | force_outputs_fp32(bool): Whether to force the outputs to be fp32. Only for ZeRO-3.
38 | search_range_mb(int): The search range in MB for the chunk size. Only for ZeRO-3.
39 | hidden_dim(optional, int): The hidden dimension for the gemini. Only for ZeRO-3.
40 | min_chunk_size_mb(float): The minimum chunk size in MB. Only for ZeRO-3.
41 | gpu_margin_mem_ratio(float): The margin memory ratio for the GPU. Only for ZeRO-3.
42 | reduce_bugket_size(int): The reduce bucket size in bytes. Only for ZeRO-1 and ZeRO-2.
43 | overlap_communication(bool): Whether to overlap communication and computation. Only for ZeRO-1 and ZeRO-2.
44 | initial_scale(float): The initial scale for the optimizer.
45 | growth_factor(float): The growth factor for the optimizer.
46 | backoff_factor(float): The backoff factor for the optimizer.
47 | growth_interval(int): The growth interval for the optimizer.
48 | hysteresis(int): The hysteresis for the optimizer.
49 | min_scale(float): The minimum scale for the optimizer.
50 | max_scale(float): The maximum scale for the optimizer.
51 | max_norm(float): The maximum norm for the optimizer.
52 | norm_type(float): The norm type for the optimizer.
53 |
54 | """
55 |
56 | def __init__(
57 | self,
58 | stage: int = 3,
59 | seed: int = 42,
60 | shard_init: bool = False, # only for stage 3
61 | placement_policy: str = 'cuda',
62 | pin_memory: bool = True, # only for stage 3
63 | force_outputs_fp32: bool = False, # only for stage 3
64 | search_range_mb: int = 32, # only for stage 3
65 | hidden_dim: Optional[int] = None, # only for stage 3
66 | min_chunk_size_mb: float = 32, # only for stage 3
67 | gpu_margin_mem_ratio: float = 0.0, # only for stage 3
68 | reduce_bucket_size: int = 12 * 1024**2, # only for stage 1&2
69 | overlap_communication: bool = True, # only for stage 1&2
70 | initial_scale: float = 2**16,
71 | growth_factor: float = 2,
72 | backoff_factor: float = 0.5,
73 | growth_interval: int = 1000,
74 | hysteresis: int = 2,
75 | min_scale: float = 1,
76 | max_scale: float = 2**32,
77 | max_norm: float = 0.0,
78 | norm_type: float = 2.0) -> None:
79 | super().__init__(seed)
80 | assert placement_policy in ('cpu', 'cuda'), f'Unsupported placement policy "{placement_policy}"'
81 | self.stage = stage
82 | # TODO(ver217): support shard_init when using from_pretrained()
83 | if shard_init:
84 | warnings.warn(
85 | f'Shard init is not supported model.from_pretrained() yet. Please load weights after strategy.prepare()'
86 | )
87 | self.shard_init = shard_init
88 | self.gemini_config = dict(device=get_current_device(),
89 | placement_policy=placement_policy,
90 | pin_memory=pin_memory,
91 | force_outputs_fp32=force_outputs_fp32,
92 | strict_ddp_mode=shard_init,
93 | search_range_mb=search_range_mb,
94 | hidden_dim=hidden_dim,
95 | min_chunk_size_mb=min_chunk_size_mb)
96 | if stage == 3:
97 | self.zero_optim_config = dict(gpu_margin_mem_ratio=gpu_margin_mem_ratio)
98 | else:
99 | self.zero_optim_config = dict(reduce_bucket_size=reduce_bucket_size,
100 | overlap_communication=overlap_communication,
101 | cpu_offload=(placement_policy == 'cpu'))
102 | self.optim_kwargs = dict(initial_scale=initial_scale,
103 | growth_factor=growth_factor,
104 | backoff_factor=backoff_factor,
105 | growth_interval=growth_interval,
106 | hysteresis=hysteresis,
107 | min_scale=min_scale,
108 | max_scale=max_scale,
109 | max_norm=max_norm,
110 | norm_type=norm_type)
111 |
112 | def setup_distributed(self) -> None:
113 | colossalai.launch_from_torch({}, seed=self.seed)
114 |
115 | def model_init_context(self):
116 | if self.stage == 3:
117 | world_size = dist.get_world_size()
118 | shard_pg = ProcessGroup(tp_degree=world_size) if self.shard_init else None
119 | default_dist_spec = ShardSpec([-1], [world_size]) if self.shard_init else None
120 | return ColoInitContext(device=get_current_device(),
121 | dtype=torch.half,
122 | default_pg=shard_pg,
123 | default_dist_spec=default_dist_spec)
124 | return super().model_init_context()
125 |
126 | def setup_model(self, model: nn.Module) -> nn.Module:
127 | return zero_model_wrapper(model, zero_stage=self.stage, gemini_config=self.gemini_config)
128 |
129 | def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer:
130 | assert isinstance(optimizer, (CPUAdam, HybridAdam)), f'Unsupported optimizer {type(optimizer)}'
131 | return zero_optim_wrapper(model, optimizer, optim_config=self.zero_optim_config, **self.optim_kwargs)
132 |
133 | def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None:
134 | optimizer.backward(loss)
135 |
136 | def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None:
137 | optimizer.step()
138 |
139 | @staticmethod
140 | def _unwrap_actor(actor: Actor) -> nn.Module:
141 | model: Union[nn.Module, ZeroDDP] = Strategy._unwrap_actor(actor)
142 | if isinstance(model, ZeroDDP):
143 | return model.module
144 | return model
145 |
146 | def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
147 | unwrapped_model = self._unwrap_model(model)
148 | # TODO : better way to get torch model from gemini model
149 | # to get torch model from gemini model
150 | if isinstance(unwrapped_model, ZeroDDP):
151 | state_dict = unwrapped_model.state_dict()
152 | unwrapped_model = get_static_torch_model(unwrapped_model)
153 | if only_rank0 and dist.get_rank() != 0:
154 | return
155 | unwrapped_model.load_state_dict(state_dict)
156 | # merge lora_weights into weights
157 | for module in unwrapped_model.modules():
158 | if isinstance(module, LoraLinear):
159 | module.merge_weights=True
160 | module.eval()
161 | # get state_dict and save
162 | state_dict = unwrapped_model.state_dict()
163 | if only_rank0 and dist.get_rank() != 0:
164 | return
165 | torch.save(state_dict, path)
166 |
167 | def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
168 | if only_rank0:
169 | raise RuntimeError(
170 | f'Optimizer states are sharded when using ColossalAIStrategy. Only rank0 is not supported.')
171 | torch.save(optimizer.state_dict(), path)
172 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/ddp.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 |
4 | import numpy as np
5 | import torch
6 | import torch.distributed as dist
7 | import torch.nn as nn
8 | from chatgpt.models.base import Actor
9 | from chatgpt.models.lora import LoraLinear
10 | from chatgpt.replay_buffer import ReplayBuffer
11 | from torch.nn.parallel import DistributedDataParallel as DDP
12 | from torch.optim import Optimizer
13 | from torch.utils.data import DataLoader
14 |
15 | from .base import Strategy
16 | from .naive import NaiveStrategy
17 | from .sampler import DistributedSampler
18 |
19 |
20 | class DDPStrategy(NaiveStrategy):
21 | """
22 | Strategy for distributed training using torch.distributed.
23 | """
24 |
25 | def __init__(self, seed: int = 42) -> None:
26 | self.seed = seed
27 | super().__init__()
28 |
29 | def setup_distributed(self) -> None:
30 | try:
31 | rank = int(os.environ['RANK'])
32 | local_rank = int(os.environ['LOCAL_RANK'])
33 | world_size = int(os.environ['WORLD_SIZE'])
34 | host = os.environ['MASTER_ADDR']
35 | port = int(os.environ['MASTER_PORT'])
36 | except KeyError as e:
37 | raise RuntimeError(
38 | f"Could not find {e} in the torch environment, visit https://www.colossalai.org/ for more information on launching with torch"
39 | )
40 | dist.init_process_group('nccl', init_method=f'tcp://[{host}]:{port}', world_size=world_size, rank=rank)
41 | self.set_seed(self.seed)
42 | torch.cuda.set_device(local_rank)
43 |
44 | def set_seed(self, seed: int) -> None:
45 | random.seed(seed)
46 | np.random.seed(seed)
47 | torch.manual_seed(seed)
48 |
49 | def setup_model(self, model: nn.Module) -> nn.Module:
50 | device = torch.cuda.current_device()
51 | return DDP(model, device_ids=[device])
52 |
53 | def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
54 | # DDP only mode, replay buffers on each rank are different.
55 | # sampler = DistributedSampler(replay_buffer,
56 | # num_replicas=dist.get_world_size(),
57 | # rank=dist.get_rank(),
58 | # shuffle=True,
59 | # seed=self.seed,
60 | # drop_last=True)
61 | return DataLoader(
62 | replay_buffer,
63 | batch_size=replay_buffer.sample_batch_size,
64 | # sampler=sampler,
65 | shuffle=True,
66 | drop_last=True,
67 | pin_memory=pin_memory,
68 | collate_fn=replay_buffer.collate_fn)
69 |
70 | @staticmethod
71 | def _unwrap_actor(actor: Actor) -> nn.Module:
72 | model: DDP = Strategy._unwrap_actor(actor)
73 | return model.module
74 |
75 | def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
76 | for module in model.modules():
77 | if isinstance(module, LoraLinear):
78 | module.merge_weights=True
79 | module.eval()
80 |
81 | if only_rank0 and dist.get_rank() != 0:
82 | return
83 | model = model.model.module
84 | state_dict = model.state_dict()
85 | torch.save(state_dict, path)
86 |
87 | def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
88 | if only_rank0 and dist.get_rank() != 0:
89 | return
90 | super().save_optimizer(optimizer, path, only_rank0)
91 |
92 | def setup_sampler(self, dataset) -> DistributedSampler:
93 | return DistributedSampler(dataset, dist.get_world_size(), dist.get_rank())
94 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/naive.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.optim as optim
6 | from chatgpt.replay_buffer import ReplayBuffer
7 | from torch.optim import Optimizer
8 | from torch.utils.data import DataLoader
9 |
10 | from .base import Strategy
11 |
12 |
13 | class NaiveStrategy(Strategy):
14 | """
15 | Strategy for single GPU. No parallelism is used.
16 | """
17 |
18 | def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None:
19 | loss.backward()
20 |
21 | def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None:
22 | optimizer.step()
23 |
24 | def setup_distributed(self) -> None:
25 | pass
26 |
27 | def setup_model(self, model: nn.Module) -> nn.Module:
28 | return model
29 |
30 | def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer:
31 | return optimizer
32 |
33 | def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
34 | return DataLoader(replay_buffer,
35 | batch_size=replay_buffer.sample_batch_size,
36 | shuffle=True,
37 | drop_last=True,
38 | pin_memory=pin_memory,
39 | collate_fn=replay_buffer.collate_fn)
40 |
41 | def save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
42 | unwrapped_model = self._unwrap_model(model)
43 | torch.save(unwrapped_model.state_dict(), path)
44 |
45 | def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None:
46 | unwrapped_model = self._unwrap_model(model)
47 | state_dict = torch.load(path, map_location=map_location)
48 | unwrapped_model.load_state_dict(state_dict, strict=strict)
49 |
50 | def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
51 | torch.save(optimizer.state_dict(), path)
52 |
53 | def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None:
54 | state_dict = torch.load(path, map_location=map_location)
55 | optimizer.load_state_dict(state_dict)
56 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/strategies/sampler.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import numpy as np
4 |
5 |
6 | class DistributedSampler:
7 |
8 | def __init__(self, dataset, num_replicas: int, rank: int) -> None:
9 | self.dataset = dataset
10 | self.num_replicas = num_replicas
11 | self.rank = rank
12 |
13 | if len(self.dataset) % self.num_replicas != 0:
14 | self.num_samples = math.ceil(
15 | (len(self.dataset) - self.num_replicas) / self.num_replicas # type: ignore[arg-type]
16 | )
17 | else:
18 | self.num_samples = math.ceil(len(self.dataset) / self.num_replicas)
19 |
20 | self.total_size = self.num_samples * self.num_replicas
21 |
22 | indices = list(range(len(self.dataset)))
23 | indices = indices[:self.total_size]
24 | assert len(indices) == self.total_size
25 | # subsample
26 | indices = indices[self.rank:self.total_size:self.num_replicas]
27 | assert len(indices) == self.num_samples
28 | self.indices = indices
29 |
30 | def sample(self, batch_size: int) -> list:
31 | sampled_indices = np.random.choice(self.indices, batch_size, replace=False)
32 | return [self.dataset[idx] for idx in sampled_indices]
33 |
--------------------------------------------------------------------------------
/code/chatgpt/trainer/utils.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 |
3 |
4 | def is_rank_0() -> bool:
5 | return not dist.is_initialized() or dist.get_rank() == 0
6 |
--------------------------------------------------------------------------------
/code/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | ## Install requirements
4 |
5 | ```shell
6 | pip install -r requirements.txt
7 | ```
8 |
9 | ## Train the reward model (Stage 2)
10 | We use [rm-static](https://huggingface.co/datasets/Dahoas/rm-static) as dataset to train our reward model. It is a dataset of chosen & rejected response of the same prompt.
11 |
12 | You can download the dataset from huggingface automatically.
13 |
14 | Use these code to train your reward model.
15 |
16 | ```shell
17 | # Naive reward model training
18 | python train_reward_model.py --pretrain --model --strategy naive
19 | # use colossalai_zero2
20 | torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain --model --strategy colossalai_zero2
21 | ```
22 |
23 | ## Train with dummy prompt data (Stage 3)
24 |
25 | This script supports 3 strategies:
26 |
27 | - naive
28 | - ddp
29 | - colossalai
30 |
31 | It uses random generated prompt data.
32 |
33 | Naive strategy only support single GPU training:
34 |
35 | ```shell
36 | python train_dummy.py --strategy naive
37 | # display cli help
38 | python train_dummy.py -h
39 | ```
40 |
41 | DDP strategy and ColossalAI strategy support multi GPUs training:
42 |
43 | ```shell
44 | # run DDP on 2 GPUs
45 | torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy ddp
46 | # run ColossalAI on 2 GPUs
47 | torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2
48 | ```
49 |
50 | ## Train with real prompt data (Stage 3)
51 |
52 | We use [awesome-chatgpt-prompts](https://huggingface.co/datasets/fka/awesome-chatgpt-prompts) as example dataset. It is a small dataset with hundreds of prompts.
53 |
54 | You should download `prompts.csv` first.
55 |
56 | This script also supports 3 strategies.
57 |
58 | ```shell
59 | # display cli help
60 | python train_dummy.py -h
61 | # run naive on 1 GPU
62 | python train_prompts.py prompts.csv --strategy naive
63 | # run DDP on 2 GPUs
64 | torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy ddp
65 | # run ColossalAI on 2 GPUs
66 | torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
67 | ```
68 |
69 | ## Inference example(After Stage3)
70 | We support naive inference demo after training.
71 | ```shell
72 | # inference, using pretrain path to configure model
73 | python inference.py --model_path --model --pretrain
74 | # example
75 | python inference.py --model_path ./actor_checkpoint_prompts.pt --pretrain bigscience/bloom-560m --model bloom
76 | ```
77 |
78 |
79 | #### data
80 | - [x] [rm-static](https://huggingface.co/datasets/Dahoas/rm-static)
81 | - [x] [hh-rlhf](https://huggingface.co/datasets/Anthropic/hh-rlhf)
82 | - [ ] [openai/summarize_from_feedback](https://huggingface.co/datasets/openai/summarize_from_feedback)
83 | - [ ] [openai/webgpt_comparisons](https://huggingface.co/datasets/openai/webgpt_comparisons)
84 | - [ ] [Dahoas/instruct-synthetic-prompt-responses](https://huggingface.co/datasets/Dahoas/instruct-synthetic-prompt-responses)
85 |
86 | ## Support Model
87 |
88 | ### GPT
89 | - [x] GPT2-S (s)
90 | - [x] GPT2-M (m)
91 | - [x] GPT2-L (l)
92 | - [ ] GPT2-XL (xl)
93 | - [x] GPT2-4B (4b)
94 | - [ ] GPT2-6B (6b)
95 | - [ ] GPT2-8B (8b)
96 | - [ ] GPT2-10B (10b)
97 | - [ ] GPT2-12B (12b)
98 | - [ ] GPT2-15B (15b)
99 | - [ ] GPT2-18B (18b)
100 | - [ ] GPT2-20B (20b)
101 | - [ ] GPT2-24B (24b)
102 | - [ ] GPT2-28B (28b)
103 | - [ ] GPT2-32B (32b)
104 | - [ ] GPT2-36B (36b)
105 | - [ ] GPT2-40B (40b)
106 | - [ ] GPT3 (175b)
107 |
108 | ### BLOOM
109 | - [x] [BLOOM-560m](https://huggingface.co/bigscience/bloom-560m)
110 | - [x] [BLOOM-1b1](https://huggingface.co/bigscience/bloom-1b1)
111 | - [x] [BLOOM-3b](https://huggingface.co/bigscience/bloom-3b)
112 | - [x] [BLOOM-7b](https://huggingface.co/bigscience/bloom-7b1)
113 | - [ ] BLOOM-175b
114 |
115 | ### OPT
116 | - [x] [OPT-125M](https://huggingface.co/facebook/opt-125m)
117 | - [x] [OPT-350M](https://huggingface.co/facebook/opt-350m)
118 | - [ ] [OPT-1.3B](https://huggingface.co/facebook/opt-1.3b)
119 | - [ ] [OPT-2.7B](https://huggingface.co/facebook/opt-2.7b)
120 | - [ ] [OPT-6.7B](https://huggingface.co/facebook/opt-6.7b)
121 | - [ ] [OPT-13B](https://huggingface.co/facebook/opt-13b)
122 | - [ ] [OPT-30B](https://huggingface.co/facebook/opt-30b)
123 |
--------------------------------------------------------------------------------
/code/examples/inference.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import torch
4 | from chatgpt.models.bloom import BLOOMActor
5 | from chatgpt.models.gpt import GPTActor
6 | from chatgpt.models.opt import OPTActor
7 | from transformers import AutoTokenizer
8 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
9 |
10 |
11 | def eval(args):
12 | # configure model
13 | if args.model == 'gpt2':
14 | actor = GPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
15 | elif args.model == 'bloom':
16 | actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device())
17 | elif args.model == 'opt':
18 | actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
19 | else:
20 | raise ValueError(f'Unsupported model "{args.model}"')
21 |
22 | state_dict = torch.load(args.model_path)
23 | actor.model.load_state_dict(state_dict)
24 |
25 | # configure tokenizer
26 | if args.model == 'gpt2':
27 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
28 | tokenizer.pad_token = tokenizer.eos_token
29 | elif args.model == 'bloom':
30 | tokenizer = AutoTokenizer.from_pretrained('bigscience/bloom-560m')
31 | tokenizer.pad_token = tokenizer.eos_token
32 | elif args.model == 'opt':
33 | tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
34 | else:
35 | raise ValueError(f'Unsupported model "{args.model}"')
36 |
37 | actor.eval()
38 | input = args.input
39 | input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device())
40 | outputs = actor.generate(input_ids,
41 | max_length=args.max_length,
42 | do_sample=True,
43 | top_k=50,
44 | top_p=0.95,
45 | num_return_sequences=1)
46 | output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)
47 | print(output)
48 |
49 |
50 | if __name__ == '__main__':
51 | parser = argparse.ArgumentParser()
52 | parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
53 | # We suggest to use the pretrained model from HuggingFace, use pretrain to configure model
54 | parser.add_argument('--pretrain', type=str, default=None)
55 | parser.add_argument('--model_path', type=str, default=None)
56 | parser.add_argument('--input', type=str, default='Question: How are you ? Answer:')
57 | parser.add_argument('--max_length', type=int, default=100)
58 | args = parser.parse_args()
59 | eval(args)
60 |
--------------------------------------------------------------------------------
/code/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=1.4.1
2 |
--------------------------------------------------------------------------------
/code/examples/test_ci.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -xue
4 |
5 | if [ -z "$PROMPT_PATH" ]; then
6 | echo "Please set \$PROMPT_PATH to the path to prompts csv."
7 | exit 1
8 | fi
9 |
10 | BASE=$(realpath $(dirname $0))
11 |
12 | export OMP_NUM_THREADS=8
13 |
14 | # install requirements
15 | pip install -r ${BASE}/requirements.txt
16 |
17 | # train dummy
18 | python ${BASE}/train_dummy.py --strategy naive --num_episodes 1 \
19 | --max_timesteps 2 --update_timesteps 2 \
20 | --max_epochs 1 --train_batch_size 2 --lora_rank 4
21 |
22 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_dummy.py \
23 | --strategy colossalai_gemini --num_episodes 1 --max_timesteps 2 \
24 | --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
25 | --pretrain 'facebook/opt-350m' --model opt --lora_rank 4\
26 | --save_path ${BASE}/actor_checkpoint_dummy.pt
27 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_dummy.pt --pretrain 'facebook/opt-350m' --model opt
28 |
29 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_dummy.py \
30 | --strategy ddp --num_episodes 1 --max_timesteps 2 \
31 | --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
32 | --pretrain 'facebook/opt-350m' --model opt --lora_rank 4\
33 | --save_path ${BASE}/actor_checkpoint_dummy.pt
34 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_dummy.pt --pretrain 'facebook/opt-350m' --model opt
35 |
36 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_dummy.py \
37 | --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
38 | --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
39 | --pretrain 'gpt2' --model gpt2 --lora_rank 4\
40 | --save_path ${BASE}/actor_checkpoint_dummy.pt
41 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_dummy.pt --pretrain 'gpt2' --model gpt2
42 |
43 | rm -rf ${BASE}/actor_checkpoint_dummy.pt
44 |
45 | # train prompts
46 | python ${BASE}/train_prompts.py $PROMPT_PATH --strategy naive --num_episodes 1 \
47 | --max_timesteps 2 --update_timesteps 2 \
48 | --max_epochs 1 --train_batch_size 2 --lora_rank 4
49 |
50 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py $PROMPT_PATH \
51 | --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
52 | --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
53 | --pretrain 'facebook/opt-350m' --model opt --lora_rank 4\
54 | --save_path ${BASE}/actor_checkpoint_prompts.pt
55 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_prompts.pt --pretrain 'facebook/opt-350m' --model opt
56 |
57 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py $PROMPT_PATH \
58 | --strategy ddp --num_episodes 1 --max_timesteps 2 \
59 | --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
60 | --pretrain 'gpt2' --model gpt2 --lora_rank 4\
61 | --save_path ${BASE}/actor_checkpoint_prompts.pt
62 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_prompts.pt --pretrain 'gpt2' --model gpt2
63 |
64 | torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py $PROMPT_PATH \
65 | --strategy colossalai_gemini --num_episodes 1 --max_timesteps 2 \
66 | --update_timesteps 2 --max_epochs 1 --train_batch_size 2\
67 | --pretrain 'gpt2' --model gpt2 --lora_rank 4\
68 | --save_path ${BASE}/actor_checkpoint_prompts.pt
69 | python ${BASE}/inference.py --model_path ${BASE}/actor_checkpoint_prompts.pt --pretrain 'gpt2' --model gpt2
70 |
71 | rm -rf ${BASE}/actor_checkpoint_prompts.pt
72 |
--------------------------------------------------------------------------------
/code/examples/train_dummy.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from copy import deepcopy
3 |
4 | import torch
5 | from chatgpt.models.base import RewardModel
6 | from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
7 | from chatgpt.models.gpt import GPTActor, GPTCritic
8 | from chatgpt.models.opt import OPTActor, OPTCritic
9 | from chatgpt.trainer import PPOTrainer
10 | from chatgpt.trainer.callbacks import SaveCheckpoint
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
12 | from torch.optim import Adam
13 | from transformers import AutoTokenizer, BloomTokenizerFast
14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
15 |
16 | from colossalai.nn.optimizer import HybridAdam
17 |
18 |
19 | def preprocess_batch(samples):
20 | input_ids = torch.stack(samples)
21 | attention_mask = torch.ones_like(input_ids, dtype=torch.long)
22 | return {'input_ids': input_ids, 'attention_mask': attention_mask}
23 |
24 |
25 | def main(args):
26 | # configure strategy
27 | if args.strategy == 'naive':
28 | strategy = NaiveStrategy()
29 | elif args.strategy == 'ddp':
30 | strategy = DDPStrategy()
31 | elif args.strategy == 'colossalai_gemini':
32 | strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
33 | elif args.strategy == 'colossalai_zero2':
34 | strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
35 | else:
36 | raise ValueError(f'Unsupported strategy "{args.strategy}"')
37 |
38 | # configure model
39 | with strategy.model_init_context():
40 | if args.model == 'gpt2':
41 | actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
42 | critic = GPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
43 | elif args.model == 'bloom':
44 | actor = BLOOMActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
45 | critic = BLOOMCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
46 | elif args.model == 'opt':
47 | actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
48 | critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
49 | else:
50 | raise ValueError(f'Unsupported model "{args.model}"')
51 |
52 | initial_model = deepcopy(actor).to(torch.cuda.current_device())
53 | reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
54 |
55 | # configure optimizer
56 | if args.strategy.startswith('colossalai'):
57 | actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
58 | critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
59 | else:
60 | actor_optim = Adam(actor.parameters(), lr=5e-6)
61 | critic_optim = Adam(critic.parameters(), lr=5e-6)
62 |
63 | # configure tokenizer
64 | if args.model == 'gpt2':
65 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
66 | tokenizer.pad_token = tokenizer.eos_token
67 | elif args.model == 'bloom':
68 | tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
69 | tokenizer.pad_token = tokenizer.eos_token
70 | elif args.model == 'opt':
71 | tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
72 | else:
73 | raise ValueError(f'Unsupported model "{args.model}"')
74 |
75 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
76 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
77 |
78 | callbacks = []
79 | if args.save_ckpt_path:
80 | ckpt_callback = SaveCheckpoint(
81 | args.save_ckpt_path,
82 | args.save_ckpt_interval,
83 | strategy,
84 | actor,
85 | critic,
86 | actor_optim,
87 | critic_optim,
88 | )
89 | callbacks.append(ckpt_callback)
90 |
91 | # configure trainer
92 |
93 | trainer = PPOTrainer(strategy,
94 | actor,
95 | critic,
96 | reward_model,
97 | initial_model,
98 | actor_optim,
99 | critic_optim,
100 | max_epochs=args.max_epochs,
101 | train_batch_size=args.train_batch_size,
102 | tokenizer=preprocess_batch,
103 | max_length=128,
104 | do_sample=True,
105 | temperature=1.0,
106 | top_k=50,
107 | pad_token_id=tokenizer.pad_token_id,
108 | eos_token_id=tokenizer.eos_token_id,
109 | callbacks=callbacks)
110 |
111 | random_prompts = torch.randint(tokenizer.vocab_size, (1000, 64), device=torch.cuda.current_device())
112 | trainer.fit(random_prompts,
113 | num_episodes=args.num_episodes,
114 | max_timesteps=args.max_timesteps,
115 | update_timesteps=args.update_timesteps)
116 |
117 | # save model checkpoint after fitting
118 | strategy.save_model(actor, args.save_path, only_rank0=True)
119 | # save optimizer checkpoint on all ranks
120 | if args.need_optim_ckpt:
121 | strategy.save_optimizer(actor_optim,
122 | 'actor_optim_checkpoint_dummy_%d.pt' % (torch.cuda.current_device()),
123 | only_rank0=False)
124 |
125 |
126 | if __name__ == '__main__':
127 | parser = argparse.ArgumentParser()
128 | parser.add_argument('--strategy',
129 | choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
130 | default='naive')
131 | parser.add_argument('--model', type=str, default='gpt2', choices=['gpt2', 'bloom', 'opt'])
132 | parser.add_argument('--pretrain', type=str, default=None)
133 | parser.add_argument('--save_path', type=str, default='actor_checkpoint_dummy.pt')
134 | parser.add_argument('--need_optim_ckpt', type=bool, default=False)
135 | parser.add_argument('--num_episodes', type=int, default=50)
136 | parser.add_argument('--max_timesteps', type=int, default=10)
137 | parser.add_argument('--update_timesteps', type=int, default=10)
138 | parser.add_argument('--max_epochs', type=int, default=5)
139 | parser.add_argument('--train_batch_size', type=int, default=8)
140 | parser.add_argument('--experience_batch_size', type=int, default=8)
141 | parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
142 | parser.add_argument('--save_ckpt_path',
143 | type=str,
144 | default=None,
145 | help="path to save checkpoint, None means not to save")
146 | parser.add_argument('--save_ckpt_interval', type=int, default=1, help="the interval of episode to save checkpoint")
147 | args = parser.parse_args()
148 | main(args)
149 |
--------------------------------------------------------------------------------
/code/examples/train_dummy.sh:
--------------------------------------------------------------------------------
1 | set_n_least_used_CUDA_VISIBLE_DEVICES() {
2 | local n=${1:-"9999"}
3 | echo "GPU Memory Usage:"
4 | local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
5 | | tail -n +2 \
6 | | nl -v 0 \
7 | | tee /dev/tty \
8 | | sort -g -k 2 \
9 | | awk '{print $1}' \
10 | | head -n $n)
11 | export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
12 | echo "Now CUDA_VISIBLE_DEVICES is set to:"
13 | echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
14 | }
15 |
16 | set_n_least_used_CUDA_VISIBLE_DEVICES 2
17 |
18 | torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2
19 |
--------------------------------------------------------------------------------
/code/examples/train_prompts.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from copy import deepcopy
3 |
4 | import pandas as pd
5 | import torch
6 | from chatgpt.models.base import RewardModel
7 | from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
8 | from chatgpt.models.gpt import GPTActor, GPTCritic
9 | from chatgpt.models.opt import OPTActor, OPTCritic
10 | from chatgpt.trainer import PPOTrainer
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
12 | from torch.optim import Adam
13 | from transformers import AutoTokenizer, BloomTokenizerFast
14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
15 |
16 | from colossalai.nn.optimizer import HybridAdam
17 |
18 |
19 | def main(args):
20 | # configure strategy
21 | if args.strategy == 'naive':
22 | strategy = NaiveStrategy()
23 | elif args.strategy == 'ddp':
24 | strategy = DDPStrategy()
25 | elif args.strategy == 'colossalai_gemini':
26 | strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
27 | elif args.strategy == 'colossalai_zero2':
28 | strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
29 | else:
30 | raise ValueError(f'Unsupported strategy "{args.strategy}"')
31 |
32 | # configure model
33 | with strategy.model_init_context():
34 | if args.model == 'gpt2':
35 | actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
36 | critic = GPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
37 | elif args.model == 'bloom':
38 | actor = BLOOMActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
39 | critic = BLOOMCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
40 | elif args.model == 'opt':
41 | actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
42 | critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
43 | else:
44 | raise ValueError(f'Unsupported model "{args.model}"')
45 |
46 | initial_model = deepcopy(actor)
47 | reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
48 |
49 | # configure optimizer
50 | if args.strategy.startswith('colossalai'):
51 | actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
52 | critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
53 | else:
54 | actor_optim = Adam(actor.parameters(), lr=5e-6)
55 | critic_optim = Adam(critic.parameters(), lr=5e-6)
56 |
57 | # configure tokenizer
58 | if args.model == 'gpt2':
59 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
60 | tokenizer.pad_token = tokenizer.eos_token
61 | elif args.model == 'bloom':
62 | tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
63 | tokenizer.pad_token = tokenizer.eos_token
64 | elif args.model == 'opt':
65 | tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
66 | else:
67 | raise ValueError(f'Unsupported model "{args.model}"')
68 |
69 | dataset = pd.read_csv(args.prompt_path)['prompt']
70 |
71 | def tokenize_fn(texts):
72 | # MUST padding to max length to ensure inputs of all ranks have the same length
73 | # Different length may lead to hang when using gemini, as different generation steps
74 | batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
75 | return {k: v.cuda() for k, v in batch.items()}
76 |
77 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
78 | (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
79 |
80 | # configure trainer
81 | trainer = PPOTrainer(
82 | strategy,
83 | actor,
84 | critic,
85 | reward_model,
86 | initial_model,
87 | actor_optim,
88 | critic_optim,
89 | max_epochs=args.max_epochs,
90 | train_batch_size=args.train_batch_size,
91 | experience_batch_size=args.experience_batch_size,
92 | tokenizer=tokenize_fn,
93 | max_length=128,
94 | do_sample=True,
95 | temperature=1.0,
96 | top_k=50,
97 | pad_token_id=tokenizer.pad_token_id,
98 | eos_token_id=tokenizer.eos_token_id,
99 | )
100 |
101 | trainer.fit(dataset,
102 | num_episodes=args.num_episodes,
103 | max_timesteps=args.max_timesteps,
104 | update_timesteps=args.update_timesteps)
105 | # save model checkpoint after fitting
106 | strategy.save_model(actor, args.save_path, only_rank0=True)
107 | # save optimizer checkpoint on all ranks
108 | if args.need_optim_ckpt:
109 | strategy.save_optimizer(actor_optim,
110 | 'actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
111 | only_rank0=False)
112 |
113 |
114 | if __name__ == '__main__':
115 | parser = argparse.ArgumentParser()
116 | parser.add_argument('prompt_path')
117 | parser.add_argument('--strategy',
118 | choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
119 | default='naive')
120 | parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
121 | parser.add_argument('--pretrain', type=str, default=None)
122 | parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
123 | parser.add_argument('--need_optim_ckpt', type=bool, default=False)
124 | parser.add_argument('--num_episodes', type=int, default=10)
125 | parser.add_argument('--max_timesteps', type=int, default=10)
126 | parser.add_argument('--update_timesteps', type=int, default=10)
127 | parser.add_argument('--max_epochs', type=int, default=5)
128 | parser.add_argument('--train_batch_size', type=int, default=8)
129 | parser.add_argument('--experience_batch_size', type=int, default=8)
130 | parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
131 | args = parser.parse_args()
132 | main(args)
133 |
--------------------------------------------------------------------------------
/code/examples/train_prompts.sh:
--------------------------------------------------------------------------------
1 | set_n_least_used_CUDA_VISIBLE_DEVICES() {
2 | local n=${1:-"9999"}
3 | echo "GPU Memory Usage:"
4 | local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
5 | | tail -n +2 \
6 | | nl -v 0 \
7 | | tee /dev/tty \
8 | | sort -g -k 2 \
9 | | awk '{print $1}' \
10 | | head -n $n)
11 | export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
12 | echo "Now CUDA_VISIBLE_DEVICES is set to:"
13 | echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
14 | }
15 |
16 | set_n_least_used_CUDA_VISIBLE_DEVICES 2
17 |
18 | torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
19 |
--------------------------------------------------------------------------------
/code/examples/train_reward_model.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import loralib as lora
4 | import torch
5 | from chatgpt.dataset import RewardDataset
6 | from chatgpt.models.base import RewardModel
7 | from chatgpt.models.bloom import BLOOMRM
8 | from chatgpt.models.gpt import GPTRM
9 | from chatgpt.models.opt import OPTRM
10 | from chatgpt.trainer import RewardModelTrainer
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
12 | from datasets import load_dataset
13 | from torch.optim import Adam
14 | from transformers import AutoTokenizer, BloomTokenizerFast
15 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
16 |
17 | from colossalai.nn.optimizer import HybridAdam
18 |
19 |
20 | def train(args):
21 | # configure strategy
22 | if args.strategy == 'naive':
23 | strategy = NaiveStrategy()
24 | elif args.strategy == 'ddp':
25 | strategy = DDPStrategy()
26 | elif args.strategy == 'colossalai_gemini':
27 | strategy = ColossalAIStrategy(stage=3, placement_policy='cuda')
28 | elif args.strategy == 'colossalai_zero2':
29 | strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
30 | else:
31 | raise ValueError(f'Unsupported strategy "{args.strategy}"')
32 |
33 | # configure model
34 | with strategy.model_init_context():
35 | if args.model == 'bloom':
36 | model = BLOOMRM(pretrained=args.pretrain, lora_rank=args.lora_rank).cuda()
37 | elif args.model == 'opt':
38 | model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).cuda()
39 | elif args.model == 'gpt2':
40 | model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).cuda()
41 | else:
42 | raise ValueError(f'Unsupported model "{args.model}"')
43 |
44 | # configure tokenizer
45 | if args.model == 'gpt2':
46 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
47 | tokenizer.pad_token = tokenizer.eos_token
48 | elif args.model == 'bloom':
49 | tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
50 | tokenizer.pad_token = tokenizer.eos_token
51 | elif args.model == 'opt':
52 | tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
53 | else:
54 | raise ValueError(f'Unsupported model "{args.model}"')
55 | tokenizer.pad_token = tokenizer.eos_token
56 |
57 | max_len = 512
58 |
59 | # configure optimizer
60 | if args.strategy.startswith('colossalai'):
61 | optim = HybridAdam(model.parameters(), lr=5e-5)
62 | else:
63 | optim = Adam(model.parameters(), lr=5e-5)
64 |
65 | # prepare for data and dataset
66 | data = load_dataset(args.dataset)
67 | train_data = data["train"]
68 | eval_data = data['test']
69 | train_dataset = RewardDataset(train_data, tokenizer, max_len)
70 | eval_dataset = RewardDataset(eval_data, tokenizer, max_len)
71 |
72 | trainer = RewardModelTrainer(model=model,
73 | strategy=strategy,
74 | optim=optim,
75 | train_dataset=train_dataset,
76 | eval_dataset=eval_dataset,
77 | batch_size=args.batch_size,
78 | max_epochs=args.max_epochs)
79 |
80 | trainer.fit(use_lora=args.lora_rank)
81 |
82 | # save model checkpoint after fitting on only rank0
83 | strategy.save_model(model, 'rm_checkpoint.pt', only_rank0=True)
84 | # save optimizer checkpoint on all ranks
85 | strategy.save_optimizer(optim, 'rm_optim_checkpoint_%d.pt' % (torch.cuda.current_device()), only_rank0=False)
86 |
87 |
88 | if __name__ == '__main__':
89 | parser = argparse.ArgumentParser()
90 | parser.add_argument('--strategy',
91 | choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
92 | default='naive')
93 | parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt'], default='bloom')
94 | parser.add_argument('--pretrain', type=str, default=None)
95 | parser.add_argument('--dataset', type=str, default='Dahoas/rm-static')
96 | parser.add_argument('--save_path', type=str, default='rm_ckpt.pth')
97 | parser.add_argument('--max_epochs', type=int, default=1)
98 | parser.add_argument('--batch_size', type=int, default=4)
99 | parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
100 | args = parser.parse_args()
101 | train(args)
102 |
--------------------------------------------------------------------------------
/code/examples/train_rm.sh:
--------------------------------------------------------------------------------
1 | set_n_least_used_CUDA_VISIBLE_DEVICES() {
2 | local n=${1:-"9999"}
3 | echo "GPU Memory Usage:"
4 | local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
5 | | tail -n +2 \
6 | | nl -v 0 \
7 | | tee /dev/tty \
8 | | sort -g -k 2 \
9 | | awk '{print $1}' \
10 | | head -n $n)
11 | export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
12 | echo "Now CUDA_VISIBLE_DEVICES is set to:"
13 | echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
14 | }
15 |
16 | set_n_least_used_CUDA_VISIBLE_DEVICES 2
17 |
18 | # torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain 'bigscience/bloomz-560m' --model 'bloom' --strategy colossalai_zero2
19 | torchrun --standalone --nproc_per_node=2 train_reward_model.py --model 'gpt2' --strategy colossalai_zero2
20 | # torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain "facebook/opt-350m" --model 'opt' --strategy colossalai_zero2
21 |
--------------------------------------------------------------------------------
/code/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 | cpu: tests which can run on CPU
4 | gpu: tests which requires a single GPU
5 | dist: tests which are run in a multi-GPU or multi-machine environment
6 | experiment: tests for experimental features
7 |
--------------------------------------------------------------------------------
/code/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 |
--------------------------------------------------------------------------------
/code/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.20.1
2 | tqdm
3 | datasets
4 | loralib
5 | colossalai>=0.2.4
6 | torch
7 | langchain
8 |
--------------------------------------------------------------------------------
/code/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 |
4 | def fetch_requirements(path):
5 | with open(path, 'r') as fd:
6 | return [r.strip() for r in fd.readlines()]
7 |
8 |
9 | def fetch_readme():
10 | with open('README.md', encoding='utf-8') as f:
11 | return f.read()
12 |
13 |
14 | def fetch_version():
15 | with open('version.txt', 'r') as f:
16 | return f.read().strip()
17 |
18 |
19 | setup(
20 | name='chatgpt',
21 | version=fetch_version(),
22 | packages=find_packages(exclude=(
23 | 'tests',
24 | 'benchmarks',
25 | '*.egg-info',
26 | )),
27 | description='A RLFH implementation (ChatGPT) powered by ColossalAI',
28 | long_description=fetch_readme(),
29 | long_description_content_type='text/markdown',
30 | license='Apache Software License 2.0',
31 | url='https://github.com/hpcaitech/ChatGPT',
32 | install_requires=fetch_requirements('requirements.txt'),
33 | python_requires='>=3.6',
34 | classifiers=[
35 | 'Programming Language :: Python :: 3',
36 | 'License :: OSI Approved :: Apache Software License',
37 | 'Environment :: GPU :: NVIDIA CUDA',
38 | 'Topic :: Scientific/Engineering :: Artificial Intelligence',
39 | 'Topic :: System :: Distributed Computing',
40 | ],
41 | )
42 |
--------------------------------------------------------------------------------
/code/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oglee815/mygpt-lecture/ed6a506cd9605f10b5fec52e840f16f3cb46ab98/code/tests/__init__.py
--------------------------------------------------------------------------------
/code/tests/test_checkpoint.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 | from contextlib import nullcontext
4 | from functools import partial
5 |
6 | import pytest
7 | import torch
8 | import torch.distributed as dist
9 | import torch.multiprocessing as mp
10 | from chatgpt.models.gpt import GPTActor
11 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy
12 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
13 |
14 | from colossalai.nn.optimizer import HybridAdam
15 | from colossalai.testing import rerun_if_address_is_in_use
16 | from colossalai.utils import free_port
17 |
18 | GPT_CONFIG = GPT2Config(n_embd=128, n_layer=4, n_head=4)
19 |
20 |
21 | def get_data(batch_size: int, seq_len: int = 10) -> dict:
22 | input_ids = torch.randint(0, 50257, (batch_size, seq_len), device='cuda')
23 | attention_mask = torch.ones_like(input_ids)
24 | return dict(input_ids=input_ids, attention_mask=attention_mask)
25 |
26 |
27 | def run_test_checkpoint(strategy):
28 | BATCH_SIZE = 2
29 |
30 | if strategy == 'ddp':
31 | strategy = DDPStrategy()
32 | elif strategy == 'colossalai_gemini':
33 | strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
34 | elif strategy == 'colossalai_zero2':
35 | strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
36 | else:
37 | raise ValueError(f'Unsupported strategy "{strategy}"')
38 |
39 | with strategy.model_init_context():
40 | actor = GPTActor(config=GPT_CONFIG).cuda()
41 |
42 | actor_optim = HybridAdam(actor.parameters())
43 |
44 | actor, actor_optim = strategy.prepare((actor, actor_optim))
45 |
46 | def run_step():
47 | data = get_data(BATCH_SIZE)
48 | action_mask = torch.ones_like(data['attention_mask'], dtype=torch.bool)
49 | action_log_probs = actor(data['input_ids'], action_mask.size(1), data['attention_mask'])
50 | loss = action_log_probs.sum()
51 | strategy.backward(loss, actor, actor_optim)
52 | strategy.optimizer_step(actor_optim)
53 |
54 | run_step()
55 |
56 | ctx = tempfile.TemporaryDirectory() if dist.get_rank() == 0 else nullcontext()
57 |
58 | with ctx as dirname:
59 | rank0_dirname = [dirname]
60 | dist.broadcast_object_list(rank0_dirname)
61 | rank0_dirname = rank0_dirname[0]
62 |
63 | model_path = os.path.join(rank0_dirname, 'model.pt')
64 | optim_path = os.path.join(rank0_dirname, f'optim-r{dist.get_rank()}.pt')
65 |
66 | strategy.save_model(actor, model_path, only_rank0=True)
67 | strategy.save_optimizer(actor_optim, optim_path, only_rank0=False)
68 |
69 | dist.barrier()
70 |
71 | strategy.load_model(actor, model_path, strict=False)
72 | strategy.load_optimizer(actor_optim, optim_path)
73 |
74 | dist.barrier()
75 |
76 | run_step()
77 |
78 |
79 | def run_dist(rank, world_size, port, strategy):
80 | os.environ['RANK'] = str(rank)
81 | os.environ['LOCAL_RANK'] = str(rank)
82 | os.environ['WORLD_SIZE'] = str(world_size)
83 | os.environ['MASTER_ADDR'] = 'localhost'
84 | os.environ['MASTER_PORT'] = str(port)
85 | run_test_checkpoint(strategy)
86 |
87 |
88 | @pytest.mark.dist
89 | @pytest.mark.parametrize('world_size', [2])
90 | @pytest.mark.parametrize('strategy', ['ddp', 'colossalai_zero2', 'colossalai_gemini'])
91 | @rerun_if_address_is_in_use()
92 | def test_checkpoint(world_size, strategy):
93 | run_func = partial(run_dist, world_size=world_size, port=free_port(), strategy=strategy)
94 | mp.spawn(run_func, nprocs=world_size)
95 |
96 |
97 | if __name__ == '__main__':
98 | test_checkpoint(2, 'colossalai_zero2')
99 |
--------------------------------------------------------------------------------
/code/tests/test_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | from copy import deepcopy
3 | from functools import partial
4 |
5 | import pytest
6 | import torch
7 | import torch.distributed as dist
8 | import torch.multiprocessing as mp
9 | from chatgpt.experience_maker import NaiveExperienceMaker
10 | from chatgpt.models.base import RewardModel
11 | from chatgpt.models.gpt import GPTActor, GPTCritic
12 | from chatgpt.replay_buffer import NaiveReplayBuffer
13 | from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy
14 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config
15 |
16 | from colossalai.testing import rerun_if_address_is_in_use
17 | from colossalai.utils import free_port
18 |
19 | GPT_CONFIG = GPT2Config(n_embd=128, n_layer=4, n_head=4)
20 |
21 |
22 | def get_data(batch_size: int, seq_len: int = 10) -> dict:
23 | input_ids = torch.randint(0, 50257, (batch_size, seq_len), device='cuda')
24 | attention_mask = torch.ones_like(input_ids)
25 | return dict(input_ids=input_ids, attention_mask=attention_mask)
26 |
27 |
28 | def gather_and_equal(tensor: torch.Tensor) -> bool:
29 | world_size = dist.get_world_size()
30 | outputs = [torch.empty_like(tensor) for _ in range(world_size)]
31 | dist.all_gather(outputs, tensor.contiguous())
32 | for t in outputs[1:]:
33 | if not torch.equal(outputs[0], t):
34 | return False
35 | return True
36 |
37 |
38 | def run_test_data(strategy):
39 | EXPERINCE_BATCH_SIZE = 4
40 | SAMPLE_BATCH_SIZE = 2
41 |
42 | if strategy == 'ddp':
43 | strategy = DDPStrategy()
44 | elif strategy == 'colossalai':
45 | strategy = ColossalAIStrategy(placement_policy='cuda')
46 | else:
47 | raise ValueError(f'Unsupported strategy "{strategy}"')
48 |
49 | actor = GPTActor(config=GPT_CONFIG).cuda()
50 | critic = GPTCritic(config=GPT_CONFIG).cuda()
51 |
52 | initial_model = deepcopy(actor)
53 | reward_model = RewardModel(deepcopy(critic.model)).cuda()
54 |
55 | experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model)
56 | replay_buffer = NaiveReplayBuffer(SAMPLE_BATCH_SIZE, cpu_offload=False)
57 |
58 | # experience of all ranks should be the same
59 | for _ in range(2):
60 | data = get_data(EXPERINCE_BATCH_SIZE)
61 | assert gather_and_equal(data['input_ids'])
62 | assert gather_and_equal(data['attention_mask'])
63 | experience = experience_maker.make_experience(**data,
64 | do_sample=True,
65 | max_length=16,
66 | eos_token_id=50256,
67 | pad_token_id=50256)
68 | assert gather_and_equal(experience.sequences)
69 | assert gather_and_equal(experience.action_log_probs)
70 | assert gather_and_equal(experience.values)
71 | assert gather_and_equal(experience.reward)
72 | assert gather_and_equal(experience.advantages)
73 | assert gather_and_equal(experience.action_mask)
74 | assert gather_and_equal(experience.attention_mask)
75 | replay_buffer.append(experience)
76 |
77 | # replay buffer's data should be the same
78 | buffer_size = torch.tensor([len(replay_buffer)], device='cuda')
79 | assert gather_and_equal(buffer_size)
80 | for item in replay_buffer.items:
81 | assert gather_and_equal(item.sequences)
82 | assert gather_and_equal(item.action_log_probs)
83 | assert gather_and_equal(item.values)
84 | assert gather_and_equal(item.reward)
85 | assert gather_and_equal(item.advantages)
86 | assert gather_and_equal(item.action_mask)
87 | assert gather_and_equal(item.attention_mask)
88 |
89 | # dataloader of each rank should have the same size and different batch
90 | dataloader = strategy.setup_dataloader(replay_buffer)
91 | dataloader_size = torch.tensor([len(dataloader)], device='cuda')
92 | assert gather_and_equal(dataloader_size)
93 | for experience in dataloader:
94 | assert not gather_and_equal(experience.sequences)
95 | assert not gather_and_equal(experience.action_log_probs)
96 | assert not gather_and_equal(experience.values)
97 | assert not gather_and_equal(experience.reward)
98 | assert not gather_and_equal(experience.advantages)
99 | # action mask and attention mask may be same
100 |
101 |
102 | def run_dist(rank, world_size, port, strategy):
103 | os.environ['RANK'] = str(rank)
104 | os.environ['LOCAL_RANK'] = str(rank)
105 | os.environ['WORLD_SIZE'] = str(world_size)
106 | os.environ['MASTER_ADDR'] = 'localhost'
107 | os.environ['MASTER_PORT'] = str(port)
108 | run_test_data(strategy)
109 |
110 |
111 | @pytest.mark.skip
112 | @pytest.mark.dist
113 | @pytest.mark.parametrize('world_size', [2])
114 | @pytest.mark.parametrize('strategy', ['ddp', 'colossalai'])
115 | @rerun_if_address_is_in_use()
116 | def test_data(world_size, strategy):
117 | run_func = partial(run_dist, world_size=world_size, port=free_port(), strategy=strategy)
118 | mp.spawn(run_func, nprocs=world_size)
119 |
120 |
121 | if __name__ == '__main__':
122 | test_data(2, 'colossalai')
123 |
--------------------------------------------------------------------------------
/code/utils.py:
--------------------------------------------------------------------------------
1 | # import
2 | # import os
3 | # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
4 | import torch
5 | import torch.nn as nn
6 | from torch.utils.data import Dataset
7 | from datasets import load_dataset
8 | import transformers
9 | from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
10 | from transformers import Trainer, TrainingArguments, AutoModelWithLMHead
11 | from copy import deepcopy
12 | from torch.optim import Adam
13 | from transformers import AutoTokenizer, BloomTokenizerFast
14 | from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
15 | import pandas as pd
16 | import argparse
17 | import copy
18 | import logging
19 | import json
20 | from dataclasses import dataclass, field
21 |
22 | PROMPT= "### system:사용자의 질문에 맞는 적절한 응답을 생성하세요.\n### 사용자:{instruction}\n### 응답:"
23 | # data config
24 | IGNORE_INDEX = -100
25 |
26 | def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output_dir: str):
27 | """Collects the state dict and dump to disk."""
28 | state_dict = trainer.model.state_dict()
29 | if trainer.args.should_save:
30 | cpu_state_dict = {key: value.cpu() for key, value in list(state_dict.items())}
31 | del state_dict
32 | trainer._save(output_dir, state_dict=cpu_state_dict) # noqa
33 |
34 | ## prepare data
35 | from typing import Optional, Dict, Sequence
36 | import jsonlines
37 |
38 | class SFT_dataset(Dataset):
39 | '''SFT dataset by wygo'''
40 | def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, verbose=True):
41 | super(SFT_dataset, self).__init__()
42 | logging.warning("Loading data...")
43 |
44 | # with open(data_path, "r", encoding='utf-8-sig') as json_file:
45 | # list_data_dict = json.load(json_file)
46 | # if verbose:
47 | # print('## data check ##')
48 | sources = []
49 | targets = []
50 | with jsonlines.open(data_path) as f:
51 | for example in f.iter():
52 | tmp = PROMPT.format_map(example)
53 | sources.append(tmp)
54 | targets.append(f"{example['output']}{tokenizer.eos_token}")
55 |
56 | if verbose:
57 | idx = 0
58 | print((sources[idx]))
59 | print((targets[idx]))
60 | print("Tokenizing inputs... This may take some time...")
61 |
62 | ############################################################
63 | examples = [s + t for s, t in zip(sources, targets)]
64 |
65 | # source data tokenized
66 | sources_tokenized = self._tokenize_fn(sources, tokenizer) # source만
67 | examples_tokenized = self._tokenize_fn(examples, tokenizer) # source + target
68 |
69 | ## 입력은 source, 출력은 source+target 이지만 학습은 target 부분만
70 | input_ids = examples_tokenized["input_ids"]
71 | labels = copy.deepcopy(input_ids)
72 | for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]):
73 | label[:source_len] = IGNORE_INDEX # source 부분은 -100으로 채운다
74 |
75 | data_dict = dict(input_ids=input_ids, labels=labels)
76 |
77 | self.input_ids = data_dict["input_ids"]
78 | self.labels = data_dict["labels"]
79 | logging.warning("Loading data done!!: %d"%(len(self.labels)))
80 |
81 | def _tokenize_fn(self, strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict:
82 | """Tokenize a list of strings."""
83 | tokenized_list = [
84 | tokenizer(
85 | text,
86 | return_tensors="pt",
87 | padding="longest",
88 | max_length=tokenizer.model_max_length,
89 | truncation=True,
90 | )
91 | for text in strings
92 | ]
93 | input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
94 | input_ids_lens = labels_lens = [
95 | tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list
96 | ]
97 |
98 | return dict(
99 | input_ids=input_ids,
100 | labels=labels,
101 | input_ids_lens=input_ids_lens,
102 | labels_lens=labels_lens,
103 | )
104 |
105 |
106 | def __len__(self):
107 | return len(self.input_ids)
108 |
109 |
110 | def __getitem__(self, i) -> Dict[str, torch.Tensor]:
111 | return dict(input_ids=self.input_ids[i], labels=self.labels[i])
112 |
113 |
114 | @dataclass
115 | class DataCollatorForSupervisedDataset(object):
116 | """Collate examples for supervised fine-tuning."""
117 |
118 | tokenizer: transformers.PreTrainedTokenizer
119 |
120 | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
121 | input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
122 | input_ids = torch.nn.utils.rnn.pad_sequence(
123 | input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
124 | )
125 | labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
126 | return dict(
127 | input_ids=input_ids,
128 | labels=labels,
129 | attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
130 | )
131 |
132 | # check
133 | # print('input : %s'%train_dataset.input_ids[0])
134 | # print('output: %s'%train_dataset.labels[0])
--------------------------------------------------------------------------------
/code/version.txt:
--------------------------------------------------------------------------------
1 | 0.1.0
2 |
--------------------------------------------------------------------------------
/data/stage1. domain_adaptive_pretraining/템플릿.csv:
--------------------------------------------------------------------------------
1 | 주제,내용,길이
2 | BTS의 개요,2013년 6월 13일에 데뷔한 대한민국의 7인조 보이그룹이다.,35
3 |
--------------------------------------------------------------------------------
/data/stage1. domain_instruction_tuning/ive_instruction_test.jsonl:
--------------------------------------------------------------------------------
1 | {"instruction": "아이브의 소속사는 어떤 엔터테인먼트 기업인가요?", "output": "아이브의 소속사는 스타쉽엔터테인먼트입니다."}
2 | {"instruction": "장원영의 생년월일은 언제인가요?", "output": "장원영의 생년월일은 2004년 8월 31일입니다."}
3 | {"instruction": "아이브와 (여자)아이들은 어떤 차이점이 있나요?", "output": "아이브는 보컬의 비중이 높은 음악을 내세우고 있지만, (여자)아이들은 콘셉트추얼함과 자체 제작으로 승부를 보며 음악에 어필합니다."}
4 | {"instruction": "어떤 멤버들이 IVE의 성장을 도왔나요?", "output": "안유진과 장원영 외에도 아이즈원 출신이 아닌 새로운 멤버들이 IVE의 성장을 도왔습니다."}
5 | {"instruction": "아이브의 실력 중에서 가장 특출난 점은 무엇인가요?", "output": "아이브의 표정 연기입니다."}
6 | {"instruction": "아이브의 모든 멤버들이 곡의 스토리와 가사에 맞는 표정 연기를 잘 하나요?", "output": "네, 아이브의 모든 멤버들이 곡의 스토리와 가사에 맞는 표정 연기를 능숙하게 사용합니다."}
7 | {"instruction": "아이브의 메인보컬과 서브보컬은 누구들인가요?", "output": "메인보컬은 안유진과 리즈이고, 서브보컬은 가을, 레이, 장원영, 이서입니다."}
8 | {"instruction": "아이브의 랩 파트와 고음역대 파트는 누가 맡고 있나요?", "output": "랩 파트는 가을, 레이가 맡고, 고음역대 파트는 리즈와 안유진이 맡고 있습니다."}
9 | {"instruction": "어떤 그룹이 처음부터 비주얼을 내세웠나요?", "output": "스타쉽엔터테인먼트 그룹인 아이브가 처음부터 비주얼을 내세웠습니다."}
10 | {"instruction": "이 아티스트는 어떤 시상식에서 3개의 시상식에서 신인상과 대상을 동시 수상한 최초의 아티스트가 되었다고 언급되었나요?", "output": "이 아티스트는 2022년 11월 26일 멜론 뮤직 어워드, 11월 30일 마마 어워즈, 12월 13일 아시아 아티스트 어워즈에서 신인상과 대상을 동시에 수상하였습니다."}
11 | {"instruction": "이 아티스트가 속한 그룹은 어떤 시기에 여자 아이돌 시장의 침체기를 가져온 걸그룹으로 꼽혔나요?", "output": "이 아티스트가 속한 그룹은 약간의 침체기가 있었던 여자 아이돌 시장의 부흥을 가져온 걸그룹으로 꼽혔습니다."}
12 | {"instruction": "《After LIKE》는 어떤 수치상 한국 걸그룹 중에서 어떤 위치에 올랐나요?", "output": "두 번째로 높은 위치, Spotify 글로벌 차트 23위"}
13 | {"instruction": "\"After LIKE\"는 한국 걸그룹 중 초동 판매량 2위를 달성했나요?", "output": "네, \"After LIKE\"는 한국 걸그룹 초동 판매량 2위에 등극했습니다."}
14 | {"instruction": "IVE의 그룹명은 어떻게 결정되었나요?", "output": "IVE의 그룹명은 회사 공모를 통해 결정되었다."}
15 | {"instruction": "아이브가 첫 곡으로 연습한 노래는 무엇인가요?", "output": "피프스 하모니의 That's my girl입니다."}
16 | {"instruction": "아이브 멤버들 중 가장 어리게 태어난 멤버는 누구인가요?", "output": "이서입니다."}
17 | {"instruction": "아이브 멤버들은 어떤 애니메이션을 모르거나, 틀린 이름을 부르기도 했나요?", "output": "원영과 이서는 슬라이드폰을 열 줄 모른다든가, 이서는 마시마로를 몰라서 마시멜로라고 하기도 했습니다."}
18 | {"instruction": "가을은 이서와의 세대 차이를 어떻게 느끼고 있었나요?", "output": "가을은 이서와 크게 세대 차이를 못 느낀다고 밝혔습니다."}
19 | {"instruction": "아이브 멤버 중에서 가장 연장자는 누구인가요?", "output": "최연장자가 아닌 둘째가 리더인 걸그룹이 되었습니다."}
20 | {"instruction": "아이브 멤버 중에서 지상파 음악 방송 MC를 맡고 있던 멤버는 몇 명인가요?", "output": "아이돌 그룹 내에 지상파 음악 방송 MC를 맡고 있던 멤버가 무려 2명이었습니다."}
21 | {"instruction": "아이브의 멤버들은 가로본능 핸드폰에 대해 어떤 반응을 보였나요?", "output": "멤버들 모두가 뭔지 모르는 듯한 모습을 보였습니다."}
22 | {"instruction": "멤버들 중 MBTI가 내향형인 멤버들은 누구인가요?", "output": "장원영과 이서를 제외한 나머지 멤버들"}
23 | {"instruction": "어떤 것들이 숙소 규칙으로 정해져 있는가?", "output": "빨래 제때 가져가기, 냉장고 유통기한 음식 본인 건 본인이 버리기 등이 있다."}
24 | {"instruction": "IVE의 데뷔월 기준 유행어는 무엇인가요?", "output": "\"너무나도\", \"꽤나\", \"오히려 좋아\" 등의 유행어가 있습니다."}
25 | {"instruction": "IVE는 어떤 걸그룹의 파생 걸그룹인가요?", "output": "IVE는 엠넷의 프로듀스 101 시리즈 데뷔조 파생 걸그룹으로, 프리스틴, 구구단, 다이아 등과 함께 대표되는 걸그룹입니다."}
26 |
--------------------------------------------------------------------------------
/data/stage2. RM/ive_test_rm.jsonl:
--------------------------------------------------------------------------------
1 | {"prompt": "아이브 멤버 중에서 서울공연예술고등학교를 졸업한 멤버는 누구인가요?", "chosen": "레이, 장원영", "rejected": "누군지는 잘 모르겠어요."}
2 | {"prompt": "다른 그룹명도 고려됐던 건가요?", "chosen": "네, 멤버들은 어떤 이름이 될지 기대하며 기다리고 있었다.", "rejected": "그룹명을 정하기 위해 다양한 그룹명이 고려되었지만, 아이브가 선택되었습니다."}
3 | {"prompt": "아이브의 비주얼이 왜 유명한가요?", "chosen": "아이브는 자타공인 전원 센터급 비주얼로 유명합니다.", "rejected": "아이브의 비주얼은 논란이 있지만 다양한 스타일을 소화해내어 유명한 편입니다."}
4 | {"prompt": "아이브의 데뷔곡 《ELEVEN》은 몇 개의 음악 방송에서 1위를 차지했나요?", "chosen": "13관왕", "rejected": "한 개의 음악 방송에서 1위를 차지했어요."}
5 | {"prompt": "리즈는 어느 도시에서 태어났나요?", "chosen": "제주에서 태어났습니다.", "rejected": "상하이"}
6 | {"prompt": "멤버들의 공식색은 어떤 색들로 이루어져 있는가?", "chosen": "멤버들의 공식색은 빨간색, 주황색, 초록색, 파란색, 남색, 보라색으로 무지개 7색 중에서 노란색만 빠져 있어서 그 이유를 궁금해하는 팬들이 많다.", "rejected": "멤버들의 공식색은 하늘과 바닷물 색으로 이루어져 있다고 합니다."}
7 | {"prompt": "아이브의 랩 실력은 그룹 내에서 어떻게 평가되나요?", "chosen": "그룹 내에서 아이브의 랩 실력은 뛰어나다고 평가되고 있습니다.", "rejected": "아이브의 랩 실력은 그룹 내에서 평가가 다양하게 나뉘어요."}
8 | {"prompt": "2023년 4월 15일 방송된 아는 형님에서 이사 소식을 알렸는데, 멤버들은 새로운 숙소에서 3명씩 두 채로 나누어 살게 되었다고 하는데, 과거 숙소는 어디에 위치했나요?", "chosen": "성수동에 위치했던 것으로 보입니다.", "rejected": "전 세계 어딘가"}
9 | {"prompt": "멜론 20만 이상 하트를 보유한 아이브의 곡은 몇 개인가요?", "chosen": "멜론 20만 이상 하트를 보유한 아이브의 곡은 2곡입니다.", "rejected": "2곡"}
10 | {"prompt": "어떤 곡에서 아이브는 빠른 랩으로 호평을 받았나요?", "chosen": "에서 빠른 랩으로 호평을 받았습니다.", "rejected": "아이브는 \"11\"이라는 곡에서 빠른 랩으로 호평을 받았어요."}
11 | {"prompt": "어떻게 아이브의 표정 연기로 개개인이 더욱 돋보이게 되나요?", "chosen": "아이브의 표정 연기는 개개인을 더욱 돋보이게 만들어줍니다.", "rejected": "아이브 멤버들의 표정 연기로 인해 각자가 더 돋보이게 되는 이유는 자신들의 개성 때문이죠."}
12 | {"prompt": "아이브의 정규 1집 I've IVE로 컴백하면서 어떤 성장을 보였나요?", "chosen": "신속한 피드백 수용과 노력하는 모습으로 한 단계 성장한 라이브 실력을 보였습니다.", "rejected": "아이브의 정규 1집 I've IVE로 컴백하면서 주목할 만한 성장은 그림 그리기 실력이 향상되었다는 것입니다."}
13 | {"prompt": "아이브의 평균 비주얼은 어떤가요?", "chosen": "아이브의 평균 비주얼은 상당히 뛰어나다고 평가받고 있습니다.", "rejected": "아이브의 평균 비주얼은 평범한 수준으로 보여요."}
14 | {"prompt": "아이브 멤버 중에서 서울공연예술고등학교를 중퇴한 멤버는 누구인가요?", "chosen": "안유진", "rejected": "잘 모르겠습니다."}
15 | {"prompt": "아이브의 댄스 실력은 전문가들에게도 인정 받나요?", "chosen": "예, 아이브의 댄스 실력은 전문가들에게도 인정받고 있습니다.", "rejected": "아이브의 댄스 실력은 전문가들에게도 어느 정도 인정받고 있어요."}
16 | {"prompt": "2002년생 가을 제외하면 나머지 멤버들은 2000년대에 어떤 문화를 잘 모르는 경향이 있나요?", "chosen": "2000년대의 문화를 잘 모른다는 경향이 있습니다.", "rejected": "2002년생 가을을 제외한 나머지 멤버들은 2000년대에 IT 문화를 잘 모르는 경향이 있습니다."}
17 | {"prompt": "IZ*ONE 활동이 끝난 후 데뷔한 최초의 그룹은 어떤 그룹인가요?", "chosen": "IVE", "rejected": "그룹 A"}
18 | {"prompt": "가을이는 어떤 애완동물을 키우고 있나요?", "chosen": "가을이는 토끼를 키우고 있습니다.", "rejected": "가을이는 전 해초류를 기르고 있어요."}
19 | {"prompt": "SBS 인기가요에서 아이브는 몇 번 트리플 크라운을 수상했나요?", "chosen": "7월 3일 기준 SBS 인기가요에서도 한 번 트리플 크라운을 수상", "rejected": "5번 TMC를 수상했어요."}
20 | {"prompt": "아이브의 댄스 실력은 어떻게 평가되나요?", "chosen": "전체적으로 준수한 실력을 갖추고 있습니다.", "rejected": "아이브의 댄스 실력은 시각적으로 평가되는 경향이 있습니다."}
21 |
--------------------------------------------------------------------------------
/data/stage3. PPO/ive_test_ppo.jsonl:
--------------------------------------------------------------------------------
1 | {"prompt": "표정 연기가 무대 전반의 분위기나 이야기 전달에 어떤 영향을 미치는지 알려주세요."}
2 | {"prompt": "아이브가 향후 어떤 방식으로 발전하고 성장해 나갈 것으로 예상되는가요?"}
3 | {"prompt": "IVE의 멤버 중에서 가장 먼저 지상파 음악 방송 1위를 기록한 멤버가 누구였나요?"}
4 | {"prompt": "가족관계가 팀 내에서 언급되는 이유가 음악적 활동에 미친 영향에 대해 어떤 생각을 가지십니까?"}
5 | {"prompt": "아이브의 랩이 앞으로 그녀의 음악적 진행을 어떻게 변화시킬지 예상해 보십시오."}
6 | {"prompt": "성인이 되면서 팀 내의 역할 분담이나 관계에 변화가 있었을까요?"}
7 | {"prompt": "아이브의 댄스 실력을 보완하거나 향상시킬 수 있는 방법에는 어떤 것들이 있을까요?"}
8 | {"prompt": "아이브의 랩이 음악이나 가사에 미칠 수 있는 강력한 영향은 무엇인가요?"}
9 | {"prompt": "아이브는 다른 프로듀스 파생 걸그룹들과 다르게 데뷔조에 속한 멤버들의 인지도가 고르게 높다고 합니다. 이러한 현상이 나타나는 이유는 무엇이라고 생각하십니까?"}
10 | {"prompt": "'소녀'와 '자기애'라는 콘셉트의 마케팅 전략은 어떤 방식으로 전개되었을까요?"}
11 | {"prompt": "멤버들이 강한 포스를 뿜어내는데에는 어떤 특징이 도움이 되었을까요?"}
12 | {"prompt": "걸 크러시 콘셉트와 소녀다운 이미지를 결합시키는 과정에서 어려움을 겪은 부분은 무엇이 있었을까요?"}
13 | {"prompt": "세대 간의 차이를 긍정적이고 다양한 가치로 받아들이는데 도움을 주는 활동이 있다면, 어떤 것들이 있을까요?"}
14 | {"prompt": "각 멤버들의 취향을 반영한 단체여행을 계획할 때, 가을이 가장 먼저 제안할 곳은 어디일까요? 다른 멤버들은 그 제안에 대해 어떤 반응을 보일까요?"}
15 | {"prompt": "아이브가 한중일 아이돌 시장 등 글로벌 시장에서의 입지를 고려할 때, 어떤 전략이 필요할 것으로 보이나요?"}
16 | {"prompt": "성인이 된 후에는 어떻게 변화가 있었을까요? 예를 들어, 리더인 유진이 성인이 된 후에 리더십이나 활동에 어떤 영향을 미쳤을까요?"}
17 | {"prompt": "각 멤버의 음색과 개성을 가장 잘 드러내는 곡은 무엇인가요? 이유도 함께 궁금합니다."}
18 | {"prompt": "IVE의 음악이 2000년대 후반 ~ 2010년대 초중반의 2세대 걸그룹 노래와 공통점이 있는 이유는 무엇일까요?"}
19 | {"prompt": "멤버들의 얼굴에 있는 각각의 점은 어떤 의미가 있을까요?"}
20 | {"prompt": "멤버들이 신장 차이가 큰 영향을 미친 곡이나 활동이 있다면 무엇인가요?"}
21 |
--------------------------------------------------------------------------------
/data/stage3. PPO/kuksundo_test_ppo.jsonl:
--------------------------------------------------------------------------------
1 | {"prompt": "국선도는 어떤 사람들에게 추천되는 수련법인가요?"}
2 | {"prompt": "밝돌법이라는 이름의 유래는 무엇인가요?"}
3 | {"prompt": "국선도를 통해 건강한 몸을 유지하는 방법은 무엇인가요?"}
4 | {"prompt": "국선도를 수행하는 데 필요한 시간과 노력은 어느 정도인가요?"}
5 | {"prompt": "국선도의 목적은 무엇이고, 그것을 달성하기 위해 어떤 방법을 사용하나요?"}
6 | {"prompt": "국선도는 어떤 의지를 기르고 어떠한 미덕을 갖게 해주나요?"}
7 | {"prompt": "국선도는 어떻게 우주자연과 인간을 하나로 만드는 건가요?"}
8 | {"prompt": "국선도를 통해 어떻게 조화로운 세상을 만들 수 있는 건가요?"}
9 | {"prompt": "국선도와 국가 지도자의 도법은 어떤 관계를 가지고 있나요?"}
10 | {"prompt": "국선도의 수련 과정을 주야로 수련해야 할까요?"}
11 | {"prompt": "국선도 밝돌법을 꾸준히 수련함으로써 어떻게 체력이 증강되는지 설명해주세요."}
12 | {"prompt": "국선도를 통한 디톡스 수련법은 어떤 것이 있을까요?"}
13 | {"prompt": "스트레스관리를 위해 어떤 습관을 가질 수 있을까요?"}
14 | {"prompt": "불로장생은 어떤 원리를 기반으로 하는 것인가요?"}
15 | {"prompt": "도인도송에서 언급된 중기(中氣)의 음양(陰陽) 변화와 수화(水火)의 승강(昇降), 그리고 기혈(氣血)의 순환(循環)은 왜 중앙오십토(中央五十土)의 중기단합력(中氣團合力)에 의해 발생하는 것인지 설명해 주세요."}
16 | {"prompt": "앞서 말씀하신 것처럼, 중기(中氣)는 인체에서 하단전(下丹田)에서 발생한 기혈(氣血)이 간(肝)으로 나와 생신(生新)하는 것이라고 알고 있습니다. 이런 과정에서 중앙오십토(中央五十土)가 어떻게 작용하는지 설명해 주세요."}
17 | {"prompt": "도인도송에 따르면, 중기(中氣)단법은 우주적(宇宙的) 입장(立場)에서 오인(吾人)이 행공(行功)해야 하는 고행(苦行)이라고 합니다. 이런 고행(苦行)의 목적은 무엇인지 설명해 주세요."}
18 | {"prompt": "중기(中氣)단법을 수도(修道) 초공(初功)으로 선택하게 된 이유에 대해 설명해 주세요."}
19 | {"prompt": "선도주의가 개인적인 삶에 어떤 영향을 미칠 수 있을까요? 개인이 선도주의를 적용했을 때 어떤 변화를 기대할 수 있나요?"}
20 | {"prompt": "선도주의를 실천하기 위해서 필요한 가장 중요한 자질은 무엇인가요? 그 자질을 어떻게 갖출 수 있을까요?"}
21 | {"prompt": "국선도본원 세계본부에서 발급되는 자격증은 어떤 특징을 가지고 있나요?"}
22 | {"prompt": "국선도의 고유 권한 보존을 위해 강화된 관리 시스템이 구축되었나요? 어떤 관리 시스템이 있는지 알려주세요."}
23 | {"prompt": "기혈순환유통유통법을 할 때 힘을 주는 정도는 어떻게 결정하나요?"}
24 | {"prompt": "기혈순환유통유통법을 할 때 상상력을 이용하는 방법이 있을까요?"}
25 | {"prompt": "단전호흡이 정신적인 측면에 어떤 영향을 미치나요?"}
26 | {"prompt": "호흡법을 익히는 과정에서 주의해야 할 점이 있나요?"}
27 | {"prompt": "이 호흡법은 왜 마음을 가라앉히는 것부터 시작할까요?"}
28 | {"prompt": "아랫배 3cm 아래에서 시작되는 기운을 왜 몸을 반시계 방향으로 돌린다는 생각을 하게 될까요?"}
29 | {"prompt": "이 호흡법은 왜 힘을 건강하게 유지할 수 있는 도움이 될까요?"}
30 | {"prompt": "\"마음을 가라앉힌다\"는 말이 추상적일 수 있습니다. 도장에서는 어떤 방법을 사용하여 마음을 고요하게 만드는지 알 수 있을까요?"}
31 | {"prompt": "마음이 가라앉지 않은 상태에서 아랫배를 내밀 때 일어나는 부작용에는 어떤 것들이 있나요?"}
32 | {"prompt": "배꼽 위의 상체 중 어디라도 긴장이나 힘이 있는 상태에서 아랫배를 내밀려고 하면 어떤 일이 일어날까요?"}
33 | {"prompt": "동작을 정확하게 하는 것과 호흡에 무리가 없도록 하는 것 중 더 중요한 것은 무엇인가요?"}
34 | {"prompt": "단전행공을 통해 얻을 수 있는 신체적인 이점은 무엇인가요?"}
35 | {"prompt": "국선도의 정각도 단계의 동작 수가 많은 이유는 무엇인가요?"}
36 | {"prompt": "국선도의 정각도 단계를 통해 얻을 수 있는 이점은 무엇인가요?"}
37 | {"prompt": "중기단법을 집에서 수련할 때와 도장이나 연수원에서 특수 프로그램을 받을 때의 차이점은 무엇인가요?"}
38 | {"prompt": "중기단법을 수행하면 호흡 뿐만 아니라 다른 측면에서도 어떤 변화가 생기나요?"}
39 | {"prompt": "중기단법의 임의성이 있는 이유는 무엇일까?"}
40 | {"prompt": "원기단법에서는 어떤 유통 과정을 자주 시도하나요?"}
41 | {"prompt": "원기단법에서 축기가 왜 중요한가요?"}
42 | {"prompt": "원기단법에서의 흡지와 축기는 왜 번갈아가며 이루어져야 하나요?"}
43 | {"prompt": "국선도의 통기법을 통해 내면의 평화를 얻을 수 있나요?"}
44 | {"prompt": "국선도의 통기법은 얼마나 오래 실천해야 효과가 나타날까요?"}
45 | {"prompt": "영체와 함께 수련하는 것은 어느 단계부터 가능한 건가요?"}
46 | {"prompt": "삼합단법을 통해 우리 몸은 어떻게 이산화탄소를 배출하나요?"}
47 | {"prompt": "조리단법은 어떤 방식으로 전파되고 널리 알려지게 되었나요?"}
48 | {"prompt": "국선도의 선도법이란 무엇인가요?"}
49 | {"prompt": "국선도의 선도법은 어떻게 구성되어 있나요?"}
50 | {"prompt": "하늘과 하나가 되는 단법이 국선도의 선도법에 어떤 영향을 미치나요?"}
51 | {"prompt": "국선도의 선도법을 세우기 위해서는 어떤 조건이 필요할까요?"}
52 | {"prompt": "국선도의 선도법이 지역 사회에 미치는 영향에는 어떤 요소가 있을까요?"}
53 | {"prompt": "국선도의 삼청단법이란 무엇인가요?"}
54 | {"prompt": "삼청단법을 통해 어떤 능력을 기를 수 있나요?"}
55 | {"prompt": "삼청단법의 목표는 무엇인가요?"}
56 | {"prompt": "삼청단법을 통해 어떤 실용적인 혜택을 얻을 수 있나요?"}
57 | {"prompt": "국선도의 무진단법의 수련 과정에서 어떤 장점이 있나요?"}
58 | {"prompt": "국선도의 무진단법을 통해 수련할 때 가장 어려운 점은 무엇인가요?"}
59 | {"prompt": "국선도의 무진단법을 통해 몸과 마음을 나누는 방법에는 어떤 종류가 있을까요?"}
60 | {"prompt": "국선도의 무진단법을 통해 나눠진 몸과 마음을 다시 하나로 합치는 시기는 얼마나 걸리나요?"}
61 | {"prompt": "국선도의 장부강화운동을 꾸준히 하면 몸에 어떤 변화가 생길까요?"}
62 | {"prompt": "국선도의 진공단법을 수행하면서 정리운동의 난도가 준비운동보다 높다는데, 그 이유는 무엇인가요?"}
63 | {"prompt": "정리운동을 수행하면서 어떻게 기혈순환을 유지할 수 있을까요?"}
64 | {"prompt": "정리운동을 수행하면서 얻을 수 있는 다른 이점들이 있을까요?"}
65 | {"prompt": "국선도를 함으로써 어떤 자세 개선이 이루어질 수 있을까요?"}
66 | {"prompt": "천화법과 내기전신행법의 차이점은 무엇인가요?"}
67 | {"prompt": "천화법을 사용하는 사람들은 어떤 이유로 이를 선택하는 건가요?"}
68 | {"prompt": "천화법의 성공적인 활용 사례를 알려주세요."}
69 | {"prompt": "천화법을 사용하면 단점은 없는 것인가요?"}
70 | {"prompt": "천화법을 활용한 정책 수립의 성공적인 예시를 알려주세요."}
71 | {"prompt": "천화법을 사용한 정책이 다른 국가에 영향을 미칠 수 있는가요?"}
72 | {"prompt": "천화법의 활용은 국가의 민주주의 발전에 어떤 역할을 하는 것인가요?"}
73 | {"prompt": "국선도를 연마하기 위해 필요한 신체적인 능력은 무엇이 있을까요?"}
74 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.13.1
2 | transformers
3 | accelerate
4 | colossalai==0.2.7
5 | openai
6 | langchain==0.0.113
7 | pandas>=1.4.1
8 | datasets
9 | jsonlines
10 | loralib
--------------------------------------------------------------------------------