├── .gitignore
├── LICENSE
├── README.md
├── model_utils
    ├── io_utils.py
    ├── modeling_base.py
    └── prm_model.py
├── setup.py
└── vllm_add_dummy_model
    ├── __init__.py
    └── prm_model.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | dist/
 8 | build/
 9 | *.egg-info/
10 | 
11 | # Virtual environments
12 | venv/
13 | env/
14 | .env/
15 | .venv/
16 | 
17 | # IDE specific files
18 | .idea/
19 | .vscode/
20 | *.swp
21 | *.swo
22 | 
23 | # Local development settings
24 | .env
25 | .env.local
26 | 
27 | # Logs
28 | *.log
29 | 
30 | # Unit test / coverage reports
31 | htmlcov/
32 | .coverage
33 | .pytest_cache/
34 | 
35 | # Jupyter Notebook
36 | .ipynb_checkpoints
37 | 
38 | # macOS specific
39 | .DS_Store
40 | 
41 | # Windows specific
42 | Thumbs.db
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PRM inference
  2 | 
  3 | ## huggingface inference
  4 | 
  5 | 1. clone skywork prm inference repo
  6 | ```shell
  7 | git clone https://github.com/SkyworkAI/skywork-o1-prm-inference.git
  8 | cd skywork-o1-prm-inference
  9 | ```
 10 | 2. run_prm_inference
 11 | 
 12 | ```python
 13 | from transformers import AutoTokenizer
 14 | from model_utils.prm_model import PRM_MODEL
 15 | from model_utils.io_utils import prepare_input, prepare_batch_input_for_model, derive_step_rewards
 16 | 
 17 | prm_model_path = "/path/to/prm_model"
 18 | tokenizer = AutoTokenizer.from_pretrained(prm_model_path, trust_remote_code=True)
 19 | datas = [
 20 |     {
 21 |         "problem"  : "Janet\u2019s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 22 |         "response" : "To determine how much money Janet makes every day at the farmers' market, we need to follow these steps:\n1. **Calculate the total number of eggs laid by the ducks per day.**\n   Janet's ducks lay 16 eggs per day.\n2. **Determine the number of eggs Janet uses each day.**\n   - She eats 3 eggs for breakfast every morning.\n   - She bakes muffins for her friends every day with 4 eggs.\n   So, the total number of eggs used per day is:\n   \\[\n   3 + 4 = 7 \\text{ eggs}\n   \\]\n3. **Calculate the number of eggs Janet sells at the farmers' market each day.**\n   Subtract the number of eggs used from the total number of eggs laid:\n   \\[\n   16 - 7 = 9 \\text{ eggs}\n   \\]\n4. **Determine how much money Janet makes from selling the eggs.**\n   She sells each egg for $2, so the total amount of money she makes is:\n   \\[\n   9 \\times 2 = 18 \\text{ dollars}\n   \\]\nTherefore, the amount of money Janet makes every day at the farmers' market is \\(\\boxed{18}\\)."
 23 |     },
 24 |     {
 25 |         "problem"  : "Janet\u2019s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 26 |         "response" : "To find out how much Janet makes every day at the farmers' market, we need to follow these steps:\n1. Calculate the total number of eggs laid by the ducks: \n   - Janet's ducks lay 16 eggs per day.\n2. Calculate the total number of eggs consumed by Janet and used for baking:\n   - Janet eats 3 eggs for breakfast.\n   - She bakes 4 eggs for muffins.\n   - Total eggs used: 3 (breakfast) + 4 (baking) = 7 eggs.\n3. Calculate the remaining number of eggs for sale:\n   - Total eggs laid: 16\n   - Eggs used: 7\n   - Remaining eggs: 16 - 7 = 9 eggs\n4. Calculate the total amount of money made at the farmers' market:\n   - Price per egg: $2\n   - Number of eggs sold: 9\n   - Total money made: 9 * $2 = $18\nTherefore, Janet makes $\\boxed{18}$ dollars every day at the farmers' market."
 27 |     }
 28 | ]
 29 | 
 30 | 
 31 | processed_data = [prepare_input(d["problem"], d["response"], tokenizer=tokenizer, step_token="\n") for d in datas]
 32 | input_ids, steps, reward_flags = zip(*processed_data)
 33 | 
 34 | model = PRM_MODEL.from_pretrained(prm_model_path, device_map="auto").eval()
 35 | input_ids, attention_mask, reward_flags = prepare_batch_input_for_model(input_ids, reward_flags, tokenizer.pad_token_id)
 36 | _, _, rewards = model(input_ids=input_ids, attention_mask=attention_mask, return_probs=True)
 37 | step_rewards = derive_step_rewards(rewards, reward_flags)
 38 | print("step_rewards:",step_rewards[0])
 39 | print("step_rewards:",step_rewards[1])
 40 | ```
 41 | 
 42 | ## vllm server for inference
 43 | 
 44 | 1. install vllm and install vllm prm plugin
 45 | ```shell
 46 | pip install vllm==v0.6.4.post1
 47 | git clone https://github.com/SkyworkAI/skywork-o1-prm-inference.git
 48 | cd skywork-o1-prm-inference
 49 | pip install -e .
 50 | ```
 51 | 
 52 | 2. start vllm server
 53 | ```shell
 54 | CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve /path/to/prm_model \
 55 |     --host 0.0.0.0 \
 56 |     --port 8081 \
 57 |     --tensor-parallel-size 4 \
 58 |     --gpu-memory-utilization 0.9 \
 59 |     --enable-prefix-caching \
 60 |     --dtype auto
 61 | ```
 62 | 
 63 | 3. request server for inference
 64 | 
 65 | ```python
 66 | from openai import OpenAI
 67 | from transformers import AutoTokenizer
 68 | from model_utils.io_utils import prepare_input, derive_step_rewards_vllm
 69 | 
 70 | prm_model_path = "/path/to/prm_model"
 71 | tokenizer = AutoTokenizer.from_pretrained(prm_model_path, trust_remote_code=True)
 72 | datas = [
 73 |     {
 74 |         "problem"  : "Janet\u2019s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 75 |         "response" : "To determine how much money Janet makes every day at the farmers' market, we need to follow these steps:\n1. **Calculate the total number of eggs laid by the ducks per day.**\n   Janet's ducks lay 16 eggs per day.\n2. **Determine the number of eggs Janet uses each day.**\n   - She eats 3 eggs for breakfast every morning.\n   - She bakes muffins for her friends every day with 4 eggs.\n   So, the total number of eggs used per day is:\n   \\[\n   3 + 4 = 7 \\text{ eggs}\n   \\]\n3. **Calculate the number of eggs Janet sells at the farmers' market each day.**\n   Subtract the number of eggs used from the total number of eggs laid:\n   \\[\n   16 - 7 = 9 \\text{ eggs}\n   \\]\n4. **Determine how much money Janet makes from selling the eggs.**\n   She sells each egg for $2, so the total amount of money she makes is:\n   \\[\n   9 \\times 2 = 18 \\text{ dollars}\n   \\]\nTherefore, the amount of money Janet makes every day at the farmers' market is \\(\\boxed{18}\\)."
 76 |     },
 77 |     {
 78 |         "problem"  : "Janet\u2019s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 79 |         "response" : "To find out how much Janet makes every day at the farmers' market, we need to follow these steps:\n1. Calculate the total number of eggs laid by the ducks: \n   - Janet's ducks lay 16 eggs per day.\n2. Calculate the total number of eggs consumed by Janet and used for baking:\n   - Janet eats 3 eggs for breakfast.\n   - She bakes 4 eggs for muffins.\n   - Total eggs used: 3 (breakfast) + 4 (baking) = 7 eggs.\n3. Calculate the remaining number of eggs for sale:\n   - Total eggs laid: 16\n   - Eggs used: 7\n   - Remaining eggs: 16 - 7 = 9 eggs\n4. Calculate the total amount of money made at the farmers' market:\n   - Price per egg: $2\n   - Number of eggs sold: 9\n   - Total money made: 9 * $2 = $18\nTherefore, Janet makes $\\boxed{18}$ dollars every day at the farmers' market."
 80 |     }
 81 | ]
 82 | 
 83 | # data preprocessing
 84 | processed_data = [prepare_input(d["problem"], d["response"], tokenizer=tokenizer, step_token="\n") for d in datas]
 85 | input_ids, steps, reward_flags = zip(*processed_data)
 86 | 
 87 | openai_api_key = "EMPTY"
 88 | openai_api_base = "http://localhost:8081/v1"
 89 | client = OpenAI(
 90 |     # defaults to os.environ.get("OPENAI_API_KEY")
 91 |     api_key=openai_api_key,
 92 |     base_url=openai_api_base,
 93 | )
 94 | models = client.models.list()
 95 | model = models.data[0].id
 96 | rewards = client.embeddings.create(
 97 |     input=input_ids,
 98 |     model=model,
 99 | )
100 | 
101 | step_rewards = derive_step_rewards_vllm(rewards, reward_flags)
102 | print("step_rewards:",step_rewards[0])
103 | print("step_rewards:",step_rewards[1])  
104 | ```


--------------------------------------------------------------------------------
/model_utils/io_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def prepare_input(problem, response, tokenizer, step_token):
 5 |     prompt_ids = tokenizer.encode(tokenizer.bos_token + problem + "\n")
 6 |     response_ids = []
 7 |     steps = []
 8 |     reward_flags = [0] * len(prompt_ids)
 9 |     step_token_id = tokenizer.encode(step_token)[-1]
10 |     for idx, step in enumerate(response.split(step_token)):
11 |         if step != "":
12 |             step_ids = tokenizer.encode(step)
13 |         else:
14 |             step_ids = []
15 |         step_ids += [step_token_id]
16 |         step = step + step_token
17 |         flag = [0] * len(step_ids)
18 |         flag[-1] = 1
19 |         response_ids.extend(step_ids)
20 |         reward_flags.extend(flag)
21 |         steps.append(step)
22 |     input_ids = prompt_ids + response_ids
23 |     return input_ids, steps, reward_flags
24 | 
25 | def prepare_batch_input_for_model(input_ids,reward_flags, pad_token_id):
26 |     padded_input_ids = torch.nn.utils.rnn.pad_sequence(
27 |         [torch.LongTensor(ids) for ids in input_ids], 
28 |         batch_first=True,
29 |         padding_value=pad_token_id
30 |     )
31 |     padded_attention_mask = torch.nn.utils.rnn.pad_sequence(
32 |         [torch.LongTensor([1] * len(ids)) for ids in input_ids], 
33 |         batch_first=True,
34 |         padding_value=0
35 |     )
36 |     padded_reward_flags = torch.nn.utils.rnn.pad_sequence(
37 |         [torch.LongTensor(reward_flag) for reward_flag in reward_flags], 
38 |         batch_first=True,
39 |         padding_value=0
40 |     )
41 |     return padded_input_ids, padded_attention_mask,padded_reward_flags
42 | 
43 | def derive_step_rewards(rewards, reward_flags):
44 |     batch_size = rewards.shape[0]
45 |     batch_step_rewards = []
46 |     for i in range(batch_size):
47 |         rewards_indices = torch.nonzero(reward_flags[i] == 1).view(-1)
48 |         step_rewards = [rewards[i][rewards_indices[j]].item() for j in range(len(rewards_indices))]
49 |         batch_step_rewards.append(step_rewards)
50 |     return batch_step_rewards
51 | 
52 | def sigmoid(x):
53 |     return 1/(np.exp(-x) + 1)
54 |     
55 | def derive_step_rewards_vllm(raw_rewards, batch_reward_flags):
56 |     batch_step_rewards = []
57 |     for idx,data in enumerate(raw_rewards.data):
58 |         rewards = data.embedding
59 |         reward_flags = batch_reward_flags[idx]
60 | 
61 |         step_rewards = [sigmoid(reward) for reward,flag in zip(rewards,reward_flags) if flag == 1]   
62 |         batch_step_rewards.append(step_rewards)
63 |     return batch_step_rewards
64 | 


--------------------------------------------------------------------------------
/model_utils/modeling_base.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import json
 15 | import logging
 16 | import os
 17 | from copy import deepcopy
 18 | from typing import Optional
 19 | import sys
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from accelerate import PartialState
 24 | from huggingface_hub import hf_hub_download
 25 | from huggingface_hub.utils import (
 26 |     EntryNotFoundError,
 27 |     HFValidationError,
 28 |     LocalEntryNotFoundError,
 29 |     RepositoryNotFoundError,
 30 | )
 31 | from safetensors.torch import load_file as safe_load_file
 32 | from transformers import PreTrainedModel
 33 | 
 34 | if sys.version_info < (3, 8):
 35 |     _is_python_greater_3_8 = False
 36 | else:
 37 |     _is_python_greater_3_8 = True
 38 | 
 39 | def is_transformers_greater_than(current_version: str) -> bool:
 40 |     if _is_python_greater_3_8:
 41 |         from importlib.metadata import version
 42 | 
 43 |         _transformers_version = version("transformers")
 44 |     else:
 45 |         import pkg_resources
 46 | 
 47 |         _transformers_version = pkg_resources.get_distribution("transformers").version
 48 |     return _transformers_version > current_version
 49 | 
 50 | if is_transformers_greater_than("4.33.0"):
 51 |     from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
 52 | else:
 53 |     from transformers.deepspeed import is_deepspeed_zero3_enabled
 54 | 
 55 | LAYER_PATTERNS = [
 56 |     "transformer.h.{layer}",
 57 |     "model.decoder.layers.{layer}",
 58 |     "gpt_neox.layers.{layer}",
 59 |     "model.layers.{layer}",
 60 | ]
 61 | 
 62 | 
 63 | class PreTrainedModelWrapper(nn.Module):
 64 |     r"""
 65 |     A wrapper class around a (`transformers.PreTrainedModel`) to be compatible with the
 66 |     (`~transformers.PreTrained`) class in order to keep some attributes and methods of the
 67 |     (`~transformers.PreTrainedModel`) class.
 68 | 
 69 |     Attributes:
 70 |         pretrained_model: (`transformers.PreTrainedModel`)
 71 |             The model to be wrapped.
 72 |         parent_class: (`transformers.PreTrainedModel`)
 73 |             The parent class of the model to be wrapped.
 74 |         supported_args: (`list`)
 75 |             The list of arguments that are supported by the wrapper class.
 76 |     """
 77 | 
 78 |     transformers_parent_class = None
 79 |     supported_args = None
 80 |     supported_modules = ("v_head",)
 81 |     supported_rm_modules = ("score",)
 82 |     supported_pretrained_model_architectures = ((PreTrainedModel))
 83 | 
 84 |     def __init__(
 85 |         self, pretrained_model=None, score_module=None, supports_rm_adapter=False, rm_adapter_name=None, **kwargs
 86 |     ):
 87 |         super().__init__()
 88 |         self.pretrained_model = pretrained_model
 89 | 
 90 |         self.config = pretrained_model.config
 91 |         self.prepare_inputs_for_generation = pretrained_model.prepare_inputs_for_generation
 92 |         self.is_loaded_in_8bit = getattr(pretrained_model, "is_loaded_in_8bit", False)
 93 |         self.is_loaded_in_4bit = getattr(pretrained_model, "is_loaded_in_4bit", False)
 94 |         self.is_sequential_parallel = False
 95 | 
 96 |         if hasattr(pretrained_model, "gradient_checkpointing_disable"):
 97 |             self.gradient_checkpointing_disable = pretrained_model.gradient_checkpointing_disable
 98 | 
 99 |         if hasattr(pretrained_model, "gradient_checkpointing_enable"):
100 |             self.gradient_checkpointing_enable = pretrained_model.gradient_checkpointing_enable
101 | 
102 |         if hasattr(pretrained_model, "enable_input_require_grads"):
103 |             self.enable_input_require_grads = pretrained_model.enable_input_require_grads
104 | 
105 |         self.supports_rm_adapter = supports_rm_adapter
106 |         self.rm_adapter_name = rm_adapter_name
107 |         self.policy_adapter_name = "default"
108 |         if score_module is not None:
109 |             self.score = score_module
110 | 
111 |     @classmethod
112 |     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
113 |         r"""
114 |         Instantiates a new model from a pretrained model from `transformers`. The
115 |         pretrained model is loaded using the `from_pretrained` method of the
116 |         `transformers.PreTrainedModel` class. The arguments that are specific to the
117 |         `transformers.PreTrainedModel` class are passed along this method and filtered
118 |         out from the `kwargs` argument.
119 | 
120 | 
121 |         Args:
122 |             pretrained_model_name_or_path (`str` or `transformers.PreTrainedModel`):
123 |                 The path to the pretrained model or its name.
124 |             *model_args (`list`, *optional*)):
125 |                 Additional positional arguments passed along to the underlying model's
126 |                 `from_pretrained` method.
127 |             **kwargs (`dict`, *optional*):
128 |                 Additional keyword arguments passed along to the underlying model's
129 |                 `from_pretrained` method. We also pre-process the kwargs to extract
130 |                 the arguments that are specific to the `transformers.PreTrainedModel`
131 |                 class and the arguments that are specific to trl models. The kwargs
132 |                 also support `prepare_model_for_kbit_training` arguments from
133 |                 `peft` library.
134 |         """
135 |         if kwargs is not None:
136 |             peft_config = kwargs.pop("peft_config", None)
137 |             reward_adapter = kwargs.pop("reward_adapter", None)
138 |             reward_adapter_name = kwargs.pop("reward_adapter_name", "reward_adapter")
139 |             is_trainable = kwargs.pop("is_trainable", False)
140 |             trl_model_args, pretrained_kwargs, peft_quantization_kwargs = cls._split_kwargs(kwargs)
141 |             token = pretrained_kwargs.get("token", None)
142 |         else:
143 |             peft_config = None
144 |             is_trainable = False
145 |             trl_model_args = {}
146 |             pretrained_kwargs = {}
147 |             peft_quantization_kwargs = {}
148 |             token = None
149 | 
150 |         if reward_adapter is not None and not isinstance(reward_adapter, str):
151 |             raise ValueError(
152 |                 "The `reward_adapter` argument should be a string representing the name of local path or the Hub id to the Reward Modeling adapter."
153 |             )
154 | 
155 |         is_peft_model = False
156 | 
157 |         current_device = cls._get_current_device()
158 |         if isinstance(pretrained_model_name_or_path, str):
159 |             is_loaded_in_8bit = pretrained_kwargs["load_in_8bit"] if "load_in_8bit" in pretrained_kwargs else False
160 |             is_loaded_in_4bit = pretrained_kwargs["load_in_4bit"] if "load_in_4bit" in pretrained_kwargs else False
161 |         else:
162 |             is_loaded_in_8bit = getattr(pretrained_model_name_or_path, "is_loaded_in_8bit", False)
163 |             is_loaded_in_4bit = getattr(pretrained_model_name_or_path, "is_loaded_in_4bit", False)
164 | 
165 |         if (is_loaded_in_8bit or is_loaded_in_4bit) and "device_map" not in pretrained_kwargs:
166 |             # warn users
167 |             logging.warning(
168 |                 "The `device_map` argument is not provided. We will override the device_map argument."
169 |                 " to set the entire"
170 |                 " model on the current device. If you want to set the model on multiple devices, please provide"
171 |                 " a custom `device_map` argument."
172 |             )
173 |             pretrained_kwargs["device_map"] = {"": current_device}
174 | 
175 | 
176 | 
177 |         # First, load the pre-trained model using the parent-class
178 |         # either `AutoModelForCausalLM` or `AutoModelForSeq2SeqLM`
179 |         if isinstance(pretrained_model_name_or_path, str):
180 | 
181 |             remote_adapter_config = None
182 |             local_adapter_present = os.path.exists(os.path.join(pretrained_model_name_or_path, "adapter_config.json"))
183 |             pretrained_model = cls.transformers_parent_class.from_pretrained(
184 |                 pretrained_model_name_or_path, *model_args, **pretrained_kwargs
185 |             )
186 |  
187 |         elif isinstance(pretrained_model_name_or_path, cls.supported_pretrained_model_architectures):
188 |             pretrained_model = pretrained_model_name_or_path
189 |         else:
190 |             raise ValueError(
191 |                 "pretrained_model_name_or_path should be a string or a PreTrainedModel, "
192 |                 f"but is {type(pretrained_model_name_or_path)}"
193 |             )
194 | 
195 | 
196 |         # Add reward modeling adapter if specified
197 |         if not is_peft_model and reward_adapter is not None:
198 |             raise ValueError("reward_adapter can only be used with a PeftModel. ")
199 |         elif is_peft_model and reward_adapter is not None:
200 |             score_module = cls.add_and_load_reward_modeling_adapter(
201 |                 pretrained_model, reward_adapter, reward_adapter_name, token=token
202 |             )
203 |             multi_adapter_args = {
204 |                 "score_module": score_module,
205 |                 "supports_rm_adapter": True,
206 |                 "rm_adapter_name": reward_adapter_name,
207 |             }
208 |         else:
209 |             multi_adapter_args = {"supports_rm_adapter": False}
210 | 
211 |         # Then, create the full model by instantiating the wrapper class
212 |         model = cls(pretrained_model, **multi_adapter_args, **trl_model_args)
213 | 
214 |         # if resume_training, load the state_dict again - this is ok since the
215 |         # state_dict is removed from the model after loading it.
216 |         is_resuming_training = True
217 |         if isinstance(pretrained_model_name_or_path, str):
218 |             safe_filename = os.path.join(pretrained_model_name_or_path, "model.safetensors")
219 |             filename = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin")
220 | 
221 |             sharded_index_filename = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin.index.json")
222 |             safe_sharded_index_filename = os.path.join(pretrained_model_name_or_path, "model.safetensors.index.json")
223 |             is_sharded = False
224 |             use_safe = os.path.exists(safe_filename)
225 | 
226 |             if not (os.path.exists(filename) or os.path.exists(safe_filename)):
227 |                 # Try with `pytorch_model.bin`
228 |                 filename, files_to_download, is_sharded, is_resuming_training = cls._get_checkpoint_from_hub(
229 |                     pretrained_model,
230 |                     pretrained_model_name_or_path,
231 |                     sharded_index_filename,
232 |                     token=token,
233 |                 )
234 |                 # Try with safetensors
235 |                 if filename is None and files_to_download is None:
236 |                     safe_filename, files_to_download, is_sharded, is_resuming_training = cls._get_checkpoint_from_hub(
237 |                         pretrained_model,
238 |                         pretrained_model_name_or_path,
239 |                         safe_sharded_index_filename,
240 |                         token=token,
241 |                         model_name="model.safetensors",
242 |                         model_index_name="model.safetensors.index.json",
243 |                     )
244 |                     use_safe = True
245 |                 else:
246 |                     use_safe = False
247 | 
248 |             loading_func = safe_load_file if use_safe else torch.load
249 |             load_kwargs = {} if use_safe else {"map_location": "cpu"}
250 | 
251 |             if is_resuming_training:
252 |                 if is_sharded:
253 |                     # download each file and add it to the state_dict
254 |                     state_dict = {}
255 | 
256 |                     for shard_file in files_to_download:
257 |                         filename = hf_hub_download(
258 |                             pretrained_model_name_or_path,
259 |                             shard_file,
260 |                             token=token,
261 |                         )
262 |                         state_dict.update(loading_func(filename, **load_kwargs))
263 |                 else:
264 |                     state_dict = loading_func(filename if not use_safe else safe_filename, **load_kwargs)
265 | 
266 |         else:
267 |             state_dict = pretrained_model_name_or_path.state_dict()
268 | 
269 |         model.is_peft_model = is_peft_model
270 |         model.current_device = current_device
271 | 
272 |         if is_resuming_training:
273 |             model.post_init(state_dict=state_dict)
274 | 
275 |         return model
276 | 
277 |     @classmethod
278 |     def _get_checkpoint_from_hub(
279 |         cls,
280 |         pretrained_model,
281 |         pretrained_model_name_or_path,
282 |         index_filename,
283 |         token=None,
284 |         model_name="pytorch_model.bin",
285 |         model_index_name="pytorch_model.bin.index.json",
286 |     ):
287 |         files_to_download = None
288 |         filename = None
289 |         is_resuming_training = True
290 |         is_sharded = False
291 | 
292 |         try:
293 |             filename = hf_hub_download(
294 |                 pretrained_model_name_or_path,
295 |                 model_name,
296 |                 token=token,
297 |             )
298 |         # sharded
299 |         except (EntryNotFoundError, LocalEntryNotFoundError, HFValidationError, RepositoryNotFoundError):
300 |             if os.path.exists(index_filename):
301 |                 index_file_name = index_filename
302 |             else:
303 |                 try:
304 |                     index_file_name = hf_hub_download(
305 |                         pretrained_model_name_or_path,
306 |                         model_index_name,
307 |                         token=token,
308 |                     )
309 |                 except (EntryNotFoundError, LocalEntryNotFoundError, HFValidationError, RepositoryNotFoundError):
310 |                     # not continue training, do not have v_head weight
311 |                     is_resuming_training = False
312 |                     logging.warning(
313 |                         f"A {type(pretrained_model)} model is loaded from '{pretrained_model_name_or_path}', "
314 |                         f"and no v_head weight is found. This IS expected if you are not resuming PPO training."
315 |                     )
316 |             # load json
317 |             if is_resuming_training:
318 |                 with open(index_file_name) as f:
319 |                     index = json.load(f)
320 |                 # check filename with `v_head` or any known extra module:
321 |                 files_to_download = set()
322 |                 for k, v in index["weight_map"].items():
323 |                     if any(module in k for module in cls.supported_modules):
324 |                         files_to_download.add(v)
325 |                 is_sharded = True
326 | 
327 |         return filename, files_to_download, is_sharded, is_resuming_training
328 | 
329 |     @classmethod
330 |     def _get_current_device(cls):
331 |         r"""
332 |         Get the current device. For GPU, we return the local process index using the `accelerate.PartialState`
333 |         object to handle corner cases when running scripts in distributed environments.
334 | 
335 |         Returns:
336 |             current_device (`Union[int, str]`):
337 |                 The current device.
338 |         """
339 |         state = PartialState()
340 |         return state.local_process_index if torch.cuda.is_available() else "cpu"
341 | 
342 |     @classmethod
343 |     def _split_kwargs(cls, kwargs):
344 |         """
345 |         Separate the kwargs from the arguments that we support inside
346 |         `supported_args` and the ones that we don't.
347 |         """
348 |         check_peft_kwargs = False
349 | 
350 |         supported_kwargs = {}
351 |         unsupported_kwargs = {}
352 |         peft_kwargs = {}
353 | 
354 |         for key, value in kwargs.items():
355 |             if key in cls.supported_args:
356 |                 supported_kwargs[key] = value
357 |             else:
358 |                 unsupported_kwargs[key] = value
359 | 
360 |             if check_peft_kwargs:
361 |                 if key in prepare_model_for_kbit_training.__code__.co_varnames:
362 |                     peft_kwargs[key] = value
363 |                     if key in unsupported_kwargs:
364 |                         unsupported_kwargs.pop(key)
365 | 
366 |         return supported_kwargs, unsupported_kwargs, peft_kwargs
367 | 
368 |     @classmethod
369 |     def add_and_load_reward_modeling_adapter(
370 |         cls, pretrained_model, adapter_model_id, adapter_name="reward_model_adapter", token=None
371 |     ):
372 |         r"""
373 |         Add and load a reward modeling adapter. This method can only be used if the
374 |         model is a `PeftModel` and if you have initialized the model with the `reward_modeling_adapter_id`
375 |         argument, pointing to the id of the reward modeling adapter. The latest needs also to contain the
376 |         score head in order to produce the reward.
377 |         """
378 |         pretrained_model.load_adapter(adapter_model_id, adapter_name, is_trainable=False)
379 |         pretrained_model.train()
380 | 
381 |         filename = os.path.join(adapter_model_id, "adapter_model.bin")
382 |         safe_loading = False
383 |         if not os.path.exists(filename):
384 |             try:
385 |                 local_filename = hf_hub_download(
386 |                     adapter_model_id,
387 |                     "adapter_model.bin",
388 |                     token=token,
389 |                 )
390 |             except Exception:
391 |                 filename = os.path.join(adapter_model_id, "adapter_model.safetensors")
392 |                 safe_loading = True
393 |                 if not os.path.exists(filename):
394 |                     try:
395 |                         local_filename = hf_hub_download(
396 |                             adapter_model_id,
397 |                             "adapter_model.safetensors",
398 |                             token=token,
399 |                         )
400 |                     except Exception as exc:
401 |                         raise ValueError(
402 |                             "Could not find adapter model in the Hub, "
403 |                             "make sure you have the correct adapter model id."
404 |                         ) from exc
405 |                 else:
406 |                     local_filename = filename
407 |         else:
408 |             local_filename = filename
409 | 
410 |         loading_func = safe_load_file if safe_loading else torch.load
411 |         load_kwargs = {} if safe_loading else {"map_location": "cpu"}
412 | 
413 |         adapter_state_dict = loading_func(local_filename, **load_kwargs)
414 | 
415 |         for score_name_candidate in cls.supported_rm_modules:
416 |             if any(score_name_candidate in name for name in adapter_state_dict.keys()):
417 |                 score_name = score_name_candidate
418 |                 # we have found the correct head name and can break
419 |                 break
420 | 
421 |         score_dict = {}
422 | 
423 |         for name, param in adapter_state_dict.items():
424 |             if score_name in name:
425 |                 key_name = ".".join(name.split(".")[-1:])
426 |                 score_dict[key_name] = param.to(cls._get_current_device())
427 | 
428 |         num_labels, hidden_dim = score_dict["weight"].shape
429 |         has_bias = any("bias" in name for name in adapter_state_dict.keys())
430 | 
431 |         score = nn.Linear(hidden_dim, num_labels, bias=has_bias).to(
432 |             device=cls._get_current_device(),
433 |             dtype=pretrained_model.dtype,
434 |         )
435 |         score.load_state_dict(score_dict)
436 |         for param in score.parameters():
437 |             param.requires_grad = False
438 | 
439 |         return score
440 | 
441 |     def push_to_hub(self, *args, **kwargs):
442 |         r"""
443 |         Push the pretrained model to the hub. This method is a wrapper around
444 |         `transformers.PreTrainedModel.push_to_hub`. Please refer to the documentation
445 |         of `transformers.PreTrainedModel.push_to_hub` for more information.
446 | 
447 |         Args:
448 |             *args (`list`, *optional*):
449 |                 Positional arguments passed along to the underlying model's
450 |                 `push_to_hub` method.
451 |             **kwargs (`dict`, *optional*):
452 |                 Keyword arguments passed along to the underlying model's
453 |                 `push_to_hub` method.
454 |         """
455 |         raise NotImplementedError
456 | 
457 |     def save_pretrained(self, *args, **kwargs):
458 |         r"""
459 |         Save the pretrained model to a directory. This method is a wrapper around
460 |         `transformers.PreTrainedModel.save_pretrained`. Please refer to the documentation
461 |         of `transformers.PreTrainedModel.save_pretrained` for more information.
462 | 
463 |         Args:
464 |             *args (`list`, *optional*):
465 |                 Positional arguments passed along to the underlying model's
466 |                 `save_pretrained` method.
467 |             **kwargs (`dict`, *optional*):
468 |                 Keyword arguments passed along to the underlying model's
469 |                 `save_pretrained` method.
470 |         """
471 |         state_dict = kwargs.get("state_dict")
472 |         if state_dict is None:
473 |             state_dict = self.state_dict()
474 |             kwargs["state_dict"] = state_dict
475 | 
476 |         # if it is a peft model only save the `v_head` state_dict and
477 |         # pop the `state_dict` from the kwargs to avoid slient bugs with `peft`
478 |         if self.is_peft_model:
479 |             save_path = args[0]
480 |             save_path = os.path.join(save_path, "pytorch_model.bin")
481 |             torch.save(state_dict, save_path)
482 |             _ = kwargs.pop("state_dict", None)
483 | 
484 |         return self.pretrained_model.save_pretrained(*args, **kwargs)
485 | 
486 |     def state_dict(self, *args, **kwargs):
487 |         r"""
488 |         Return the state_dict of the pretrained model.
489 |         """
490 |         raise NotImplementedError
491 | 
492 |     def post_init(self, *args, **kwargs):
493 |         r"""
494 |         Post initialization method. This method is called after the model is
495 |         instantiated and loaded from a checkpoint. It can be used to perform
496 |         additional operations such as loading the state_dict.
497 |         """
498 |         raise NotImplementedError
499 | 
500 |     def compute_reward_score(self, input_ids, attention_mask=None, **kwargs):
501 |         r"""
502 |         Computes the reward score for a given input. The method has first to enable the adapter
503 |         and then compute the reward score. After that the model disables the reward modeling
504 |         adapter and enables the default ppo adapter again.
505 |         """
506 |         if not self.supports_rm_adapter:
507 |             raise ValueError("This model does not support reward modeling adapter.")
508 | 
509 |         # enable rm adapter
510 |         self.pretrained_model.set_adapter(self.rm_adapter_name)
511 |         self.pretrained_model.eval()
512 | 
513 |         with torch.no_grad():
514 |             base_model_output = self.pretrained_model(
515 |                 input_ids=input_ids,
516 |                 attention_mask=attention_mask,
517 |                 output_hidden_states=True,
518 |                 return_dict=True,
519 |                 **kwargs,
520 |             )
521 | 
522 |             last_hidden_states = base_model_output.hidden_states[-1]
523 |             scores = self.score(last_hidden_states)
524 | 
525 |         self.pretrained_model.set_adapter(self.policy_adapter_name)
526 |         self.pretrained_model.eval()
527 | 
528 |         return scores
529 | 
530 | 
531 | def create_reference_model(
532 |     model: PreTrainedModelWrapper, num_shared_layers: Optional[int] = None, pattern: Optional[str] = None
533 | ) -> PreTrainedModelWrapper:
534 |     """
535 |     Creates a static reference copy of a model. Note that model will be in `.eval()` mode.
536 | 
537 |     Args:
538 |         model (`PreTrainedModelWrapper`): The model to be copied.
539 |         num_shared_layers (`int`, *optional*): The number of initial layers that are shared between both models and kept frozen.
540 |         pattern (`str`, *optional*): The shared layers are selected with a string pattern
541 |             (e.g. "transformer.h.{layer}" for GPT2) and if a custom pattern is necessary it can be passed here.
542 | 
543 |     Returns
544 |         `PreTrainedModelWrapper`
545 |     """
546 |     if is_deepspeed_zero3_enabled():
547 |         raise ValueError(
548 |             "DeepSpeed ZeRO-3 is enabled and is not compatible with `create_reference_model()`. Please instantiate your reference model directly with `AutoCausalLM.from_pretrained()`."
549 |         )
550 | 
551 |     parameter_names = [n for n, _ in model.named_parameters()]
552 |     ref_model = deepcopy(model)
553 | 
554 |     # if no layers are shared, return copy of model
555 |     if num_shared_layers is None:
556 |         for param_name in parameter_names:
557 |             param = ref_model.get_parameter(param_name)
558 |             param.requires_grad = False
559 |         return ref_model.eval()
560 | 
561 |     # identify layer name pattern
562 |     if pattern is not None:
563 |         pattern = pattern.format(layer=num_shared_layers)
564 |     else:
565 |         for pattern_candidate in LAYER_PATTERNS:
566 |             pattern_candidate = pattern_candidate.format(layer=num_shared_layers)
567 |             if any(pattern_candidate in name for name in parameter_names):
568 |                 pattern = pattern_candidate
569 |                 break
570 | 
571 |     if pattern is None:
572 |         raise ValueError("Layer pattern could not be matched.")
573 | 
574 |     # divide parameters in shared and unshared parameter lists
575 |     shared_param_list = []
576 |     unshared_param_list = []
577 | 
578 |     shared_parameter = True
579 |     for name, _param in model.named_parameters():
580 |         if pattern in name:
581 |             shared_parameter = False
582 |         if shared_parameter:
583 |             shared_param_list.append(name)
584 |         else:
585 |             unshared_param_list.append(name)
586 | 
587 |     # create reference of the original parameter if they are shared
588 |     for param_name in shared_param_list:
589 |         param = model.get_parameter(param_name)
590 |         param.requires_grad = False
591 | 
592 |         _ref_param = ref_model.get_parameter(param_name)
593 | 
594 |     # for all other parameters just make sure they don't use gradients
595 |     for param_name in unshared_param_list:
596 |         param = ref_model.get_parameter(param_name)
597 |         param.requires_grad = False
598 | 
599 |     if pattern is not None and len(unshared_param_list) == 0:
600 |         logging.warning("Pattern passed or found, but no layers matched in the model. Check for a typo.")
601 | 
602 |     return ref_model.eval()
603 | 


--------------------------------------------------------------------------------
/model_utils/prm_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import torch
 15 | import torch.nn as nn
 16 | from transformers import AutoModelForCausalLM
 17 | from .modeling_base import PreTrainedModelWrapper
 18 | 
 19 | 
 20 | class ValueHead(nn.Module):
 21 |     r"""
 22 |     The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
 23 |     """
 24 | 
 25 |     def __init__(self, config, **kwargs):
 26 |         super().__init__()
 27 |         if not hasattr(config, "summary_dropout_prob"):
 28 |             summary_dropout_prob = kwargs.pop("summary_dropout_prob", 0.1)
 29 |         else:
 30 |             summary_dropout_prob = config.summary_dropout_prob
 31 | 
 32 |         self.dropout = nn.Dropout(summary_dropout_prob) if summary_dropout_prob else nn.Identity()
 33 | 
 34 |         # some models such as OPT have a projection layer before the word embeddings - e.g. OPT-350m
 35 |         if hasattr(config, "hidden_size"):
 36 |             hidden_size = config.hidden_size
 37 |         if hasattr(config, "word_embed_proj_dim"):
 38 |             hidden_size = config.word_embed_proj_dim
 39 |         elif hasattr(config, "is_encoder_decoder"):
 40 |             if config.is_encoder_decoder and hasattr(config, "decoder"):
 41 |                 if hasattr(config.decoder, "hidden_size"):
 42 |                     hidden_size = config.decoder.hidden_size
 43 | 
 44 |         self.summary = nn.Linear(hidden_size, 1)
 45 | 
 46 |         self.flatten = nn.Flatten()
 47 | 
 48 |     def forward(self, hidden_states):
 49 |         output = self.dropout(hidden_states)
 50 | 
 51 |         # For now force upcast in fp32 if needed. Let's keep the
 52 |         # output in fp32 for numerical stability.
 53 |         if output.dtype != self.summary.weight.dtype:
 54 |             output = output.to(self.summary.weight.dtype)
 55 | 
 56 |         output = self.summary(output)
 57 |         return output
 58 | 
 59 | 
 60 | class PRM_MODEL(PreTrainedModelWrapper):
 61 | 
 62 |     transformers_parent_class = AutoModelForCausalLM
 63 |     lm_head_namings = ["lm_head", "embed_out"]
 64 |     supported_args = (
 65 |         "summary_dropout_prob",
 66 |         "v_head_initializer_range",
 67 |         "v_head_init_strategy",
 68 |     )
 69 | 
 70 |     def __init__(self, pretrained_model, **kwargs):
 71 |         r"""
 72 |         Initializes the model.
 73 | 
 74 |         Args:
 75 |             pretrained_model (`transformers.PreTrainedModel`):
 76 |                 The model to wrap. It should be a causal language model such as GPT2.
 77 |                 or any model mapped inside the `AutoModelForCausalLM` class.
 78 |             kwargs (`dict`, `optional`):
 79 |                 Additional keyword arguments, that are passed to the `ValueHead` class.
 80 |         """
 81 |         super().__init__(pretrained_model, **kwargs)
 82 |         v_head_kwargs, _, _ = self._split_kwargs(kwargs)
 83 | 
 84 |         if not any(hasattr(self.pretrained_model, attribute) for attribute in self.lm_head_namings):
 85 |             raise ValueError("The model does not have a language model head, please use a model that has one.")
 86 | 
 87 |         self.v_head = ValueHead(self.pretrained_model.config, **v_head_kwargs)
 88 | 
 89 |         self._init_weights(**v_head_kwargs)
 90 | 
 91 |     def _init_weights(self, **kwargs):
 92 |         r"""
 93 |         Initializes the weights of the value head. The default initialization strategy is random.
 94 |         Users can pass a different initialization strategy by passing the `v_head_init_strategy` argument
 95 |         when calling `.from_pretrained`. Supported strategies are:
 96 |         - `normal`: initializes the weights with a normal distribution.
 97 | 
 98 |         Args:
 99 |             **kwargs (`dict`, `optional`):
100 |                 Additional keyword arguments, that are passed to the `ValueHead` class. These arguments
101 |                 can contain the `v_head_init_strategy` argument as well as the `v_head_initializer_range`
102 |                 argument.
103 |         """
104 |         initializer_range = kwargs.pop("v_head_initializer_range", 0.2)
105 |         # random init by default
106 |         init_strategy = kwargs.pop("v_head_init_strategy", None)
107 |         if init_strategy is None:
108 |             # do nothing
109 |             pass
110 |         elif init_strategy == "normal":
111 |             self.v_head.summary.weight.data.normal_(mean=0.0, std=initializer_range)
112 |             self.v_head.summary.bias.data.zero_()
113 | 
114 |     def forward(
115 |         self,
116 |         input_ids=None,
117 |         past_key_values=None,
118 |         attention_mask=None,
119 |         return_past_key_values=False,
120 |         return_probs=False,
121 |         **kwargs,
122 |     ):
123 |         r"""
124 |         Applies a forward pass to the wrapped model and returns the logits of the value head.
125 | 
126 |         Args:
127 |             input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
128 |                 Indices of input sequence tokens in the vocabulary.
129 |             past_key_values (`tuple(tuple(torch.FloatTensor))`, `optional`):
130 |                 Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
131 |                 (see `past_key_values` input) to speed up sequential decoding.
132 |             attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
133 |                 Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
134 |                 - 1 for tokens that are **not masked**,
135 |                 - 0 for tokens that are **masked**.
136 |             return_past_key_values (bool): A flag indicating if the computed hidden-states should be returned.
137 |             kwargs (`dict`, `optional`):
138 |                 Additional keyword arguments, that are passed to the wrapped model.
139 |         """
140 |         kwargs["output_hidden_states"] = True  # this had already been set in the LORA / PEFT examples
141 |         kwargs["past_key_values"] = past_key_values
142 | 
143 |         if self.is_peft_model and self.pretrained_model.active_peft_config.peft_type == "PREFIX_TUNING":
144 |             kwargs.pop("past_key_values")
145 | 
146 |         base_model_output = self.pretrained_model(
147 |             input_ids=input_ids,
148 |             attention_mask=attention_mask,
149 |             **kwargs,
150 |         )
151 | 
152 |         last_hidden_state = base_model_output.hidden_states[-1]
153 |         lm_logits = base_model_output.logits
154 |         loss = base_model_output.loss
155 | 
156 |         if last_hidden_state.device != self.v_head.summary.weight.device:
157 |             last_hidden_state = last_hidden_state.to(self.v_head.summary.weight.device)
158 | 
159 |         value = self.v_head(last_hidden_state).squeeze(-1) # logits_diff
160 | 
161 |         if return_probs:
162 |             value = torch.nn.functional.sigmoid(value) # convert logits_diff_to_Probs
163 | 
164 |         # force upcast in fp32 if logits are in half-precision
165 |         if lm_logits.dtype != torch.float32:
166 |             lm_logits = lm_logits.float()
167 | 
168 |         if return_past_key_values:
169 |             return (lm_logits, loss, value, base_model_output.past_key_values)
170 |         else:
171 |             return (lm_logits, loss, value)
172 | 
173 |     def generate(self, *args, **kwargs):
174 |         r"""
175 |         A simple wrapper around the `generate` method of the wrapped model.
176 |         Please refer to the [`generate`](https://huggingface.co/docs/transformers/internal/generation_utils)
177 |         method of the wrapped model for more information about the supported arguments.
178 | 
179 |         Args:
180 |             *args (`list`, *optional*):
181 |                 Positional arguments passed to the `generate` method of the wrapped model.
182 |             **kwargs (`dict`, *optional*):
183 |                 Keyword arguments passed to the `generate` method of the wrapped model.
184 |         """
185 |         return self.pretrained_model.generate(*args, **kwargs)
186 | 
187 |     def state_dict(self, *args, **kwargs):
188 |         r"""
189 |         Returns the state dictionary of the model. We add the state dictionary of the value head
190 |         to the state dictionary of the wrapped model by prepending the key with `v_head.`.
191 |         """
192 |         if not self.is_peft_model:
193 |             pretrained_model_state_dict = self.pretrained_model.state_dict(*args, **kwargs)
194 |         else:
195 |             # if it is a peft model, only save the v_head
196 |             pretrained_model_state_dict = {}
197 | 
198 |         v_head_state_dict = self.v_head.state_dict(*args, **kwargs)
199 |         for k, v in v_head_state_dict.items():
200 |             pretrained_model_state_dict[f"v_head.{k}"] = v
201 |         return pretrained_model_state_dict
202 | 
203 |     def push_to_hub(self, *args, **kwargs):
204 |         self.pretrained_model.v_head = self.v_head
205 | 
206 |         return self.pretrained_model.push_to_hub(*args, **kwargs)
207 | 
208 |     def post_init(self, state_dict):
209 |         r"""
210 |         We add the state dictionary of the value head to the state dictionary of the wrapped model
211 |         by prepending the key with `v_head.`. This function removes the `v_head.` prefix from the
212 |         keys of the value head state dictionary.
213 |         """
214 |         for k in list(state_dict.keys()):
215 |             if "v_head." in k:
216 |                 state_dict[k.replace("v_head.", "")] = state_dict.pop(k)
217 |         self.v_head.load_state_dict(state_dict, strict=False)
218 |         del state_dict
219 | 
220 |         if hasattr(self.pretrained_model, "hf_device_map"):
221 |             if (
222 |                 "cpu" in self.pretrained_model.hf_device_map.values()
223 |                 or "disk" in self.pretrained_model.hf_device_map.values()
224 |             ):
225 |                 raise ValueError(
226 |                     "The model is offloaded on CPU or disk - CPU & disk offloading is not supported for ValueHead models."
227 |                 )
228 | 
229 |             first_device = list(set(self.pretrained_model.hf_device_map.values()))[0]
230 |             if isinstance(first_device, int):
231 |                 first_device = f"cuda:{first_device}"
232 |             self.v_head = self.v_head.to(first_device)
233 | 
234 |             def set_device_hook(module, input, outputs):
235 |                 new_output = ()
236 |                 for output in outputs:
237 |                     if isinstance(output, torch.Tensor):
238 |                         new_output += (output.to(first_device),)
239 |                     else:
240 |                         new_output += (output,)
241 |                 return new_output
242 | 
243 |             self.register_forward_hook(set_device_hook)
244 | 
245 |             self.is_sequential_parallel = True
246 | 
247 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='vllm_add_dummy_model',
 5 |     version='0.1',
 6 |     packages=['vllm_add_dummy_model'],
 7 |     entry_points={
 8 |         'vllm.general_plugins': [
 9 |             "register_dummy_model = vllm_add_dummy_model.prm_model:register"
10 |         ]
11 |     }
12 | )


--------------------------------------------------------------------------------
/vllm_add_dummy_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SkyworkAI/skywork-o1-prm-inference/719b56b17447405e0f10e6c0360a581cf4ffa9c1/vllm_add_dummy_model/__init__.py


--------------------------------------------------------------------------------
/vllm_add_dummy_model/prm_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import re
  4 | import sys
  5 | import vllm
  6 | from torch import nn
  7 | from vllm.model_executor.layers.pooler import (
  8 |     Optional,
  9 |     List,
 10 |     PoolerConfig,
 11 |     PoolingType,
 12 |     PoolingMetadata,
 13 |     PoolingTensors,
 14 |     EmbeddingSequenceGroupOutput,
 15 |     PoolerOutput
 16 | )
 17 | from vllm.model_executor.models.qwen2_rm import (
 18 |     Qwen2Model,
 19 |     AutoWeightsLoader,
 20 |     VllmConfig,
 21 |     maybe_prefix,
 22 |     IntermediateTensors,
 23 |     PoolingMetadata,
 24 |     PoolingType,
 25 |     PoolerOutput,
 26 |     AttentionMetadata,
 27 |     Iterable,
 28 |     Union,
 29 |     Tuple,
 30 |     SupportsPP
 31 | )
 32 | 
 33 | class ValueHead(nn.Module):
 34 |     r"""
 35 |     The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
 36 |     """
 37 | 
 38 |     def __init__(self, config, **kwargs):
 39 |         super().__init__()
 40 |         if not hasattr(config, "summary_dropout_prob"):
 41 |             summary_dropout_prob = kwargs.pop("summary_dropout_prob", 0.1)
 42 |         else:
 43 |             summary_dropout_prob = config.summary_dropout_prob
 44 | 
 45 |         self.dropout = nn.Dropout(summary_dropout_prob) if summary_dropout_prob else nn.Identity()
 46 | 
 47 |         # some models such as OPT have a projection layer before the word embeddings - e.g. OPT-350m
 48 |         if hasattr(config, "hidden_size"):
 49 |             hidden_size = config.hidden_size
 50 | 
 51 |         self.summary = nn.Linear(hidden_size, 1)
 52 | 
 53 |         self.flatten = nn.Flatten()
 54 | 
 55 |     def forward(self, hidden_states):
 56 |         output = self.dropout(hidden_states)
 57 | 
 58 |         # For now force upcast in fp32 if needed. Let's keep the
 59 |         # output in fp32 for numerical stability.
 60 |         if output.dtype != self.summary.weight.dtype:
 61 |             output = output.to(self.summary.weight.dtype)
 62 | 
 63 |         # print('enter here')
 64 |         # print('output1.shape: ', output.shape)
 65 |         output = self.summary(output)
 66 |         # print('output2.shape: ', output.shape)
 67 |         return output
 68 | 
 69 |     
 70 | 
 71 | 
 72 | class Pooler(nn.Module):
 73 |     """A layer that pools specific information from hidden states.
 74 | 
 75 |     This layer does the following:
 76 |     1. Extracts specific tokens or aggregates data based on pooling method.
 77 |     2. Normalizes output if specified.
 78 |     3. Returns structured results as `PoolerOutput`.
 79 | 
 80 |     Attributes:
 81 |         pooling_type: The type of pooling to use.
 82 |         normalize: Whether to normalize the pooled data.
 83 |     """
 84 | 
 85 |     def __init__(
 86 |         self,
 87 |         pooling_type: PoolingType,
 88 |         normalize: bool,
 89 |         softmax: bool,
 90 |         step_tag_id: Optional[int] = None,
 91 |         returned_token_ids: Optional[List[int]] = None,
 92 |     ):
 93 |         super().__init__()
 94 | 
 95 |         self.pooling_type = pooling_type
 96 |         self.normalize = normalize
 97 |         self.softmax = softmax
 98 |         self.step_tag_id = step_tag_id
 99 |         self.returned_token_ids = returned_token_ids
100 | 
101 |     @classmethod
102 |     def from_config_with_defaults(
103 |         cls,
104 |         pooler_config: PoolerConfig,
105 |         pooling_type: PoolingType,
106 |         normalize: bool,
107 |         softmax: bool,
108 |         step_tag_id: Optional[int] = None,
109 |         returned_token_ids: Optional[List[int]] = None,
110 |     ) -> Optional["Pooler"]:
111 |         if pooler_config is None:
112 |             return None
113 |         return cls(
114 |             pooling_type=PoolingType[pooler_config.pooling_type]
115 |             if pooler_config.pooling_type is not None else pooling_type,
116 |             normalize=pooler_config.normalize
117 |             if pooler_config.normalize is not None else normalize,
118 |             softmax=pooler_config.softmax
119 |             if pooler_config.softmax is not None else softmax,
120 |             step_tag_id=pooler_config.step_tag_id
121 |             if pooler_config.step_tag_id is not None else step_tag_id,
122 |             returned_token_ids=pooler_config.returned_token_ids
123 |             if pooler_config.returned_token_ids is not None else
124 |             returned_token_ids,
125 |         )
126 | 
127 |     def forward(
128 |         self,
129 |         hidden_states: torch.Tensor,
130 |         pooling_metadata: PoolingMetadata,
131 |     ) -> PoolerOutput:
132 |         """Pools specific information from hidden states based on metadata."""
133 | 
134 |         prompt_lens = PoolingTensors.from_pooling_metadata(
135 |             pooling_metadata, hidden_states.device).prompt_lens
136 | 
137 |         if self.pooling_type is PoolingType.CLS:
138 |             first_token_flat_indices = torch.zeros_like(prompt_lens)
139 |             first_token_flat_indices[1:] += torch.cumsum(prompt_lens,
140 |                                                         dim=0)[:-1]
141 |             pooled_data = hidden_states[first_token_flat_indices]
142 |         elif self.pooling_type == PoolingType.LAST:
143 |             last_token_flat_indices = torch.cumsum(prompt_lens, dim=0) - 1
144 |             pooled_data = hidden_states[last_token_flat_indices]
145 |         elif self.pooling_type == PoolingType.ALL:
146 |             offset = 0
147 |             pooled_data = []
148 |             for prompt_len in prompt_lens:
149 |                 pooled_data.append(hidden_states[offset:offset + prompt_len])
150 |                 offset += prompt_len
151 |             # pooled_data = torch.stack(pooled_data_lst)
152 |         elif self.pooling_type == PoolingType.MEAN:
153 |             # Calculate mean pooling
154 |             cumsum = torch.cumsum(hidden_states, dim=0)
155 |             start_indices = torch.cat([
156 |                 torch.tensor([0], device=hidden_states.device),
157 |                 torch.cumsum(prompt_lens[:-1], dim=0)
158 |             ])
159 |             end_indices = torch.cumsum(prompt_lens, dim=0)
160 |             pooled_data = (
161 |                 cumsum[end_indices - 1] - cumsum[start_indices] +
162 |                 hidden_states[start_indices]) / prompt_lens.unsqueeze(1)
163 |         elif self.pooling_type == PoolingType.STEP:
164 |             returned_token_ids = self.returned_token_ids
165 |             if returned_token_ids is not None and len(returned_token_ids) > 0:
166 |                 hidden_states = hidden_states[:, returned_token_ids]
167 | 
168 |             step_tag_id = self.step_tag_id
169 | 
170 |             offset = 0
171 |             pooled_data_lst = []
172 |             for prompt_len, seq_data_i in zip(
173 |                     prompt_lens, pooling_metadata.seq_data.values()):
174 |                 pooled_data_i = hidden_states[offset:offset + prompt_len]
175 |                 if step_tag_id is not None:
176 |                     token_ids = torch.tensor(seq_data_i.prompt_token_ids)
177 |                     pooled_data_i = pooled_data_i[token_ids == step_tag_id]
178 | 
179 |                 offset += prompt_len
180 |                 pooled_data_lst.append(pooled_data_i)
181 | 
182 |             pooled_data = torch.stack(pooled_data_lst)
183 |         else:
184 |             raise ValueError(f"Invalid pooling type: {self.pooling_type}")
185 | 
186 |         if self.normalize:
187 |             pooled_data = nn.functional.normalize(pooled_data, p=2, dim=1)
188 | 
189 |         if self.softmax:
190 |             pooled_data = nn.functional.softmax(pooled_data, dim=-1)
191 | 
192 |         pooled_outputs = [
193 |             EmbeddingSequenceGroupOutput(data.tolist()) for data in pooled_data
194 |         ]
195 | 
196 |         return PoolerOutput(outputs=pooled_outputs)
197 | 
198 | class Qwen2ForPrmModel(nn.Module, SupportsPP):
199 |     packed_modules_mapping = {
200 |         "qkv_proj": [
201 |             "q_proj",
202 |             "k_proj",
203 |             "v_proj",
204 |         ],
205 |         "gate_up_proj": [
206 |             "gate_proj",
207 |             "up_proj",
208 |         ],
209 |     }
210 | 
211 |     # LoRA specific attributes
212 |     supported_lora_modules = [
213 |         "qkv_proj",
214 |         "o_proj",
215 |         "gate_up_proj",
216 |         "down_proj",
217 |     ]
218 |     embedding_modules = {}
219 |     embedding_padding_modules = []
220 | 
221 |     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
222 |         super().__init__()
223 |         config = vllm_config.model_config.hf_config
224 |         cache_config = vllm_config.cache_config
225 |         quant_config = vllm_config.quant_config
226 |         lora_config = vllm_config.lora_config
227 |         pooler_config = vllm_config.model_config.pooler_config
228 |         # TODO (@robertgshaw2): see if this can be moved out
229 |         if (cache_config.sliding_window is not None
230 |                 and hasattr(config, "max_window_layers")):
231 |             raise ValueError("Sliding window for some but all layers is not "
232 |                             "supported. This model uses sliding window "
233 |                             "but `max_window_layers` = {} is less than "
234 |                             "`num_hidden_layers` = {}. Please open an issue "
235 |                             "to discuss this feature.".format(
236 |                                 config.max_window_layers,
237 |                                 config.num_hidden_layers,
238 |                             ))
239 | 
240 |         self.config = config
241 |         self.lora_config = lora_config
242 | 
243 |         self.quant_config = quant_config
244 |         self.model = Qwen2Model(vllm_config=vllm_config,
245 |                                 prefix=maybe_prefix(prefix, "model"))
246 |         self.v_head = ValueHead(self.config)
247 | 
248 |         self._pooler = Pooler.from_config_with_defaults(
249 |             pooler_config,
250 |             pooling_type=PoolingType.ALL,
251 |             normalize=False,
252 |             softmax=False)
253 |         self.make_empty_intermediate_tensors = (
254 |             self.model.make_empty_intermediate_tensors)
255 | 
256 |     def forward(
257 |         self,
258 |         input_ids: torch.Tensor,
259 |         positions: torch.Tensor,
260 |         kv_caches: List[torch.Tensor],
261 |         attn_metadata: AttentionMetadata,
262 |         intermediate_tensors: Optional[IntermediateTensors] = None,
263 |     ) -> Union[torch.Tensor, IntermediateTensors]:
264 |         hidden_states = self.model(input_ids, positions, kv_caches,
265 |                                 attn_metadata, intermediate_tensors)
266 |         logits = self.v_head(hidden_states)
267 |         return logits
268 | 
269 |     def pooler(
270 |         self,
271 |         hidden_states: torch.Tensor,
272 |         pooling_metadata: PoolingMetadata,
273 |     ) -> Optional[PoolerOutput]:
274 |         return self._pooler(hidden_states, pooling_metadata)
275 | 
276 |     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
277 |         loader = AutoWeightsLoader(self,
278 |                                 ignore_unexpected_prefixes=["lm_head."])
279 |         loader.load_weights(weights)
280 | 
281 | def register():
282 |     from vllm import ModelRegistry
283 |     if "Qwen2ForPrmModel" not in ModelRegistry.get_supported_archs():
284 |         ModelRegistry.register_model("Qwen2ForPrmModel", "vllm_add_dummy_model.prm_model:Qwen2ForPrmModel")
285 | 


--------------------------------------------------------------------------------