├── synthesize_deep_reasoning ├── synthesize.sh ├── server.sh ├── config.yaml ├── helper.py ├── model_utils.py └── synthesize.py ├── LICENSE ├── README.md └── .gitignore /synthesize_deep_reasoning/synthesize.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | export HOST_IP=0.0.0.0 3 | model2=${model2:-""} 4 | cd ${workdir} 5 | python synthesize.py --port $port --rank ${rank} --total-ranks ${total} --config_file $cname --model $model --posterior_model $model2 # port rank total 6 | -------------------------------------------------------------------------------- /synthesize_deep_reasoning/server.sh: -------------------------------------------------------------------------------- 1 | 2 | set -ex 3 | export HOST_IP=0.0.0.0 4 | cname=/path/to/config.yaml 5 | 6 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m vllm.entrypoints.openai.api_server --model $model --port $port --disable-log-requests --max-model-len 32000 -tp 4 --gpu-memory-utilization 0.6 --trust-remote-code & 7 | CUDA_VISIBLE_DEVICES=4,5,6,7 python -m vllm.entrypoints.openai.api_server --model $model2 --port 2$port --disable-log-requests --max-model-len 32000 -tp 2 --gpu-memory-utilization 0.6 --trust-remote-code & 8 | -------------------------------------------------------------------------------- /synthesize_deep_reasoning/config.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | file_path: '/path/to/QAcollection.json' 3 | 4 | output: 5 | file_prefix: '/path/to/output/file/folder/' 6 | 7 | processing: 8 | stop_thresh: 0.25 9 | max_step: 10 10 | num_rollouts: 1 11 | num_expansion: 2 12 | 13 | model: 14 | # supported model_types: "hf", "openai", "anthropic", "vllm" 15 | model_type: "vllm_server" 16 | model_name: "/path/to/Qwen2___5-32B-Instruct" 17 | model_url: "http://127.0.0.1:8701/v1/completions" 18 | model_args: 19 | beamsearch: 0 20 | port: 5757 21 | max_tokens: 8000 22 | top_k: 40 23 | top_p: 0.85 24 | temperature_range: [0.8, 0.8] 25 | prompt_type: "tokenizer" 26 | 27 | judge_model: 28 | # supported model_types: "hf", "openai", "anthropic", "vllm" 29 | use: true 30 | model_type: "vllm_server" 31 | model_name: "/path/to/model/for/perplexity" 32 | model_url: "http://127.0.0.1:8701/v1/completions" 33 | model_args: 34 | beamsearch: 0 35 | port: 5757 36 | max_tokens: 100 37 | top_k: -1 38 | top_p: 1 39 | temperature_range: [1.0, 1.0] 40 | prompt_type: "tokenizer" 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 TIGER Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # REverse-Engineered Reasoning for Open-Ended Generation 2 | The official code of "REverse-Engineered Reasoning for Open-Ended Generation" 3 | 4 | ## Release Progress 5 | - [x] Deep Reasoning Synthesis 6 | 7 | - [ ] evaluation 8 | 9 | - [ ] VeRL based Distributed SFT training 10 | 11 | ## Synthesis of Deep Reasoning 12 | cd folder: `synthesize_deep_reasoning` 13 | 14 | - **Step 0: Update the config.** 15 | 16 | check `config.yaml`: 17 | ``` 18 | stop_thresh: 0.25 # PPL stopping criterion 19 | max_step: 10 # max-step stopping criterion 20 | num_rollouts: 1 # num initial thinking rollouts each query, not tested 21 | num_expansion: 2 # num expanded node for each segment edits 22 | file_path: '/path/to/QAcollection.json' 23 | file_prefix: '/path/to/output/file/folder/' 24 | ``` 25 | Json format: a list of dicts, where each dict has three keys, `question`, `solution`, `index` 26 | 27 | - **Step 1: Start the vLLM server.** 28 | ```bash 29 | export model=/path/to/generator 30 | export model2=/path/to/basemodel/for/PPL 31 | bash server.sh 32 | ``` 33 | We use Qwen2.5-32B-Instruct as the generator, and Qwen3-8B-Base as the model for computing perplexity. We find it faster if we amortize the PPL computation to a smaller model. 34 | 35 | - **Step 2: Run the Deep Reasoning Synthesis with Ray-scheduled multi-workers.** 36 | ``` 37 | export workdir=${pwd} 38 | export model=/path/to/generator 39 | export model2=/path/to/basemodel/for/PPL 40 | export port=2233 41 | export rank=0 42 | export total=1 43 | export cname=/path/to/config 44 | bash synthesis.sh 45 | ``` 46 | The synthesized trajectories will be dumped to the `file_prefix` path. 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | -------------------------------------------------------------------------------- /synthesize_deep_reasoning/helper.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | class Node: 4 | def __init__(self, ref, raw_q, gold=None, info=None): 5 | if info is not None: 6 | self.memory = info 7 | else: self.memory = {} 8 | self.memory.update(dict(q=raw_q, ref=ref,history=[])) 9 | self.ref = ref 10 | self.raw_q = raw_q 11 | 12 | def segment_response(inp, sep='\n\n'): 13 | segments = [] 14 | temp = '' 15 | i = 0 16 | n = len(inp) 17 | 18 | while i < n: 19 | if inp[i:i+2] == '\\[': 20 | # Start of a LaTeX block, collect until we find '\\]' 21 | j = i + 2 22 | while j < n and inp[j:j+2] != '\\]': 23 | j += 1 24 | # Add the LaTeX block including '\\[' and '\\]' 25 | # segments.append(inp[i:j+2]) 26 | temp += inp[i:j+2] 27 | i = j + 2 # Move past '\\]' 28 | elif inp[i:i+sep.count('\n')] == sep: 29 | segments.append(temp) 30 | temp = '' 31 | j = i+1 32 | while j0: 56 | prefix = "".join(buffer) 57 | buffer = [] 58 | seg = prefix + seg 59 | final.append(seg) 60 | if len(buffer)>0: 61 | prefix = "".join(buffer) 62 | buffer = [] 63 | if final: final[-1] += prefix 64 | else: final = [prefix] 65 | return final 66 | 67 | def load_config(config_path): 68 | """ 69 | Load configuration from a YAML file. 70 | 71 | Args: 72 | config_path (str): Path to the YAML configuration file. 73 | 74 | Returns: 75 | dict: A dictionary containing the configuration. 76 | """ 77 | with open(config_path, "r") as file: 78 | return yaml.safe_load(file) 79 | 80 | 81 | 82 | def make_prompt(tokenizer, messages): 83 | prompt = tokenizer.apply_chat_template( 84 | messages, tokenize=False, add_generation_prompt=True 85 | ) 86 | # prompt = prompt.split('')[0] 87 | if prompt.strip().endswith(""): 88 | prompt = prompt.strip()[:-len("")] # split('')[0] 89 | return prompt 90 | 91 | 92 | def breakdown_steps(a): 93 | steps = segment_response(a) 94 | if len(steps)<=1: 95 | steps = segment_response(a, '\n') 96 | final = "" 97 | allsteps = [] 98 | for idx, step in enumerate(steps): 99 | allsteps.append(f" {step}\n") 100 | final += allsteps[-1] 101 | return final, allsteps, steps 102 | 103 | 104 | def segment_offsets(offsets, segments, logps): 105 | """ 106 | Splits the 'offsets' list into sublists based on the cumulative sums of 'segments'. 107 | 108 | Args: 109 | offsets (list[int]): A sorted list of integers. 110 | segments (list[int]): A list of segment lengths. 111 | 112 | Returns: 113 | list[list[int]]: A list of lists, where each sublist contains offsets 114 | belonging to the corresponding segment. 115 | """ 116 | 117 | # Calculate the cumulative sum of segments to get the upper bounds. 118 | # The first element is the length of the first segment, the second is the 119 | # sum of the first two, and so on. 120 | segment_boundaries = np.cumsum(segments) 121 | 122 | # This will hold the final list of lists 123 | result = [] 124 | 125 | # Pointer for the current position in the offsets list 126 | offset_idx = 0 127 | 128 | # This will be the starting boundary for each segment's range. 129 | # It starts at 0 for the first segment. 130 | lower_bound = 0 131 | 132 | # Iterate through each segment's upper boundary 133 | for upper_bound in segment_boundaries: 134 | 135 | # This sublist will store the offsets for the current segment 136 | current_segment_offsets = [] 137 | 138 | # Go through the offsets list starting from where we last left off 139 | while offset_idx < len(offsets) and offsets[offset_idx] < upper_bound: 140 | current_segment_offsets.append(offsets[offset_idx]) 141 | offset_idx += 1 142 | 143 | result.append(current_segment_offsets) 144 | 145 | # The next segment's range will start from the end of the current one 146 | lower_bound = upper_bound 147 | 148 | token_belongs_to_segment = result 149 | seg_logp_list = [] # list of list of logps for each segment 150 | cnt = 0 151 | for seg_i, token_included in enumerate(token_belongs_to_segment): 152 | num_tokens = len(token_included) 153 | assert num_tokens>0 154 | seg_logp_list.append(logps[cnt:cnt+num_tokens]) 155 | cnt += num_tokens 156 | return seg_logp_list, token_belongs_to_segment 157 | 158 | 159 | def equals(a, b): 160 | flag = a==b 161 | if not flag: 162 | flag = False 163 | try: 164 | xx = eval(a) 165 | yy = eval(str(b)) 166 | flag = abs(xx-yy)<1e-4 167 | except: 168 | pass 169 | 170 | return flag 171 | -------------------------------------------------------------------------------- /synthesize_deep_reasoning/model_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import AutoTokenizer, AutoModelForCausalLM 3 | import random 4 | import os 5 | from transformers import set_seed 6 | import requests 7 | from concurrent.futures import ThreadPoolExecutor, as_completed 8 | from tqdm import tqdm 9 | from typing import List 10 | import pdb 11 | import copy 12 | import time 13 | import numpy as np 14 | import json 15 | # Set your Hugging Face token here 16 | os.environ["HUGGINGFACE_HUB_TOKEN"] = "hf_yourkey" 17 | 18 | # For reproducibility 19 | SEED = 1234 20 | set_seed(SEED) 21 | random.seed(42) 22 | 23 | class LM: 24 | def __init__(self, model_name: str = "Qwen/Qwen2.5-Math-7B-Instruct", model_type: str = "hf", model_url="", num_rollouts: int = 5, tokenizer=None, **model_args): 25 | self.model_type = model_type.lower() 26 | self.model_name = model_name 27 | 28 | self.max_tokens = model_args['max_tokens'] 29 | self.temperature_range = model_args['temperature_range'] 30 | self.topp = model_args['top_p'] 31 | # self.topk = model_args['top_k'] 32 | self.port = model_args.get("port", 0) 33 | self.do_bs = model_args.get('beamsearch', 0) 34 | self.url = model_url 35 | # if self.port!="0": 36 | self.url = f"http://127.0.0.1:{self.port}/v1/completions" 37 | print(f"running server on {self.url}") 38 | self.num_rollouts = num_rollouts 39 | 40 | self.payload = { 41 | "model":self.model_name, 42 | "max_tokens": self.max_tokens, 43 | "top_p": self.topp, 44 | "n": self.num_rollouts 45 | } 46 | self.tokenizer = tokenizer 47 | 48 | self.__dict__.update(model_args) 49 | print("Updated model args:", self.__dict__) 50 | 51 | if self.model_type == "vllm": 52 | #raise NotImplementedError("VLLM is not implemented yet") 53 | from vllm import LLM, SamplingParams 54 | self.llm = LLM(model=model_name, enable_prefix_caching=True) 55 | self.SamplingParams = SamplingParams 56 | elif self.model_type == "hf": 57 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 58 | self.model = AutoModelForCausalLM.from_pretrained( 59 | model_name, torch_dtype=torch.float16, device_map="cuda" 60 | ) 61 | elif self.model_type == "openai": 62 | import openai 63 | self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 64 | elif self.model_type == "anthropic": 65 | import anthropic 66 | self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) 67 | elif self.model_type == "vllm_server": 68 | pass 69 | else: 70 | raise ValueError("Invalid model_type. Choose 'vllm', 'hf', 'openai', or 'anthropic'.") 71 | 72 | def generate(self, prompt, num_rollouts=None, isgreedy=False, **kwargs): 73 | if num_rollouts is None: 74 | num_rollouts = self.num_rollouts 75 | if self.model_type == 'vllm_server': 76 | return self.generate_vllm_server(prompt, num_rollouts, isgreedy=isgreedy, **kwargs) 77 | elif self.model_type == "vllm": 78 | return self.generate_vllm(prompt, num_rollouts) 79 | elif self.model_type == "hf": 80 | return self.generate_hf(prompt, num_rollouts) 81 | elif self.model_type == "anthropic" or self.model_type == "openai": 82 | return self.generate_api(prompt, num_rollouts) 83 | 84 | def generate_hf(self, prompt, num_rollouts): 85 | inputs = self.tokenizer(prompt, return_tensors="pt").to('cuda') 86 | print(prompt) 87 | results = [] 88 | for _ in range(num_rollouts): 89 | temperature = random.uniform(self.temperature_range[0], self.temperature_range[1]) 90 | outputs = self.model.generate( 91 | **inputs, do_sample=True, max_new_tokens=self.max_tokens, temperature=temperature, 92 | num_return_sequences=1 93 | ) 94 | generated_tokens = outputs[0][inputs['input_ids'].shape[1]:] 95 | result = self.tokenizer.decode(generated_tokens, skip_special_tokens=True) 96 | results.append(result) 97 | pdb.set_trace() 98 | return results 99 | 100 | def generate_vllm(self, prompt, num_rollouts): 101 | #raise NotImplementedError("VLLM is not implemented yet") 102 | # print(prompt) 103 | temperature = random.choice(self.temperature_range) 104 | sampling_params = self.SamplingParams( 105 | temperature=temperature, 106 | top_k=self.topk, 107 | top_p=self.topp, 108 | max_tokens=self.max_tokens, 109 | n=num_rollouts, 110 | seed=SEED, 111 | # stop=['\n'], 112 | ) 113 | st = time.time() 114 | outputs = self.llm.generate(prompt, sampling_params) 115 | ed = time.time() 116 | print(f'{num_rollouts} responses Time taken: {ed-st}') 117 | result = [completion.text for output in outputs for completion in output.outputs] 118 | return result, temperature 119 | 120 | def generate_vllm_server(self, prompt, num_rollouts=None, isgreedy=False, special_stop=None, prompt_only=False): 121 | 122 | temperature = np.random.uniform(low=self.temperature_range[0], high=self.temperature_range[1]) 123 | # temperature = random.choice(self.temperature_range) 124 | payload = copy.copy(self.payload) 125 | 126 | payload.update({ 127 | "temperature": temperature, 128 | # "messages": [ 129 | # {"role": "system", "content": systemprompt}, 130 | # {"role": "user", "content": query} 131 | # ], 132 | # "max_tokens": 4096, 133 | "prompt": prompt, 134 | 'logprobs': 1 if not isgreedy else 0, 135 | }) 136 | if num_rollouts is not None: 137 | payload['n'] = num_rollouts 138 | 139 | if isgreedy: 140 | payload['top_k'] = 1 141 | payload['top_p'] = 1 142 | payload['temperature'] = 0 143 | payload['n'] = 1 144 | 145 | if special_stop: 146 | payload['stop'] = special_stop 147 | 148 | if prompt_only: 149 | payload.update({ 150 | 'prompt': prompt, 151 | 'n': 1, 152 | 'temperature': 1.0, 153 | 'prompt_logprobs': 1 154 | }) 155 | 156 | # print(f'===> submitting request @{self.url}') 157 | response = requests.post(self.url, 158 | headers={"User-Agent": "Test Client"}, 159 | json=payload, 160 | stream=False) 161 | # {"object":"error","message":"[{'type': 'missing', 'loc': ('body', 'model'), 'msg': 'Field required' 162 | if response.status_code == 200: 163 | result = response.json() 164 | # print(num_rollouts, "Generated Text:", result) 165 | else: 166 | result = dict(choices=[]) 167 | print(f"Error: {response.status_code}, {response.text}") 168 | return ([],[], []), None 169 | message = json.loads(response.text)['message'] 170 | 171 | if 'context length' in message: 172 | encoded = self.tokenizer(prompt) 173 | q_tokens = len(encoded['input_ids']) 174 | a_tokens_max = 4096-5-q_tokens 175 | payload['max_tokens'] = a_tokens_max 176 | 177 | response = requests.post(self.url, 178 | headers={"User-Agent": "Test Client"}, 179 | json=payload, 180 | stream=False 181 | ) 182 | 183 | if response.status_code == 200: 184 | result = response.json() 185 | # print("Generated Text:", result) 186 | else: 187 | result = dict(choices=[]) 188 | print(f"Error: {response.status_code}, {response.text}") 189 | 190 | 191 | result_ = [item['text'] for item in result['choices']] 192 | logps_ = [item['prompt_logprobs'] for item in result['choices']] if prompt_only else [item['logprobs']['token_logprobs'] for item in result['choices']] 193 | offsets_ = result if prompt_only else [item['logprobs']['text_offset'] for item in result['choices']] 194 | # offsets_ = [item['prompt_logprobs'] for item in result['choices']] 195 | 196 | offsets_ = result 197 | return (result_,logps_, offsets_), temperature 198 | 199 | def generate_vl_vllm_server(self, conversation, num_rollouts=None, isgreedy=False, special_stop=None, prompt_only=False): 200 | 201 | temperature = np.random.uniform(low=self.temperature_range[0], high=self.temperature_range[1]) 202 | # temperature = random.choice(self.temperature_range) 203 | payload = copy.copy(self.payload) 204 | 205 | payload.update({ 206 | "temperature": temperature, 207 | "messages": conversation, 208 | # "max_tokens": 4096, 209 | # "prompt": prompt, 210 | 'logprobs': 1 if not isgreedy else 0, 211 | }) 212 | 213 | if num_rollouts is not None: 214 | payload['n'] = num_rollouts 215 | 216 | if isgreedy: 217 | payload['top_k'] = 1 218 | payload['top_p'] = 1 219 | payload['temperature'] = 0 220 | payload['n'] = 1 221 | 222 | if special_stop: 223 | payload['stop'] = special_stop 224 | 225 | 226 | # print(f'===> submitting request @{self.url}') 227 | response = requests.post(self.url.replace("completions","chat/completions"), 228 | headers={"User-Agent": "Test Client"}, 229 | json=payload, 230 | stream=False) 231 | 232 | if response.status_code == 200: 233 | result = response.json() 234 | # print(num_rollouts, "Generated Text:", result) 235 | else: 236 | result = dict(choices=[]) 237 | print(f"Error: {response.status_code}, {response.text}") 238 | # import pdb; pdb.set_trace() 239 | return ([],[], []), None 240 | 241 | result_ = [item['message']['content'] for item in result['choices']] 242 | logps_ = [[x['logprob'] for x in item['logprobs']['content']] for item in result['choices']] 243 | offsets_ = None # result if prompt_only else [item['logprobs']['text_offset'] for item in result['choices']] 244 | # offsets_ = [item['prompt_logprobs'] for item in result['choices']] 245 | 246 | offsets_ = result 247 | return (result_,logps_, offsets_), temperature 248 | 249 | def generate_api(self, prompt: str, num_rollouts) -> List[str]: 250 | def send_request(prompt): 251 | temperature = random.choice(self.temperature_range) 252 | if self.model_type == "openai": 253 | response = self.client.chat.completions.create( 254 | model=self.model_name, 255 | messages=[{"role": "user", "content": prompt}], 256 | max_tokens=self.max_tokens, 257 | temperature=temperature 258 | ) 259 | output = response.choices[0].message.content 260 | elif self.model_type == "anthropicc": 261 | response = self.client.messages.create( 262 | model=self.model_name, 263 | messages=[{"role": "user", "content": prompt}], 264 | max_tokens=self.max_tokens, 265 | temperature=temperature 266 | ) 267 | output = response.content[0].text 268 | return output 269 | 270 | responses = [] 271 | with ThreadPoolExecutor(max_workers=num_rollouts) as executor: 272 | futures = [executor.submit(send_request, prompt) for _ in range(num_rollouts)] 273 | for future in tqdm(as_completed(futures), total=len(futures)): 274 | responses.append(future.result()) 275 | 276 | return responses 277 | -------------------------------------------------------------------------------- /synthesize_deep_reasoning/synthesize.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import copy 4 | import json 5 | import logging 6 | import multiprocessing 7 | import os 8 | from glob import glob 9 | from typing import Any, Dict, List, Tuple, Optional 10 | 11 | import numpy as np 12 | import pandas as pd 13 | import ray 14 | import transformers 15 | import time 16 | 17 | from helper import * 18 | from model_utils import LM 19 | 20 | default_sys = "You are a helpful assistant." 21 | boxed_sysprompt = "Please reason step by step, and put your final answer within \\boxed{}." 22 | 23 | templates = { 24 | # serves as the standard inference without reference 25 | "standard_inference_en": """You are an expert in many fields. Suppose you will give a specific final response, I need you to also write down the thought process behind this solution. 26 | Here is a task: 27 | {} 28 | 29 | Now, you need to think aloud and brainstorm in the mind. The thinking process involves thoroughly exploring questions through a systematic long thinking process. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Present your complete thought process within a single and unique `` tag. 30 | 31 | Your thought process must adhere to the following requirements: 32 | 33 | 1. **Narrate in the first-person as if you are thinking aloud and brainstorming** 34 | Stick to the narrative of "I". Imagine you are brainstorming and thinking in the mind. Use verbalized, simple language. 35 | 36 | 2. **Unify the thinking process and the writing solution:** 37 | Your thought process must precisely correspond to a part of the writing solution. The reader should be able to clearly see how your thoughts progressively "grew" into the finished piece, making the copy feel like the inevitable product of your thinking. 38 | 39 | 3. **Tone of Voice: Planning, Sincere, Natural, and Accessible** 40 | Imagine you are analyzing and planning what to do before you start to wrtie the solution. Your language should be plain and easy to understand, avoiding obscure professional jargon to explain complex thought processes clearly. 41 | 42 | 4. **Logical Flow: Clear and Progressive** 43 | 44 | 5. **Thinking Framework for deep thinking** 45 | To ensure your thinking is clear and deep, to showcase your thinking and planning to fulfill the task, below is what you might cover when you are thinking aloud and brainstorming. 46 | 47 | Understanding the user intent and the task: Before putting pen to paper, I need to thoroughly consider the fundamental purpose of the writing. I first need to discern the user's true goal behind their literal request. Next, I will consider: Who am I talking to? I will create a precise profile of the target reader, understanding their pain points, aspirations, and reading context. Then, I will establish the Core Objective: What specific emotional, cognitive, and behavioral changes do I most want the reader to experience after reading? 48 | 49 | Establishing the content: I need to brainstorm a core creative idea and communication strategy centered around my objective. Then, I will think about what content and key information I need to convey to the reader to fulfill the writing task, and what source materials this will involve. 50 | 51 | Building the structure: I need to design a clear narrative path for the reader, like a "blueprint." First, I will plan the article's skeleton (e.g., using a framework like the Golden Circle "Why-How-What," the AIDA model "Attention-Interest-Desire-Action," or a narrative structure "Beginning-Development-Climax-Resolution"). Then, I will plan the key modules: How will the introduction hook the reader? How will the body be layered and the arguments arranged? How will the conclusion summarize, elevate the message, and provide a clear Call to Action (CTA)? 52 | 53 | Draft: unless it is a really easy request, otherwise I need to consider writing a draft based on the plan above, before you give the final writing solution. I will translate my plan into paragraphs, considering the key points, content, and sentence structure for each. This initial draft should look like a prototype of the target text. This draft will be way shorter than the final writing solution within controlled length, but it must also avoid being too vague or general or simply copying the final text. I will begin writing this draft after a `--- The Draft ---` separator at the end of my thinking process. The draft will be included in the same `` block. After writing the draft, I will further critique what can be improved, and analyze what details can be enriched (and hence make it more likely to eventually arrive at the given solution) 54 | 55 | 6. Throughout the thinking process, I want to involve deep thinking and planning, and use deliberate self-critique/self-reflection in my thinking process. Trigger these by frequently using patterns such as `wait`, `maybe`, `let me`, etc. For example: 56 | - Hmm, maybe .. (other concrete thinking regarding the given request) 57 | - Let me think .. 58 | - Wait no .. 59 | - But wait ..(might find something wrong with your previous thoughts) 60 | - Wait, that's a bit ..(reflections about previous decisions). Let me think .. (are thinking of other possibilities) 61 | - Wait, the user said ..(backtracking of previous information). So .. 62 | - Hmm...Alternatively, maybe ..(branching on other possibilities) 63 | - But .. 64 | 65 | Now record your clear, complete, and logical thinking process within `` tags. 66 | In the thinking process, make sure NO PAST TENSES, NO PAST TENSES, because this is the thought process before you are to write a final solution. You are planning what you will and you need to do. 67 | Imagine you're thinking aloud and brainstorming. Write it as an internal monologue or a stream of consciousness. Do not use bullet points, numbers, or formal section headings. 68 | """, 69 | "standard_inference_cn": """你是一名各种领域专家,设想有一个用户请求和一个回答。现在你需要针对回答解释思考过程,特别是如何针对这个请求进行深入思考、深入规划的。 70 | 下面是用户请求:\n{} 71 | 72 | 现在你需要头脑风暴,在(单独且唯一的)``标签中呈现你的完整思考过程。 73 | 74 | 思考过程必须遵循以下要求: 75 | 关于叙述视角:使用第一人称,想象你在大脑里头脑风暴,演绎自己的创作思考过程。用口语化的表述和通俗的语言。 76 | 77 | 关于语言风格:未来时、真诚、自然、易懂 78 | 设想你在动笔前分析和规划的思考过程,所以应该是用未来、计划性或者“我应该”这种语气。请用真诚、坦率的口吻,像一位经验丰富的前辈在传授经验。语言要平实、易懂,避免使用晦涩的专业术语,把复杂的思考过程说明白。 79 | 80 | 关于思考的逻辑:清晰、层层递进 81 | 整个思考过程需要展现出清晰的因果链条,层层递进,解释“为什么这么想”以及“这样做预计会带来什么效果”。思考过程中,基于上面的写作框架中的核心步骤,不断进行细节拆分,使用多样化的逻辑连接词,例如“首先”、“其次”、“然后”、来逐步递进思考过程,完善细节。避免反复使用相同的连接词。 82 | 83 | 思维框架: 84 | 为了让思考过程清晰、有深度,我建议你采用下面的创作框架来组织思路。这能让你的思考过程更接近一位真实专家的工作流: 85 | 86 | 首先思考,我为何而写?在动笔前,我会先彻底想清楚写作的根本目的。我需要先洞察用户字面需求背后的真实目标,接着思考:我在对谁说话?精准描绘出目标读者的画像,理解他们的痛点、渴望和阅读场景。然后,确立核心目标: 我最希望读者读完后,在情感、认知和行动上发生什么具体变化? 87 | 88 | 然后确立内容,我要写什么?我需要围绕写作目标构思核心创意和沟通策略,规划内容。然后思考,为了完成用户请求,我需要向读者传递包括哪些内容和关键信息,分别涉及到什么素材。 89 | 90 | 接着搭建结构,思考我要怎么写?我需要设计一个清晰的行文路径,像“施工图”一样引导读者。首先,我需要规划文章骨架(例如:黄金圈法则 "Why-How-What"、AIDA 模型 "Attention-Interest-Desire-Action"、故事结构 "开端-发展-高潮-结尾"等)。然后,我要考虑布局关键模块: 开头如何破题?主体如何分层展开、安排论据?结尾如何总结、升华,并给出清晰的行动号召 (Call to Action)? 91 | 92 | 再然后,除非是很显然很容易的请求,否则考虑先写一个草稿。我需要落实到每一个段落,具体考虑有什么要点,写什么内容,句子如何组织。要让这份初稿看起来像是上面的文案的雏形,但是要避免照搬上面的文案,又要避免语言笼统。`--- 草稿 ---`分割线后开始写你的草稿,但是草稿部分和上面的思考过程都要放在同一个``标签内 93 | 草稿结束后再次思考有什么可以进一步调整的细节、或者进一步优化的地方,这也是为什么要和真正最终回答有所区分,应当是最终回答的雏形 94 | 95 | 为了充分思考和深入推理,我会多使用自我反思和自我评判来进一步展开细节、分支其他方面或者回溯思考之前的一些陈述。我会频繁利用一些触发自我反思和自我批判的词语:“不过”、“或者”、“可能”,用这些词来触发更加细节、更加深入的思考,下面是一些例子: 96 | - 嗯,也许……(关于给定请求的其他具体思考) 97 | - 让我想想…… 98 | - 等等,不对…… 99 | - 不过等等……(可能会发现你之前的想法有问题) 100 | - 等等,这有点……(对先前决定的反思)。让我想想……(正在思考其他可能性) 101 | - 等等,用户说……(回溯之前的信息)。所以…… 102 | - 嗯……或者,也许……(思考其他分支可能性) 103 | - 但是…… 104 | 格式上,将清晰完整有逻辑的思考过程在``标签中记录。 105 | 在思考过程中,确保不要使用过去时,不要使用过去时,因为这是在你写最终解决方案之前的思考过程。你正在计划你将要做什么和需要做什么。 106 | 想象你正在出声思考和进行头脑风暴。把它写成内心独白或意识流。不要使用项目符号、编号或正式的章节标题。下面,设想你是首次拿到这个用户请求,然后开始你的思考(不要暗示你在解释一个回答。 107 | """, 108 | "initial_thinking_en": """You are an expert in many fields. Suppose you will give a specific final response, I need you to also write down the thought process behind this solution. 109 | Here is a task: 110 | {} 111 | 112 | Here is the solution you will create: 113 | {} 114 | 115 | Now, you need to write down the thinking process behind this solution, as if you are thinking aloud and brainstorming in the mind. The thinking process involves thoroughly exploring questions through a systematic long thinking process. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Present your complete thought process within a single and unique `` tag. 116 | 117 | Your thought process must adhere to the following requirements: 118 | 119 | 1. **Narrate in the first-person as if you are thinking aloud and brainstorming** 120 | Stick to the narrative of "I". Imagine you are brainstorming and thinking in the mind. Use verbalized, simple language. 121 | 122 | 2. **Unify the thinking process and the writing solution:** 123 | Your thought process must precisely correspond to a part of the writing solution. The reader should be able to clearly see how your thoughts progressively "grew" into the finished piece, making the copy feel like the inevitable product of your thinking. 124 | 125 | 3. **Tone of Voice: Planning, Sincere, Natural, and Accessible** 126 | Imagine you are analyzing and planning what to do before you start to wrtie the solution. Your language should be plain and easy to understand, avoiding obscure professional jargon to explain complex thought processes clearly. 127 | 128 | 4. **Logical Flow: Clear and Progressive** 129 | 130 | 5. **Thinking Framework for deep thinking** 131 | To ensure your thinking is clear and deep, to showcase your thinking and planning to fulfill the task, below is what you might cover when you are thinking aloud and brainstorming. 132 | 133 | Understanding the user intent and the task: Before putting pen to paper, I need to thoroughly consider the fundamental purpose of the writing. I first need to discern the user's true goal behind their literal request. Next, I will consider: Who am I talking to? I will create a precise profile of the target reader, understanding their pain points, aspirations, and reading context. Then, I will establish the Core Objective: What specific emotional, cognitive, and behavioral changes do I most want the reader to experience after reading? 134 | 135 | Establishing the content: I need to brainstorm a core creative idea and communication strategy centered around my objective. Then, I will think about what content and key information I need to convey to the reader to fulfill the writing task, and what source materials this will involve. 136 | 137 | Building the structure: I need to design a clear narrative path for the reader, like a "blueprint." First, I will plan the article's skeleton (e.g., using a framework like the Golden Circle "Why-How-What," the AIDA model "Attention-Interest-Desire-Action," or a narrative structure "Beginning-Development-Climax-Resolution"). Then, I will plan the key modules: How will the introduction hook the reader? How will the body be layered and the arguments arranged? How will the conclusion summarize, elevate the message, and provide a clear Call to Action (CTA)? 138 | 139 | Outline: If the task output might be relatively long, I will consider writing an outline (or a draft) which naturally derives from the plan above. Specifically, the outline will ground my plan into paragraphs, summarizing the key content for each paragraph and what are the key points here, sentence structure or anything important for the paragraph. 140 | I PROMISE I will NOT copy the solution I will NOT copy the solution, this outline (or draft) should only look like a prototype or outline of the target text. After finishing this outline, I will check again if there are any details or notes I should pay attention to when writing the final solution. 141 | I will begin writing this draft after a `--- Outline (or Draft) ---` separator at the end of my thinking process. The draft will be included in the same `` block. 142 | 143 | 144 | 6. Throughout the thinking process, I want to involve deep thinking and planning, and use deliberate self-critique/self-reflection in my thinking process. Trigger these by regularly using patterns such as `wait`, `maybe`, `let me`, etc. For example: 145 | - Hmm, maybe .. (other concrete thinking regarding the given request) 146 | - Let me think .. 147 | - Wait no .. 148 | - But wait ..(might find something wrong with your previous thoughts) 149 | - Wait, that's a bit ..(reflections about previous decisions). Let me think .. (are thinking of other possibilities) 150 | - Wait, the user said ..(backtracing of previous information). So .. 151 | - Hmm...Alternatively, maybe ..(branching on other possibilities) 152 | - But .. 153 | But I promise I will use diverse triggers and will NOT use same triggers repeatedly. I will use these when analyzing user needs, establishing content and structure and when I consider alternatives, backtracing and the details. I will NOT use them when I write the draft or I am approaching the end of thinking. 154 | 155 | In the thinking process, make sure NO PAST TENSES, NO PAST TENSES, because this is the thought process before you are to write a final solution. You are planning what you will and you need to do. 156 | Imagine you're thinking aloud and brainstorming. Write it as an internal monologue or a stream of consciousness. Do not use bullet points, numbers, or formal section headings. 157 | Now record your thinking process within `` tags. 158 | """, 159 | "initial_thinking_cn": """你是一名各种领域专家,设想有一个用户请求,你为此正在头脑风暴并且,并且把你的深入思考记录下来。 160 | 下面是用户请求:\n{} 161 | 162 | 假设下面是你会完成的文案:\n{} 163 | 164 | 现在你需要写出对应的思考过程,就像在大脑里头脑风暴。在(单独且唯一的)``标签中呈现你的完整思考过程。 165 | 166 | 思考过程必须遵循以下要求: 167 | 1. 关于叙述视角:使用第一人称,想象你在大脑里头脑风暴,演绎自己的创作思考过程。用口语化的表述和通俗的语言。 168 | 169 | 2. 关于思维与作品的统一:思考即作品,作品即思考 170 | 你的每一个思考步骤,都必须在最终的文案中找到精准的对应。要让读者清晰地看到,你的思考是如何一步步“长”成这篇作品的,整个复盘过程要让人觉得,这篇文案正是这些思考的必然产物。 171 | 172 | 3. 关于语言风格:未来时、真诚、自然、易懂 173 | 设想你在动笔前分析和规划的思考过程,所以应该是用未来、计划性或者“我应该”这种语气。请用真诚、坦率的口吻,像一位经验丰富的前辈在传授经验。语言要平实、易懂,避免使用晦涩的专业术语,把复杂的思考过程说明白。 174 | 175 | 4. 关于思考的逻辑:清晰、层层递进 176 | 整个思考过程需要展现出清晰的因果链条,层层递进,解释“为什么这么想”以及“这样做预计会带来什么效果”。思考过程中,基于上面的写作框架中的核心步骤,不断进行细节拆分,使用多样化的逻辑连接词,例如“首先”、“其次”、“然后”、来逐步递进思考过程,完善细节。避免反复使用相同的连接词。 177 | 178 | 5. 思维框架: 179 | 对于给定的用户请求,一个清晰、有深度、细节丰富的思考过程可能包含下面这些内容和思考方向: 180 | 181 | 为何而写?在动笔前,我会先彻底想清楚写作的根本目的。我需要先洞察用户字面需求背后的真实目标,接着思考:我在对谁说话?精准描绘出目标读者的画像,理解他们的痛点、渴望和阅读场景。然后,确立核心目标: 我最希望读者读完后,在情感、认知和行动上发生什么具体变化? 182 | 183 | 确立内容,我要写什么?我需要围绕写作目标构思核心创意和沟通策略,规划内容。然后思考,为了完成用户请求,我需要向读者传递包括哪些内容和关键信息,分别涉及到什么素材。 184 | 185 | 搭建结构,思考我要怎么写?我需要设计一个清晰的行文路径,像“施工图”一样引导读者。首先,我需要规划文章骨架(例如:黄金圈法则 "Why-How-What"、AIDA 模型 "Attention-Interest-Desire-Action"、故事结构 "开端-发展-高潮-结尾"等)。然后,我要考虑布局关键模块: 开头如何破题?主体如何分层展开、安排论据?结尾如何总结、升华,并给出清晰的行动号召 (Call to Action)? 186 | 187 | 如果是需要输出相对比较长的回答,我会考虑先写一个提纲(或者草稿),会对于参考回答进行提纲挈领,并且列出来每个段落或者部分有什么要点,写什么内容,句子如何组织。 188 | 我**绝对不会照抄绝对不会照抄**参考回答。我会让这个看起来像是一个雏形或者大纲,而不是照搬上面的文案。写完这个提纲之后,我可能会总结一下最终的回答还有没有什么细节需要主要 189 | 我会在`--- 提纲(或者草稿) ---`分割线后开始,和上面的思考过程都要放在同一个``标签内 190 | 191 | 192 | 6. 为了充分思考和深入推理,我会多使用自我反思和自我评判来进一步展开细节、分支其他方面或者回溯思考之前的一些陈述。我会利用一些触发自我反思和自我批判的词语:“不过”、“或者”、“可能”,用这些词来触发更加细节、更加深入的思考,下面是一些例子: 193 | - 嗯,也许……(关于给定请求的其他具体思考) 194 | - 让我想想…… 195 | - 等等,不对…… 196 | - 不过等等……(可能会发现你之前的想法有问题) 197 | - 等等,这有点……(对先前决定的反思)。让我想想……(正在思考其他可能性) 198 | - 等等,用户说……(回溯之前的信息)。所以…… 199 | - 嗯……或者,也许……(思考其他分支可能性) 200 | - 但是…… 201 | 但是我保证会多样化而且不会反复使用相同的触发词。我会在思考用户需求、确定内容和搭建结构的时候考虑使用这些,特别是需要回溯思考、分支或者展开细节的时候使用。如果是在写草稿,或者是快要完成思考的阶段,我不去使用这些触发词。 202 | 203 | 现在,想象我正在内心独白进行头脑风暴……在``标签中。 204 | """, 205 | # writing的核心是align 206 | "segment_edits_en": """ 207 | Your task is to receive a writing assignment, a target sample text, and an existing thinking process, and then to refine and enrich a specific paragraph within that thinking process. 208 | 209 | ---> **Writing Task** \n{} 210 | ---> **Target Text** \n{} 211 | ---> **Thinking Process** 212 | {} 213 | 214 | {} 215 | 216 | {} 217 | 218 | Follow this three-step method to construct your response: 219 | 220 | **Step 1:** Locate the paragraph you need to revise within the existing thinking process. In relation to the surrounding context, what is the primary function of this paragraph? 221 | 222 | **Step 2:** Read the "Target Text" and the "paragraph to be revised" side-by-side. Ask yourself a key question: Does the thinking process reflected in this paragraph lack crucial steps, or is there content that could be further optimized and detailed to better align with the Target Text? 223 | 224 | **Step 3:** Improve and optimize the paragraph (that represents a part of the thinking process). 225 | - Based on the analysis, refine the initial target paragraph into a new one, base remain the tone. Put the refinement into tags. 226 | - To help involve deep thinking and planning, consider deliberate self-critique/self-reflection in your thinking process. Trigger these by frequently using patterns such as `wait`, `maybe`, `let me`, etc. For example: 227 | - Hmm, maybe .. (other concrete thinking regarding the given request) 228 | - Let me think .. 229 | - Wait no .. 230 | - But wait ..(might find something wrong with your previous thoughts) 231 | - Wait, that's a bit ..(reflections about previous decisions). Let me think .. (are thinking of other possibilities) 232 | - Wait, the user said ..(backtracking of previous information). So .. 233 | - Hmm...Alternatively, maybe ..(branching on other possibilities) 234 | - But .. 235 | - If the function of the paragraph being improved is to serve as a first draft of the text, you must focus on enhancing the text's logic and completeness. The draft should not be a general outline but should express specific content and state a clear point of view. Consider whether the current draft is an appropriate prototype for the Target Text: it should be neither too vague nor a direct copy, but should reflect a foundational version. 236 | 237 | Based on the guide above, you are to refine **only** the section marked for replacement below. 238 | 239 | {} 240 | 241 | 242 | In your response, first, present your analysis following the three-step method within `` tags. Finally, place the corresponding, refined paragraph of the **thinking process** within `` tags. 243 | Notes: a. Avoid repeating. Reduce the use of the same connection words, avoid repeating the same meanings over and over again. Ensure that your revised content does not repeat information from the context. 244 | b. please keep the first a few words of the original paragraph, especially the connection words 245 | c. use self-critique trigger words, such as `wait`, `maybe`, `let me`, etc. 246 | """, 247 | # 不能让他直接refine,还是要进行分析 248 | "segment_edits_cn": """你会接收一个用户请求、一篇目标范文和一个已有的思考过程,然后对该思考过程的某个段落进行优化和丰富。 249 | ---> 用户请求 250 | {} 251 | ---> 目标文案 252 | {} 253 | ---> 思考过程 254 | {} 255 | ---- 256 | 257 | {} 258 | 259 | ---- 260 | {} 261 | 262 | 遵循以下三步法来构建我的回答: 263 | 264 | 第一步:定位我需要修改的段落在现有思考过程中的位置,相对于目前的上下文而言,这个段落主要是什么功能。 265 | 第二步:并排阅读“目标文案”和“需要修改的段落”。问自己一个核心问题:需要修改的段落所反映的思考过程,是否缺少了关键的思考步骤,或者有没有可以进一步优化、进一步细化的内容,能够更好地对应到目标文案? 266 | 第三步:改进和优化现有的思考过程。 267 | - 根据第二步中的分析,修改这个段落,注意放在标签重。 268 | - 为了充分思考和深入推理,我会多使用自我反思和自我评判来进一步展开细节、分支其他方面或者回溯思考之前的一些陈述。我会频繁利用一些触发自我反思和自我批判的词语:“不过”、“或者”、“可能”,用这些词来触发更加细节、更加深入的思考,下面是一些例子: 269 | - 嗯,也许……(关于给定请求的其他具体思考) 270 | - 让我想想…… 271 | - 等等,不对…… 272 | - 不过等等……(可能会发现你之前的想法有问题) 273 | - 等等,这有点……(对先前决定的反思)。让我想想……(正在思考其他可能性) 274 | - 等等,用户说……(回溯之前的信息)。所以…… 275 | - 嗯……或者,也许……(思考其他分支可能性) 276 | - 但是…… 277 | - 如果需要改进的段落的功能是作为文案的初稿,务必注意改进和优化文案的逻辑性、完整性:初稿不应该是笼统的大纲,而应该具体地表达内容、陈述观点。考虑当前的初稿是否是目标文案的一个恰当的草稿:既不能太笼统,也不能照抄,而应该反映出是一个雏形。 278 | 279 | 基于上述指南,仅仅针对下面需要替换的部分进行优化。 280 | 281 | {} 282 | 283 | 284 | 在下面的回答中,首先遵循三步法进行分析,放在``标签中,最后将我修改后的**思考过程的对应段落**放在``标签。 285 | 注意:1. 尽可能避免重复,减少反复使用的衔接词,修改后的内容不要和上下文内容有重复。 286 | 2. 务必保留段落最开始的几个词,特别是连接词或语气词。 287 | 3. 多使用反思触发词激发更深入的思考 288 | """, 289 | } 290 | 291 | think_prefix = "\n" 292 | 293 | import re 294 | 295 | def contains_chinese(text: str) -> bool: 296 | """ 297 | Checks if a string contains any Chinese characters. 298 | 299 | Args: 300 | text: The input string. 301 | 302 | Returns: 303 | True if the string contains at least one Chinese character, False otherwise. 304 | """ 305 | # The \u4e00-\u9fff range covers the CJK Unified Ideographs. 306 | # This is the most common range for Chinese characters. 307 | return bool(re.search(r'[\u4e00-\u9fff]', text)) 308 | 309 | @ray.remote 310 | def generate(inputs, model: LM, num_rollouts=None, isgreedy=True, **kwargs): 311 | results = [] 312 | 313 | if len(inputs)==1: 314 | completions, temperature = model.generate(inputs, num_rollouts, isgreedy, **kwargs) 315 | results = completions 316 | else: 317 | for inp in inputs: 318 | completions, temperature = model.generate(inp, num_rollouts, isgreedy, **kwargs) 319 | results.append(completions) 320 | return results 321 | 322 | 323 | lm_tokenizer, lm_model, post_tokenizer, post_model = None, None, None, None 324 | def get_model_output( 325 | template_role: str, 326 | system_prompt: Optional[str], 327 | template_inputs: List[List[Any]], 328 | tokenizer: Any, 329 | model: Any, 330 | prompt_suffix: str, 331 | # NOTE: use_fewshot is an unused parameter in this function. 332 | use_fewshot: bool, 333 | num_rollouts: int, 334 | is_greedy: bool = True, 335 | **kwargs, 336 | ) -> Tuple[Any, List[str]]: 337 | """ 338 | Constructs prompts from templates and submits them to the model for generation. 339 | 340 | Args: 341 | template_role: The key for the desired prompt template. 342 | system_prompt: An optional system message to guide the model's behavior. 343 | template_inputs: A list of lists, where each inner list contains the arguments for a prompt template. 344 | tokenizer: The model's tokenizer. 345 | model: The language model instance. 346 | prompt_suffix: A string to append to each prompt after formatting. 347 | use_fewshot: (Unused) A flag that was likely intended for few-shot prompting. 348 | num_rollouts: The number of sequences to generate for each prompt. 349 | is_greedy: A flag to control the decoding strategy. 350 | **kwargs: Additional arguments passed to the generation function. 351 | 352 | Returns: 353 | A tuple containing the Ray object for the asynchronous generation task 354 | and the list of fully constructed prompts sent to the model. 355 | """ 356 | template = templates[template_role] 357 | 358 | # Format each input using the specified template. 359 | formatted_queries = [template.format(*inp) for inp in template_inputs] 360 | 361 | prompts = [] 362 | for query in formatted_queries: 363 | # Create the standard message format for model interaction. 364 | messages = [{"role": "user", "content": query}] 365 | 366 | if system_prompt: 367 | messages.insert(0, {"role": "system", "content": system_prompt}) 368 | 369 | # Construct the final prompt string. 370 | prompt = make_prompt(tokenizer, messages) 371 | prompt += prompt_suffix 372 | prompts.append(prompt) 373 | # if kwargs.get('log',False): 374 | # print(prompts) 375 | # Launch the remote generation task using Ray. 376 | generation_task = generate.remote(prompts, model, num_rollouts, isgreedy=is_greedy, **kwargs) 377 | return generation_task, prompts 378 | 379 | class RefinementProcessor: 380 | """Handles the iterative refinement process for a single generated response.""" 381 | 382 | def __init__(self, node: Any, tokenizer: Any, model: Any, post_tokenizer: Any, post_model: Any, stop_threshold: float, max_steps: int, num_expansion: int = 2): 383 | self.node = node 384 | self.tokenizer = tokenizer 385 | self.model = model 386 | self.post_tokenizer = post_tokenizer 387 | self.post_model = post_model 388 | self.stop_threshold = stop_threshold 389 | self.max_steps = max_steps 390 | self.num_expansion = num_expansion 391 | 392 | def run(self, initial_info: Dict[str, Any]) -> Dict[str, Any]: 393 | """ 394 | Runs the full iterative refinement loop on a generated "thinking" process. 395 | 396 | Args: 397 | initial_info: A dictionary containing the initial model rollout and its metadata. 398 | 399 | Returns: 400 | An updated info dictionary containing the results of the refinement process. 401 | """ 402 | # The 'initial_ppl' is a tuple where the first element is the log perplexity. 403 | initial_perplexity = initial_info.get('initial_ppl', (1000.0,)) 404 | print(f"===> Initial PPL: {initial_perplexity[0]}") 405 | if initial_perplexity[0] < self.stop_threshold: 406 | print("===> Skipping refinement due to low initial PPL.") 407 | return initial_info 408 | 409 | thinking_segments = initial_info.get('thinking_segments', []) 410 | if not thinking_segments: 411 | return initial_info 412 | 413 | print(f"====> Starting refinement on {len(thinking_segments)} thinking steps.") 414 | 415 | finalized_thinking_steps = [] 416 | best_ppl_so_far = initial_perplexity 417 | 418 | # Iterate through each segment of the thinking process to refine it. 419 | for i in range(min(self.max_steps, len(thinking_segments))): 420 | # Reconstruct the thinking process parts: before, current, and after the segment being refined. 421 | before_segment = "\n\n".join(finalized_thinking_steps) 422 | segment_to_refine = thinking_segments[i].strip() 423 | after_segments = "\n\n".join(s.strip() for s in thinking_segments[i+1:]) 424 | 425 | # Generate and evaluate several possible refinements for the current segment. 426 | best_candidate, best_ppl, all_candidates = self._refine_one_step( 427 | before_segment, segment_to_refine, after_segments 428 | ) 429 | 430 | # Decide whether to keep the original segment or use a generated refinement. 431 | choice = 'original' 432 | chosen_text = segment_to_refine 433 | if best_candidate is not None and best_ppl[0] < best_ppl_so_far[0]: 434 | best_ppl_so_far = best_ppl 435 | chosen_text = best_candidate['refinement'] 436 | choice = f"refinement_No.{best_candidate['id']}" 437 | 438 | finalized_thinking_steps.append(chosen_text) 439 | 440 | # Log the details of this refinement step. 441 | initial_info[f"refine_thinking_step_No.{i+1}"] = { 442 | 'segment_to_refine': segment_to_refine, 443 | 'choice': choice, 444 | 'chosen_refinement': chosen_text, 445 | 'after_avg_token_logp': best_ppl_so_far[0], 446 | 'possible_refinements': all_candidates, 447 | } 448 | 449 | if best_ppl_so_far[0] < self.stop_threshold: 450 | print("===> Stopping refinement early as PPL threshold reached.") 451 | break 452 | 453 | if len(finalized_thinking_steps) Tuple[Optional[Dict], Tuple[float, List], List[Dict]]: 470 | """Generates and evaluates possible refinements for a single thinking segment.""" 471 | num_samples = self.num_expansion 472 | # The arguments are: question, reference_answer, text_before, text_to_replace, text_after, original_text 473 | prompt_args = (self.node.memory['q'], self.node.memory['ref'], before, current, after, current) 474 | role_suffix = '_cn' if self.node.memory['is_chinese'] else '_en' 475 | pre_trigger = "Let's find out what can be improved and enriched to better align with the target text.\n" if role_suffix=='_en' else "让我看看这段有什么可以做修改、优化、补充的地方,从而更加贴合目标文本\n" 476 | rollouts_obj, _ = get_model_output( 477 | template_role='segment_edits'+role_suffix, 478 | system_prompt=default_sys, 479 | template_inputs=[prompt_args], 480 | tokenizer=self.tokenizer, 481 | model=self.model, 482 | prompt_suffix=pre_trigger+"\n", 483 | use_fewshot=False, 484 | num_rollouts=num_samples, 485 | is_greedy=False 486 | ) 487 | # The return from ray.get should be (texts, logps, offsets), but only texts are used here. 488 | rollout_texts, _, _ = ray.get(rollouts_obj) 489 | del rollouts_obj 490 | 491 | refinement_candidates = [] 492 | best_candidate_info = None 493 | # Initialize with a high perplexity value. 494 | best_perplexity = (float('inf'), []) 495 | if np.random.uniform()<0.5: 496 | eng_trigger = "Wait" 497 | elif np.random.uniform()<0.8: 498 | eng_trigger = "But wait" 499 | else: 500 | eng_trigger = "Meanwhile" 501 | trigger = eng_trigger if role_suffix == '_en' else "等等我再想想" 502 | for i, rollout_text in enumerate(rollout_texts): 503 | # if np.random.uniform()<0.25: # extra wait 504 | # temp = rollout_text.split('')[0].strip() 505 | # rollouts_obj2, _ = get_model_output( 506 | # template_role='segment_edits'+role_suffix, 507 | # system_prompt=default_sys, 508 | # template_inputs=[prompt_args], 509 | # tokenizer=self.tokenizer, 510 | # model=self.model, 511 | # prompt_suffix=pre_trigger+"\n"+rollout_text+f"\n\n{trigger}", 512 | # use_fewshot=False, 513 | # num_rollouts=1, 514 | # is_greedy=False 515 | # ) 516 | # # The return from ray.get should be (texts, logps, offsets), but only texts are used here. 517 | # rollout_texts2, _, _ = ray.get(rollouts_obj2) 518 | # del rollouts_obj2 519 | # new_roll = f"{temp}\n\n{trigger}{rollout_texts2[0]}" 520 | # rollout_text = new_roll 521 | 522 | # Extract the refined text from within the tags. 523 | last_block_start = rollout_text.rfind("") 524 | if last_block_start == -1: 525 | print("Warning: tag not found in output, skipping.") 526 | continue 527 | 528 | block_end = rollout_text.rfind("") 529 | start_pos = last_block_start + len("") 530 | refinement_text = rollout_text[start_pos:block_end if block_end != -1 else None].strip() 531 | 532 | # Create the full "thinking" process with the new refinement. 533 | recomposed_thinking = f"{refinement_text}" 534 | if before: recomposed_thinking = f"{before}\n\n{recomposed_thinking}" 535 | if after: recomposed_thinking = f"{recomposed_thinking}\n\n{after}" 536 | 537 | # Evaluate the new thinking process by calculating the perplexity of the reference answer. 538 | manager = PosteriorManager('standard_inference'+role_suffix, default_sys, [[self.node.memory['q']]], self.post_tokenizer, self.post_model, self.node.memory['ref']) 539 | _, posterior_prefix = manager.prepare(think_prefix, recomposed_thinking, "") 540 | ppl_obj, _ = manager.submit(posterior_prefix) 541 | # The 'compute' method returns a tuple (log_perplexity, debug_info). 542 | new_perplexity_result = manager.compute(ppl_obj) 543 | 544 | print(f"Refinement candidate {i} | New log PPL: {new_perplexity_result[0]}") 545 | candidate_info = { 546 | 'id': i, 547 | 'refinement': refinement_text, 548 | 'generator': self.model.model_name, 549 | 'raw_output': rollout_text, 550 | 'raw_input_for_posterior': posterior_prefix, 551 | 'avg_token_logp': new_perplexity_result[0], 552 | } 553 | refinement_candidates.append({f'expansion_No.{i}_of_segment': candidate_info}) 554 | 555 | # If this candidate is better than the best one so far, update it. 556 | if new_perplexity_result[0] < best_perplexity[0]: 557 | best_perplexity = new_perplexity_result 558 | best_candidate_info = candidate_info 559 | 560 | return best_candidate_info, best_perplexity, refinement_candidates 561 | 562 | class PosteriorManager: 563 | """Calculates the log probability of a reference answer given a thinking process.""" 564 | def __init__(self, role: str, system_prompt: str, inputs: List[List[Any]], tokenizer: Any, model: Any, ref_answer: str): 565 | self.role = role 566 | self.system_prompt = system_prompt 567 | self.inputs = inputs 568 | self.tokenizer = tokenizer 569 | self.model = model 570 | self.ref_answer = ref_answer 571 | _, _, answer_steps = breakdown_steps(ref_answer) 572 | self.answer_prefix = answer_steps[0] 573 | self.pred_answer = "".join(answer_steps[1:]) 574 | 575 | def prepare(self, think_prefix: str, thinking_process: str, ref_answer: str = None) -> Tuple[str, str]: 576 | """Prepares the prompt for posterior probability calculation.""" 577 | # The part of the prompt before the reference answer. 578 | self.prefix_before_answer = f"{think_prefix}{thinking_process}\n\n\n{self.answer_prefix}" 579 | # The full prompt including the reference answer. 580 | 581 | self.posterior_prefix = f"{self.prefix_before_answer}{self.pred_answer}\n" 582 | return self.prefix_before_answer, self.posterior_prefix 583 | 584 | def submit(self, prefix: str) -> Tuple[Any, List[str]]: 585 | """Submits the prompt to the model to get token log probabilities.""" 586 | self.rollouts_obj, self.real_input_prompts = get_model_output( 587 | self.role, self.system_prompt, self.inputs, self.tokenizer, self.model, prefix, 588 | use_fewshot=False, num_rollouts=1, is_greedy=False, prompt_only=True 589 | ) 590 | return self.rollouts_obj, self.real_input_prompts 591 | 592 | def compute(self, rollouts_obj: Any) -> Tuple[float, List[Dict]]: 593 | """Computes the log perplexity of the reference answer from the model's output.""" 594 | _, prompt_logprobs, _ = ray.get(rollouts_obj) 595 | if not prompt_logprobs: return 1000.0, [] 596 | 597 | current_prompt_logprobs = prompt_logprobs[0] 598 | 599 | # This complex logic is used to find the exact tokens corresponding to the reference answer. 600 | offsets = self.tokenizer(self.posterior_prefix, return_offsets_mapping=True).offset_mapping 601 | start_char_index = len(self.prefix_before_answer) 602 | end_char_index = start_char_index + len(self.pred_answer) 603 | 604 | # Find the number of tokens to count backward from the end of the prompt to find the answer's start. 605 | tokens_to_go_back = 0 606 | for i, (start, end) in enumerate(offsets[::-1]): 607 | if start <= start_char_index: 608 | tokens_to_go_back = i + 1 609 | break 610 | 611 | # Find how many tokens the reference answer spans. 612 | answer_token_span = 0 613 | for j, (start, end) in enumerate(offsets[-tokens_to_go_back:]): 614 | if end >= end_char_index: 615 | answer_token_span = j + 1 616 | break 617 | 618 | answer_logps = [] 619 | # Sum the log probabilities of the tokens that make up the reference answer. 620 | logprob_slice = current_prompt_logprobs[-tokens_to_go_back : -tokens_to_go_back + answer_token_span] 621 | for logp_dict in logprob_slice: 622 | # The dictionary may have multiple keys; the first one corresponds to the prompt token. 623 | major_key = list(logp_dict)[0] 624 | logp = logp_dict[major_key]['logprob'] 625 | answer_logps.append(np.clip(logp, -2.0, 0.0)) 626 | 627 | # Calculate the negative mean log probability (log perplexity). A lower value is better. 628 | log_perplexity = -np.mean(answer_logps) if answer_logps else 1000.0 629 | 630 | # For debugging, gather the log probability info for tokens surrounding the answer. 631 | debug_slice = current_prompt_logprobs[-tokens_to_go_back-2 : -tokens_to_go_back + answer_token_span + 2] 632 | debug_logprob_info = [logp_dict[list(logp_dict)[0]] for logp_dict in debug_slice] 633 | return log_perplexity, debug_logprob_info 634 | 635 | def direct_rollout(node, prefix, n_sample=1, role=None, log=False): 636 | 637 | role, sysp, in_keys = ('standard_inference' if role is None else role), default_sys, ['q', 'ref'] 638 | node.memory['gen_role'] = role 639 | 640 | inputs = [node.memory[k] for k in in_keys] 641 | tok, model = lm_tokenizer, lm_model 642 | node.memory['generator'] = model.model_name 643 | inp = [inputs] 644 | # print(f"===> {role}: {n_sample} for {len(inp)} queries") 645 | rollouts_obj, real_input_prompts = get_model_output(role, sysp, inp, tok, model, prefix, False, n_sample, is_greedy=False) 646 | rollout_texts, rollout_logps, rollout_offsets = ray.get(rollouts_obj) 647 | del rollouts_obj 648 | return rollout_texts, rollout_logps, rollout_offsets, real_input_prompts 649 | 650 | 651 | 652 | @ray.remote 653 | def process_item( 654 | item: Dict[str, Any], 655 | file_prefix: str, 656 | rank: int, 657 | n_sample: int, 658 | configs=dict() 659 | ): 660 | uid = item['extra_info']['index'] 661 | output_fname = f"{file_prefix}_{uid}_rk{rank}" 662 | # if glob(f"{output_fname}*"): 663 | # for fp in glob(f"{file_prefix}_{uid}*"): 664 | for fp in glob(f"{output_fname}*"): 665 | try: 666 | tmp = json.load(open(fp)) 667 | if "alist" in tmp: 668 | print(f"Skipping existing item: {uid}") 669 | return True 670 | except Exception as e: 671 | print(e) 672 | print(f'wrong loading {fp}') 673 | continue 674 | 675 | stop_thresh = configs['processing']['stop_thresh'] 676 | max_step = configs['processing']['max_step'] 677 | num_expansion = configs['processing']['num_expansion'] 678 | q = item['question'] 679 | is_chinese = contains_chinese(q) 680 | has_think = "" in item['solution'] 681 | if has_think: 682 | ref = item['solution'].split('')[-1].strip() 683 | else: 684 | ref = item['solution'] 685 | node = Node(ref=ref, raw_q=item['question'], info={'uid': item['extra_info']['index'], 'old_solution': item['solution'] if has_think else None, 'is_chinese': is_chinese}) 686 | node.memory.update({k:v for k,v in item.items() if not isinstance(v, np.ndarray) if k not in {'solution'}}) 687 | 688 | fname = output_fname 689 | 690 | tok, model = lm_tokenizer, lm_model 691 | 692 | # 1. Initial Rollout 693 | # n_sample = 1 694 | generation_role = 'initial_thinking_'+('cn' if is_chinese else 'en') 695 | if is_chinese: 696 | if np.random.uniform()>0.5: pre_trigger = "好的" 697 | else: pre_trigger = "嗯" 698 | else: 699 | if np.random.uniform()>0.8: pre_trigger = "Okay, I am given" 700 | elif np.random.uniform()>0.4: pre_trigger = "Alright, the user" 701 | else: pre_trigger = "Alright" 702 | rollout_texts, rollout_logps, rollout_offsets, real_input_prompts = direct_rollout(node, "\n"+pre_trigger, n_sample, role=generation_role, log=True) 703 | # print(rollout_texts[0]) 704 | outcomes = [] 705 | expanded_prompts = [pp for pp in real_input_prompts for _ in range(n_sample)] 706 | flag = True 707 | for roll, inpprompt, token_logps, token_to_text_offsets in zip(rollout_texts, expanded_prompts, rollout_logps, rollout_offsets): 708 | ntoken = len(token_logps) 709 | if ntoken<100: 710 | print("num token too short, skip") 711 | return False 712 | info = dict(ntokens=ntoken,) 713 | 714 | # separate thinking and code answer 715 | roll = pre_trigger + roll 716 | thinking = roll.split('')[0] 717 | answer_code = roll.split('')[-1].split('')[-1].strip() 718 | 719 | # breakdown thinking to steps 720 | aa,bb,thinking_segments = breakdown_steps(thinking) 721 | if len(thinking_segments)==1: 722 | return False 723 | # get PPL of initial rollout 724 | role, sysp = 'standard_inference_'+('cn' if is_chinese else 'en'), default_sys 725 | inp = [[node.memory['q']]] 726 | manager = PosteriorManager(role, sysp, inp, post_tokenizer, post_model, node.memory['ref']) 727 | 728 | posterior_prefix1, posterior_prefix = manager.prepare(think_prefix, thinking) 729 | rollouts_obj, real_input_prompts = manager.submit(posterior_prefix) 730 | noreplace_log_ppl = manager.compute(rollouts_obj) 731 | ppl = noreplace_log_ppl 732 | if ppl[0]