├── .gitignore ├── LICENSE ├── README.md ├── common └── __init__.py ├── global_config.py ├── gradio_server.py ├── human_simulator.py ├── imgs └── webui-snapshot.png ├── init_prompt.json ├── misc ├── ali_pay.png ├── placeholder └── wechat_pay.png ├── models ├── __init__.py ├── aquila_fa.py ├── aquila_hf.py ├── baichuan_hf.py ├── chatglm_hf.py ├── falcon_hf.py └── vicuna_bin.py ├── prompts ├── __init__.py ├── chatgpt_query.py ├── human_simulator.py ├── llm_query.py └── service_init.py ├── recurrent_llm.py ├── recurrentgpt.py ├── requirements.txt └── utils ├── __init__.py ├── aquila_util.py ├── baichuan_util.py ├── chatglm_util.py ├── falcon_util.py ├── openai_util.py └── vicuna_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 MK 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## **Recurrent-LLM** 2 | The Open Source LLM implementation of paper: 3 | 4 | **RecurrentGPT: Interactive Generation of (Arbitrarily) Long Text**. 5 | 6 | [[Paper](https://arxiv.org/pdf/2305.13304v1.pdf)] [[arxiv](https://arxiv.org/abs/2305.13304v1)] [[HuggingFace](https://huggingface.co/papers/2305.13304)] [[Offical](https://github.com/aiwaves-cn/RecurrentGPT)] 7 | 8 | The fixed-size context of Transformer makes GPT models incapable of generating arbitrarily long text. In this paper, we introduce RecurrentGPT, a language-based simulacrum of the recurrence mechanism in RNNs. RecurrentGPT is built upon a large language model (LLM) such as ChatGPT and uses natural language to simulate the Long Short-Term Memory mechanism in an LSTM. At each timestep, RecurrentGPT generates a paragraph of text and updates its language-based long-short term memory stored on the hard drive and the prompt, respectively. This recurrence mechanism enables RecurrentGPT to generate texts of arbitrary length without forgetting. Since human users can easily observe and edit the natural language memories, RecurrentGPT is interpretable and enables interactive generation of long text. RecurrentGPT is an initial step towards next-generation computer-assisted writing systems beyond local editing suggestions. In addition to producing AI-generated content (AIGC), we also demonstrate the possibility of using RecurrentGPT as an interactive fiction that directly interacts with consumers. We call this usage of generative models by ``AI As Contents'' (AIAC), which we believe is the next form of conventional AIGC. We further demonstrate the possibility of using RecurrentGPT to create personalized interactive fiction that directly interacts with readers instead of interacting with writers. More broadly, RecurrentGPT demonstrates the utility of borrowing ideas from popular model designs in cognitive science and deep learning for prompting LLMs. 9 | 10 | Transformer的固定尺寸上下文使得GPT模型无法生成任意长的文本。在本文中,我们介绍了RecurrentGPT,一个基于语言的模拟RNNs中的递归机制。RecurrentGPT建立在大型语言模型(LLM)之上,如ChatGPT,并使用自然语言来模拟LSTM中的长短时记忆机制。在每个时间段,RecurrentGPT生成一段文字,并更新其基于语言的长短时记忆,分别存储在硬盘和提示器上。这种递归机制使RecurrentGPT能够生成任意长度的文本而不被遗忘。由于人类用户可以很容易地观察和编辑自然语言记忆,因此RecurrentGPT是可解释的,并能互动地生成长文本。RecurrentGPT是朝着超越本地编辑建议的下一代计算机辅助写作系统迈出的第一步。除了制作人工智能生成的内容(AIGC),我们还展示了使用RecurrentGPT作为直接与消费者互动的互动小说的可能性。我们称这种生成模型的使用为 "AI As Contents"(AIAC),我们认为这是传统AIGC的下一个形式。我们进一步展示了使用RecurrentGPT创造个性化互动小说的可能性,这种小说直接与读者互动,而不是与作者互动。更广泛地说,RecurrentGPT证明了从认知科学和深度学习中流行的模型设计中借用思想来提示LLM的效用。 11 | 12 | --- 13 | 14 | ## **Table of Contents** 15 | - [**Recurrent-LLM**](#recurrent-llm) 16 | - [**Table of Contents**](#table-of-contents) 17 | - [**Requirements**](#requirements) 18 | - [**Configuration**](#configuration) 19 | - [**Global Config**](#global-config) 20 | - [**Supported LLM options**](#supported-llm-options) 21 | - [**OpenAI ChatGPT**](#openai-chatgpt) 22 | - [**Vicuna**](#vicuna) 23 | - [**ChatGLM**](#chatglm) 24 | - [**Baichuan**](#baichuan) 25 | - [**Aquila**](#aquila) 26 | - [**Falcon**](#falcon) 27 | - [**Usage**](#usage) 28 | - [**start web server**](#start-web-server) 29 | - [**WebUI**](#webui) 30 | - [**Star-History**](#star-history) 31 | - [**License**](#license) 32 | 33 | ## **Requirements** 34 | 35 | ``` 36 | pip install transformers@git+https://github.com/huggingface/transformers.git 37 | pip install peft@git+https://github.com/huggingface/peft.git 38 | pip install accelerate@git+https://github.com/huggingface/accelerate.git 39 | pip install bitsandbytes==0.39.0 40 | 41 | pip install -U flagai 42 | pip install bminf 43 | ``` 44 | 45 | ## **Configuration** 46 | 47 | ### **Global Config** 48 | [[global_config.py](./global_config.py)] 49 | 50 | ``` 51 | lang_opt = "zh" # zh or en. make English or Chinese Novel 52 | llm_model_opt = "openai" # default is openai, it also can be other open-source LLMs as below 53 | ``` 54 | 55 | ### **Supported LLM options** 56 | 57 | - [x] openai 58 | - [x] vicuna 59 | - [x] chatglm 60 | - [x] baichuan 61 | - [x] aquila 62 | - [x] falcon 63 | 64 | #### **OpenAI ChatGPT** 65 | 66 | you should apply an openai api key first. then 67 | ``` 68 | export OPENAI_API_KEY = "your key" 69 | ``` 70 | 71 | #### **Vicuna** 72 | 73 | download vicuna model. and config it in [models/vicuna_bin.py](models/vicuna_bin.py) 74 | 75 | #### **ChatGLM** 76 | 77 | ```python 78 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True) 79 | model_config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) 80 | model = AutoModel.from_pretrained(model_name_or_path, config=model_config, trust_remote_code=True) 81 | ``` 82 | 83 | #### **Baichuan** 84 | 85 | ```python 86 | tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True) 87 | model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True) 88 | ``` 89 | 90 | #### **Aquila** 91 | 92 | ```python 93 | loader = AutoLoader( 94 | "lm", 95 | model_dir=state_dict, 96 | model_name=model_name, 97 | use_cache=True, 98 | fp16=True) 99 | model = loader.get_model() 100 | tokenizer = loader.get_tokenizer() 101 | model.eval() 102 | ``` 103 | If want to use bminf, then add code as below: 104 | ```python 105 | with torch.cuda.device(0): 106 | model = bminf.wrapper(model, quantization=False, memory_limit=2 << 30) 107 | ``` 108 | 109 | 110 | #### **Falcon** 111 | 112 | 113 | ## **Usage** 114 | 115 | ### **start web server** 116 | 117 | ``` 118 | python gradio_server.py 119 | ``` 120 | 121 | 122 | ## **WebUI** 123 | 124 | 125 | 126 | ------ 127 | ## **Star-History** 128 | 129 | ![star-history](https://api.star-history.com/svg?repos=jackaduma/Recurrent-LLM&type=Date "star-history") 130 | 131 | ------ 132 | 133 | ## Donation 134 | If this project help you reduce time to develop, you can give me a cup of coffee :) 135 | 136 | AliPay(支付宝) 137 |
138 | ali_pay 139 |
140 | 141 | WechatPay(微信) 142 |
143 | wechat_pay 144 |
145 | 146 | ------ 147 | 148 | ## **License** 149 | 150 | [MIT](LICENSE) © Kun 151 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | import torch 7 | 8 | def torch_gc(): 9 | if torch.cuda.is_available(): 10 | # with torch.cuda.device(DEVICE): 11 | torch.cuda.empty_cache() 12 | torch.cuda.ipc_collect() 13 | elif torch.backends.mps.is_available(): 14 | try: 15 | from torch.mps import empty_cache 16 | empty_cache() 17 | except Exception as e: 18 | print(e) 19 | print("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本,以支持及时清理 torch 产生的内存占用。") -------------------------------------------------------------------------------- /global_config.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | #################################################### 6 | 7 | # lang_opt = "zh1" 8 | lang_opt = "zh2" 9 | # lang_opt = "en" 10 | 11 | #################################################### 12 | 13 | # llm_model_opt = "openai" 14 | # llm_model_opt = "vicuna" 15 | llm_model_opt = "chatglm" 16 | # llm_model_opt = "baichuan" 17 | # llm_model_opt = "aquila" 18 | # llm_model_opt = "falcon" 19 | 20 | #################################################### -------------------------------------------------------------------------------- /gradio_server.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | import gradio as gr 6 | import random 7 | from sentence_transformers import SentenceTransformer 8 | from human_simulator import Human 9 | from prompts.service_init import get_init_prompt 10 | from utils import get_init, parse_instructions 11 | from global_config import lang_opt, llm_model_opt 12 | 13 | if "openai" == llm_model_opt: 14 | from recurrentgpt import RecurrentGPT as AIWriter 15 | llm_model = None 16 | llm_tokenizer = None 17 | 18 | elif "vicuna" == llm_model_opt: 19 | from recurrent_llm import RecurrentLLM as AIWriter 20 | from models.vicuna_bin import load_model 21 | llm_tokenizer, llm_model = load_model() 22 | 23 | elif "chatglm" == llm_model_opt: 24 | from recurrent_llm import RecurrentLLM as AIWriter 25 | from models.chatglm_hf import load_model 26 | llm_tokenizer, llm_model = load_model() 27 | 28 | elif "baichuan" == llm_model_opt: 29 | from recurrent_llm import RecurrentLLM as AIWriter 30 | from models.baichuan_hf import load_model 31 | llm_tokenizer, llm_model = load_model() 32 | 33 | elif "aquila" == llm_model_opt: 34 | from recurrent_llm import RecurrentLLM as AIWriter 35 | from models.aquila_fa import load_model 36 | # from models.aquila_hf import load_model 37 | llm_tokenizer, llm_model = load_model() 38 | 39 | elif "falcon" == llm_model_opt: 40 | from recurrent_llm import RecurrentLLM 41 | from models.falcon_hf import load_model 42 | llm_tokenizer, llm_model = load_model() 43 | 44 | else: 45 | raise Exception("not supported llm model name: {}".format(llm_model_opt)) 46 | 47 | # from urllib.parse import quote_plus 48 | # from pymongo import MongoClient 49 | 50 | # uri = "mongodb://%s:%s@%s" % (quote_plus("xxx"), 51 | # quote_plus("xxx"), "localhost") 52 | # client = MongoClient(uri, maxPoolSize=None) 53 | # db = client.recurrentGPT_db 54 | # log = db.log 55 | 56 | _CACHE = {} 57 | 58 | 59 | # Build the semantic search model 60 | embedder = SentenceTransformer('multi-qa-mpnet-base-cos-v1') 61 | 62 | 63 | def init_prompt(novel_type, description): 64 | if description == "": 65 | description = "" 66 | else: 67 | description = " about " + description 68 | 69 | return get_init_prompt(lang_opt, novel_type, description) 70 | 71 | 72 | def init(novel_type, description, request: gr.Request): 73 | if novel_type == "": 74 | novel_type = "Science Fiction" if "en" == lang_opt else "科幻故事" 75 | global _CACHE 76 | cookie = request.headers['cookie'] 77 | cookie = cookie.split('; _gat_gtag')[0] 78 | # prepare first init 79 | init_paragraphs = get_init(text=init_prompt( 80 | novel_type, description), model=llm_model, tokenizer=llm_tokenizer) 81 | # print(init_paragraphs) 82 | start_input_to_human = { 83 | 'output_paragraph': init_paragraphs['Paragraph 3'], 84 | 'input_paragraph': '\n\n'.join([init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']]), 85 | 'output_memory': init_paragraphs['Summary'], 86 | "output_instruction": [init_paragraphs['Instruction 1'], init_paragraphs['Instruction 2'], init_paragraphs['Instruction 3']] 87 | } 88 | 89 | _CACHE[cookie] = {"start_input_to_human": start_input_to_human, 90 | "init_paragraphs": init_paragraphs} 91 | written_paras = f"""Title: {init_paragraphs['name']} 92 | 93 | Outline: {init_paragraphs['Outline']} 94 | 95 | Paragraphs: 96 | 97 | {start_input_to_human['input_paragraph']}""" if "en" == lang_opt else f"""标题: {init_paragraphs['name']} 98 | 99 | 梗概: {init_paragraphs['Outline']} 100 | 101 | 段落: 102 | 103 | {start_input_to_human['input_paragraph']}""" 104 | long_memory = parse_instructions( 105 | [init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']]) 106 | # short memory, long memory, current written paragraphs, 3 next instructions 107 | return start_input_to_human['output_memory'], long_memory, written_paras, init_paragraphs['Instruction 1'], init_paragraphs['Instruction 2'], init_paragraphs['Instruction 3'] 108 | 109 | 110 | def step(short_memory, long_memory, instruction1, instruction2, instruction3, current_paras, request: gr.Request, ): 111 | if current_paras == "": 112 | return "", "", "", "", "", "" 113 | global _CACHE 114 | # print(list(_CACHE.keys())) 115 | # print(request.headers.get('cookie')) 116 | cookie = request.headers['cookie'] 117 | cookie = cookie.split('; _gat_gtag')[0] 118 | cache = _CACHE[cookie] 119 | 120 | if "writer" not in cache: 121 | start_input_to_human = cache["start_input_to_human"] 122 | start_input_to_human['output_instruction'] = [ 123 | instruction1, instruction2, instruction3] 124 | init_paragraphs = cache["init_paragraphs"] 125 | human = Human(input=start_input_to_human, 126 | memory=None, embedder=embedder, model=llm_model, tokenizer=llm_tokenizer) 127 | human.step() 128 | start_short_memory = init_paragraphs['Summary'] 129 | writer_start_input = human.output 130 | 131 | # Init writerGPT 132 | writer = AIWriter(input=writer_start_input, short_memory=start_short_memory, long_memory=[ 133 | init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']], memory_index=None, embedder=embedder, 134 | model=llm_model, tokenizer=llm_tokenizer) 135 | cache["writer"] = writer 136 | cache["human"] = human 137 | writer.step() 138 | else: 139 | human = cache["human"] 140 | writer = cache["writer"] 141 | output = writer.output 142 | output['output_memory'] = short_memory 143 | # randomly select one instruction out of three 144 | instruction_index = random.randint(0, 2) 145 | output['output_instruction'] = [instruction1, 146 | instruction2, instruction3][instruction_index] 147 | human.input = output 148 | human.step() 149 | writer.input = human.output 150 | writer.step() 151 | 152 | long_memory = [[v] for v in writer.long_memory] 153 | # short memory, long memory, current written paragraphs, 3 next instructions 154 | return writer.output['output_memory'], long_memory, current_paras + '\n\n' + writer.output['input_paragraph'], human.output['output_instruction'], *writer.output['output_instruction'] 155 | 156 | 157 | def controled_step(short_memory, long_memory, selected_instruction, current_paras, request: gr.Request, ): 158 | if current_paras == "": 159 | return "", "", "", "", "", "" 160 | global _CACHE 161 | # print(list(_CACHE.keys())) 162 | # print(request.headers.get('cookie')) 163 | cookie = request.headers['cookie'] 164 | cookie = cookie.split('; _gat_gtag')[0] 165 | cache = _CACHE[cookie] 166 | if "writer" not in cache: 167 | start_input_to_human = cache["start_input_to_human"] 168 | start_input_to_human['output_instruction'] = selected_instruction 169 | init_paragraphs = cache["init_paragraphs"] 170 | human = Human(input=start_input_to_human, 171 | memory=None, embedder=embedder, model=llm_model, tokenizer=llm_tokenizer) 172 | human.step() 173 | start_short_memory = init_paragraphs['Summary'] 174 | writer_start_input = human.output 175 | 176 | # Init writerGPT 177 | writer = AIWriter(input=writer_start_input, short_memory=start_short_memory, long_memory=[ 178 | init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']], memory_index=None, embedder=embedder, 179 | model=llm_model, tokenizer=llm_tokenizer) 180 | cache["writer"] = writer 181 | cache["human"] = human 182 | writer.step() 183 | else: 184 | human = cache["human"] 185 | writer = cache["writer"] 186 | output = writer.output 187 | output['output_memory'] = short_memory 188 | output['output_instruction'] = selected_instruction 189 | human.input = output 190 | human.step() 191 | writer.input = human.output 192 | writer.step() 193 | 194 | # short memory, long memory, current written paragraphs, 3 next instructions 195 | return writer.output['output_memory'], parse_instructions(writer.long_memory), current_paras + '\n\n' + writer.output['input_paragraph'], *writer.output['output_instruction'] 196 | 197 | 198 | # SelectData is a subclass of EventData 199 | def on_select(instruction1, instruction2, instruction3, evt: gr.SelectData): 200 | selected_plan = int(evt.value.replace("Instruction ", "") 201 | ) if "en" == lang_opt else int(evt.value.replace("指令 ", "")) 202 | selected_plan = [instruction1, instruction2, instruction3][selected_plan-1] 203 | return selected_plan 204 | 205 | 206 | def reload_model(choice): 207 | pass 208 | 209 | 210 | with gr.Blocks(title="RecurrentGPT", css="footer {visibility: hidden}", theme="default") as demo: 211 | if "en" == lang_opt: 212 | gr.Markdown( 213 | """ 214 | # Recurrent-LLM 215 | Interactive Generation of (Arbitrarily) Long Texts with Human-in-the-Loop 216 | """) 217 | elif lang_opt in ["zh1", "zh2"]: 218 | gr.Markdown( 219 | """ 220 | # Recurrent-LLM 221 | 可以根据题目和简介自动续写文章 222 | 也可以手动选择剧情走向进行续写 223 | """) 224 | 225 | with gr.Tab("Auto-Generation"): 226 | with gr.Row(): 227 | with gr.Column(): 228 | with gr.Box(): 229 | with gr.Row(): 230 | with gr.Column(scale=1, min_width=200): 231 | novel_type = gr.Textbox( 232 | label="Novel Type", placeholder="e.g. science fiction") if "en" == lang_opt else gr.Textbox( 233 | label="请输入文本", placeholder="可以自己填写或者从EXamples中选择一个填入") 234 | with gr.Column(scale=2, min_width=400): 235 | description = gr.Textbox( 236 | label="Description") if "en" == lang_opt else gr.Textbox(label="剧情简介(非必选项)") 237 | btn_init = gr.Button( 238 | "Init Novel Generation", variant="primary") if "en" == lang_opt else gr.Button( 239 | "点击开始运行", variant="primary") 240 | if "en" == lang_opt: 241 | gr.Examples(["Science Fiction", "Romance", "Mystery", "Fantasy", 242 | "Historical", "Horror", "Thriller", "Western", "Young Adult", ], inputs=[novel_type]) 243 | elif lang_opt in ["zh1", "zh2"]: 244 | gr.Examples(["科幻故事", "青春伤痛文学", "爱到死去活来", "搞笑", 245 | "幽默", "鬼故事", "喜剧", "童话", "魔法世界", ], inputs=[novel_type]) 246 | else: 247 | raise Exception(f"not supported language: {lang_opt}") 248 | 249 | written_paras = gr.Textbox( 250 | label="Written Paragraphs (editable)", max_lines=21, lines=21) if "en" == lang_opt else gr.Textbox( 251 | label="文章内容", max_lines=21, lines=21) 252 | with gr.Column(): 253 | with gr.Box(): 254 | if "en" == lang_opt: 255 | gr.Markdown("### Memory Module\n") 256 | elif lang_opt in ["zh1", "zh2"]: 257 | gr.Markdown("### 剧情模型\n") 258 | 259 | short_memory = gr.Textbox( 260 | label="Short-Term Memory (editable)", max_lines=3, lines=3) if "en" == lang_opt else gr.Textbox( 261 | label="短期记忆 (可编辑)", max_lines=3, lines=3) 262 | long_memory = gr.Textbox( 263 | label="Long-Term Memory (editable)", max_lines=6, lines=6) if "en" == lang_opt else gr.Textbox( 264 | label="长期记忆 (可编辑)", max_lines=6, lines=6) 265 | # long_memory = gr.Dataframe( 266 | # # label="Long-Term Memory (editable)", 267 | # headers=["Long-Term Memory (editable)"], 268 | # datatype=["str"], 269 | # row_count=3, 270 | # max_rows=3, 271 | # col_count=(1, "fixed"), 272 | # type="array", 273 | # ) 274 | with gr.Box(): 275 | if "en" == lang_opt: 276 | gr.Markdown("### Instruction Module\n") 277 | elif lang_opt in ["zh1", "zh2"]: 278 | gr.Markdown("### 选项模型\n") 279 | 280 | with gr.Row(): 281 | instruction1 = gr.Textbox( 282 | label="Instruction 1 (editable)", max_lines=4, lines=4) if "en" == lang_opt else gr.Textbox( 283 | label="指令1(可编辑)", max_lines=4, lines=4) 284 | instruction2 = gr.Textbox( 285 | label="Instruction 2 (editable)", max_lines=4, lines=4) if "en" == lang_opt else gr.Textbox( 286 | label="指令2(可编辑)", max_lines=4, lines=4) 287 | instruction3 = gr.Textbox( 288 | label="Instruction 3 (editable)", max_lines=4, lines=4) if "en" == lang_opt else gr.Textbox( 289 | label="指令3(可编辑)", max_lines=4, lines=4) 290 | selected_plan = gr.Textbox( 291 | label="Revised Instruction (from last step)", max_lines=2, lines=2) if "en" == lang_opt else gr.Textbox( 292 | label="选项说明 (来自上一步)", max_lines=2, lines=2) 293 | 294 | btn_step = gr.Button("Next Step", variant="primary") if "en" == lang_opt else gr.Button( 295 | "下一步", variant="primary") 296 | 297 | btn_init.click(init, inputs=[novel_type, description], outputs=[ 298 | short_memory, long_memory, written_paras, instruction1, instruction2, instruction3]) 299 | btn_step.click(step, inputs=[short_memory, long_memory, instruction1, instruction2, instruction3, written_paras], outputs=[ 300 | short_memory, long_memory, written_paras, selected_plan, instruction1, instruction2, instruction3]) 301 | 302 | with gr.Tab("Human-in-the-Loop"): 303 | with gr.Row(): 304 | with gr.Column(): 305 | with gr.Box(): 306 | with gr.Row(): 307 | with gr.Column(scale=1, min_width=200): 308 | novel_type = gr.Textbox( 309 | label="Novel Type", placeholder="e.g. science fiction") if "en" == lang_opt else gr.Textbox( 310 | label="请输入文本", placeholder="可以自己填写或者从EXamples中选择一个填入") 311 | with gr.Column(scale=2, min_width=400): 312 | description = gr.Textbox( 313 | label="Description") if "en" == lang_opt else gr.Textbox(label="剧情简介(非必选项)") 314 | btn_init = gr.Button( 315 | "Init Novel Generation", variant="primary") if "en" == lang_opt else gr.Button( 316 | "点击开始运行", variant="primary") 317 | 318 | if "en" == lang_opt: 319 | gr.Examples(["Science Fiction", "Romance", "Mystery", "Fantasy", 320 | "Historical", "Horror", "Thriller", "Western", "Young Adult", ], inputs=[novel_type]) 321 | elif lang_opt in ["zh1", "zh2"]: 322 | gr.Examples(["科幻小说", "爱情小说", "推理小说", "奇幻小说", 323 | "玄幻小说", "恐怖", "悬疑", "惊悚", "武侠小说", ], inputs=[novel_type]) 324 | 325 | written_paras = gr.Textbox( 326 | label="Written Paragraphs (editable)", max_lines=23, lines=23) if "en" == lang_opt else gr.Textbox( 327 | label="文章内容 (可编辑)", max_lines=23, lines=23) 328 | with gr.Column(): 329 | with gr.Box(): 330 | if "en" == lang_opt: 331 | gr.Markdown("### Memory Module\n") 332 | elif lang_opt in ["zh1", "zh2"]: 333 | gr.Markdown("### 剧情模型\n") 334 | 335 | short_memory = gr.Textbox( 336 | label="Short-Term Memory (editable)", max_lines=3, lines=3) if "en" == lang_opt else gr.Textbox( 337 | label="短期记忆 (可编辑)", max_lines=3, lines=3) 338 | long_memory = gr.Textbox( 339 | label="Long-Term Memory (editable)", max_lines=6, lines=6) if "en" == lang_opt else gr.Textbox( 340 | label="长期记忆 (可编辑)", max_lines=6, lines=6) 341 | with gr.Box(): 342 | if "en" == lang_opt: 343 | gr.Markdown("### Instruction Module\n") 344 | elif lang_opt in ["zh1", "zh2"]: 345 | gr.Markdown("### 选项模型\n") 346 | 347 | with gr.Row(): 348 | instruction1 = gr.Textbox( 349 | label="Instruction 1", max_lines=3, lines=3, interactive=False) if "en" == lang_opt else gr.Textbox( 350 | label="指令1", max_lines=3, lines=3, interactive=False) 351 | instruction2 = gr.Textbox( 352 | label="Instruction 2", max_lines=3, lines=3, interactive=False) if "en" == lang_opt else gr.Textbox( 353 | label="指令2", max_lines=3, lines=3, interactive=False) 354 | instruction3 = gr.Textbox( 355 | label="Instruction 3", max_lines=3, lines=3, interactive=False) if "en" == lang_opt else gr.Textbox( 356 | label="指令3", max_lines=3, lines=3, interactive=False) 357 | with gr.Row(): 358 | with gr.Column(scale=1, min_width=100): 359 | selected_plan = gr.Radio( 360 | ["Instruction 1", "Instruction 2", "Instruction 3"], label="Instruction Selection",) if "en" == lang_opt else gr.Radio(["指令 1", "指令 2", "指令 3"], label="指令 选择",) 361 | # info="Select the instruction you want to revise and use for the next step generation.") 362 | with gr.Column(scale=3, min_width=300): 363 | selected_instruction = gr.Textbox( 364 | label="Selected Instruction (editable)", max_lines=5, lines=5) if "en" == lang_opt else gr.Textbox( 365 | label="在上一步骤中被选择的 (可编辑)", max_lines=5, lines=5) 366 | 367 | btn_step = gr.Button("Next Step", variant="primary") if "en" == lang_opt else gr.Button( 368 | "下一步", variant="primary") 369 | 370 | btn_init.click(init, inputs=[novel_type, description], outputs=[ 371 | short_memory, long_memory, written_paras, instruction1, instruction2, instruction3]) 372 | btn_step.click(controled_step, inputs=[short_memory, long_memory, selected_instruction, written_paras], outputs=[ 373 | short_memory, long_memory, written_paras, instruction1, instruction2, instruction3]) 374 | selected_plan.select(on_select, inputs=[ 375 | instruction1, instruction2, instruction3], outputs=[selected_instruction]) 376 | 377 | with gr.Tab("Model-Config"): 378 | model_opt_radio = gr.Radio(["OpenAI", "ChatGLM-6B", "Vicuna-7B"], value="OpenAI", label="model", 379 | info="select language you preferred. Default is English.", 380 | interactive=True 381 | ) 382 | 383 | reload_button = gr.Button("Reload/重新加载") 384 | reload_button.click(reload_model, show_progress=True, 385 | inputs=[model_opt_radio], 386 | outputs=[novel_type]) 387 | 388 | demo.queue(concurrency_count=1) 389 | 390 | if __name__ == "__main__": 391 | demo.launch(server_port=8005, share=True, 392 | debug=True, 393 | server_name="0.0.0.0", show_api=False) 394 | -------------------------------------------------------------------------------- /human_simulator.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | from utils import get_content_between_a_b, parse_instructions 7 | from prompts.human_simulator import get_input_text 8 | from global_config import lang_opt, llm_model_opt 9 | 10 | if "openai" == llm_model_opt: 11 | from utils.openai_util import get_api_response 12 | elif "vicuna" == llm_model_opt: 13 | from utils.vicuna_util import get_api_response 14 | elif "chatglm" == llm_model_opt: 15 | from utils.chatglm_util import get_api_response 16 | elif "baichuan" == llm_model_opt: 17 | from utils.baichuan_util import get_api_response 18 | elif "aquila" == llm_model_opt: 19 | from utils.aquila_util import get_api_response 20 | elif "falcon" == llm_model_opt: 21 | from utils.falcon_util import get_api_response 22 | else: 23 | raise Exception("not supported llm model name: {}".format(llm_model_opt)) 24 | 25 | 26 | class Human: 27 | 28 | def __init__(self, input, memory, embedder, model, tokenizer): 29 | self.input = input 30 | if memory: 31 | self.memory = memory 32 | else: 33 | self.memory = self.input['output_memory'] 34 | self.embedder = embedder 35 | self.model = model 36 | self.tokenizer = tokenizer 37 | self.output = {} 38 | 39 | def prepare_input(self): 40 | previous_paragraph = self.input["input_paragraph"] 41 | writer_new_paragraph = self.input["output_paragraph"] 42 | memory = self.input["output_memory"] 43 | user_edited_plan = self.input["output_instruction"] 44 | 45 | input_text = get_input_text( 46 | lang_opt, previous_paragraph, memory, writer_new_paragraph, user_edited_plan) 47 | 48 | return input_text 49 | 50 | def parse_plan(self, response): 51 | plan = get_content_between_a_b('Selected Plan:', 'Reason', response) 52 | return plan 53 | 54 | def select_plan(self, response_file): # TODO ??? 55 | 56 | previous_paragraph = self.input["input_paragraph"] 57 | writer_new_paragraph = self.input["output_paragraph"] 58 | memory = self.input["output_memory"] 59 | previous_plans = self.input["output_instruction"] 60 | prompt = f""" 61 | Now imagine you are a helpful assistant that help a novelist with decision making. You will be given a previously written paragraph and a paragraph written by a ChatGPT writing assistant, a summary of the main storyline maintained by the ChatGPT assistant, and 3 different possible plans of what to write next. 62 | I need you to: 63 | Select the most interesting and suitable plan proposed by the ChatGPT assistant. 64 | 65 | Previously written paragraph: 66 | {previous_paragraph} 67 | 68 | The summary of the main storyline maintained by your ChatGPT assistant: 69 | {memory} 70 | 71 | The new paragraph written by your ChatGPT assistant: 72 | {writer_new_paragraph} 73 | 74 | Three plans of what to write next proposed by your ChatGPT assistant: 75 | {parse_instructions(previous_plans)} 76 | 77 | Now start choosing, organize your output by strictly following the output format as below: 78 | 79 | Selected Plan: 80 | 81 | 82 | Reason: 83 | 84 | """ 85 | print(prompt+'\n'+'\n') 86 | 87 | response = get_api_response(self.model, self.tokenizer, prompt) 88 | 89 | plan = self.parse_plan(response) 90 | while plan == None: 91 | response = get_api_response(self.model, self.tokenizer, prompt) 92 | plan = self.parse_plan(response) 93 | 94 | if response_file: 95 | with open(response_file, 'a', encoding='utf-8') as f: 96 | f.write(f"Selected plan here:\n{response}\n\n") 97 | 98 | return plan 99 | 100 | def parse_output(self, text): 101 | try: 102 | if text.splitlines()[0].startswith('Extended Paragraph'): 103 | new_paragraph = get_content_between_a_b( 104 | 'Extended Paragraph:', 'Selected Plan', text) 105 | else: 106 | new_paragraph = text.splitlines()[0] 107 | 108 | lines = text.splitlines() 109 | if lines[-1] != '\n' and lines[-1].startswith('Revised Plan:'): 110 | revised_plan = lines[-1][len("Revised Plan:"):] 111 | elif lines[-1] != '\n': 112 | revised_plan = lines[-1] 113 | 114 | output = { 115 | "output_paragraph": new_paragraph, 116 | # "selected_plan": selected_plan, 117 | "output_instruction": revised_plan, 118 | # "memory":self.input["output_memory"] 119 | } 120 | 121 | return output 122 | except: 123 | return None 124 | 125 | def step(self, response_file=None): 126 | 127 | prompt = self.prepare_input() 128 | print(prompt+'\n'+'\n') 129 | 130 | response = get_api_response(self.model, self.tokenizer, prompt) 131 | self.output = self.parse_output(response) 132 | while self.output == None: 133 | response = get_api_response(self.model, self.tokenizer, prompt) 134 | self.output = self.parse_output(response) 135 | if response_file: 136 | with open(response_file, 'a', encoding='utf-8') as f: 137 | f.write(f"Human's output here:\n{response}\n\n") 138 | -------------------------------------------------------------------------------- /imgs/webui-snapshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackaduma/Recurrent-LLM/1a857f430efd68f5dfbd65f581191e43a1655f1d/imgs/webui-snapshot.png -------------------------------------------------------------------------------- /init_prompt.json: -------------------------------------------------------------------------------- 1 | {"init_prompt": "\nPlease write a {type} novel about {topic} with about 50 chapters. Follow the format below precisely:\n\n Begin with the name of the novel.\n Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel.\n Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene.\n Write a summary that captures the key information of the three paragraphs.\n Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story.\n The output format should follow these guidelines:\n Name: \n Outline: \n Paragraph 1: \n Paragraph 2: \n Paragraph 3: \n Summary: \n Instruction 1: \n Instruction 2: \n Instruction 3: \n \n Make sure to be precise and follow the output format strictly.\n \n "} -------------------------------------------------------------------------------- /misc/ali_pay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackaduma/Recurrent-LLM/1a857f430efd68f5dfbd65f581191e43a1655f1d/misc/ali_pay.png -------------------------------------------------------------------------------- /misc/placeholder: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /misc/wechat_pay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackaduma/Recurrent-LLM/1a857f430efd68f5dfbd65f581191e43a1655f1d/misc/wechat_pay.png -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | -------------------------------------------------------------------------------- /models/aquila_fa.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | import os 6 | import torch 7 | from flagai.auto_model.auto_loader import AutoLoader 8 | from flagai.model.predictor.predictor import Predictor 9 | from flagai.model.predictor.aquila import aquila_generate 10 | from flagai.data.tokenizer import Tokenizer 11 | import bminf 12 | 13 | 14 | 15 | max_token: int = 128 # 10000 # 64 16 | temperature: float = 0.75 17 | top_p = 0.9 18 | 19 | state_dict = "./checkpoints_in" 20 | model_name = 'aquilachat-7b' 21 | 22 | def load_model(): 23 | loader = AutoLoader( 24 | "lm", 25 | model_dir=state_dict, 26 | model_name=model_name, 27 | use_cache=True, 28 | fp16=True) 29 | model = loader.get_model() 30 | tokenizer = loader.get_tokenizer() 31 | cache_dir = os.path.join(state_dict, model_name) 32 | 33 | model.eval() 34 | 35 | with torch.cuda.device(0): 36 | model = bminf.wrapper(model, quantization=False, memory_limit=2 << 30) 37 | 38 | return tokenizer, model -------------------------------------------------------------------------------- /models/aquila_hf.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | import torch 7 | from transformers import AutoTokenizer, AutoModelForCausalLM 8 | 9 | # trust_remote_code: remote code depends old version transformers 10 | """ 11 | File "/root/.cache/huggingface/modules/transformers_modules/qhduan/aquilachat-7b/9d8fcc4f12b6bb6ea0c8a494ba85110f78804739/modeling_aquila.py", line 33, in 12 | from transformers.models.llama.configuration_llama import LlamaConfig 13 | ModuleNotFoundError: No module named 'transformers.models.llama' 14 | """ 15 | def load_model(): 16 | tokenizer = AutoTokenizer.from_pretrained('qhduan/aquilachat-7b') 17 | model = AutoModelForCausalLM.from_pretrained('qhduan/aquilachat-7b', trust_remote_code=True) 18 | model = model.eval().half().cuda() 19 | 20 | return tokenizer, model -------------------------------------------------------------------------------- /models/baichuan_hf.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | from transformers import AutoModelForCausalLM, AutoTokenizer 6 | from peft import PeftModel 7 | 8 | max_token: int = 10000 # 10000 # 64 9 | temperature: float = 0.75 10 | top_p = 0.9 11 | use_lora = False 12 | 13 | 14 | # def load_model(): 15 | # model_name_or_path = "baichuan-inc/baichuan-7B" 16 | # # model_name_or_path = "~/.cache/huggingface/hub/models--baichuan-inc--baichuan-7B/snapshots/39916f64eb892ccdc1982b0eef845b3b8fd43f6b/" 17 | # tokenizer = AutoTokenizer.from_pretrained( 18 | # model_name_or_path, 19 | # trust_remote_code=True) 20 | # model = AutoModelForCausalLM.from_pretrained( 21 | # model_name_or_path, 22 | # device_map="auto", 23 | # trust_remote_code=True) 24 | 25 | # # inputs = tokenizer('登鹳雀楼->王之涣\n夜雨寄北->', return_tensors='pt') 26 | # # inputs = inputs.to('cuda:0') 27 | # # pred = model.generate(**inputs, max_new_tokens=64,repetition_penalty=1.1) 28 | # # print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)) 29 | 30 | # return tokenizer, model 31 | 32 | 33 | def load_model(use_lora=True, LOAD_IN_8BIT=False): 34 | """ 35 | params: 36 | use_lora=True, LOAD_IN_8BIT=False 37 | use_lora=False. LOAD_IN_8BIT=True 38 | """ 39 | tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", 40 | trust_remote_code=True) 41 | model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", 42 | device_map="auto", 43 | trust_remote_code=True, 44 | load_in_8bit=LOAD_IN_8BIT, # if not have enough GPU memory, then use 8bit 45 | ) 46 | 47 | if use_lora: 48 | model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft") 49 | 50 | return tokenizer, model 51 | -------------------------------------------------------------------------------- /models/chatglm_hf.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | import torch 6 | from transformers import AutoTokenizer, AutoConfig, AutoModel 7 | 8 | model_name_or_path = "THUDM/chatglm-6b-int8" 9 | max_token: int = 10000 10 | temperature: float = 0.75 11 | top_p = 0.9 12 | use_lora = False 13 | 14 | def auto_configure_device_map(num_gpus: int, use_lora: bool): 15 | # transformer.word_embeddings 占用1层 16 | # transformer.final_layernorm 和 lm_head 占用1层 17 | # transformer.layers 占用 28 层 18 | # 总共30层分配到num_gpus张卡上 19 | num_trans_layers = 28 20 | per_gpu_layers = 30 / num_gpus 21 | 22 | # bugfix: PEFT加载lora模型出现的层命名不同 23 | # if LLM_LORA_PATH and use_lora: 24 | # layer_prefix = 'base_model.model.transformer' 25 | # else: 26 | layer_prefix = 'transformer' 27 | 28 | # bugfix: 在linux中调用torch.embedding传入的weight,input不在同一device上,导致RuntimeError 29 | # windows下 model.device 会被设置成 transformer.word_embeddings.device 30 | # linux下 model.device 会被设置成 lm_head.device 31 | # 在调用chat或者stream_chat时,input_ids会被放到model.device上 32 | # 如果transformer.word_embeddings.device和model.device不同,则会导致RuntimeError 33 | # 因此这里将transformer.word_embeddings,transformer.final_layernorm,lm_head都放到第一张卡上 34 | device_map = {f'{layer_prefix}.word_embeddings': 0, 35 | f'{layer_prefix}.final_layernorm': 0, 'lm_head': 0, 36 | f'base_model.model.lm_head': 0, } 37 | 38 | used = 2 39 | gpu_target = 0 40 | for i in range(num_trans_layers): 41 | if used >= per_gpu_layers: 42 | gpu_target += 1 43 | used = 0 44 | assert gpu_target < num_gpus 45 | device_map[f'{layer_prefix}.layers.{i}'] = gpu_target 46 | used += 1 47 | 48 | return device_map 49 | 50 | def load_model(llm_device="cuda", device_map=None): 51 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True) 52 | model_config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) 53 | model = AutoModel.from_pretrained(model_name_or_path, config=model_config, trust_remote_code=True) 54 | 55 | if torch.cuda.is_available() and llm_device.lower().startswith("cuda"): 56 | # 根据当前设备GPU数量决定是否进行多卡部署 57 | num_gpus = torch.cuda.device_count() 58 | if num_gpus < 2 and device_map is None: 59 | model = model.half().cuda() 60 | else: 61 | from accelerate import dispatch_model 62 | 63 | # model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True, 64 | # config=model_config, **kwargs) 65 | # 可传入device_map自定义每张卡的部署情况 66 | if device_map is None: 67 | device_map = auto_configure_device_map(num_gpus, use_lora) 68 | 69 | model = dispatch_model( 70 | model.half(), device_map=device_map) 71 | else: 72 | model = model.float().to(llm_device) 73 | 74 | model = model.eval() 75 | 76 | return tokenizer, model -------------------------------------------------------------------------------- /models/falcon_hf.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | import torch 7 | from transformers import AutoTokenizer, AutoModelForCausalLM 8 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 9 | 10 | max_token: int = 10000 # 10000 # 64 11 | temperature: float = 0.75 12 | top_p = 0.9 13 | use_lora = False 14 | 15 | # model_name_or_path = "Hannes-Epoch/falcon-7b-instruct-8bit" # not work, miss file 16 | 17 | 18 | def load_model(opt="gptq"): 19 | if "pt" == opt: 20 | return load_pt_model() 21 | elif "gptq" == opt: 22 | return load_gptq_model() 23 | else: 24 | raise Exception("not supported opt: {}".format(opt)) 25 | 26 | ######################################################################################################## 27 | 28 | def load_gptq_model(): 29 | model_name_or_path = "TheBloke/falcon-7b-instruct-GPTQ" 30 | # You could also download the model locally, and access it there 31 | # model_name_or_path = "/path/to/TheBloke_falcon-7b-instruct-GPTQ" 32 | 33 | model_basename = "gptq_model-4bit-64g" 34 | 35 | use_triton = False 36 | 37 | tokenizer = AutoTokenizer.from_pretrained( 38 | model_name_or_path, use_fast=True) 39 | 40 | model = AutoGPTQForCausalLM.from_quantized(model_name_or_path, 41 | model_basename=model_basename, 42 | use_safetensors=True, 43 | trust_remote_code=True, 44 | device="cuda:0", 45 | use_triton=use_triton, 46 | quantize_config=None) 47 | 48 | return tokenizer, model 49 | 50 | 51 | ######################################################################################################## 52 | 53 | def load_pt_model(): 54 | model_name_or_path = "tiiuae/falcon-7b" 55 | # model_name_or_path = "tiiuae/falcon-7b-instruct" 56 | 57 | tokenizer = AutoTokenizer.from_pretrained( 58 | model_name_or_path, 59 | trust_remote_code=True, 60 | ) 61 | model = AutoModelForCausalLM.from_pretrained( 62 | model_name_or_path, 63 | trust_remote_code=True, 64 | device_map='auto', 65 | # load_in_8bit=True, # not working "RWForCausalLM.__init__() got an unexpected keyword argument 'load_in_8bit'" 66 | ) 67 | 68 | return tokenizer, model 69 | 70 | ######################################################################################################## -------------------------------------------------------------------------------- /models/vicuna_bin.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | 7 | from llama_cpp import Llama, LlamaCache 8 | from common import torch_gc 9 | 10 | 11 | max_token: int = 10000 12 | temperature: float = 0.75 13 | top_p = 0.9 14 | 15 | def load_model(): 16 | model_name_or_path = "/root/下载/ggml-vic13b-q5_1.bin" 17 | 18 | params = { 19 | 'model_path': str(model_name_or_path), 20 | 'n_ctx': 2048, 21 | 'seed': 0, 22 | 'n_threads': 8, 23 | 'n_gpu_layers': 40, 24 | 'n_batch': 512, 25 | 'verbose': True, 26 | } 27 | model = Llama(**params) 28 | model.set_cache(LlamaCache) 29 | 30 | tokenizer = model.tokenizer() 31 | 32 | return tokenizer, model -------------------------------------------------------------------------------- /prompts/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | -------------------------------------------------------------------------------- /prompts/chatgpt_query.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | def get_input_text(lang_opt, short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt): 6 | if "en" == lang_opt: 7 | input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 8 | I need you to write: 9 | 1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions. 10 | 2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences! 11 | 3. Output Instruction: instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences 12 | Here are the inputs: 13 | 14 | Input Memory: 15 | {short_memory} 16 | 17 | Input Paragraph: 18 | {input_paragraph} 19 | 20 | Input Instruction: 21 | {input_instruction} 22 | 23 | Input Related Paragraphs: 24 | {input_long_term_memory} 25 | 26 | Now start writing, organize your output by strictly following the output format as below: 27 | Output Paragraph: 28 | , around 20 sentences. 29 | 30 | Output Memory: 31 | Rational: ; 32 | Updated Memory: , around 10 to 20 sentences 33 | 34 | Output Instruction: 35 | Instruction 1: , around 5 sentences 36 | Instruction 2: , around 5 sentences 37 | Instruction 3: , around 5 sentences 38 | 39 | Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words! 40 | Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 41 | 42 | Very Important: 43 | You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 44 | {new_character_prompt} 45 | """ 46 | 47 | elif "zh1" == lang_opt: 48 | input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 49 | I need you to write: 50 | 1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions. 51 | 2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences! 52 | 3. Output Instruction: instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences 53 | 4. 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 54 | Here are the inputs: 55 | 56 | Input Memory: 57 | {short_memory} 58 | 59 | Input Paragraph: 60 | {input_paragraph} 61 | 62 | Input Instruction: 63 | {input_instruction} 64 | 65 | Input Related Paragraphs: 66 | {input_long_term_memory} 67 | 68 | Now start writing, organize your output by strictly following the output format as below: 69 | Output Paragraph: 70 | , around 20 sentences. 71 | 72 | Output Memory: 73 | Rational: ; 74 | Updated Memory: , around 10 to 20 sentences 75 | 76 | Output Instruction: 77 | Instruction 1: , around 5 sentences 78 | Instruction 2: , around 5 sentences 79 | Instruction 3: , around 5 sentences 80 | 81 | Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words! 82 | Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 83 | 84 | Very Important: 85 | You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 86 | 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 87 | {new_character_prompt} 88 | """ 89 | 90 | elif "zh2" == lang_opt: 91 | input_text = f"""我需要你帮我写一部小说。现在我给你一个400字的记忆(一个简短的总结),你应该用它来存储已经写好的关键内容,这样你就可以记录很长的上下文。每一次,我都会给你当前的记忆(以前的故事的简要总结。你应该用它来存储所写内容的关键内容,这样你就能记下很长的上下文),之前写的段落,以及下一段要写的内容的指示。 92 | 我需要你来写: 93 | 1. 输出段落:小说的下一个段落。输出段应包含约20句话,并应遵循输入指示。 94 | 2. 输出记忆: 更新后的记忆。你应该首先解释输入记忆中的哪些句子不再需要,为什么,然后解释需要添加到记忆中的内容,为什么。之后,你应该写出更新的记忆。除了你之前认为应该删除或添加的部分,更新后的记忆应该与输入的记忆相似。更新后的记忆应该只存储关键信息。更新后的记忆不应该超过20个句子! 95 | 3. 输出指令:接下来要写什么的指令(在你写完之后)。你应该输出3个不同的指令,每个指令都是故事的一个可能的有趣的延续。每个输出指令应该包含大约5个句子 96 | 下面是输入的内容: 97 | 98 | 输入内存: 99 | {short_memory} 100 | 101 | 输入段落: 102 | {input_paragraph} 103 | 104 | 输入指令: 105 | {input_instruction}。 106 | 107 | 输入相关段落: 108 | {input_long_term_memory} 109 | 110 | 现在开始写,严格按照下面的输出格式来组织你的输出: 111 | 输出段落: 112 | <输出段落的字符串>,大约20句话。 113 | 114 | 输出记忆: 115 | 理性: <解释如何更新内存的字符串>; 116 | 更新的记忆: <更新内存的字符串>,大约10到20句话 117 | 118 | 输出指令: 119 | 指令1:<指令1的内容>,大约5句话 120 | 指令2:<指令2的内容>,大约5句话 121 | 指令3:<指令3的内容>,大约5句话 122 | 123 | 非常重要!! 更新的内存应该只存储关键信息。更新后的记忆不应该包含超过500个字!!!! 124 | 最后,记住你在写一本小说。像小说家一样写作,在写下一段的输出指令时不要走得太快。记住,这一章将包含10多段,而小说将包含100多章。而这仅仅是个开始。就要写一些接下来会发生的有趣的职员。另外,在写输出说明时,要考虑什么情节能吸引普通读者。 125 | 126 | 非常重要: 127 | 你应该首先解释输入存储器中的哪些句子不再需要,为什么,然后解释需要添加到存储器中的内容,为什么。之后,你开始重写输入内存,得到更新的内存。 128 | {new_character_prompt} 129 | """ 130 | 131 | else: 132 | raise Exception("not supported lang_opt: {}".format(lang_opt)) 133 | 134 | return input_text -------------------------------------------------------------------------------- /prompts/human_simulator.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | def get_input_text(lang_opt, previous_paragraph, memory, writer_new_paragraph, user_edited_plan): 7 | if "en" == lang_opt: 8 | input_text = f""" 9 | Now imagine you are a novelist writing a Chinese novel with the help of ChatGPT. You will be given a previously written paragraph (wrote by you), and a paragraph written by your ChatGPT assistant, a summary of the main storyline maintained by your ChatGPT assistant, and a plan of what to write next proposed by your ChatGPT assistant. 10 | I need you to write: 11 | 1. Extended Paragraph: Extend the new paragraph written by the ChatGPT assistant to twice the length of the paragraph written by your ChatGPT assistant. 12 | 2. Selected Plan: Copy the plan proposed by your ChatGPT assistant. 13 | 3. Revised Plan: Revise the selected plan into an outline of the next paragraph. 14 | 15 | Previously written paragraph: 16 | {previous_paragraph} 17 | 18 | The summary of the main storyline maintained by your ChatGPT assistant: 19 | {memory} 20 | 21 | The new paragraph written by your ChatGPT assistant: 22 | {writer_new_paragraph} 23 | 24 | The plan of what to write next proposed by your ChatGPT assistant: 25 | {user_edited_plan} 26 | 27 | Now start writing, organize your output by strictly following the output format as below,所有输出仍然保持是中文: 28 | 29 | Extended Paragraph: 30 | , around 40-50 sentences. 31 | 32 | Selected Plan: 33 | 34 | 35 | Revised Plan: 36 | , keep it short, around 5-7 sentences. 37 | 38 | Very Important: 39 | Remember that you are writing a novel. Write like a novelist and do not move too fast when writing the plan for the next paragraph. Think about how the plan can be attractive for common readers when selecting and extending the plan. Remember to follow the length constraints! Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And the next paragraph will be the second paragraph of the second chapter. You need to leave space for future stories. 40 | 41 | """ 42 | 43 | elif "zh1" == lang_opt: 44 | input_text = f""" 45 | Now imagine you are a novelist writing a Chinese novel with the help of ChatGPT. You will be given a previously written paragraph (wrote by you), and a paragraph written by your ChatGPT assistant, a summary of the main storyline maintained by your ChatGPT assistant, and a plan of what to write next proposed by your ChatGPT assistant. 46 | I need you to write: 47 | 1. Extended Paragraph: Extend the new paragraph written by the ChatGPT assistant to twice the length of the paragraph written by your ChatGPT assistant. 48 | 2. Selected Plan: Copy the plan proposed by your ChatGPT assistant. 49 | 3. Revised Plan: Revise the selected plan into an outline of the next paragraph. 50 | 4. 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 51 | 52 | Previously written paragraph: 53 | {previous_paragraph} 54 | 55 | The summary of the main storyline maintained by your ChatGPT assistant: 56 | {memory} 57 | 58 | The new paragraph written by your ChatGPT assistant: 59 | {writer_new_paragraph} 60 | 61 | The plan of what to write next proposed by your ChatGPT assistant: 62 | {user_edited_plan} 63 | 64 | Now start writing, organize your output by strictly following the output format as below,所有输出仍然保持是中文: 65 | 66 | Extended Paragraph: 67 | , around 40-50 sentences. 68 | 69 | Selected Plan: 70 | 71 | 72 | Revised Plan: 73 | , keep it short, around 5-7 sentences. 74 | 75 | Very Important: 76 | Remember that you are writing a novel. Write like a novelist and do not move too fast when writing the plan for the next paragraph. Think about how the plan can be attractive for common readers when selecting and extending the plan. Remember to follow the length constraints! Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And the next paragraph will be the second paragraph of the second chapter. You need to leave space for future stories. 77 | 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 78 | 79 | """ 80 | 81 | elif "zh2" == lang_opt: 82 | input_text = f""" 83 | 现在想象一下,你是一个小说家,在ChatGPT的帮助下写一本中文小说。你会得到一个先前写好的段落(由你写),和一个由你的ChatGPT助手写的段落,一个由你的ChatGPT助手保持的主要故事情节的总结,以及一个由你的ChatGPT助手提出的下一步写作计划。 84 | 我需要你写: 85 | 1. 扩展段落: 将ChatGPT助手写的新段落延长到你的ChatGPT助手所写段落的两倍。 86 | 2. 选定计划: 复制您的ChatGPT助手提出的计划。 87 | 3. 修订的计划: 将选定的计划修改为下一段的纲要。 88 | 89 | 以前写的段落: 90 | {previous_paragraph} 91 | 92 | 由你的ChatGPT助手维护的主要故事情节的摘要: 93 | {memory} 94 | 95 | 您的ChatGPT助理写的新段落: 96 | {writer_new_paragraph} 97 | 98 | 您的ChatGPT助理提出的下一步写作计划: 99 | {user_edited_plan} 100 | 101 | 现在开始写,严格按照下面的输出格式来组织你的输出,所有输出仍然保持是中文: 102 | 103 | 扩展段落: 104 | <输出段落的字符串>,大约40-50个句子。 105 | 106 | 选定的计划: 107 | <在此复制计划> 108 | 109 | 修改后的计划: 110 | <修改后的计划字符串>,保持简短,大约5-7句话。 111 | 112 | 非常重要: 113 | 记住你在写一本小说。像小说家一样写作,在写下一段的计划时不要走得太快。在选择和扩展计划时,要考虑计划如何对普通读者具有吸引力。记住要遵循长度限制! 记住,这一章将包含10多段,而小说将包含100多章。而下一段将是第二章的第二段。你需要为未来的故事留出空间。 114 | 115 | """ 116 | 117 | else: 118 | raise Exception("not supported lang_opt: {}".format(lang_opt)) 119 | 120 | return input_text 121 | -------------------------------------------------------------------------------- /prompts/llm_query.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | def get_input_text(lang_opt, short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt): 7 | if "en" == lang_opt: 8 | input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 9 | I need you to write: 10 | 1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions. 11 | 2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences! 12 | 3. Output Instruction: instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences 13 | Here are the inputs: 14 | 15 | Input Memory: 16 | {short_memory} 17 | 18 | Input Paragraph: 19 | {input_paragraph} 20 | 21 | Input Instruction: 22 | {input_instruction} 23 | 24 | Input Related Paragraphs: 25 | {input_long_term_memory} 26 | 27 | Now start writing, organize your output by strictly following the output format as below: 28 | Output Paragraph: 29 | , around 20 sentences. 30 | 31 | Output Memory: 32 | Rational: ; 33 | Updated Memory: , around 10 to 20 sentences 34 | 35 | Output Instruction: 36 | Instruction 1: , around 5 sentences 37 | Instruction 2: , around 5 sentences 38 | Instruction 3: , around 5 sentences 39 | 40 | Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words! 41 | Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 42 | 43 | Very Important: 44 | You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 45 | {new_character_prompt} 46 | """ 47 | 48 | elif "zh1" == lang_opt: 49 | input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 50 | I need you to write: 51 | 1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions. 52 | 2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences! 53 | 3. Output Instruction: instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences 54 | 4. 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 55 | Here are the inputs: 56 | 57 | Input Memory: 58 | {short_memory} 59 | 60 | Input Paragraph: 61 | {input_paragraph} 62 | 63 | Input Instruction: 64 | {input_instruction} 65 | 66 | Input Related Paragraphs: 67 | {input_long_term_memory} 68 | 69 | Now start writing, organize your output by strictly following the output format as below: 70 | Output Paragraph: 71 | , around 20 sentences. 72 | 73 | Output Memory: 74 | Rational: ; 75 | Updated Memory: , around 10 to 20 sentences 76 | 77 | Output Instruction: 78 | Instruction 1: , around 5 sentences 79 | Instruction 2: , around 5 sentences 80 | Instruction 3: , around 5 sentences 81 | 82 | Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words! 83 | Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 84 | 85 | Very Important: 86 | You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 87 | 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 88 | {new_character_prompt} 89 | """ 90 | 91 | elif "zh2" == lang_opt: 92 | input_text = f"""我需要你帮我写一部小说。现在我给你一个400字的记忆(一个简短的总结),你应该用它来存储已经写好的关键内容,这样你就可以记录很长的上下文。每一次,我都会给你当前的记忆(以前的故事的简要总结。你应该用它来存储所写内容的关键内容,这样你就能记下很长的上下文),之前写的段落,以及下一段要写的内容的指示。 93 | 我需要你来写: 94 | 1. 输出段落:小说的下一个段落。输出段应包含约20句话,并应遵循输入指示。 95 | 2. 输出记忆: 更新后的记忆。你应该首先解释输入记忆中的哪些句子不再需要,为什么,然后解释需要添加到记忆中的内容,为什么。之后,你应该写出更新的记忆。除了你之前认为应该删除或添加的部分,更新后的记忆应该与输入的记忆相似。更新后的记忆应该只存储关键信息。更新后的记忆不应该超过20个句子! 96 | 3. 输出指令:接下来要写什么的指令(在你写完之后)。你应该输出3个不同的指令,每个指令都是故事的一个可能的有趣的延续。每个输出指令应该包含大约5个句子 97 | 下面是输入的内容: 98 | 99 | 输入内存: 100 | {short_memory} 101 | 102 | 输入段落: 103 | {input_paragraph} 104 | 105 | 输入指令: 106 | {input_instruction}。 107 | 108 | 输入相关段落: 109 | {input_long_term_memory} 110 | 111 | 现在开始写,严格按照下面的输出格式来组织你的输出: 112 | 输出段落: 113 | <输出段落的字符串>,大约20句话。 114 | 115 | 输出记忆: 116 | 理性: <解释如何更新内存的字符串>; 117 | 更新的记忆: <更新内存的字符串>,大约10到20句话 118 | 119 | 输出指令: 120 | 指令1:<指令1的内容>,大约5句话 121 | 指令2:<指令2的内容>,大约5句话 122 | 指令3:<指令3的内容>,大约5句话 123 | 124 | 非常重要!! 更新的内存应该只存储关键信息。更新后的记忆不应该包含超过500个字!!!! 125 | 最后,记住你在写一本小说。像小说家一样写作,在写下一段的输出指令时不要走得太快。记住,这一章将包含10多段,而小说将包含100多章。而这仅仅是个开始。就要写一些接下来会发生的有趣的职员。另外,在写输出说明时,要考虑什么情节能吸引普通读者。 126 | 127 | 非常重要: 128 | 你应该首先解释输入存储器中的哪些句子不再需要,为什么,然后解释需要添加到存储器中的内容,为什么。之后,你开始重写输入内存,得到更新的内存。 129 | {new_character_prompt} 130 | """ 131 | 132 | else: 133 | raise Exception("not supported lang_opt: {}".format(lang_opt)) 134 | 135 | return input_text -------------------------------------------------------------------------------- /prompts/service_init.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | def get_init_prompt(lang_opt, novel_type, description): 7 | if "en" == lang_opt: 8 | return f""" 9 | Please write a {novel_type} novel{description} with 50 chapters. Follow the format below precisely: 10 | 11 | Begin with the name of the novel. 12 | Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel. 13 | Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene. 14 | Write a summary that captures the key information of the three paragraphs. 15 | Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story. 16 | The output format should follow these guidelines: 17 | Name: 18 | Outline: 19 | Paragraph 1: 20 | Paragraph 2: 21 | Paragraph 3: 22 | Summary: 23 | Instruction 1: 24 | Instruction 2: 25 | Instruction 3: 26 | 27 | Make sure to be precise and follow the output format strictly. 28 | 29 | """ 30 | elif "zh1" == lang_opt: 31 | return f""" 32 | Please write a {novel_type} novel{description} with 50 chapters. Follow the format below precisely: 33 | 34 | Begin with the name of the novel. 35 | Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel. 36 | Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene. 37 | Write a summary that captures the key information of the three paragraphs. 38 | Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story. 39 | The output format should follow these guidelines: 40 | 名称: 41 | 概述: 42 | 段落1: 43 | 段落2: 44 | 段落3: 45 | 总结: 46 | 指令1: 47 | 指令2: 48 | 指令3: 49 | 50 | Make sure to be precise and follow the output format strictly. 51 | 非常重要!请将输出信息内容全部转化为中文,注意要符合中文母语的语法和用词习惯。 52 | 53 | """ 54 | 55 | elif "zh2" == lang_opt: 56 | return f""" 57 | 请写一篇{novel_type}的小说{description},有50个章节。准确遵循以下格式: 58 | 59 | 以小说的名称开始。 60 | 接下来,写出第一章的大纲。大纲应描述小说的背景和开头。 61 | 根据你的提纲写出前三段,并说明小说的内容。用小说的风格来写,慢慢地设置场景。 62 | 写一个摘要,抓住这三段的关键信息。 63 | 最后,写出三个不同的指示,说明接下来要写什么,每个指示包含大约五句话。每个指示都应该提出一个可能的、有趣的故事的延续。 64 | 输出格式应遵循这些准则: 65 | 名称: <小说的名称> 66 | 概述: <第一章的大纲> 67 | 段落1: <第1段的内容> 68 | 段落2: <第2段的内容> 69 | 段落3: <第3段的内容> 70 | 总结: <摘要的内容>。 71 | 指令1: <指令1的内容> 72 | 指令2: <指令2的内容> 73 | 指令3:<指令3的内容> 74 | 75 | 请务必准确无误,并严格遵守输出格式。 76 | """ 77 | 78 | else: 79 | raise Exception(f"not supported language: {lang_opt}") -------------------------------------------------------------------------------- /recurrent_llm.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | import torch 6 | import random 7 | from sentence_transformers import util 8 | 9 | from utils import get_content_between_a_b 10 | from prompts.llm_query import get_input_text 11 | from global_config import lang_opt, llm_model_opt 12 | 13 | if "openai" == llm_model_opt: 14 | from utils.openai_util import get_api_response 15 | elif "vicuna" == llm_model_opt: 16 | from utils.vicuna_util import get_api_response 17 | elif "chatglm" == llm_model_opt: 18 | from utils.chatglm_util import get_api_response 19 | elif "baichuan" == llm_model_opt: 20 | from utils.baichuan_util import get_api_response 21 | elif "aquila" == llm_model_opt: 22 | from utils.aquila_util import get_api_response 23 | elif "falcon" == llm_model_opt: 24 | from utils.falcon_util import get_api_response 25 | else: 26 | raise Exception("not supported llm model name: {}".format(llm_model_opt)) 27 | 28 | 29 | class RecurrentLLM: 30 | 31 | def __init__(self, input, short_memory, long_memory, memory_index, embedder, model, tokenizer): 32 | print("AIWriter loaded by RecurrentLLM") 33 | self.input = input 34 | self.short_memory = short_memory 35 | self.long_memory = long_memory 36 | self.embedder = embedder 37 | self.model = model 38 | self.tokenizer = tokenizer 39 | if self.long_memory and not memory_index: 40 | self.memory_index = self.embedder.encode( 41 | self.long_memory, convert_to_tensor=True) 42 | self.output = {} 43 | 44 | def prepare_input(self, new_character_prob=0.1, top_k=2): 45 | 46 | input_paragraph = self.input["output_paragraph"] 47 | input_instruction = self.input["output_instruction"] 48 | 49 | instruction_embedding = self.embedder.encode( 50 | input_instruction, convert_to_tensor=True) 51 | 52 | # get the top 3 most similar paragraphs from memory 53 | 54 | memory_scores = util.cos_sim( 55 | instruction_embedding, self.memory_index)[0] 56 | top_k_idx = torch.topk(memory_scores, k=top_k)[1] 57 | top_k_memory = [self.long_memory[idx] for idx in top_k_idx] 58 | # combine the top 3 paragraphs 59 | input_long_term_memory = '\n'.join( 60 | [f"Related Paragraphs {i+1} :" + selected_memory for i, selected_memory in enumerate(top_k_memory)]) 61 | # randomly decide if a new character should be introduced 62 | if random.random() < new_character_prob: 63 | new_character_prompt = f"If it is reasonable, you can introduce a new character in the output paragrah and add it into the memory." 64 | else: 65 | new_character_prompt = "" 66 | 67 | input_text = get_input_text(lang_opt, self.short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt) 68 | 69 | return input_text 70 | 71 | def parse_output(self, output): 72 | try: 73 | output_paragraph = get_content_between_a_b( 74 | 'Output Paragraph:', 'Output Memory', output) 75 | output_memory_updated = get_content_between_a_b( 76 | 'Updated Memory:', 'Output Instruction:', output) 77 | self.short_memory = output_memory_updated 78 | ins_1 = get_content_between_a_b( 79 | 'Instruction 1:', 'Instruction 2', output) 80 | ins_2 = get_content_between_a_b( 81 | 'Instruction 2:', 'Instruction 3', output) 82 | lines = output.splitlines() 83 | # content of Instruction 3 may be in the same line with I3 or in the next line 84 | if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'): 85 | ins_3 = lines[-1][len("Instruction 3:"):] 86 | elif lines[-1] != '\n': 87 | ins_3 = lines[-1] 88 | 89 | output_instructions = [ins_1, ins_2, ins_3] 90 | assert len(output_instructions) == 3 91 | 92 | output = { 93 | "input_paragraph": self.input["output_paragraph"], 94 | "output_memory": output_memory_updated, # feed to human 95 | "output_paragraph": output_paragraph, 96 | "output_instruction": [instruction.strip() for instruction in output_instructions] 97 | } 98 | 99 | return output 100 | except: 101 | return None 102 | 103 | def step(self, response_file=None): 104 | 105 | prompt = self.prepare_input() 106 | 107 | print(prompt+'\n'+'\n') 108 | 109 | response = get_api_response(self.model, self.tokenizer, prompt) 110 | 111 | self.output = self.parse_output(response) 112 | while self.output == None: 113 | response = get_api_response(self.model, self.tokenizer, prompt) 114 | self.output = self.parse_output(response) 115 | if response_file: 116 | with open(response_file, 'a', encoding='utf-8') as f: 117 | f.write(f"Writer's output here:\n{response}\n\n") 118 | 119 | self.long_memory.append(self.input["output_paragraph"]) 120 | self.memory_index = self.embedder.encode( 121 | self.long_memory, convert_to_tensor=True) 122 | -------------------------------------------------------------------------------- /recurrentgpt.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | import torch 6 | import random 7 | from sentence_transformers import util 8 | 9 | from utils import get_content_between_a_b, get_api_response 10 | from prompts.chatgpt_query import get_input_text 11 | from global_config import lang_opt 12 | 13 | 14 | class RecurrentGPT: 15 | 16 | def __init__(self, input, short_memory, long_memory, memory_index, embedder): 17 | print("AIWriter loaded by RecurrentGPT") 18 | self.input = input 19 | self.short_memory = short_memory 20 | self.long_memory = long_memory 21 | self.embedder = embedder 22 | if self.long_memory and not memory_index: 23 | self.memory_index = self.embedder.encode( 24 | self.long_memory, convert_to_tensor=True) 25 | self.output = {} 26 | 27 | def prepare_input(self, new_character_prob=0.1, top_k=2): 28 | 29 | input_paragraph = self.input["output_paragraph"] 30 | input_instruction = self.input["output_instruction"] 31 | 32 | instruction_embedding = self.embedder.encode( 33 | input_instruction, convert_to_tensor=True) 34 | 35 | # get the top 3 most similar paragraphs from memory 36 | 37 | memory_scores = util.cos_sim( 38 | instruction_embedding, self.memory_index)[0] 39 | top_k_idx = torch.topk(memory_scores, k=top_k)[1] 40 | top_k_memory = [self.long_memory[idx] for idx in top_k_idx] 41 | # combine the top 3 paragraphs 42 | input_long_term_memory = '\n'.join( 43 | [f"Related Paragraphs {i+1} :" + selected_memory for i, selected_memory in enumerate(top_k_memory)]) 44 | # randomly decide if a new character should be introduced 45 | if random.random() < new_character_prob: 46 | new_character_prompt = f"If it is reasonable, you can introduce a new character in the output paragrah and add it into the memory." 47 | else: 48 | new_character_prompt = "" 49 | 50 | input_text = get_input_text(lang_opt, self.short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt) 51 | 52 | return input_text 53 | 54 | def parse_output(self, output): 55 | try: 56 | output_paragraph = get_content_between_a_b( 57 | 'Output Paragraph:', 'Output Memory', output) 58 | output_memory_updated = get_content_between_a_b( 59 | 'Updated Memory:', 'Output Instruction:', output) 60 | self.short_memory = output_memory_updated 61 | ins_1 = get_content_between_a_b( 62 | 'Instruction 1:', 'Instruction 2', output) 63 | ins_2 = get_content_between_a_b( 64 | 'Instruction 2:', 'Instruction 3', output) 65 | lines = output.splitlines() 66 | # content of Instruction 3 may be in the same line with I3 or in the next line 67 | if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'): 68 | ins_3 = lines[-1][len("Instruction 3:"):] 69 | elif lines[-1] != '\n': 70 | ins_3 = lines[-1] 71 | 72 | output_instructions = [ins_1, ins_2, ins_3] 73 | assert len(output_instructions) == 3 74 | 75 | output = { 76 | "input_paragraph": self.input["output_paragraph"], 77 | "output_memory": output_memory_updated, # feed to human 78 | "output_paragraph": output_paragraph, 79 | "output_instruction": [instruction.strip() for instruction in output_instructions] 80 | } 81 | 82 | return output 83 | except: 84 | return None 85 | 86 | def step(self, response_file=None): 87 | 88 | prompt = self.prepare_input() 89 | 90 | print(prompt+'\n'+'\n') 91 | 92 | response = get_api_response(prompt) 93 | 94 | self.output = self.parse_output(response) 95 | while self.output == None: 96 | response = get_api_response(prompt) 97 | self.output = self.parse_output(response) 98 | if response_file: 99 | with open(response_file, 'a', encoding='utf-8') as f: 100 | f.write(f"Writer's output here:\n{response}\n\n") 101 | 102 | self.long_memory.append(self.input["output_paragraph"]) 103 | self.memory_index = self.embedder.encode( 104 | self.long_memory, convert_to_tensor=True) 105 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pip install -U sentence-transformers 2 | pip install --upgrade openai 3 | 4 | 5 | pip install bitsandbytes==0.39.0 6 | pip install transformers@git+https://github.com/huggingface/transformers.git 7 | pip install peft@git+https://github.com/huggingface/peft.git 8 | pip install accelerate@git+https://github.com/huggingface/accelerate.git 9 | 10 | 11 | llama-cpp-python@git+https://github.com/abetlen/llama-cpp-python.git 12 | 13 | 14 | pip install -U flagai 15 | pip install bminf 16 | 17 | 18 | pip install auto-gptq 19 | pip install einops -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | import re 6 | from global_config import lang_opt, llm_model_opt 7 | 8 | if "openai" == llm_model_opt: 9 | from utils.openai_util import get_api_response 10 | elif "vicuna" == llm_model_opt: 11 | from utils.vicuna_util import get_api_response 12 | elif "chatglm" == llm_model_opt: 13 | from utils.chatglm_util import get_api_response 14 | elif "baichuan" == llm_model_opt: 15 | from utils.baichuan_util import get_api_response 16 | elif "aquila" == llm_model_opt: 17 | from utils.aquila_util import get_api_response 18 | elif "falcon" == llm_model_opt: 19 | from utils.falcon_util import get_api_response 20 | else: 21 | raise Exception("not supported llm model name: {}".format(llm_model_opt)) 22 | 23 | 24 | def get_content_between_a_b(a, b, text): 25 | if "en" == lang_opt: 26 | if "vicuna" == llm_model_opt: 27 | return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip() 28 | elif "openai" == llm_model_opt: 29 | return re.search(f"{a}(.*?)\n{b}", text, re.DOTALL).group(1).strip() 30 | elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]: 31 | return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip() 32 | else: 33 | raise Exception( 34 | "not supported llm model name: {}".format(llm_model_opt)) 35 | 36 | elif lang_opt in ["zh1", "zh2"]: 37 | if "vicuna" == llm_model_opt: 38 | match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) 39 | elif "openai" == llm_model_opt: 40 | match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL) 41 | elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]: 42 | match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) 43 | else: 44 | raise Exception( 45 | "not supported llm model name: {}".format(llm_model_opt)) 46 | 47 | if match: 48 | return match.group(1).strip() 49 | else: 50 | if "1" in a or "2" in a or "3" in a: 51 | a = ''.join(a.split(" ")) 52 | if "1" in b or "2" in b or "3" in b: 53 | b = "".join(b.split(" ")) 54 | 55 | if "vicuna" == llm_model_opt: 56 | match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) 57 | elif "openai" == llm_model_opt: 58 | match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL) 59 | elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]: 60 | match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) 61 | else: 62 | raise Exception( 63 | "not supported llm model name: {}".format(llm_model_opt)) 64 | 65 | if match: 66 | return match.group(1).strip() 67 | else: 68 | # 处理找不到匹配内容的情况 69 | return "翻译时出现错误请重试" # 或者返回其他默认值或采取其他的处理方式 70 | else: 71 | raise Exception(f"not supported language: {lang_opt}") 72 | 73 | 74 | def get_init(init_text=None, text=None, response_file=None, model=None, tokenizer=None): 75 | """ 76 | init_text: if the title, outline, and the first 3 paragraphs are given in a .txt file, directly read 77 | text: if no .txt file is given, use init prompt to generate 78 | """ 79 | if not init_text: 80 | response = get_api_response(model, tokenizer, text) 81 | print("response: {}".format(response)) 82 | 83 | if response_file: 84 | with open(response_file, 'a', encoding='utf-8') as f: 85 | f.write(f"Init output here:\n{response}\n\n") 86 | else: 87 | with open(init_text, 'r', encoding='utf-8') as f: 88 | response = f.read() 89 | f.close() 90 | paragraphs = { 91 | "name": "", 92 | "Outline": "", 93 | "Paragraph 1": "", 94 | "Paragraph 2": "", 95 | "Paragraph 3": "", 96 | "Summary": "", 97 | "Instruction 1": "", 98 | "Instruction 2": "", 99 | "Instruction 3": "" 100 | } 101 | 102 | if "en" == lang_opt: 103 | paragraphs['name'] = get_content_between_a_b( 104 | 'Name:', 'Outline', response) 105 | 106 | paragraphs['Paragraph 1'] = get_content_between_a_b( 107 | 'Paragraph 1:', 'Paragraph 2:', response) 108 | paragraphs['Paragraph 2'] = get_content_between_a_b( 109 | 'Paragraph 2:', 'Paragraph 3:', response) 110 | paragraphs['Paragraph 3'] = get_content_between_a_b( 111 | 'Paragraph 3:', 'Summary', response) 112 | paragraphs['Summary'] = get_content_between_a_b( 113 | 'Summary:', 'Instruction 1', response) 114 | paragraphs['Instruction 1'] = get_content_between_a_b( 115 | 'Instruction 1:', 'Instruction 2', response) 116 | paragraphs['Instruction 2'] = get_content_between_a_b( 117 | 'Instruction 2:', 'Instruction 3', response) 118 | lines = response.splitlines() 119 | # content of Instruction 3 may be in the same line with I3 or in the next line 120 | if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'): 121 | paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):] 122 | elif lines[-1] != '\n': 123 | paragraphs['Instruction 3'] = lines[-1] 124 | # Sometimes it gives Chapter outline, sometimes it doesn't 125 | for line in lines: 126 | if line.startswith('Chapter'): 127 | paragraphs['Outline'] = get_content_between_a_b( 128 | 'Outline:', 'Chapter', response) 129 | break 130 | if paragraphs['Outline'] == '': 131 | paragraphs['Outline'] = get_content_between_a_b( 132 | 'Outline:', 'Paragraph', response) 133 | 134 | elif lang_opt in ["zh1", "zh2"]: 135 | paragraphs['name'] = get_content_between_a_b('名称:', '概述:', response) 136 | 137 | paragraphs['Paragraph 1'] = get_content_between_a_b( 138 | '段落 1:', '段落 2:', response) 139 | paragraphs['Paragraph 2'] = get_content_between_a_b( 140 | '段落 2:', '段落 3:', response) 141 | paragraphs['Paragraph 3'] = get_content_between_a_b( 142 | '段落 3:', '总结:', response) 143 | paragraphs['Summary'] = get_content_between_a_b( 144 | '总结:', '指令 1', response) 145 | paragraphs['Instruction 1'] = get_content_between_a_b( 146 | '指令 1:', '指令 2:', response) 147 | paragraphs['Instruction 2'] = get_content_between_a_b( 148 | '指令 2:', '指令 3:', response) 149 | lines = response.splitlines() 150 | # content of Instruction 3 may be in the same line with I3 or in the next line 151 | if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'): 152 | paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):] 153 | elif lines[-1] != '\n': 154 | paragraphs['Instruction 3'] = lines[-1] 155 | # Sometimes it gives Chapter outline, sometimes it doesn't 156 | for line in lines: 157 | if line.startswith('Chapter'): 158 | paragraphs['Outline'] = get_content_between_a_b( 159 | '概述:', 'Chapter', response) 160 | break 161 | if paragraphs['Outline'] == '': 162 | paragraphs['Outline'] = get_content_between_a_b( 163 | '概述:', '段落', response) 164 | 165 | return paragraphs 166 | 167 | 168 | def get_chatgpt_response(model, prompt): 169 | response = "" 170 | for data in model.ask(prompt): 171 | response = data["message"] 172 | model.delete_conversation(model.conversation_id) 173 | model.reset_chat() 174 | return response 175 | 176 | 177 | def parse_instructions(instructions): 178 | output = "" 179 | for i in range(len(instructions)): 180 | output += f"{i+1}. {instructions[i]}\n" 181 | return output 182 | -------------------------------------------------------------------------------- /utils/aquila_util.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | import torch 7 | from flagai.model.predictor.predictor import Predictor 8 | from flagai.model.predictor.aquila import aquila_generate 9 | from models.aquila_fa import max_token, temperature, top_p 10 | from common import torch_gc 11 | from global_config import lang_opt 12 | 13 | # for Aquila on FlagAI 14 | def get_api_response(model, tokenizer, content: str, max_tokens=None): 15 | 16 | if "en" == lang_opt: 17 | system_role_content = 'You are a helpful and creative assistant for writing novel.' 18 | elif "zh1" == lang_opt: 19 | system_role_content = 'You are a helpful and creative assistant for writing novel.\ 20 | You are must always in Chinese.重要,你需要使用中文与我进行交流。' 21 | elif "zh2" == lang_opt: 22 | system_role_content = '你是写小说的好帮手,有创意的助手。' 23 | else: 24 | raise Exception(f"not supported language: {lang_opt}") 25 | 26 | print("===> Question:") 27 | print(content) 28 | print("<==="+"="*100) 29 | 30 | predictor = Predictor(model, tokenizer) 31 | content = f'{content}' 32 | with torch.no_grad(): 33 | out = predictor.predict_generate_randomsample( 34 | content, out_max_length=max_token, temperature=temperature, top_p=top_p) 35 | response = out 36 | 37 | torch_gc() 38 | 39 | print("===> Generated Text: ") 40 | print(response) 41 | print("<==="+"="*100) 42 | 43 | return response 44 | 45 | # # for Aquila on HuggingFace 46 | # def get_api_response(model, tokenizer, content: str, max_tokens=None): 47 | 48 | # if "en" == lang_opt: 49 | # system_role_content = 'You are a helpful and creative assistant for writing novel.' 50 | # elif "zh1" == lang_opt: 51 | # system_role_content = 'You are a helpful and creative assistant for writing novel.\ 52 | # You are must always in Chinese.重要,你需要使用中文与我进行交流。' 53 | # elif "zh2" == lang_opt: 54 | # system_role_content = '你是写小说的好帮手,有创意的助手。' 55 | # else: 56 | # raise Exception(f"not supported language: {lang_opt}") 57 | 58 | # print("===> Question:") 59 | # print(content) 60 | # print("<==="+"="*100) 61 | 62 | # with torch.no_grad(): 63 | # ret = model.generate( 64 | # **tokenizer(content, return_tensors='pt').to('cuda'), 65 | # do_sample=False, 66 | # max_new_tokens=max_token, 67 | # temperature=temperature, 68 | # top_p=top_p, 69 | # use_cache=True 70 | # ) 71 | # output_ids = ret[0].detach().cpu().numpy().tolist() 72 | # if 100007 in output_ids: 73 | # output_ids = output_ids[:output_ids.index(100007)] 74 | # elif 0 in output_ids: 75 | # output_ids = output_ids[:output_ids.index(0)] 76 | # response = tokenizer.decode(output_ids) 77 | 78 | # torch_gc() 79 | 80 | # print("===> Generated Text: ") 81 | # print(response) 82 | # print("<==="+"="*100) 83 | 84 | # return response 85 | -------------------------------------------------------------------------------- /utils/baichuan_util.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | from transformers import TextStreamer 6 | 7 | from models.baichuan_hf import max_token, temperature, top_p 8 | from common import torch_gc 9 | from global_config import lang_opt 10 | 11 | 12 | def get_api_response(model, tokenizer, content: str, max_tokens=None): 13 | 14 | if "en" == lang_opt: 15 | system_role_content = 'You are a helpful and creative assistant for writing novel.' 16 | elif "zh1" == lang_opt: 17 | system_role_content = 'You are a helpful and creative assistant for writing novel.\ 18 | You are must always in Chinese.重要,你需要使用中文与我进行交流。' 19 | elif "zh2" == lang_opt: 20 | system_role_content = '你是写小说的好帮手,有创意的助手。' 21 | else: 22 | raise Exception(f"not supported language: {lang_opt}") 23 | 24 | print("===> Question:") 25 | print(content) 26 | print("<==="+"="*100) 27 | 28 | streamer = TextStreamer(tokenizer, 29 | skip_prompt=True, 30 | skip_special_tokens=True 31 | ) 32 | 33 | # inputs = tokenizer(content, return_tensors='pt') 34 | inputs = tokenizer(":{}\n:".format(content), return_tensors='pt') 35 | # inputs = inputs.to('cuda') # UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cuda, whereas the model is on cpu. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cpu') before running `.generate()`. 36 | inputs = inputs.to('cpu') 37 | generate_ids = model.generate(**inputs, 38 | max_new_tokens=max_token, 39 | top_p=top_p, 40 | temperature=temperature, 41 | repetition_penalty=1.1, 42 | streamer=streamer, 43 | ) 44 | response = tokenizer.decode( 45 | generate_ids.cpu()[0], skip_special_tokens=True) 46 | 47 | torch_gc() 48 | 49 | print("===> Generated Text: ") 50 | print(response) 51 | print("<==="+"="*100) 52 | 53 | return response 54 | -------------------------------------------------------------------------------- /utils/chatglm_util.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | from models.chatglm_hf import max_token, temperature, top_p 7 | from common import torch_gc 8 | from global_config import lang_opt 9 | 10 | 11 | def get_api_response(model, tokenizer, content: str, max_tokens=None): 12 | 13 | if "en" == lang_opt: 14 | system_role_content = 'You are a helpful and creative assistant for writing novel.' 15 | elif "zh1" == lang_opt: 16 | system_role_content = 'You are a helpful and creative assistant for writing novel.\ 17 | You are must always in Chinese.重要,你需要使用中文与我进行交流。' 18 | elif "zh2" == lang_opt: 19 | system_role_content = '你是写小说的好帮手,有创意的助手。' 20 | else: 21 | raise Exception(f"not supported language: {lang_opt}") 22 | 23 | print("===> Question:") 24 | print(content) 25 | print("<==="+"="*100) 26 | 27 | response, history = model.chat( 28 | tokenizer, 29 | content, 30 | history=[], 31 | max_length=max_token, 32 | temperature=temperature, 33 | top_p=top_p, 34 | ) 35 | 36 | torch_gc() 37 | 38 | print("===> Generated Text: ") 39 | print(response) 40 | print("<==="+"="*100) 41 | 42 | return response 43 | -------------------------------------------------------------------------------- /utils/falcon_util.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | from models.falcon_hf import max_token, temperature, top_p 6 | from common import torch_gc 7 | from global_config import lang_opt 8 | 9 | 10 | def get_api_response(model, tokenizer, content: str, max_tokens=None): 11 | 12 | if "en" == lang_opt: 13 | system_role_content = 'You are a helpful and creative assistant for writing novel.' 14 | elif "zh1" == lang_opt: 15 | system_role_content = 'You are a helpful and creative assistant for writing novel.\ 16 | You are must always in Chinese.重要,你需要使用中文与我进行交流。' 17 | elif "zh2" == lang_opt: 18 | system_role_content = '你是写小说的好帮手,有创意的助手。' 19 | else: 20 | raise Exception(f"not supported language: {lang_opt}") 21 | 22 | print("===> Question:") 23 | print(content) 24 | print("<==="+"="*100) 25 | 26 | inputs = tokenizer(content, 27 | return_tensors='pt', 28 | return_token_type_ids=False, # ValueError: The following model_kwargs are not used by the model: ['token_type_ids'] (note: typos in the generate arguments will also show up in this list) 29 | ) 30 | inputs = inputs.to('cuda:0') 31 | output = model.generate(**inputs, 32 | max_new_tokens=max_token, 33 | top_p=top_p, 34 | temperature=temperature, 35 | repetition_penalty=1.1, 36 | # eos_token_id=tokenizer.eos_token_id, 37 | ) 38 | response = tokenizer.decode(output.cpu()[0], skip_special_tokens=True) 39 | 40 | torch_gc() 41 | 42 | print("===> Generated Text: ") 43 | print(response) 44 | print("<==="+"="*100) 45 | 46 | return response 47 | -------------------------------------------------------------------------------- /utils/openai_util.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | import openai 7 | 8 | from global_config import lang_opt 9 | 10 | 11 | def get_api_response(model, tokenizer, content: str, max_tokens=None): 12 | 13 | if "en" == lang_opt: 14 | system_role_content = 'You are a helpful and creative assistant for writing novel.' 15 | elif "zh1" == lang_opt: 16 | system_role_content = 'You are a helpful and creative assistant for writing novel.\ 17 | You are must always in Chinese.重要,你需要使用中文与我进行交流。' 18 | elif "zh2" == lang_opt: 19 | system_role_content = '你是写小说的好帮手,有创意的助手。' 20 | else: 21 | raise Exception(f"not supported language: {lang_opt}") 22 | 23 | response = openai.ChatCompletion.create( 24 | model='gpt-3.5-turbo', 25 | messages=[{ 26 | 'role': 'system', 27 | 'content': system_role_content 28 | }, { 29 | 'role': 'user', 30 | 'content': content, 31 | }], 32 | temperature=0.5, 33 | max_tokens=max_tokens 34 | ) 35 | 36 | return response['choices'][0]['message']['content'] 37 | 38 | 39 | -------------------------------------------------------------------------------- /utils/vicuna_util.py: -------------------------------------------------------------------------------- 1 | #!python 2 | # -*- coding: utf-8 -*- 3 | # @author: Kun 4 | 5 | 6 | from models.vicuna_bin import max_token, temperature, top_p 7 | from common import torch_gc 8 | from global_config import lang_opt 9 | 10 | 11 | def get_api_response(model, tokenizer, content: str, max_tokens=None): 12 | 13 | if "en" == lang_opt: 14 | system_role_content = 'You are a helpful and creative assistant for writing novel.' 15 | elif "zh1" == lang_opt: 16 | system_role_content = 'You are a helpful and creative assistant for writing novel.\ 17 | You are must always in Chinese.重要,你需要使用中文与我进行交流。' 18 | elif "zh2" == lang_opt: 19 | system_role_content = '你是写小说的好帮手,有创意的助手。' 20 | else: 21 | raise Exception(f"not supported language: {lang_opt}") 22 | 23 | print("===> Question:") 24 | print(content) 25 | print("<==="+"="*100) 26 | 27 | content = content.encode() 28 | tokens = model.tokenize(content) 29 | 30 | output = b"" 31 | count = 0 32 | token_count = 10000 33 | top_k = 40 34 | repetition_penalty = 1.1 35 | for token in model.generate(tokens, 36 | top_k=top_k, 37 | top_p=top_p, 38 | temp=temperature, 39 | repeat_penalty=repetition_penalty): 40 | text = model.detokenize([token]) 41 | # print(text) 42 | output += text 43 | 44 | count += 1 45 | if count >= token_count or (token == model.token_eos()): 46 | break 47 | 48 | response = output.decode() 49 | # print("===> [vicuna][generate] response: {}".format(response)) 50 | 51 | torch_gc() 52 | 53 | print("===> Generated Text: ") 54 | print(response) 55 | print("<==="+"="*100) 56 | 57 | return response 58 | 59 | 60 | --------------------------------------------------------------------------------